diff --git a/.gitignore b/.gitignore index 194270e2..12e7687d 100755 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,4 @@ isaacgym/ # dev ref/ +*.csv diff --git a/.gitmodules b/.gitmodules index 4e07ec9f..ecf9a2e5 100755 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "third_party/isaacgym"] path = third_party/isaacgym url = ../../kscalelabs/isaacgym.git -[submodule "third_party/kinfer"] - path = third_party/kinfer - url = https://github.com/kscalelabs/kinfer.git diff --git a/examples/new.kinfer b/examples/new.kinfer new file mode 100644 index 00000000..6ca2f58b Binary files /dev/null and b/examples/new.kinfer differ diff --git a/examples/randomization.kinfer b/examples/randomization.kinfer new file mode 100644 index 00000000..bdb35b1e Binary files /dev/null and b/examples/randomization.kinfer differ diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py index 016df468..d4bebfc8 100755 --- a/sim/envs/__init__.py +++ b/sim/envs/__init__.py @@ -14,14 +14,33 @@ from sim.envs.humanoids.g1_env import G1FreeEnv from sim.envs.humanoids.gpr_config import GprCfg, GprCfgPPO, GprStandingCfg from sim.envs.humanoids.gpr_env import GprFreeEnv +from sim.envs.humanoids.gpr_headless_config import GprHeadlessCfg, GprHeadlessCfgPPO +from sim.envs.humanoids.gpr_headless_env import GprHeadlessEnv +from sim.envs.humanoids.gpr_headless_latency_config import ( + GprHeadlessLatencyCfg, + GprHeadlessLatencyCfgPPO, +) +from sim.envs.humanoids.gpr_headless_latency_env import GprHeadlessLatencyEnv +from sim.envs.humanoids.gpr_headless_pos_config import ( + GprHeadlessPosCfg, + GprHeadlessPosCfgPPO, + GprHeadlessPosStandingCfg, +) +from sim.envs.humanoids.gpr_headless_pos_env import GprHeadlessPosEnv +from sim.envs.humanoids.gpr_latency_config import ( + GprLatencyCfg, + GprLatencyCfgPPO, + GprLatencyStandingCfg, +) +from sim.envs.humanoids.gpr_latency_env import GprLatencyEnv +from sim.envs.humanoids.gpr_vel_config import GprVelCfg, GprVelCfgPPO +from sim.envs.humanoids.gpr_vel_env import GprVelEnv from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO from sim.envs.humanoids.h1_env import H1FreeEnv from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO from sim.envs.humanoids.xbot_env import XBotLFreeEnv from sim.envs.humanoids.zbot2_config import ZBot2Cfg, ZBot2CfgPPO, ZBot2StandingCfg from sim.envs.humanoids.zbot2_env import ZBot2Env -from sim.envs.humanoids.zeroth_config import ZerothCfg, ZerothCfgPPO -from sim.envs.humanoids.zeroth_env import ZerothEnv from sim.utils.task_registry import TaskRegistry # noqa: E402 task_registry = TaskRegistry() @@ -33,4 +52,14 @@ task_registry.register("XBotL_free", XBotLFreeEnv, XBotCfg(), XBotCfgPPO()) task_registry.register("zbot2", ZBot2Env, ZBot2Cfg(), ZBot2CfgPPO()) task_registry.register("zbot2_standing", ZBot2Env, ZBot2StandingCfg(), ZBot2CfgPPO()) -task_registry.register("zeroth", ZerothEnv, ZerothCfg(), ZerothCfgPPO()) +task_registry.register("gpr_headless", GprHeadlessEnv, GprHeadlessCfg(), GprHeadlessCfgPPO()) +task_registry.register("gpr_latency", GprLatencyEnv, GprLatencyCfg(), GprLatencyCfgPPO()) +task_registry.register("gpr_latency_standing", GprLatencyEnv, GprLatencyStandingCfg(), GprLatencyCfgPPO()) +task_registry.register( + "gpr_headless_latency", GprHeadlessLatencyEnv, GprHeadlessLatencyCfg(), GprHeadlessLatencyCfgPPO() +) +task_registry.register("gpr_vel", GprVelEnv, GprVelCfg(), GprVelCfgPPO()) +task_registry.register("gpr_headless_pos", GprHeadlessPosEnv, GprHeadlessPosCfg(), GprHeadlessPosCfgPPO()) +task_registry.register( + "gpr_headless_pos_standing", GprHeadlessPosEnv, GprHeadlessPosStandingCfg(), GprHeadlessPosCfgPPO() +) diff --git a/sim/envs/base/base_task.py b/sim/envs/base/base_task.py index fe9f4874..a8356aa0 100644 --- a/sim/envs/base/base_task.py +++ b/sim/envs/base/base_task.py @@ -64,6 +64,7 @@ def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): self.num_obs = cfg.env.num_observations self.num_privileged_obs = cfg.env.num_privileged_obs self.num_actions = cfg.env.num_actions + self.num_joints = cfg.env.num_joints # optimization flags for pytorch JIT torch._C._jit_set_profiling_mode(False) diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index 4d121ebf..072ce44a 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -82,7 +82,7 @@ def reset(self): """Reset all robots""" self.reset_idx(torch.arange(self.num_envs, device=self.device)) - + obs, privileged_obs, _, _, _ = self.step( torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False) ) @@ -412,7 +412,9 @@ def _reset_root_states(self, env_ids): self.root_states[env_ids] = self.base_init_state self.root_states[env_ids, :3] += self.env_origins[env_ids] # base velocities - # self.root_states[env_ids, 7:13] = torch_rand_float(-0.05, 0.05, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel + self.root_states[env_ids, 7:13] = torch_rand_float( + -0.05, 0.05, (len(env_ids), 6), device=self.device + ) # [7:10]: lin vel, [10:13]: ang vel if self.cfg.asset.fix_base_link: self.root_states[env_ids, 7:13] = 0 self.root_states[env_ids, 2] += 1.8 @@ -521,13 +523,13 @@ def _init_buffers(self): ) self.forward_vec = to_torch([1.0, 0.0, 0.0], device=self.device).repeat((self.num_envs, 1)) self.torques = torch.zeros( - self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False ) self.p_gains = torch.zeros( - self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False ) self.d_gains = torch.zeros( - self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False ) self.actions = torch.zeros( self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False @@ -612,11 +614,13 @@ def _prepare_reward_function(self): # remove zero scales + multiply non-zero ones by dt for key in list(self.reward_scales.keys()): - scale = self.reward_scales[key] - if scale == 0: - self.reward_scales.pop(key) - else: - self.reward_scales[key] *= self.dt + if key != "termination": + scale = self.reward_scales[key] + if scale == 0: + self.reward_scales.pop(key) + else: + self.reward_scales[key] *= self.dt + # prepare list of functions self.reward_functions = [] self.reward_names = [] diff --git a/sim/envs/base/legged_robot_latency.py b/sim/envs/base/legged_robot_latency.py new file mode 100644 index 00000000..c4069d65 --- /dev/null +++ b/sim/envs/base/legged_robot_latency.py @@ -0,0 +1,1172 @@ +import os +from collections import deque + +import numpy as np + +from sim import ROOT_DIR +from sim.envs.base.base_task import BaseTask +from sim.utils.helpers import class_to_dict +from sim.utils.math import get_euler_xyz_tensor, quat_apply_yaw, wrap_to_pi + +# fmt: off +from isaacgym import gymapi, gymtorch, gymutil # isort: skip +from isaacgym.torch_utils import * # isort: skip +import torch # isort: skip +# fmt: on + + +class LeggedRobotLatency(BaseTask): + def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): + """Parses the provided config file, + initilizes pytorch buffers used during training + + Args: + cfg (Dict): Environment config file + sim_params (gymapi.SimParams): simulation parameters + physics_engine (gymapi.SimType): gymapi.SIM_PHYSX (must be PhysX) + device_type (string): 'cuda' or 'cpu' + device_id (int): 0, 1, ... + headless (bool): Run without rendering if True + """ + self.cfg = cfg + self.sim_params = sim_params + self.height_samples = None + self.debug_viz = False + self.init_done = False + self._parse_cfg(self.cfg) + super().__init__(self.cfg, sim_params, physics_engine, sim_device, headless) + if not self.headless: + self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat) + self._init_buffers() + self._prepare_reward_function() + self.init_done = True + + def step(self, actions): + """Apply actions, simulate, call self.post_physics_step() + + Args: + actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env) + """ + if self.cfg.env.use_ref_actions: + actions += self.ref_action + actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) + + # dynamic randomization + delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay + actions = (1 - delay) * actions + delay * self.actions + actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions + + clip_actions = self.cfg.normalization.clip_actions + self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device) + + # step physics and render each frame + self.render() + + # Number of PD cycles within one policy step + num_pd_cycles = self.cfg.control.decimation // self.cfg.control.pd_decimation + + # For each PD cycle + for _ in range(num_pd_cycles): + # Compute torques at start of PD cycle + action_delayed = self.update_cmd_action_latency_buffer() + self.torques = self._compute_torques(action_delayed).view(self.torques.shape) + + # Run simulation steps until next PD update + for _ in range(self.cfg.control.pd_decimation): + self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques)) + self.gym.simulate(self.sim) + if self.device == "cpu": + self.gym.fetch_results(self.sim, True) + self.gym.refresh_dof_state_tensor(self.sim) + self.update_obs_latency_buffer() + + self.post_physics_step() + + # return clipped obs, clipped states (None), rewards, dones and infos + clip_obs = self.cfg.normalization.clip_observations + self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs) + if self.privileged_obs_buf is not None: + self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs) + return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras + + def reset(self): + """Reset all robots""" + + self.reset_idx(torch.arange(self.num_envs, device=self.device)) + + obs, privileged_obs, _, _, _ = self.step( + torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False) + ) + return obs, privileged_obs + + def post_physics_step(self): + """Check terminations, compute observations and rewards + calls self._post_physics_step_callback() for common computations + calls self._draw_debug_vis() if needed + """ + self.gym.refresh_actor_root_state_tensor(self.sim) + self.gym.refresh_net_contact_force_tensor(self.sim) + self.gym.refresh_rigid_body_state_tensor(self.sim) + + self.episode_length_buf += 1 + self.common_step_counter += 1 + + if self.imu_indices: + self.base_quat = self.rigid_state[:, self.imu_indices, 3:7] + self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 7:10]) + self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13]) + else: + self.base_quat = self.root_states[:, 3:7] + self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) + self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) + + self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) + self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec) + + self._post_physics_step_callback() + + # compute observations, rewards, resets, ... + self.check_termination() + self.compute_reward() + env_ids = self.reset_buf.nonzero(as_tuple=False).flatten() + self.reset_idx(env_ids) + self.compute_observations() # in some cases a simulation step might be required to refresh some obs (for example body positions) + + self.last_last_actions[:] = torch.clone(self.last_actions[:]) + self.last_actions[:] = self.actions[:] + self.last_dof_vel[:] = self.dof_vel[:] + self.last_root_vel[:] = self.root_states[:, 7:13] + self.last_rigid_state[:] = self.rigid_state[:] + + if self.viewer and self.enable_viewer_sync and self.debug_viz: + self._draw_debug_vis() + + def check_termination(self): + """Check if environments need to be reset""" + self.reset_buf = torch.any( + torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0, dim=1 + ) + self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs + self.reset_buf |= self.time_out_buf + + def reset_idx(self, env_ids): + """Reset some environments. + Calls self._reset_dofs(env_ids), self._reset_root_states(env_ids), and self._resample_commands(env_ids) + [Optional] calls self._update_terrain_curriculum(env_ids), self.update_command_curriculum(env_ids) and + Logs episode info + Resets some buffers + + Args: + env_ids (list[int]): List of environment ids which must be reset + """ + if len(env_ids) == 0: + return + # update curriculum + if self.cfg.terrain.curriculum: + self._update_terrain_curriculum(env_ids) + # avoid updating command curriculum at each step since the maximum command is common to all envs + if self.cfg.commands.curriculum and (self.common_step_counter % self.max_episode_length == 0): + self.update_command_curriculum(env_ids) + + # # Add noise to the PD gains + # if self.cfg.domain_rand.randomize_pd_gains: + # self.p_gains[env_ids] = self.original_p_gains[env_ids] + torch.randn_like(self.p_gains[env_ids]) * self.cfg.domain_rand.stiffness_multiplier_range[1] + # self.d_gains[env_ids] = self.original_d_gains[env_ids] + torch.randn_like(self.d_gains[env_ids]) * self.cfg.domain_rand.damping_multiplier_range[1] + + # reset robot states + self._reset_dofs(env_ids) + + self._reset_root_states(env_ids) + + self._resample_commands(env_ids) + + # reset buffers + self.last_last_actions[env_ids] = 0.0 + self.actions[env_ids] = 0.0 + self.last_actions[env_ids] = 0.0 + self.last_rigid_state[env_ids] = 0.0 + self.last_dof_vel[env_ids] = 0.0 + self.feet_air_time[env_ids] = 0.0 + self.episode_length_buf[env_ids] = 0 + self.reset_buf[env_ids] = 1 + # fill extras + self.extras["episode"] = {} + for key in self.episode_sums.keys(): + self.extras["episode"]["rew_" + key] = ( + torch.mean(self.episode_sums[key][env_ids]) / self.max_episode_length_s + ) + self.episode_sums[key][env_ids] = 0.0 + # log additional curriculum info + if self.cfg.terrain.mesh_type == "trimesh": + self.extras["episode"]["terrain_level"] = torch.mean(self.terrain_levels.float()) + if self.cfg.commands.curriculum: + self.extras["episode"]["max_command_x"] = self.command_ranges["lin_vel_x"][1] + # send timeout info to the algorithm + if self.cfg.env.send_timeouts: + self.extras["time_outs"] = self.time_out_buf + + if self.imu_indices: + self.base_quat[env_ids] = self.rigid_state[env_ids, self.imu_indices, 3:7] + else: + self.base_quat[env_ids] = self.root_states[env_ids, 3:7] + + self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) + self.projected_gravity[env_ids] = quat_rotate_inverse(self.base_quat[env_ids], self.gravity_vec[env_ids]) + + self._reset_latency_buffer(env_ids) + + def compute_reward(self): + """Compute rewards + Calls each reward function which had a non-zero scale (processed in self._prepare_reward_function()) + adds each terms to the episode sums and to the total reward + """ + self.rew_buf[:] = 0.0 + + for i in range(len(self.reward_functions)): + name = self.reward_names[i] + rew = self.reward_functions[i]() * self.reward_scales[name] + self.rew_buf += rew + self.episode_sums[name] += rew + if self.cfg.rewards.only_positive_rewards: + self.rew_buf[:] = torch.clip(self.rew_buf[:], min=0.0) + # add termination reward after clipping + if "termination" in self.reward_scales: + rew = self._reward_termination() * self.reward_scales["termination"] + self.rew_buf += rew + self.episode_sums["termination"] += rew + + def set_camera(self, position, lookat): + """Set camera position and direction""" + cam_pos = gymapi.Vec3(position[0], position[1], position[2]) + cam_target = gymapi.Vec3(lookat[0], lookat[1], lookat[2]) + self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) + + # ------------- Callbacks -------------- + def _process_rigid_shape_props(self, props, env_id): + """Callback allowing to store/change/randomize the rigid shape properties of each environment. + Called During environment creation. + Base behavior: randomizes the friction of each environment + + Args: + props (List[gymapi.RigidShapeProperties]): Properties of each shape of the asset + env_id (int): Environment id + + Returns: + [List[gymapi.RigidShapeProperties]]: Modified rigid shape properties + """ + if self.cfg.domain_rand.randomize_friction: + if env_id == 0: + # prepare friction randomization + friction_range = self.cfg.domain_rand.friction_range + num_buckets = 256 + bucket_ids = torch.randint(0, num_buckets, (self.num_envs, 1)) + friction_buckets = torch_rand_float( + friction_range[0], friction_range[1], (num_buckets, 1), device="cpu" + ) + self.friction_coeffs = friction_buckets[bucket_ids] + + for s in range(len(props)): + props[s].friction = self.friction_coeffs[env_id] + + self.env_frictions[env_id] = self.friction_coeffs[env_id] + + return props + + def _process_dof_props(self, props, env_id): + """Callback allowing to store/change/randomize the DOF properties of each environment. + Called During environment creation. + Base behavior: stores position, velocity and torques limits defined in the URDF + + Args: + props (numpy.array): Properties of each DOF of the asset + env_id (int): Environment id + + Returns: + [numpy.array]: Modified DOF properties + """ + if env_id == 0: + self.dof_pos_limits = torch.zeros( + self.num_dof, 2, dtype=torch.float, device=self.device, requires_grad=False + ) + self.dof_vel_limits = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) + self.torque_limits = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) + for i in range(len(props)): + self.dof_pos_limits[i, 0] = props["lower"][i].item() * self.cfg.safety.pos_limit + self.dof_pos_limits[i, 1] = props["upper"][i].item() * self.cfg.safety.pos_limit + self.dof_vel_limits[i] = props["velocity"][i].item() * self.cfg.safety.vel_limit + self.torque_limits[i] = props["effort"][i].item() * self.cfg.safety.torque_limit + + # torque randomization + if self.cfg.domain_rand.randomize_calculated_torque: + self.torque_multiplier[env_id, :] = torch_rand_float( + self.cfg.domain_rand.torque_multiplier_range[0], + self.cfg.domain_rand.torque_multiplier_range[1], + (1, self.num_joints), + device=self.device, + ) + + # motor zero calibration randomization + if self.cfg.domain_rand.randomize_motor_zero_offset: + self.motor_zero_offsets[env_id, :] = torch_rand_float( + self.cfg.domain_rand.motor_zero_offset_range[0], + self.cfg.domain_rand.motor_zero_offset_range[1], + (1, self.num_joints), + device=self.device, + ) + + # motor pd gains randomization + if self.cfg.domain_rand.randomize_pd_gains: + self.p_gains_multiplier[env_id, :] = torch_rand_float( + self.cfg.domain_rand.stiffness_multiplier_range[0], + self.cfg.domain_rand.stiffness_multiplier_range[1], + (1, self.num_joints), + device=self.device, + ) + self.d_gains_multiplier[env_id, :] = torch_rand_float( + self.cfg.domain_rand.damping_multiplier_range[0], + self.cfg.domain_rand.damping_multiplier_range[1], + (1, self.num_joints), + device=self.device, + ) + + # motor frictions randomization + if self.cfg.domain_rand.randomize_joint_friction: + self.joint_friction_coeffs[env_id, 0] = torch_rand_float( + self.cfg.domain_rand.joint_friction_range[0], + self.cfg.domain_rand.joint_friction_range[1], + (1, 1), + device=self.device, + ) + + # motor dampings randomization + if self.cfg.domain_rand.randomize_joint_damping: + self.joint_damping_coeffs[env_id, 0] = torch_rand_float( + self.cfg.domain_rand.joint_damping_range[0], + self.cfg.domain_rand.joint_damping_range[1], + (1, 1), + device=self.device, + ) + + # motor armature randomization + if self.cfg.domain_rand.randomize_joint_armature: + self.joint_armatures[env_id, 0] = torch_rand_float( + self.cfg.domain_rand.joint_armature_range[0], + self.cfg.domain_rand.joint_armature_range[1], + (1, 1), + device=self.device, + ) + + for i in range(len(props)): + props["friction"][i] *= self.joint_friction_coeffs[env_id, 0] + props["damping"][i] *= self.joint_damping_coeffs[env_id, 0] + props["armature"][i] = self.joint_armatures[env_id, 0] + + return props + + def _process_rigid_body_props(self, props, env_id): + # randomize base mass + if self.cfg.domain_rand.randomize_base_mass: + rng = self.cfg.domain_rand.added_mass_range + props[0].mass += np.random.uniform(rng[0], rng[1]) + + # randomize link masses + if self.cfg.domain_rand.randomize_link_mass: + self.link_mass_multiplier = torch_rand_float( + self.cfg.domain_rand.link_mass_multiplier_range[0], + self.cfg.domain_rand.link_mass_multiplier_range[1], + (1, self.num_bodies - 1), + device=self.device, + ) + + for i in range(1, len(props)): + props[i].mass *= self.link_mass_multiplier[0, i - 1] + + for prop in props: + self.body_mass[env_id] += prop.mass + + return props + + def _post_physics_step_callback(self): + """Callback called before computing terminations, rewards, and observations + Default behaviour: Compute ang vel command based on target and heading, compute measured terrain heights and randomly push robots + """ + env_ids = ( + (self.episode_length_buf % int(self.cfg.commands.resampling_time / self.dt) == 0) + .nonzero(as_tuple=False) + .flatten() + ) + self._resample_commands(env_ids) + if self.cfg.commands.heading_command: + forward = quat_apply(self.base_quat, self.forward_vec) + heading = torch.atan2(forward[:, 1], forward[:, 0]) + self.commands[:, 2] = torch.clip(0.5 * wrap_to_pi(self.commands[:, 3] - heading), -1.0, 1.0) + + if self.cfg.terrain.measure_heights: + self.measured_heights = self._get_heights() + + if self.cfg.domain_rand.push_robots and (self.common_step_counter % self.cfg.domain_rand.push_interval == 0): + self._push_robots() + + def _resample_commands(self, env_ids): + """Randommly select commands of some environments + + Args: + env_ids (List[int]): Environments ids for which new commands are needed + """ + self.commands[env_ids, 0] = torch_rand_float( + self.command_ranges["lin_vel_x"][0], + self.command_ranges["lin_vel_x"][1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.commands[env_ids, 1] = torch_rand_float( + self.command_ranges["lin_vel_y"][0], + self.command_ranges["lin_vel_y"][1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + if self.cfg.commands.heading_command: + self.commands[env_ids, 3] = torch_rand_float( + self.command_ranges["heading"][0], + self.command_ranges["heading"][1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + else: + self.commands[env_ids, 2] = torch_rand_float( + self.command_ranges["ang_vel_yaw"][0], + self.command_ranges["ang_vel_yaw"][1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + + # set small commands to zero + self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1) + + def _compute_torques(self, actions): + """Compute torques from actions. + Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques. + [NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated. + + Args: + actions (torch.Tensor): Actions + + Returns: + [torch.Tensor]: Torques sent to the simulation + """ + actions_scaled = actions * self.cfg.control.action_scale + p_gains = self.p_gains * self.p_gains_multiplier + d_gains = self.d_gains * self.d_gains_multiplier + torques = ( + p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos + self.motor_zero_offsets) + - d_gains * self.dof_vel + ) + torques = torques * self.torque_multiplier + res = torch.clip(torques, -self.torque_limits, self.torque_limits) + return res + + def _reset_dofs(self, env_ids): + """Resets DOF position and velocities of selected environmments + Positions are randomly selected within 0.5:1.5 x default positions. + Velocities are set to zero. + + Args: + env_ids (List[int]): Environemnt ids + """ + self.dof_pos[env_ids] = self.default_dof_pos + torch_rand_float( + -self.cfg.domain_rand.start_pos_noise, + self.cfg.domain_rand.start_pos_noise, + (len(env_ids), self.num_dof), + device=self.device, + ) + self.dof_vel[env_ids] = 0.0 + + env_ids_int32 = env_ids.to(dtype=torch.int32) + self.gym.set_dof_state_tensor_indexed( + self.sim, gymtorch.unwrap_tensor(self.dof_state), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32) + ) + + def _reset_root_states(self, env_ids): + """Resets ROOT states position and velocities of selected environmments + Sets base position based on the curriculum + Selects randomized base velocities within -0.5:0.5 [m/s, rad/s] + + Args: + env_ids (List[int]): Environemnt ids + """ + # base position + if self.custom_origins: + self.root_states[env_ids] = self.base_init_state + self.root_states[env_ids, :3] += self.env_origins[env_ids] + self.root_states[env_ids, :2] += torch_rand_float( + -1.0, 1.0, (len(env_ids), 2), device=self.device + ) # xy position within 1m of the center + else: + self.root_states[env_ids] = self.base_init_state + self.root_states[env_ids, :3] += self.env_origins[env_ids] + # base velocities + # self.root_states[env_ids, 7:13] = torch_rand_float(-0.05, 0.05, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel + if self.cfg.asset.fix_base_link: + self.root_states[env_ids, 7:13] = 0 + self.root_states[env_ids, 2] += 1.8 + env_ids_int32 = env_ids.to(dtype=torch.int32) + self.gym.set_actor_root_state_tensor_indexed( + self.sim, + gymtorch.unwrap_tensor(self.root_states), + gymtorch.unwrap_tensor(env_ids_int32), + len(env_ids_int32), + ) + + def _update_terrain_curriculum(self, env_ids): + """Implements the game-inspired curriculum. + + Args: + env_ids (List[int]): ids of environments being reset + """ + # Implement Terrain curriculum + if not self.init_done: + # don't change on initial reset + return + distance = torch.norm(self.root_states[env_ids, :2] - self.env_origins[env_ids, :2], dim=1) + # robots that walked far enough progress to harder terains + move_up = distance > self.terrain.env_length / 2 + # robots that walked less than half of their required distance go to simpler terrains + move_down = ( + distance < torch.norm(self.commands[env_ids, :2], dim=1) * self.max_episode_length_s * 0.5 + ) * ~move_up + self.terrain_levels[env_ids] += 1 * move_up - 1 * move_down + # Robots that solve the last level are sent to a random one + self.terrain_levels[env_ids] = torch.where( + self.terrain_levels[env_ids] >= self.max_terrain_level, + torch.randint_like(self.terrain_levels[env_ids], self.max_terrain_level), + torch.clip(self.terrain_levels[env_ids], 0), + ) # (the minumum level is zero) + self.env_origins[env_ids] = self.terrain_origins[self.terrain_levels[env_ids], self.terrain_types[env_ids]] + + def update_command_curriculum(self, env_ids): + """Implements a curriculum of increasing commands + + Args: + env_ids (List[int]): ids of environments being reset + """ + # If the tracking reward is above 80% of the maximum, increase the range of commands + if ( + torch.mean(self.episode_sums["tracking_lin_vel"][env_ids]) / self.max_episode_length + > 0.8 * self.reward_scales["tracking_lin_vel"] + ): + self.command_ranges["lin_vel_x"][0] = np.clip( + self.command_ranges["lin_vel_x"][0] - 0.5, -self.cfg.commands.max_curriculum, 0.0 + ) + self.command_ranges["lin_vel_x"][1] = np.clip( + self.command_ranges["lin_vel_x"][1] + 0.5, 0.0, self.cfg.commands.max_curriculum + ) + + def _resample_default_positions(self): + self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) + for i in range(self.num_dofs): + name = self.dof_names[i] + self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name] + + if self.add_noise: + self.default_dof_pos[i] += torch.randn(1).item() * self.cfg.noise.noise_ranges.default_pos + + # ---------------------------------------- + def _init_buffers(self): + """Initialize torch tensors which will contain simulation states and processed quantities""" + # get gym GPU state tensors + actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim) + dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) + net_contact_forces = self.gym.acquire_net_contact_force_tensor(self.sim) + rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim) + + self.gym.refresh_dof_state_tensor(self.sim) + self.gym.refresh_actor_root_state_tensor(self.sim) + self.gym.refresh_net_contact_force_tensor(self.sim) + self.gym.refresh_rigid_body_state_tensor(self.sim) + + # create some wrapper tensors for different slices + self.root_states = gymtorch.wrap_tensor(actor_root_state) + self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) + self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0] + self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1] + + self.rigid_state = gymtorch.wrap_tensor(rigid_body_state) # .view(self.num_envs, -1, 13) + self.rigid_state = self.rigid_state.view(self.num_envs, -1, 13) + + if self.imu_indices: + self.base_quat = self.rigid_state[:, self.imu_indices, 3:7] + else: + self.base_quat = self.root_states[:, 3:7] + + self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) + + self.contact_forces = gymtorch.wrap_tensor(net_contact_forces).view( + self.num_envs, -1, 3 + ) # shape: num_envs, num_bodies, xyz axis + + # initialize some data used later on + self.common_step_counter = 0 + self.extras = {} + + self.noise_scale_vec = self._get_noise_scale_vec(self.cfg) + self.gravity_vec = to_torch(get_axis_params(-1.0, self.up_axis_idx), device=self.device).repeat( + (self.num_envs, 1) + ) + self.forward_vec = to_torch([1.0, 0.0, 0.0], device=self.device).repeat((self.num_envs, 1)) + self.torques = torch.zeros( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + self.p_gains = torch.zeros( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + self.d_gains = torch.zeros( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + + self.torque_multiplier = torch.ones( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + self.motor_zero_offsets = torch.zeros( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + self.p_gains_multiplier = torch.ones( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + self.d_gains_multiplier = torch.ones( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + + self.actions = torch.zeros( + self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False + ) + self.last_actions = torch.zeros( + self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False + ) + self.last_last_actions = torch.zeros( + self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False + ) + self.last_rigid_state = torch.zeros_like(self.rigid_state) + self.last_dof_vel = torch.zeros_like(self.dof_vel) + self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13]) + self.commands = torch.zeros( + self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False + ) # x vel, y vel, yaw vel, heading + self.commands_scale = torch.tensor( + [self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], + device=self.device, + requires_grad=False, + ) # TODO change this + self.feet_air_time = torch.zeros( + self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False + ) + self.last_contacts = torch.zeros( + self.num_envs, len(self.feet_indices), dtype=torch.bool, device=self.device, requires_grad=False + ) + if self.imu_indices: + self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 7:10]) + self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13]) + else: + self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) + self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) + + self.projected_gravity = quat_rotate_inverse(self.base_quat, self.gravity_vec) + if self.cfg.terrain.measure_heights: + self.height_points = self._init_height_points() + self.measured_heights = 0 + # joint positions offsets and PD gains + self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) + for i in range(self.num_dofs): + name = self.dof_names[i] + print(i, name) + self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name] + found = False + + for dof_name in self.cfg.control.stiffness.keys(): + if dof_name in name: + self.p_gains[:, i] = self.cfg.control.stiffness[dof_name] + self.d_gains[:, i] = self.cfg.control.damping[dof_name] + found = True + if not found: + self.p_gains[:, i] = 0.0 + self.d_gains[:, i] = 0.0 + raise ValueError(f"PD gain of joint {name} were not defined, setting them to zero") + + self.original_p_gains = self.p_gains.clone() + self.original_d_gains = self.d_gains.clone() + + self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device) + self.rand_push_torque = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device) + self.default_dof_pos = self.default_dof_pos.unsqueeze(0) + + self.default_joint_pd_target = self.default_dof_pos.clone() + self.obs_history = deque(maxlen=self.cfg.env.frame_stack) + self.critic_history = deque(maxlen=self.cfg.env.c_frame_stack) + for _ in range(self.cfg.env.frame_stack): + self.obs_history.append( + torch.zeros(self.num_envs, self.cfg.env.num_single_obs, dtype=torch.float, device=self.device) + ) + for _ in range(self.cfg.env.c_frame_stack): + self.critic_history.append( + torch.zeros( + self.num_envs, self.cfg.env.single_num_privileged_obs, dtype=torch.float, device=self.device + ) + ) + + # Latency buffers + self.cmd_action_latency_buffer = torch.zeros( + self.num_envs, self.num_actions, self.cfg.domain_rand.range_cmd_action_latency[1] + 1, device=self.device + ) + self.obs_motor_latency_buffer = torch.zeros( + self.num_envs, self.num_joints * 2, self.cfg.domain_rand.range_obs_motor_latency[1] + 1, device=self.device + ) + self.obs_imu_latency_buffer = torch.zeros( + self.num_envs, 6, self.cfg.domain_rand.range_obs_imu_latency[1] + 1, device=self.device + ) + self.cmd_action_latency_simstep = torch.zeros(self.num_envs, dtype=torch.long, device=self.device) + self.obs_motor_latency_simstep = torch.zeros(self.num_envs, dtype=torch.long, device=self.device) + self.obs_imu_latency_simstep = torch.zeros(self.num_envs, dtype=torch.long, device=self.device) + + self.p_gains_multiplier = torch.ones((self.num_envs, self.num_joints), device=self.device) + self.d_gains_multiplier = torch.ones((self.num_envs, self.num_joints), device=self.device) + self.torque_multiplier = torch.ones((self.num_envs, self.num_joints), device=self.device) + self.motor_zero_offsets = torch.zeros((self.num_envs, self.num_joints), device=self.device) + self.joint_friction_coeffs = torch.ones((self.num_envs, 1), device=self.device) + self.joint_damping_coeffs = torch.ones((self.num_envs, 1), device=self.device) + self.joint_armatures = torch.zeros((self.num_envs, 1), device=self.device) + self.env_frictions = torch.ones((self.num_envs, 1), device=self.device) + + def _prepare_reward_function(self): + """Prepares a list of reward functions, which will be called to compute the total reward. + Looks for self._reward_, where are names of all non zero reward scales in the cfg. + """ + + # remove zero scales + multiply non-zero ones by dt + for key in list(self.reward_scales.keys()): + if key != "termination": + scale = self.reward_scales[key] + if scale == 0: + self.reward_scales.pop(key) + else: + self.reward_scales[key] *= self.dt + # prepare list of functions + self.reward_functions = [] + self.reward_names = [] + + for name, scale in self.reward_scales.items(): + if name == "termination": + continue + self.reward_names.append(name) + name = "_reward_" + name + self.reward_functions.append(getattr(self, name)) + + # reward episode sums + self.episode_sums = { + name: torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False) + for name in self.reward_scales.keys() + } + + def _create_ground_plane(self): + """Adds a ground plane to the simulation, sets friction and restitution based on the cfg.""" + plane_params = gymapi.PlaneParams() + plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) + plane_params.static_friction = self.cfg.terrain.static_friction + plane_params.dynamic_friction = self.cfg.terrain.dynamic_friction + plane_params.restitution = self.cfg.terrain.restitution + self.gym.add_ground(self.sim, plane_params) + + def _create_heightfield(self): + """Adds a heightfield terrain to the simulation, sets parameters based on the cfg.""" + hf_params = gymapi.HeightFieldParams() + hf_params.column_scale = self.terrain.cfg.horizontal_scale + hf_params.row_scale = self.terrain.cfg.horizontal_scale + hf_params.vertical_scale = self.terrain.cfg.vertical_scale + hf_params.nbRows = self.terrain.tot_cols + hf_params.nbColumns = self.terrain.tot_rows + hf_params.transform.p.x = -self.terrain.cfg.border_size + hf_params.transform.p.y = -self.terrain.cfg.border_size + hf_params.transform.p.z = 0.0 + hf_params.static_friction = self.cfg.terrain.static_friction + hf_params.dynamic_friction = self.cfg.terrain.dynamic_friction + hf_params.restitution = self.cfg.terrain.restitution + + self.gym.add_heightfield(self.sim, self.terrain.heightsamples, hf_params) + self.height_samples = ( + torch.tensor(self.terrain.heightsamples).view(self.terrain.tot_rows, self.terrain.tot_cols).to(self.device) + ) + + def _create_trimesh(self): + """Adds a triangle mesh terrain to the simulation, sets parameters based on the cfg. + # + """ + tm_params = gymapi.TriangleMeshParams() + tm_params.nb_vertices = self.terrain.vertices.shape[0] + tm_params.nb_triangles = self.terrain.triangles.shape[0] + + tm_params.transform.p.x = -self.terrain.cfg.border_size + tm_params.transform.p.y = -self.terrain.cfg.border_size + tm_params.transform.p.z = 0.0 + tm_params.static_friction = self.cfg.terrain.static_friction + tm_params.dynamic_friction = self.cfg.terrain.dynamic_friction + tm_params.restitution = self.cfg.terrain.restitution + self.gym.add_triangle_mesh( + self.sim, self.terrain.vertices.flatten(order="C"), self.terrain.triangles.flatten(order="C"), tm_params + ) + self.height_samples = ( + torch.tensor(self.terrain.heightsamples).view(self.terrain.tot_rows, self.terrain.tot_cols).to(self.device) + ) + + def _create_envs(self): + """Creates environments: + 1. loads the robot URDF/MJCF asset, + 2. For each environment + 2.1 creates the environment, + 2.2 calls DOF and Rigid shape properties callbacks, + 2.3 create actor with these properties and add them to the env + 3. Store indices of different bodies of the robot + """ + asset_path = self.cfg.asset.file.format(ROOT_DIR=ROOT_DIR) + asset_root = os.path.dirname(asset_path) + asset_file = os.path.basename(asset_path) + + asset_options = gymapi.AssetOptions() + asset_options.default_dof_drive_mode = self.cfg.asset.default_dof_drive_mode + asset_options.collapse_fixed_joints = self.cfg.asset.collapse_fixed_joints + asset_options.replace_cylinder_with_capsule = self.cfg.asset.replace_cylinder_with_capsule + asset_options.flip_visual_attachments = self.cfg.asset.flip_visual_attachments + asset_options.fix_base_link = self.cfg.asset.fix_base_link + asset_options.density = self.cfg.asset.density + asset_options.angular_damping = self.cfg.asset.angular_damping + asset_options.linear_damping = self.cfg.asset.linear_damping + asset_options.max_angular_velocity = self.cfg.asset.max_angular_velocity + asset_options.max_linear_velocity = self.cfg.asset.max_linear_velocity + asset_options.armature = self.cfg.asset.armature + asset_options.thickness = self.cfg.asset.thickness + asset_options.disable_gravity = self.cfg.asset.disable_gravity + + robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) + self.num_dof = self.gym.get_asset_dof_count(robot_asset) + self.num_bodies = self.gym.get_asset_rigid_body_count(robot_asset) + dof_props_asset = self.gym.get_asset_dof_properties(robot_asset) + + self.dof_props_asset = dof_props_asset + + rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(robot_asset) + + # save body names from the asset + body_names = self.gym.get_asset_rigid_body_names(robot_asset) + self.dof_names = self.gym.get_asset_dof_names(robot_asset) + self.num_bodies = len(body_names) + self.num_dofs = len(self.dof_names) + feet_names = [s for s in body_names if s in self.cfg.asset.foot_name] + knee_names = [s for s in body_names if s in self.cfg.asset.knee_name] + + penalized_contact_names = [] + for name in self.cfg.asset.penalize_contacts_on: + penalized_contact_names.extend([s for s in body_names if name in s]) + termination_contact_names = [] + for name in self.cfg.asset.terminate_after_contacts_on: + termination_contact_names.extend([s for s in body_names if name in s]) + safety_termination_contact_names = [] + for name in self.cfg.safety.terminate_after_contacts_on: + safety_termination_contact_names.extend([s for s in body_names if name in s]) + base_init_state_list = ( + self.cfg.init_state.pos + + self.cfg.init_state.rot + + self.cfg.init_state.lin_vel + + self.cfg.init_state.ang_vel + ) + self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False) + start_pose = gymapi.Transform() + start_pose.p = gymapi.Vec3(*self.base_init_state[:3]) + + self._get_env_origins() + env_lower = gymapi.Vec3(0.0, 0.0, 0.0) + env_upper = gymapi.Vec3(0.0, 0.0, 0.0) + self.actor_handles = [] + self.envs = [] + self.env_frictions = torch.zeros(self.num_envs, 1, dtype=torch.float32, device=self.device) + + self.body_mass = torch.zeros(self.num_envs, 1, dtype=torch.float32, device=self.device, requires_grad=False) + + self.joint_friction_coeffs = torch.ones( + self.num_envs, 1, dtype=torch.float, device=self.device, requires_grad=False + ) + + self.joint_damping_coeffs = torch.ones( + self.num_envs, 1, dtype=torch.float, device=self.device, requires_grad=False + ) + + self.joint_armatures = torch.zeros(self.num_envs, 1, dtype=torch.float, device=self.device, requires_grad=False) + + self.torque_multiplier = torch.ones( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + self.motor_zero_offsets = torch.zeros( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + self.p_gains_multiplier = torch.ones( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + self.d_gains_multiplier = torch.ones( + self.num_envs, self.num_joints, dtype=torch.float, device=self.device, requires_grad=False + ) + + for i in range(self.num_envs): + # create env instance + env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs))) + pos = self.env_origins[i].clone() + pos[:2] += torch_rand_float(-1.0, 1.0, (2, 1), device=self.device).squeeze(1) + start_pose.p = gymapi.Vec3(*pos) + + rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i) + self.gym.set_asset_rigid_shape_properties(robot_asset, rigid_shape_props) + actor_handle = self.gym.create_actor( + env_handle, robot_asset, start_pose, self.cfg.asset.name, i, self.cfg.asset.self_collisions, 0 + ) + dof_props = self._process_dof_props(dof_props_asset, i) + self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props) + body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle) + body_props = self._process_rigid_body_props(body_props, i) + + self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=False) + self.envs.append(env_handle) + self.actor_handles.append(actor_handle) + + self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False) + for i in range(len(feet_names)): + self.feet_indices[i] = self.gym.find_actor_rigid_body_handle( + self.envs[0], self.actor_handles[0], feet_names[i] + ) + print("feet", self.feet_indices) + print(f"Processed body properties for env {i}:") + for j, prop in enumerate(body_props): + print(f" Body {j} mass: {prop.mass}") + print(f" Body {j} inertia: {prop.inertia.x}, {prop.inertia.y}, {prop.inertia.z}") + + self.knee_indices = torch.zeros(len(knee_names), dtype=torch.long, device=self.device, requires_grad=False) + for i in range(len(knee_names)): + self.knee_indices[i] = self.gym.find_actor_rigid_body_handle( + self.envs[0], self.actor_handles[0], knee_names[i] + ) + self.imu_indices = self.gym.find_actor_rigid_body_handle( + self.envs[0], self.actor_handles[0], self.cfg.asset.imu_name + ) + + if self.imu_indices == -1: + self.imu_indices = None + print(f"Warning: IMU {self.cfg.asset.imu_name} not found in the asset. Defaulting to base link.") + self.penalised_contact_indices = torch.zeros( + len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False + ) + for i in range(len(penalized_contact_names)): + self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle( + self.envs[0], self.actor_handles[0], penalized_contact_names[i] + ) + + self.termination_contact_indices = torch.zeros( + len(termination_contact_names), dtype=torch.long, device=self.device, requires_grad=False + ) + for i in range(len(termination_contact_names)): + self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle( + self.envs[0], self.actor_handles[0], termination_contact_names[i] + ) + + self.safety_termination_contact_indices = torch.zeros( + len(safety_termination_contact_names), dtype=torch.long, device=self.device, requires_grad=False + ) + for i in range(len(safety_termination_contact_names)): + self.safety_termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle( + self.envs[0], self.actor_handles[0], safety_termination_contact_names[i] + ) + + def _get_env_origins(self): + """Sets environment origins. On rough terrain the origins are defined by the terrain platforms. + Otherwise create a grid. + """ + if self.cfg.terrain.mesh_type in ["heightfield", "trimesh"]: + self.custom_origins = True + self.env_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False) + # put robots at the origins defined by the terrain + max_init_level = self.cfg.terrain.max_init_terrain_level + if not self.cfg.terrain.curriculum: + max_init_level = self.cfg.terrain.num_rows - 1 + self.terrain_levels = torch.randint(0, max_init_level + 1, (self.num_envs,), device=self.device) + self.terrain_types = torch.div( + torch.arange(self.num_envs, device=self.device), + (self.num_envs / self.cfg.terrain.num_cols), + rounding_mode="floor", + ).to(torch.long) + self.max_terrain_level = self.cfg.terrain.num_rows + self.terrain_origins = torch.from_numpy(self.terrain.env_origins).to(self.device).to(torch.float) + self.env_origins[:] = self.terrain_origins[self.terrain_levels, self.terrain_types] + else: + self.custom_origins = False + self.env_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False) + # create a grid of robots + num_cols = np.floor(np.sqrt(self.num_envs)) + num_rows = np.ceil(self.num_envs / num_cols) + xx, yy = torch.meshgrid(torch.arange(num_rows), torch.arange(num_cols)) + spacing = self.cfg.env.env_spacing + self.env_origins[:, 0] = spacing * xx.flatten()[: self.num_envs] + self.env_origins[:, 1] = spacing * yy.flatten()[: self.num_envs] + self.env_origins[:, 2] = 0.0 + + def _parse_cfg(self, cfg): + self.dt = self.cfg.control.decimation * self.sim_params.dt + self.obs_scales = self.cfg.normalization.obs_scales + self.reward_scales = class_to_dict(self.cfg.rewards.scales) + self.command_ranges = class_to_dict(self.cfg.commands.ranges) + if self.cfg.terrain.mesh_type not in ["heightfield", "trimesh"]: + self.cfg.terrain.curriculum = False + self.max_episode_length_s = self.cfg.env.episode_length_s + self.max_episode_length = np.ceil(self.max_episode_length_s / self.dt) + + self.cfg.domain_rand.push_interval = np.ceil(self.cfg.domain_rand.push_interval_s / self.dt) + + def _draw_debug_vis(self): + """Draws visualizations for dubugging (slows down simulation a lot). + Default behaviour: draws height measurement points + """ + # draw height lines + if not self.cfg.terrain.measure_heights: + return + self.gym.clear_lines(self.viewer) + self.gym.refresh_rigid_body_state_tensor(self.sim) + sphere_geom = gymutil.WireframeSphereGeometry(0.02, 4, 4, None, color=(1, 1, 0)) + for i in range(self.num_envs): + base_pos = (self.root_states[i, :3]).cpu().numpy() + heights = self.measured_heights[i].cpu().numpy() + height_points = ( + quat_apply_yaw(self.base_quat[i].repeat(heights.shape[0]), self.height_points[i]).cpu().numpy() + ) + for j in range(heights.shape[0]): + x = height_points[j, 0] + base_pos[0] + y = height_points[j, 1] + base_pos[1] + z = heights[j] + sphere_pose = gymapi.Transform(gymapi.Vec3(x, y, z), r=None) + gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose) + + def _init_height_points(self): + """Returns points at which the height measurments are sampled (in base frame) + + Returns: + [torch.Tensor]: Tensor of shape (num_envs, self.num_height_points, 3) + """ + y = torch.tensor(self.cfg.terrain.measured_points_y, device=self.device, requires_grad=False) + x = torch.tensor(self.cfg.terrain.measured_points_x, device=self.device, requires_grad=False) + grid_x, grid_y = torch.meshgrid(x, y) + + self.num_height_points = grid_x.numel() + points = torch.zeros(self.num_envs, self.num_height_points, 3, device=self.device, requires_grad=False) + points[:, :, 0] = grid_x.flatten() + points[:, :, 1] = grid_y.flatten() + return points + + def _get_heights(self, env_ids=None): + """Samples heights of the terrain at required points around each robot. + The points are offset by the base's position and rotated by the base's yaw + + Args: + env_ids (List[int], optional): Subset of environments for which to return the heights. Defaults to None. + + Raises: + NameError: [description] + + Returns: + [type]: [description] + """ + if self.cfg.terrain.mesh_type == "plane": + return torch.zeros(self.num_envs, self.num_height_points, device=self.device, requires_grad=False) + elif self.cfg.terrain.mesh_type == "none": + raise NameError("Can't measure height with terrain mesh type 'none'") + + if env_ids: + points = quat_apply_yaw( + self.base_quat[env_ids].repeat(1, self.num_height_points), self.height_points[env_ids] + ) + (self.root_states[env_ids, :3]).unsqueeze(1) + else: + points = quat_apply_yaw(self.base_quat.repeat(1, self.num_height_points), self.height_points) + ( + self.root_states[:, :3] + ).unsqueeze(1) + + points += self.terrain.cfg.border_size + points = (points / self.terrain.cfg.horizontal_scale).long() + px = points[:, :, 0].view(-1) + py = points[:, :, 1].view(-1) + px = torch.clip(px, 0, self.height_samples.shape[0] - 2) + py = torch.clip(py, 0, self.height_samples.shape[1] - 2) + + heights1 = self.height_samples[px, py] + heights2 = self.height_samples[px + 1, py] + heightXBotL = self.height_samples[px, py + 1] + heights = torch.min(heights1, heights2) + heights = torch.min(heights, heightXBotL) + + return heights.view(self.num_envs, -1) * self.terrain.cfg.vertical_scale + + def update_cmd_action_latency_buffer(self): + """Updates command action latency buffer and returns delayed actions""" + if self.cfg.domain_rand.add_cmd_action_latency: + self.cmd_action_latency_buffer[:, :, 1:] = self.cmd_action_latency_buffer[ + :, :, : self.cfg.domain_rand.range_cmd_action_latency[1] + ].clone() + self.cmd_action_latency_buffer[:, :, 0] = self.actions.clone() + action_delayed = self.cmd_action_latency_buffer[ + torch.arange(self.num_envs), :, self.cmd_action_latency_simstep.long() + ] + else: + action_delayed = self.actions + return action_delayed + + def update_obs_latency_buffer(self): + """Updates observation latency buffers for motor and IMU data""" + if self.cfg.domain_rand.randomize_obs_motor_latency: + q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos + dq = self.dof_vel * self.obs_scales.dof_vel + self.obs_motor_latency_buffer[:, :, 1:] = self.obs_motor_latency_buffer[ + :, :, : self.cfg.domain_rand.range_obs_motor_latency[1] + ].clone() + self.obs_motor_latency_buffer[:, :, 0] = torch.cat((q, dq), 1).clone() + + if self.cfg.domain_rand.randomize_obs_imu_latency: + if self.imu_indices: + self.base_quat[:] = self.rigid_state[:, self.imu_indices, 3:7] + self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13]) + else: + self.base_quat[:] = self.root_states[:, 3:7] + self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) + + self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec) + self.obs_imu_latency_buffer[:, :, 1:] = self.obs_imu_latency_buffer[ + :, :, : self.cfg.domain_rand.range_obs_imu_latency[1] + ].clone() + self.obs_imu_latency_buffer[:, :, 0] = torch.cat( + (self.projected_gravity, self.base_ang_vel * self.obs_scales.ang_vel), 1 + ).clone() + + def _reset_latency_buffer(self, env_ids): + """Resets latency buffers for specified environments""" + if self.cfg.domain_rand.randomize_obs_motor_latency: + self.obs_motor_latency_buffer[env_ids] = 0 + self.obs_motor_latency_simstep[env_ids] = torch.randint_like( + self.obs_motor_latency_simstep[env_ids], + self.cfg.domain_rand.range_obs_motor_latency[0], + self.cfg.domain_rand.range_obs_motor_latency[1], + ) + + if self.cfg.domain_rand.randomize_obs_imu_latency: + self.obs_imu_latency_buffer[env_ids] = 0 + self.obs_imu_latency_simstep[env_ids] = torch.randint_like( + self.obs_imu_latency_simstep[env_ids], + self.cfg.domain_rand.range_obs_imu_latency[0], + self.cfg.domain_rand.range_obs_imu_latency[1], + ) + + if self.cfg.domain_rand.add_cmd_action_latency: + self.cmd_action_latency_buffer[env_ids] = 0 + self.cmd_action_latency_simstep[env_ids] = torch.randint( + self.cfg.domain_rand.range_cmd_action_latency[0], + self.cfg.domain_rand.range_cmd_action_latency[1] + 1, + (len(env_ids),), + device=self.device, + ) diff --git a/sim/envs/humanoids/gpr_config.py b/sim/envs/humanoids/gpr_config.py index 3660736b..f7115829 100644 --- a/sim/envs/humanoids/gpr_config.py +++ b/sim/envs/humanoids/gpr_config.py @@ -17,9 +17,9 @@ class GprCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): # change the observation dim - frame_stack = 15 - c_frame_stack = 3 - num_single_obs = 11 + NUM_JOINTS * 3 + frame_stack = 15 # Actor + c_frame_stack = 3 # Critic + num_single_obs = 8 + NUM_JOINTS * 3 + 3 # Add ang vel num_observations = int(frame_stack * num_single_obs) single_num_privileged_obs = 25 + NUM_JOINTS * 4 num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) @@ -28,93 +28,6 @@ class env(LeggedRobotCfg.env): episode_length_s = 24 # episode length in seconds use_ref_actions = False - # input_schema = P.IOSchema( - # values=[ - # P.ValueSchema( - # value_name="vector_command", - # vector_command=P.VectorCommandSchema( - # dimensions=3, # x_vel, y_vel, rot - # ), - # ), - # P.ValueSchema( - # value_name="timestamp", - # timestamp=P.TimestampSchema( - # start_seconds=0, - # ), - # ), - # P.ValueSchema( - # value_name="dof_pos", - # joint_positions=P.JointPositionsSchema( - # joint_names=Robot.joint_names(), - # unit=P.JointPositionUnit.RADIANS, - # ), - # ), - # P.ValueSchema( - # value_name="dof_vel", - # joint_velocities=P.JointVelocitiesSchema( - # joint_names=Robot.joint_names(), - # unit=P.JointVelocityUnit.RADIANS_PER_SECOND, - # ), - # ), - # P.ValueSchema( - # value_name="prev_actions", - # joint_positions=P.JointPositionsSchema( - # joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS - # ), - # ), - # # Abusing the IMU schema to pass in euler and angular velocity instead of raw sensor data - # P.ValueSchema( - # value_name="imu_ang_vel", - # imu=P.ImuSchema( - # use_accelerometer=False, - # use_gyroscope=True, - # use_magnetometer=False, - # ), - # ), - # P.ValueSchema( - # value_name="imu_euler_xyz", - # imu=P.ImuSchema( - # use_accelerometer=True, - # use_gyroscope=False, - # use_magnetometer=False, - # ), - # ), - # P.ValueSchema( - # value_name="hist_obs", - # state_tensor=P.StateTensorSchema( - # # 11 is the number of single observation features - 6 from IMU, 5 from command input - # # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) - # shape=[frame_stack * (11 + NUM_JOINTS * 3)], - # dtype=P.DType.FP32, - # ), - # ), - # ] - # ) - - # output_schema = P.IOSchema( - # values=[ - # P.ValueSchema( - # value_name="actions", - # joint_positions=P.JointPositionsSchema( - # joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS - # ), - # ), - # P.ValueSchema( - # value_name="actions_raw", - # joint_positions=P.JointPositionsSchema( - # joint_names=Robot.joint_names(), unit=P.JointPositionUnit.RADIANS - # ), - # ), - # P.ValueSchema( - # value_name="new_x", - # state_tensor=P.StateTensorSchema( - # shape=[frame_stack * (11 + NUM_JOINTS * 3)], - # dtype=P.DType.FP32, - # ), - # ), - # ] - # ) - class safety(LeggedRobotCfg.safety): # safety factors pos_limit = 1.0 @@ -128,7 +41,7 @@ class asset(LeggedRobotCfg.asset): foot_name = ["foot1", "foot3"] knee_name = ["leg3_shell2", "leg3_shell22"] - imu_name = "imu_link" + imu_name = "imu" termination_height = 0.2 default_feet_height = 0.0 @@ -140,8 +53,8 @@ class asset(LeggedRobotCfg.asset): fix_base_link = False class terrain(LeggedRobotCfg.terrain): - # mesh_type = "plane" - mesh_type = "trimesh" + mesh_type = "plane" + # mesh_type = "trimesh" curriculum = False # rough terrain only: measure_heights = False @@ -168,10 +81,6 @@ class noise_scales: quat = 0.03 height_measurements = 0.1 - # Currently unused - # class noise_ranges: - # default_pos = 0.03 # +- 0.05 rad - class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, Robot.height] rot = Robot.rotation @@ -223,6 +132,7 @@ class domain_rand(LeggedRobotCfg.domain_rand): # dynamic randomization action_noise = 0.02 action_delay = 0.5 + randomize_pd_gains = False class commands(LeggedRobotCfg.commands): # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) @@ -231,9 +141,9 @@ class commands(LeggedRobotCfg.commands): heading_command = True # if true: compute ang vel command from heading error class ranges: - lin_vel_x = [-0.3, 0.6] # min max [m/s] - lin_vel_y = [-0.3, 0.3] # min max [m/s] - ang_vel_yaw = [-0.3, 0.3] # min max [rad/s] + lin_vel_x = [-0.5, 1.0] # min max [m/s] + lin_vel_y = [-0.5, 0.5] # min max [m/s] + ang_vel_yaw = [-1.5, 1.5] # min max [rad/s] heading = [-3.14, 3.14] class rewards: @@ -242,8 +152,8 @@ class rewards: min_dist = 0.2 max_dist = 0.5 # put some settings here for LLM parameter tuning - target_joint_pos_scale = 0.17 # rad - target_feet_height = 0.05 # m + target_joint_pos_scale = 0.24 # rad + target_feet_height = 0.06 # m cycle_time = 0.4 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) @@ -316,8 +226,8 @@ class rewards: max_dist = 0.5 # put some settings here for LLM parameter tuning target_joint_pos_scale = 0.17 # rad - target_feet_height = 0.05 # m - cycle_time = 0.5 # sec + target_feet_height = 0.06 # m + cycle_time = 0.4 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) only_positive_rewards = False # tracking reward = exp(error*sigma) diff --git a/sim/envs/humanoids/gpr_env.py b/sim/envs/humanoids/gpr_env.py index e309e3bb..e25c1fd4 100644 --- a/sim/envs/humanoids/gpr_env.py +++ b/sim/envs/humanoids/gpr_env.py @@ -124,15 +124,17 @@ def compute_ref_state(self): scale_2 = 2 * scale_1 # left foot stance phase set to default joint pos sin_pos_l[sin_pos_l > 0] = 0 - self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1 + + self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += -sin_pos_l * scale_1 self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2 - self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 + self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 * -1 # I negated this # right foot stance phase set to default joint pos sin_pos_r[sin_pos_r < 0] = 0 - self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1 - self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2 - self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1 + # pfb30 last one should be -1 + self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += -sin_pos_r * scale_1 + self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += -sin_pos_r * scale_2 * -1 # I negated this + self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += -sin_pos_r * scale_1 * -1 # I negated this # Double support phase self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 @@ -180,11 +182,11 @@ def _get_noise_scale_vec(self, cfg): noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0 # previous actions noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = ( + noise_scales.quat * self.obs_scales.quat + ) # projected_gravity + noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5) + 6] = ( noise_scales.ang_vel * self.obs_scales.ang_vel ) # ang vel - noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = ( - noise_scales.quat * self.obs_scales.quat - ) # euler x,y return noise_vec def compute_observations(self): @@ -228,8 +230,8 @@ def compute_observations(self): q, # 12D dq, # 12D self.actions, # 12D + self.projected_gravity, # 3 self.base_ang_vel * self.obs_scales.ang_vel, # 3 - self.base_euler_xyz * self.obs_scales.quat, # 3 ), dim=-1, ) diff --git a/sim/envs/humanoids/zeroth_config.py b/sim/envs/humanoids/gpr_headless_config.py similarity index 62% rename from sim/envs/humanoids/zeroth_config.py rename to sim/envs/humanoids/gpr_headless_config.py index 7926cef5..3ae70147 100644 --- a/sim/envs/humanoids/zeroth_config.py +++ b/sim/envs/humanoids/gpr_headless_config.py @@ -5,55 +5,63 @@ LeggedRobotCfg, LeggedRobotCfgPPO, ) -from sim.resources.zeroth.joints import Robot +from sim.resources.gpr_headless.joints import Robot -NUM_JOINTS = len(Robot.all_joints()) # 20 +NUM_JOINTS = len(Robot.all_joints()) -class ZerothCfg(LeggedRobotCfg): +class GprHeadlessCfg(LeggedRobotCfg): """Configuration class for the Legs humanoid robot.""" class env(LeggedRobotCfg.env): # change the observation dim frame_stack = 15 c_frame_stack = 3 - num_single_obs = 11 + NUM_JOINTS * 3 + num_single_obs = 8 + NUM_JOINTS * 4 # 2 for past actions, 1 for dof pos, 1 for dof vel num_observations = int(frame_stack * num_single_obs) - single_num_privileged_obs = 25 + NUM_JOINTS * 4 + single_num_privileged_obs = 25 + NUM_JOINTS * 5 # 2 for past actions, 1 for dof pos, 1 for dof vel, 1 for diff num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) - num_actions = NUM_JOINTS + num_joints = NUM_JOINTS + num_actions = NUM_JOINTS * 2 num_envs = 4096 episode_length_s = 24 # episode length in seconds use_ref_actions = False - class safety: + class safety(LeggedRobotCfg.safety): # safety factors pos_limit = 1.0 vel_limit = 1.0 - torque_limit = 0.85 - terminate_after_contacts_on = [] + torque_limit = 1.0 class asset(LeggedRobotCfg.asset): - name = "zeroth" + name = "gpr_headless" + file = str(robot_urdf_path(name)) - foot_name = ["foot_bracket_for_5dof_leg_v9", "foot_bracket_for_5dof_leg_v9_2"] - knee_name = ["leg_top_bracket_v8_1", "leg_top_bracket_v8_1_2"] + foot_name = ["foot1", "foot3"] + knee_name = ["leg3_shell1", "leg3_shell11"] + imu_name = "imu" - termination_height = 0.05 - default_feet_height = 0.01 + # foot_name = ["foot1", "foot3"] + # knee_name = ["leg3_shell1", "leg3_shell11"] + # imu_name = "imu" - terminate_after_contacts_on = ["torso"] + # terminate_after_contacts_on = ["arm1_top_2", "arm1_top", "shoulder", "shoulder_2", + # "arm2_shell_2", "arm2_shell", "arm3_shell2", "arm3_shell", + # "hand_shell", "hand_shell_2"] + terminate_after_contacts_on = ["shoulder", "shoulder_2", "arm1_top", "arm1_top_2"] + termination_height = 0.2 + default_feet_height = 0.0 penalize_contacts_on = [] - self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter + self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter flip_visual_attachments = False replace_cylinder_with_capsule = False fix_base_link = False class terrain(LeggedRobotCfg.terrain): mesh_type = "plane" - # mesh_type = 'trimesh' + # mesh_type = "trimesh" curriculum = False # rough terrain only: measure_heights = False @@ -74,16 +82,15 @@ class noise: class noise_scales: dof_pos = 0.05 - dof_vel = 0.5 + dof_vel = 2.5 ang_vel = 0.1 lin_vel = 0.05 quat = 0.03 height_measurements = 0.1 class init_state(LeggedRobotCfg.init_state): - pos = [0.0, 0.0, Robot.height] + pos = [0.0, 0.0, Robot.height + 0.025] rot = Robot.rotation - default_joint_angles = {k: 0.0 for k in Robot.all_joints()} default_positions = Robot.default_standing() @@ -97,7 +104,8 @@ class control(LeggedRobotCfg.control): # action scale: target angle = actionScale * action + defaultAngle action_scale = 0.25 # decimation: Number of control action updates @ sim DT per policy DT - decimation = 10 # 100hz + decimation = 20 # 50hz + pd_decimation = 1 class sim(LeggedRobotCfg.sim): dt = 0.001 # 1000 Hz @@ -122,15 +130,18 @@ class domain_rand(LeggedRobotCfg.domain_rand): start_pos_noise = 0.1 randomize_friction = True friction_range = [0.1, 2.0] - randomize_base_mass = True # True - added_mass_range = [-0.25, 0.25] - push_robots = True # True - push_interval_s = 4 - max_push_vel_xy = 0.05 - max_push_ang_vel = 0.1 + + randomize_base_mass = True + added_mass_range = [-2.0, 2.0] + push_robots = True + push_random_interval_min = 1.0 + push_random_interval_max = 4.0 + max_push_vel_xy = 1.0 # 1.2 # 0.4 + max_push_ang_vel = 1.0 # 1.2 # 0.4 # dynamic randomization - action_delay = 0.5 action_noise = 0.02 + action_delay = 0.5 + randomize_pd_gains = False class commands(LeggedRobotCfg.commands): # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) @@ -139,20 +150,21 @@ class commands(LeggedRobotCfg.commands): heading_command = True # if true: compute ang vel command from heading error class ranges: - lin_vel_x = [-0.3, 0.6] # min max [m/s] - lin_vel_y = [-0.3, 0.3] # min max [m/s] - ang_vel_yaw = [-0.3, 0.3] # min max [rad/s] + lin_vel_x = [-0.0, 1.0] # min max [m/s] + lin_vel_y = [-0.5, 0.5] # min max [m/s] + ang_vel_yaw = [-1.5, 1.5] # min max [rad/s] heading = [-3.14, 3.14] class rewards: + # quite important to keep it right base_height_target = Robot.height - min_dist = 0.07 - max_dist = 0.14 - + min_dist = 0.2 + max_dist = 0.5 # put some settings here for LLM parameter tuning - target_joint_pos_scale = 0.17 # rad - target_feet_height = 0.02 # m - cycle_time = 0.2 # sec + target_joint_pos_scale = 0.24 # rad + target_feet_height = 0.06 # m + + cycle_time = 0.4 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) only_positive_rewards = True # tracking reward = exp(error*sigma) @@ -160,13 +172,14 @@ class rewards: max_contact_force = 400 # forces above this value are penalized class scales: + termination = -10.0 # reference motion tracking - joint_pos = 1.6 - feet_clearance = 1.2 - feet_contact_number = 1.2 + joint_pos = 1.2 # 1.6 + feet_clearance = 0.8 # 1.2 + feet_contact_number = 0.8 # 0.8 # 1.4 + # gait feet_air_time = 1.2 foot_slip = -0.05 - feet_distance = 0.2 knee_distance = 0.2 # contact feet_contact_forces = -0.01 @@ -174,19 +187,19 @@ class scales: tracking_lin_vel = 1.2 tracking_ang_vel = 1.1 vel_mismatch_exp = 0.5 # lin_z; ang x,y - low_speed = 0.4 + low_speed = 0.2 track_vel_hard = 0.5 # base pos - default_joint_pos = 1.0 - orientation = 1 + default_joint_pos = 0.5 + orientation = 1.0 base_height = 0.2 base_acc = 0.2 # energy action_smoothness = -0.002 torques = -1e-5 - dof_vel = -5e-4 - dof_acc = -1e-7 + dof_vel = -5e-4 # -1e-3 + dof_acc = -2.5e-7 # -1e-7 # -2.5e-7 collision = -1.0 class normalization: @@ -207,7 +220,44 @@ class viewer: lookat = [0, -2, 0] -class ZerothCfgPPO(LeggedRobotCfgPPO): +class GprHeadlessStandingCfg(GprHeadlessCfg): + """Configuration class for the GPR humanoid robot.""" + + class init_state(LeggedRobotCfg.init_state): + pos = [0.0, 0.0, Robot.standing_height + 0.025] + rot = Robot.rotation + default_joint_angles = {k: 0.0 for k in Robot.all_joints()} + + class rewards: + # quite important to keep it right + base_height_target = Robot.standing_height + min_dist = 0.2 + max_dist = 0.5 + # put some settings here for LLM parameter tuning + target_joint_pos_scale = 0.17 # rad + target_feet_height = 0.06 # m + cycle_time = 0.4 # sec + # if true negative total rewards are clipped at zero (avoids early termination problems) + only_positive_rewards = False + # tracking reward = exp(error*sigma) + tracking_sigma = 5 + max_contact_force = 200 # forces above this value are penalized + + class scales: + # base pos + default_joint_pos = 1.0 + orientation = 1 + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.002 + torques = -1e-5 + dof_vel = -1e-3 + dof_acc = -2.5e-7 + collision = -1.0 + + +class GprHeadlessCfgPPO(LeggedRobotCfgPPO): seed = 5 runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner @@ -231,8 +281,8 @@ class runner: max_iterations = 3001 # number of policy updates # logging - save_interval = 300 # check for potential saves every this many iterations - experiment_name = "zeroth" + save_interval = 100 # check for potential saves every this many iterations + experiment_name = "gpr_headless" run_name = "" # load and resume resume = False diff --git a/sim/envs/humanoids/gpr_headless_env.py b/sim/envs/humanoids/gpr_headless_env.py new file mode 100644 index 00000000..98cbbac8 --- /dev/null +++ b/sim/envs/humanoids/gpr_headless_env.py @@ -0,0 +1,626 @@ +# mypy: disable-error-code="valid-newtype" +"""Defines the environment for training the humanoid.""" + +from isaacgym.torch_utils import * # isort:skip + +from sim.envs.base.legged_robot import LeggedRobot +from sim.resources.gpr_headless.joints import Robot +from sim.utils.math import wrap_to_pi +from sim.utils.terrain import HumanoidTerrain + +from isaacgym import gymtorch # isort:skip + +import torch # isort:skip +import random # isort:skip + + +class GprHeadlessEnv(LeggedRobot): + """GprHeadlessEnv is a class that represents a custom environment for a legged robot. + + Args: + cfg (LeggedRobotCfg): Configuration object for the legged robot. + sim_params: Parameters for the simulation. + physics_engine: Physics engine used in the simulation. + sim_device: Device used for the simulation. + headless: Flag indicating whether the simulation should be run in headless mode. + + Attributes: + last_feet_z (float): The z-coordinate of the last feet position. + feet_height (torch.Tensor): Tensor representing the height of the feet. + sim (gymtorch.GymSim): The simulation object. + terrain (HumanoidTerrain): The terrain object. + up_axis_idx (int): The index representing the up axis. + command_input (torch.Tensor): Tensor representing the command input. + privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer. + obs_buf (torch.Tensor): Tensor representing the observations buffer. + obs_history (collections.deque): Deque containing the history of observations. + critic_history (collections.deque): Deque containing the history of critic observations. + + Methods: + _push_robots(): Randomly pushes the robots by setting a randomized base velocity. + _get_phase(): Calculates the phase of the gait cycle. + _get_gait_phase(): Calculates the gait phase. + compute_ref_state(): Computes the reference state. + create_sim(): Creates the simulation, terrain, and environments. + _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations. + step(actions): Performs a simulation step with the given actions. + compute_observations(): Computes the observations. + reset_idx(env_ids): Resets the environment for the specified environment IDs. + """ + + def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): + super().__init__(cfg, sim_params, physics_engine, sim_device, headless) + self.last_feet_z = self.cfg.asset.default_feet_height + self.feet_height = torch.zeros((self.num_envs, 2), device=self.device) + self.reset_idx(torch.tensor(range(self.num_envs), device=self.device)) + + env_handle = self.envs[0] + actor_handle = self.actor_handles[0] + + self.legs_joints = {} + for name, joint in Robot.legs.left.joints_motors(): + joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) + self.legs_joints["left_" + name] = joint_handle + + for name, joint in Robot.legs.right.joints_motors(): + joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) + self.legs_joints["right_" + name] = joint_handle + + self.compute_observations() + self._initialize_push_intervals() + + def step(self, actions): + """Apply actions, simulate, call self.post_physics_step() + + Args: + actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env) + """ + if self.cfg.env.use_ref_actions: + actions += self.ref_action + actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) + + # dynamic randomization + delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay + actions = (1 - delay) * actions + delay * self.actions + actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions + + clip_actions = self.cfg.normalization.clip_actions + self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device) + # step physics and render each frame + target_positions = self.actions[:, : self.cfg.env.num_actions // 2] + target_velocities = self.actions[:, self.cfg.env.num_actions // 2 :] + self.render() + for _ in range(self.cfg.control.decimation): + self.torques = self._compute_torques(target_positions, target_velocities).view(self.torques.shape) + self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques)) + + self.gym.simulate(self.sim) + if self.device == "cpu": + self.gym.fetch_results(self.sim, True) + self.gym.refresh_dof_state_tensor(self.sim) + self.post_physics_step() + + # return clipped obs, clipped states (None), rewards, dones and infos + clip_obs = self.cfg.normalization.clip_observations + self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs) + if self.privileged_obs_buf is not None: + self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs) + return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras + + def _compute_torques(self, positions, velocities): + """Compute torques from actions. + Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques. + [NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated. + + Args: + actions (torch.Tensor): Actions + + Returns: + [torch.Tensor]: Torques sent to the simulation + """ + pos_scaled = positions * self.cfg.control.action_scale + vel_scaled = velocities * self.cfg.control.action_scale + p_gains = self.p_gains + d_gains = self.d_gains + torques = p_gains * (pos_scaled + self.default_dof_pos - self.dof_pos) + d_gains * (vel_scaled - self.dof_vel) + res = torch.clip(torques, -self.torque_limits, self.torque_limits) + return res + + def _push_robots(self, env_ids=None): + """Randomly pushes the robots for specific environments. + + If env_ids is None, push all environments; otherwise, push only for the provided indices. + """ + max_vel = self.cfg.domain_rand.max_push_vel_xy + max_push_angular = self.cfg.domain_rand.max_push_ang_vel + + if env_ids is None: + env_ids = torch.arange(self.num_envs, device=self.device) + + # For the selected environments, sample random push forces and torques + self.rand_push_force[env_ids, :2] = torch_rand_float(-max_vel, max_vel, (len(env_ids), 2), device=self.device) + self.root_states[env_ids, 7:9] = self.rand_push_force[env_ids, :2] + + self.rand_push_torque[env_ids] = torch_rand_float( + -max_push_angular, max_push_angular, (len(env_ids), 3), device=self.device + ) + self.root_states[env_ids, 10:13] = self.rand_push_torque[env_ids] + + self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) + + def _get_phase(self): + cycle_time = self.cfg.rewards.cycle_time + phase = self.episode_length_buf * self.dt / cycle_time + return phase + + def _get_gait_phase(self): + # return float mask 1 is stance, 0 is swing + phase = self._get_phase() + sin_pos = torch.sin(2 * torch.pi * phase) + # Add double support phase + stance_mask = torch.zeros((self.num_envs, 2), device=self.device) + # left foot stance + stance_mask[:, 0] = sin_pos >= 0 + # right foot stance + stance_mask[:, 1] = sin_pos < 0 + # Double support phase + stance_mask[torch.abs(sin_pos) < 0.1] = 1 + + return stance_mask + + def check_termination(self): + """Check if environments need to be reset""" + self.reset_buf = torch.any( + torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0, + dim=1, + ) + self.reset_buf |= self.root_states[:, 2] < self.cfg.asset.termination_height + self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs + self.reset_buf |= self.time_out_buf + + def compute_ref_state(self): + phase = self._get_phase() + sin_pos = torch.sin(2 * torch.pi * phase) + sin_pos_l = sin_pos.clone() + sin_pos_r = sin_pos.clone() + default_clone = self.default_dof_pos.clone() + self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1) + + scale_1 = self.cfg.rewards.target_joint_pos_scale + scale_2 = 2 * scale_1 + # left foot stance phase set to default joint pos + sin_pos_l[sin_pos_l > 0] = 0 + + self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += -sin_pos_l * scale_1 + self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += -sin_pos_l * scale_2 + self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 + + # right foot stance phase set to default joint pos + sin_pos_r[sin_pos_r < 0] = 0 + # pfb30 last one should be -1 + self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += -sin_pos_r * scale_1 + self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += -sin_pos_r * scale_2 + self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1 + + # Double support phase + self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 + + self.ref_action = 2 * self.ref_dof_pos + + def create_sim(self): + """Creates simulation, terrain and evironments""" + self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly + self.sim = self.gym.create_sim( + self.sim_device_id, + self.graphics_device_id, + self.physics_engine, + self.sim_params, + ) + mesh_type = self.cfg.terrain.mesh_type + if mesh_type in ["heightfield", "trimesh"]: + self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs) + if mesh_type == "plane": + self._create_ground_plane() + elif mesh_type == "heightfield": + self._create_heightfield() + elif mesh_type == "trimesh": + self._create_trimesh() + elif mesh_type is not None: + raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]") + self._create_envs() + + def _get_noise_scale_vec(self, cfg): + """Sets a vector used to scale the noise added to the observations. + [NOTE]: Must be adapted when changing the observations structure + + Args: + cfg (Dict): Environment config file + + Returns: + [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1] + """ + num_joints = self.num_actions // 2 # Since num_actions is now NUM_JOINTS * 2 + noise_vec = torch.zeros(self.cfg.env.num_single_obs, device=self.device) + self.add_noise = self.cfg.noise.add_noise + noise_scales = self.cfg.noise.noise_scales + noise_vec[0:5] = 0.0 # commands + noise_vec[5 : (num_joints + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos + noise_vec[(num_joints + 5) : (2 * num_joints + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel + noise_vec[(2 * num_joints + 5) : (4 * num_joints + 5)] = 0.0 # previous actions (now includes both pos and vel) + noise_vec[(4 * num_joints + 5) : (4 * num_joints + 8)] = ( + noise_scales.quat * self.obs_scales.quat + ) # projected_gravity + return noise_vec + + def compute_observations(self): + phase = self._get_phase() + self.compute_ref_state() + + sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1) + cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1) + + stance_mask = self._get_gait_phase() + contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0 + + self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1) + q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos + dq = self.dof_vel * self.obs_scales.dof_vel + + diff = self.dof_pos - self.ref_dof_pos + self.privileged_obs_buf = torch.cat( + ( + self.command_input, # 2 + 3 + (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 12 + self.dof_vel * self.obs_scales.dof_vel, # 12 + self.actions, # 12 + diff, # 12 + self.base_lin_vel * self.obs_scales.lin_vel, # 3 + self.base_ang_vel * self.obs_scales.ang_vel, # 3 + self.base_euler_xyz * self.obs_scales.quat, # 3 + self.rand_push_force[:, :2], # 3 + self.rand_push_torque, # 3 + self.env_frictions, # 1 + self.body_mass / 30.0, # 1 + stance_mask, # 2 + contact_mask, # 2 + ), + dim=-1, + ) + + obs_buf = torch.cat( + ( + self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw) + q, # 12D + dq, # 12D + self.actions, # 12D + self.projected_gravity, # 3 + ), + dim=-1, + ) + + if self.cfg.terrain.measure_heights: + heights = ( + torch.clip( + self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights, + -1, + 1.0, + ) + * self.obs_scales.height_measurements + ) + self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1) + if self.add_noise: + obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level + else: + obs_now = obs_buf.clone() + self.obs_history.append(obs_now) + self.critic_history.append(self.privileged_obs_buf) + + obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1) # N,T,K + + self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K + self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1) + + def reset_idx(self, env_ids): + super().reset_idx(env_ids) + for i in range(self.obs_history.maxlen): + self.obs_history[i][env_ids] *= 0 + for i in range(self.critic_history.maxlen): + self.critic_history[i][env_ids] *= 0 + + # ================================================ Rewards ================================================== # + def _reward_joint_pos(self): + """Calculates the reward based on the difference between the current joint positions and the target joint positions.""" + joint_pos = self.dof_pos.clone() + pos_target = self.ref_dof_pos.clone() + diff = joint_pos - pos_target + r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5) + + return r + + def _reward_feet_distance(self): + """Calculates the reward based on the distance between the feet. Penilize feet get close to each other or too far away.""" + foot_pos = self.rigid_state[:, self.feet_indices, :2] + foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) + fd = self.cfg.rewards.min_dist + max_df = self.cfg.rewards.max_dist + d_min = torch.clamp(foot_dist - fd, -0.5, 0.0) + d_max = torch.clamp(foot_dist - max_df, 0, 0.5) + return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 + + def _reward_knee_distance(self): + """Calculates the reward based on the distance between the knee of the humanoid.""" + foot_pos = self.rigid_state[:, self.knee_indices, :2] + foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) + fd = self.cfg.rewards.min_dist + max_df = self.cfg.rewards.max_dist / 2 + d_min = torch.clamp(foot_dist - fd, -0.5, 0.0) + d_max = torch.clamp(foot_dist - max_df, 0, 0.5) + return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 + + def _reward_foot_slip(self): + """Calculates the reward for minimizing foot slip. The reward is based on the contact forces + and the speed of the feet. A contact threshold is used to determine if the foot is in contact + with the ground. The speed of the foot is calculated and scaled by the contact condition. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2) + rew = torch.sqrt(foot_speed_norm) + rew *= contact + return torch.sum(rew, dim=1) + + def _reward_feet_air_time(self): + """Calculates the reward for feet air time, promoting longer steps. This is achieved by + checking the first contact with the ground after being in the air. The air time is + limited to a maximum value for reward calculation. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + stance_mask = self._get_gait_phase() + self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts) + self.last_contacts = contact + first_contact = (self.feet_air_time > 0.0) * self.contact_filt + self.feet_air_time += self.dt + air_time = self.feet_air_time.clamp(0, 0.5) * first_contact + self.feet_air_time *= ~self.contact_filt + return air_time.sum(dim=1) + + def _reward_feet_contact_number(self): + """Calculates a reward based on the number of feet contacts aligning with the gait phase. + Rewards or penalizes depending on whether the foot contact matches the expected gait phase. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + stance_mask = self._get_gait_phase() + reward = torch.where(contact == stance_mask, 1.0, -0.3) + return torch.mean(reward, dim=1) + + def _reward_orientation(self): + """Calculates the reward for maintaining a flat base orientation. It penalizes deviation + from the desired base orientation using the base euler angles and the projected gravity vector. + """ + quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10) + orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20) + + return (quat_mismatch + orientation) / 2.0 + + def _reward_feet_contact_forces(self): + """Calculates the reward for keeping contact forces within a specified range. Penalizes + high contact forces on the feet. + """ + return torch.sum( + ( + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force + ).clip(0, 400), + dim=1, + ) + + def _reward_default_joint_pos(self): + """Calculates the reward for keeping joint positions close to default positions, with a focus + on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty. + """ + joint_diff = self.dof_pos - self.default_joint_pd_target + left_yaw_roll = joint_diff[:, [self.legs_joints["left_hip_roll"], self.legs_joints["left_hip_yaw"]]] + right_yaw_roll = joint_diff[:, [self.legs_joints["right_hip_roll"], self.legs_joints["right_hip_yaw"]]] + yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1) + yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50) + + return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1) + + def _reward_base_height(self): + """Calculates the reward based on the robot's base height. Penalizes deviation from a target base height. + The reward is computed based on the height difference between the robot's base and the average height + of its feet when they are in contact with the ground. + """ + stance_mask = self._get_gait_phase() + measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum( + stance_mask, dim=1 + ) + base_height = self.root_states[:, 2] - (measured_heights - self.cfg.asset.default_feet_height) + reward = torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100) + return reward + + def _reward_base_acc(self): + """Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base, + encouraging smoother motion. + """ + root_acc = self.last_root_vel - self.root_states[:, 7:13] + rew = torch.exp(-torch.norm(root_acc, dim=1) * 3) + return rew + + def _reward_vel_mismatch_exp(self): + """Computes a reward based on the mismatch in the robot's linear and angular velocities. + Encourages the robot to maintain a stable velocity by penalizing large deviations. + """ + lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10) + ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.0) + + c_update = (lin_mismatch + ang_mismatch) / 2.0 + + return c_update + + def _reward_track_vel_hard(self): + """Calculates a reward for accurately tracking both linear and angular velocity commands. + Penalizes deviations from specified linear and angular velocity targets. + """ + # Tracking of linear velocity commands (xy axes) + lin_vel_error = torch.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1) + lin_vel_error_exp = torch.exp(-lin_vel_error * 10) + + # Tracking of angular velocity commands (yaw) + ang_vel_error = torch.abs(self.commands[:, 2] - self.base_ang_vel[:, 2]) + ang_vel_error_exp = torch.exp(-ang_vel_error * 10) + + linear_error = 0.2 * (lin_vel_error + ang_vel_error) + + return (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error + + def _reward_tracking_lin_vel(self): + """Tracks linear velocity commands along the xy axes. + Calculates a reward based on how closely the robot's linear velocity matches the commanded values. + """ + lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1) + return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma) + + def _reward_tracking_ang_vel(self): + """Tracks angular velocity commands for yaw rotation. + Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values. + """ + ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2]) + return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma) + + def _reward_feet_clearance(self): + """Calculates reward based on the clearance of the swing leg from the ground during movement. + Encourages appropriate lift of the feet during the swing phase of the gait. + """ + # Compute feet contact mask + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + + # Get the z-position of the feet and compute the change in z-position + feet_z = self.rigid_state[:, self.feet_indices, 2] - self.cfg.asset.default_feet_height + delta_z = feet_z - self.last_feet_z + self.feet_height += delta_z + self.last_feet_z = feet_z + + # Compute swing mask + swing_mask = 1 - self._get_gait_phase() + + # feet height should be closed to target feet height at the peak + rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02 + rew_pos = torch.sum(rew_pos * swing_mask, dim=1) + self.feet_height *= ~contact + + return rew_pos + + def _reward_low_speed(self): + """Rewards or penalizes the robot based on its speed relative to the commanded speed. + This function checks if the robot is moving too slow, too fast, or at the desired speed, + and if the movement direction matches the command. + """ + # Calculate the absolute value of speed and command for comparison + absolute_speed = torch.abs(self.base_lin_vel[:, 0]) + absolute_command = torch.abs(self.commands[:, 0]) + + # Define speed criteria for desired range + speed_too_low = absolute_speed < 0.5 * absolute_command + speed_too_high = absolute_speed > 1.2 * absolute_command + speed_desired = ~(speed_too_low | speed_too_high) + + # Check if the speed and command directions are mismatched + sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0]) + + # Initialize reward tensor + reward = torch.zeros_like(self.base_lin_vel[:, 0]) + + # Assign rewards based on conditions + # Speed too low + reward[speed_too_low] = -1.0 + # Speed too high + reward[speed_too_high] = 0.0 + # Speed within desired range + reward[speed_desired] = 1.2 + # Sign mismatch has the highest priority + reward[sign_mismatch] = -2.0 + return reward * (self.commands[:, 0].abs() > 0.1) + + def _reward_torques(self): + """Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing + the necessary force exerted by the motors. + """ + return torch.sum(torch.square(self.torques), dim=1) + + def _reward_dof_vel(self): + """Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and + more controlled movements. + """ + return torch.sum(torch.square(self.dof_vel), dim=1) + + def _reward_dof_acc(self): + """Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring + smooth and stable motion, reducing wear on the robot's mechanical parts. + """ + return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1) + + def _reward_collision(self): + """Penalizes collisions of the robot with the environment, specifically focusing on selected body parts. + This encourages the robot to avoid undesired contact with objects or surfaces. + """ + return torch.sum( + 1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), + dim=1, + ) + + def _reward_action_smoothness(self): + """Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions. + This is important for achieving fluid motion and reducing mechanical stress. + """ + term_1 = torch.sum(torch.square(self.last_actions - self.actions), dim=1) + term_2 = torch.sum( + torch.square(self.actions + self.last_last_actions - 2 * self.last_actions), + dim=1, + ) + term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1) + return term_1 + term_2 + term_3 + + def _reward_termination(self): + """Returns a penalty for each environment that is terminated.""" + penalty = 1.0 + # Apply the penalty element-wise: only environments that need reset get the penalty + return penalty * self.reset_buf.float() + + def _initialize_push_intervals(self): + # Create a tensor of size (num_envs,) for the next push step. + # We'll assume self.num_envs is available and self.dt is the simulation time step. + self.next_push_steps = torch.empty(self.num_envs, dtype=torch.int64, device=self.device) + for i in range(self.num_envs): + random_interval_sec = random.uniform( + self.cfg.domain_rand.push_random_interval_min, self.cfg.domain_rand.push_random_interval_max + ) + self.next_push_steps[i] = self.common_step_counter + int(random_interval_sec / self.dt) + + def _post_physics_step_callback(self): + """Callback called before computing terminations, rewards, and observations.""" + env_ids = ( + (self.episode_length_buf % int(self.cfg.commands.resampling_time / self.dt) == 0) + .nonzero(as_tuple=False) + .flatten() + ) + self._resample_commands(env_ids) + if self.cfg.commands.heading_command: + forward = quat_apply(self.base_quat, self.forward_vec) + heading = torch.atan2(forward[:, 1], forward[:, 0]) + self.commands[:, 2] = torch.clip(0.5 * wrap_to_pi(self.commands[:, 3] - heading), -1.0, 1.0) + if self.cfg.terrain.measure_heights: + self.measured_heights = self._get_heights() + if self.cfg.domain_rand.push_robots: + # Initialize per-env push intervals if not done yet. + if not hasattr(self, "next_push_steps"): + self._initialize_push_intervals() + # Determine which environments are due for a push + envs_to_push = (self.common_step_counter >= self.next_push_steps).nonzero(as_tuple=False).flatten() + if len(envs_to_push) > 0: + # Apply push only to the environments that are due + self._push_robots(envs_to_push) + # For each pushed environment, set a new random push time + for env_id in envs_to_push: + random_interval_sec = random.uniform( + self.cfg.domain_rand.push_random_interval_min, self.cfg.domain_rand.push_random_interval_max + ) + self.next_push_steps[env_id] = self.common_step_counter + int(random_interval_sec / self.dt) diff --git a/sim/envs/humanoids/gpr_headless_latency_config.py b/sim/envs/humanoids/gpr_headless_latency_config.py new file mode 100644 index 00000000..4a7b8eb2 --- /dev/null +++ b/sim/envs/humanoids/gpr_headless_latency_config.py @@ -0,0 +1,356 @@ +"""Defines the environment configuration for the Getting up task""" + +# from kinfer import proto as P + +from sim.env import robot_urdf_path +from sim.envs.base.legged_robot_config import ( # type: ignore + LeggedRobotCfg, + LeggedRobotCfgPPO, +) +from sim.resources.gpr_headless_latency.joints import Robot + +NUM_JOINTS = len(Robot.all_joints()) + + +class GprHeadlessLatencyCfg(LeggedRobotCfg): + """Configuration class for the Legs humanoid robot.""" + + class env(LeggedRobotCfg.env): + # change the observation dim + frame_stack = 15 # 15 # actor + c_frame_stack = 3 # critic + # num_single_obs = 8 + NUM_JOINTS * 4 + 3 # ang vel + num_single_obs = 8 + NUM_JOINTS * 4 # no ang vel + num_observations = int(frame_stack * num_single_obs) + single_num_privileged_obs = 25 + NUM_JOINTS * 5 + num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) + num_joints = NUM_JOINTS + num_actions = NUM_JOINTS * 2 + num_envs = 4096 + episode_length_s = 24 # episode length in seconds + use_ref_actions = False + + class safety(LeggedRobotCfg.safety): + # safety factors + pos_limit = 1.0 + vel_limit = 1.0 + torque_limit = 1.0 + + class asset(LeggedRobotCfg.asset): + name = "gpr_headless_latency" + + file = str(robot_urdf_path(name)) + + # foot_name = ["foot1", "foot3"] + # knee_name = ["leg3_shell1", "leg3_shell11"] + # imu_name = "imu" + + foot_name = ["foot1", "foot3"] + knee_name = ["leg3_shell1", "leg3_shell11"] + imu_name = "imu" + + termination_height = 0.83 # 0.43 # 0.33 + default_feet_height = 0.0 + + # terminate_after_contacts_on = ["arm1_top", "shoulder", "arm1_top_2", "shoulder_2"] + # terminate_after_contacts_on = ["shoulder", "shoulder_2", "arm1_top", "arm1_top_2"] + terminate_after_contacts_on = [ + "shoulder", + "shoulder_2", + "arm1_top", + "arm1_top_2", + "arm2_shell", + "arm2_shell_2", + "arm3_shell", + "arm3_shell2" "hand_shell", + "hand_shell_2", + "leg0_shell", + "leg0_shell_2", + "leg1_shell", + "leg1_shell3", + "leg2_shell", + "leg2_shell_2", + "body1-part", + "imu", + ] + + penalize_contacts_on = [] + self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter + flip_visual_attachments = False + replace_cylinder_with_capsule = False + fix_base_link = False + + class terrain(LeggedRobotCfg.terrain): + mesh_type = "plane" + # mesh_type = "trimesh" + curriculum = False + # rough terrain only: + measure_heights = False + static_friction = 0.6 + dynamic_friction = 0.6 + terrain_length = 8.0 + terrain_width = 8.0 + num_rows = 10 # number of terrain rows (levels) + num_cols = 10 # number of terrain cols (types) + max_init_terrain_level = 10 # starting curriculum state + # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down + terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0] + restitution = 0.0 + + class noise: + add_noise = True + noise_level = 0.3 # 1.5 # 0.6 # scales other values + + class noise_scales: + dof_pos = 0.05 + dof_vel = 2.5 + ang_vel = 0.4 # 0.1 + lin_vel = 0.05 + quat = 0.08 # 0.03 + height_measurements = 0.1 + + class init_state(LeggedRobotCfg.init_state): + pos = [0.0, 0.0, Robot.height + 0.025] + rot = Robot.rotation + default_joint_angles = {k: 0.0 for k in Robot.all_joints()} + + default_positions = Robot.default_standing() + for joint in default_positions: + default_joint_angles[joint] = default_positions[joint] + + class control(LeggedRobotCfg.control): + # PD Drive parameters: + stiffness = Robot.stiffness() + damping = Robot.damping() + # action scale: target angle = actionScale * action + defaultAngle + action_scale = 0.25 + # decimation: Number of control action updates @ sim DT per policy DT + decimation = 20 # Policy at 50hz + pd_decimation = 1 # PD at 1000hz + + class sim(LeggedRobotCfg.sim): + dt = 0.001 # 1000 Hz + substeps = 1 # 2 + up_axis = 1 # 0 is y, 1 is z + + class physx(LeggedRobotCfg.sim.physx): + num_threads = 10 + solver_type = 1 # 0: pgs, 1: tgs + num_position_iterations = 4 + num_velocity_iterations = 1 + contact_offset = 0.01 # [m] + rest_offset = 0.0 # [m] + bounce_threshold_velocity = 0.1 # [m/s] + max_depenetration_velocity = 1.0 + max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more + default_buffer_size_multiplier = 5 + # 0: never, 1: last sub-step, 2: all sub-steps (default=2) + contact_collection = 2 + + class domain_rand(LeggedRobotCfg.domain_rand): + start_pos_noise = 0.1 + randomize_friction = True + friction_range = [0.1, 2.0] + + randomize_base_mass = True + added_mass_range = [-1.0, 1.0] # [-2.0, 2.0] + + randomize_link_mass = True + link_mass_multiplier_range = [0.9, 1.1] # [0.8, 1.2] + + push_robots = True + push_random_interval_min = 1.0 + push_random_interval_max = 4.0 + max_push_vel_xy = 0.2 # 0.2 + max_push_ang_vel = 0.4 # 0.4 + + # dynamic randomization + action_noise = 0.02 + action_delay = 0.5 + randomize_pd_gains = False + + randomize_motor_zero_offset = True + motor_zero_offset_range = [-0.015, 0.015] # [-0.035, 0.035] # Offset to add to the motor angles + + randomize_joint_friction = True + joint_friction_range = [0.01, 1.15] + + randomize_joint_damping = True + joint_damping_range = [0.3, 1.5] + + randomize_joint_armature = True + joint_armature_range = [0.008, 0.06] + + randomize_pd_gains = True + stiffness_multiplier_range = [0.9, 1.1] + damping_multiplier_range = [0.9, 1.1] + + randomize_calculated_torque = True + torque_multiplier_range = [0.8, 1.2] + + # Latency randomization + add_cmd_action_latency = True # Enable action latency + randomize_cmd_action_latency = True # Randomize the latency amount + range_cmd_action_latency = [0, 2] # Range of latency in simulation steps + + add_obs_latency = True # Enable observation latency + randomize_obs_motor_latency = True # Randomize motor sensor latency + range_obs_motor_latency = [0, 5] # [0, 10] # Range of motor latency in simulation steps + randomize_obs_imu_latency = True # Randomize IMU sensor latency + range_obs_imu_latency = [0, 5] # [0, 10] # Range of IMU latency in simulation steps + + class commands(LeggedRobotCfg.commands): + # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) + num_commands = 4 + resampling_time = 8.0 # time before command are changed[s] + heading_command = True # if true: compute ang vel command from heading error + + curriculum = True + max_curriculum = 1.7 + + class ranges: + lin_vel_x = [-0.0, 1.0] # min max [m/s] + lin_vel_y = [-0.5, 0.5] # min max [m/s] + ang_vel_yaw = [-1.5, 1.5] # min max [rad/s] + heading = [-3.14, 3.14] + + class rewards: + # quite important to keep it right + base_height_target = Robot.height + min_dist = 0.2 + max_dist = 0.5 + # put some settings here for LLM parameter tuning + target_joint_pos_scale = 0.24 # rad + target_feet_height = 0.06 # m + + cycle_time = 0.4 # sec + # if true negative total rewards are clipped at zero (avoids early termination problems) + only_positive_rewards = True + # tracking reward = exp(error*sigma) + tracking_sigma = 5.0 + max_contact_force = 400 # forces above this value are penalized + + class scales: + termination = -10.0 + # reference motion tracking + joint_pos = 1.6 + feet_clearance = 1.2 + feet_contact_number = 1.2 # 1.2 + # gait + feet_air_time = 1.2 + foot_slip = -0.05 + feet_distance = 0.2 + knee_distance = 0.2 + # contact + feet_contact_forces = -0.01 + # vel tracking + tracking_lin_vel = 1.2 + tracking_ang_vel = 1.1 + vel_mismatch_exp = 0.5 # lin_z; ang x,y + low_speed = 0.2 + track_vel_hard = 0.5 + + # base pos + default_joint_pos = 0.5 + orientation = 1.0 + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.003 # -0.002 + torques = -1.5e-5 # -1e-5 + dof_vel = -5e-4 # -1e-3 + dof_acc = -1e-7 # -2.5e-7 + collision = -1.0 + + # action_smoothness = -0.004 + # torques = -1e-10 + # dof_vel = -1e-5 + # dof_acc = -5e-9 + # collision = -1. + + class normalization: + class obs_scales: + lin_vel = 2.0 + ang_vel = 1.0 + dof_pos = 1.0 + dof_vel = 0.05 + quat = 1.0 + height_measurements = 5.0 + + clip_observations = 18.0 + clip_actions = 18.0 + + class viewer: + ref_env = 0 + pos = [4, -4, 2] + lookat = [0, -2, 0] + + +class GprHeadlessLatencyStandingCfg(GprHeadlessLatencyCfg): + """Configuration class for the GPR humanoid robot.""" + + class init_state(LeggedRobotCfg.init_state): + pos = [0.0, 0.0, Robot.standing_height + 0.025] + rot = Robot.rotation + default_joint_angles = {k: 0.0 for k in Robot.all_joints()} + + class rewards: + # quite important to keep it right + base_height_target = Robot.standing_height + min_dist = 0.2 + max_dist = 0.5 + # put some settings here for LLM parameter tuning + target_joint_pos_scale = 0.17 # rad + target_feet_height = 0.06 # m + cycle_time = 0.4 # sec + # if true negative total rewards are clipped at zero (avoids early termination problems) + only_positive_rewards = False + # tracking reward = exp(error*sigma) + tracking_sigma = 5 + max_contact_force = 200 # forces above this value are penalized + + class scales: + # base pos + default_joint_pos = 1.0 + orientation = 1 + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.002 + torques = -1e-5 + dof_vel = -1e-3 + dof_acc = -2.5e-7 + collision = -1.0 + + +class GprHeadlessLatencyCfgPPO(LeggedRobotCfgPPO): + seed = 5 + + class policy: + init_noise_std = 1.0 + actor_hidden_dims = [512, 256, 128] + critic_hidden_dims = [768, 256, 128] + + class algorithm(LeggedRobotCfgPPO.algorithm): + entropy_coef = 0.001 + learning_rate = 2e-5 + num_learning_epochs = 2 + gamma = 0.994 + lam = 0.9 + num_mini_batches = 4 + + class runner: + policy_class_name = "ActorCritic" + algorithm_class_name = "PPO" + num_steps_per_env = 60 # per iteration + max_iterations = 10001 # number of policy updates + + # logging + save_interval = 100 # check for potential saves every this many iterations + experiment_name = "gpr_headless_latency" + run_name = "" + # load and resume + resume = False + load_run = -1 # -1 = last run + checkpoint = -1 # -1 = last saved model + resume_path = None diff --git a/sim/envs/humanoids/gpr_headless_latency_env.py b/sim/envs/humanoids/gpr_headless_latency_env.py new file mode 100644 index 00000000..3de44006 --- /dev/null +++ b/sim/envs/humanoids/gpr_headless_latency_env.py @@ -0,0 +1,694 @@ +# mypy: disable-error-code="valid-newtype" +"""Defines the environment for training the humanoid.""" + +import random + +from sim.envs.base.legged_robot_latency import LeggedRobotLatency +from sim.resources.gpr_headless_latency.joints import Robot +from sim.utils.math import wrap_to_pi +from sim.utils.terrain import HumanoidTerrain + +from isaacgym.torch_utils import * # isort:skip + + +from isaacgym import gymtorch # isort:skip + +import torch # isort:skip + + +class GprHeadlessLatencyEnv(LeggedRobotLatency): + """GprHeadlessLatencyEnv is a class that represents a custom environment for a legged robot. + + Args: + cfg (LeggedRobotCfg): Configuration object for the legged robot. + sim_params: Parameters for the simulation. + physics_engine: Physics engine used in the simulation. + sim_device: Device used for the simulation. + headless: Flag indicating whether the simulation should be run in headless mode. + + Attributes: + last_feet_z (float): The z-coordinate of the last feet position. + feet_height (torch.Tensor): Tensor representing the height of the feet. + sim (gymtorch.GymSim): The simulation object. + terrain (HumanoidTerrain): The terrain object. + up_axis_idx (int): The index representing the up axis. + command_input (torch.Tensor): Tensor representing the command input. + privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer. + obs_buf (torch.Tensor): Tensor representing the observations buffer. + obs_history (collections.deque): Deque containing the history of observations. + critic_history (collections.deque): Deque containing the history of critic observations. + + Methods: + _push_robots(): Randomly pushes the robots by setting a randomized base velocity. + _get_phase(): Calculates the phase of the gait cycle. + _get_gait_phase(): Calculates the gait phase. + compute_ref_state(): Computes the reference state. + create_sim(): Creates the simulation, terrain, and environments. + _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations. + step(actions): Performs a simulation step with the given actions. + compute_observations(): Computes the observations. + reset_idx(env_ids): Resets the environment for the specified environment IDs. + """ + + def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): + super().__init__(cfg, sim_params, physics_engine, sim_device, headless) + self.last_feet_z = self.cfg.asset.default_feet_height + self.feet_height = torch.zeros((self.num_envs, 2), device=self.device) + self.reset_idx(torch.tensor(range(self.num_envs), device=self.device)) + + env_handle = self.envs[0] + actor_handle = self.actor_handles[0] + + self.legs_joints = {} + for name, joint in Robot.legs.left.joints_motors(): + joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) + self.legs_joints["left_" + name] = joint_handle + + for name, joint in Robot.legs.right.joints_motors(): + joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) + self.legs_joints["right_" + name] = joint_handle + + self.compute_observations() + self._initialize_push_intervals() + + def step(self, actions): + """Apply actions, simulate, call self.post_physics_step() + + Args: + actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env) + """ + if self.cfg.env.use_ref_actions: + actions += self.ref_action + actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) + + # dynamic randomization + delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay + actions = (1 - delay) * actions + delay * self.actions + actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions + + clip_actions = self.cfg.normalization.clip_actions + self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device) + # step physics and render each frame + target_positions = self.actions[:, : self.cfg.env.num_actions // 2] + target_velocities = self.actions[:, self.cfg.env.num_actions // 2 :] + self.render() + for _ in range(self.cfg.control.decimation): + self.torques = self._compute_torques(target_positions, target_velocities).view(self.torques.shape) + self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques)) + + self.gym.simulate(self.sim) + if self.device == "cpu": + self.gym.fetch_results(self.sim, True) + self.gym.refresh_dof_state_tensor(self.sim) + self.post_physics_step() + + # return clipped obs, clipped states (None), rewards, dones and infos + clip_obs = self.cfg.normalization.clip_observations + self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs) + if self.privileged_obs_buf is not None: + self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs) + return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras + + def _compute_torques(self, positions, velocities): + """Compute torques from actions. + Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques. + [NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated. + + Args: + actions (torch.Tensor): Actions + + Returns: + [torch.Tensor]: Torques sent to the simulation + """ + pos_scaled = positions * self.cfg.control.action_scale + vel_scaled = velocities * self.cfg.control.action_scale + p_gains = self.p_gains + d_gains = self.d_gains + torques = p_gains * (pos_scaled + self.default_dof_pos - self.dof_pos) + d_gains * (vel_scaled - self.dof_vel) + res = torch.clip(torques, -self.torque_limits, self.torque_limits) + return res + + # def _push_robots(self): + # """Random pushes the robots. Emulates an impulse by setting a randomized base velocity.""" + # max_vel = self.cfg.domain_rand.max_push_vel_xy + # max_push_angular = self.cfg.domain_rand.max_push_ang_vel + # self.rand_push_force[:, :2] = torch_rand_float( + # -max_vel, max_vel, (self.num_envs, 2), device=self.device + # ) # lin vel x/y + # self.root_states[:, 7:9] = self.rand_push_force[:, :2] + + # self.rand_push_torque = torch_rand_float( + # -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device + # ) + # self.root_states[:, 10:13] = self.rand_push_torque + + # self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) + + def _push_robots(self, env_ids=None): + """Randomly pushes the robots for specific environments. + + If env_ids is None, push all environments; otherwise, push only for the provided indices. + """ + max_vel = self.cfg.domain_rand.max_push_vel_xy + max_push_angular = self.cfg.domain_rand.max_push_ang_vel + + if env_ids is None: + env_ids = torch.arange(self.num_envs, device=self.device) + + # For the selected environments, sample random push forces and torques + self.rand_push_force[env_ids, :2] = torch_rand_float(-max_vel, max_vel, (len(env_ids), 2), device=self.device) + self.root_states[env_ids, 7:9] = self.rand_push_force[env_ids, :2] + + self.rand_push_torque[env_ids] = torch_rand_float( + -max_push_angular, max_push_angular, (len(env_ids), 3), device=self.device + ) + self.root_states[env_ids, 10:13] = self.rand_push_torque[env_ids] + + self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) + + def _get_phase(self): + cycle_time = self.cfg.rewards.cycle_time + phase = self.episode_length_buf * self.dt / cycle_time + return phase + + def _get_gait_phase(self): + # return float mask 1 is stance, 0 is swing + phase = self._get_phase() + sin_pos = torch.sin(2 * torch.pi * phase) + # Add double support phase + stance_mask = torch.zeros((self.num_envs, 2), device=self.device) + # left foot stance + stance_mask[:, 0] = sin_pos >= 0 + # right foot stance + stance_mask[:, 1] = sin_pos < 0 + # Double support phase + stance_mask[torch.abs(sin_pos) < 0.1] = 1 + + return stance_mask + + def check_termination(self): + """Check if environments need to be reset""" + self.reset_buf = torch.any( + torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0, + dim=1, + ) + self.reset_buf |= self.root_states[:, 2] < self.cfg.asset.termination_height + self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs + self.reset_buf |= self.time_out_buf + + def compute_ref_state(self): + phase = self._get_phase() + sin_pos = torch.sin(2 * torch.pi * phase) + sin_pos_l = sin_pos.clone() + sin_pos_r = sin_pos.clone() + default_clone = self.default_dof_pos.clone() + self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1) + + scale_1 = self.cfg.rewards.target_joint_pos_scale + scale_2 = 2 * scale_1 + # left foot stance phase set to default joint pos + sin_pos_l[sin_pos_l > 0] = 0 + + self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += -sin_pos_l * scale_1 + self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += -sin_pos_l * scale_2 + self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 + + # right foot stance phase set to default joint pos + sin_pos_r[sin_pos_r < 0] = 0 + # pfb30 last one should be -1 + self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += -sin_pos_r * scale_1 + self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += -sin_pos_r * scale_2 + self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1 + + # Double support phase + self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 + + self.ref_action = 2 * self.ref_dof_pos + + # def compute_ref_state(self): + # phase = self._get_phase() + # sin_pos = torch.sin(2 * torch.pi * phase) + # sin_pos_l = sin_pos.clone() + # sin_pos_r = sin_pos.clone() + # default_clone = self.default_dof_pos.clone() + # self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1) + + # scale_1 = self.cfg.rewards.target_joint_pos_scale + # scale_2 = 2 * scale_1 + # # left foot stance phase set to default joint pos + # sin_pos_l[sin_pos_l > 0] = 0 + + # self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += -sin_pos_l * scale_1 + # self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += -sin_pos_l * scale_2 + # self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 + + # # right foot stance phase set to default joint pos + # sin_pos_r[sin_pos_r < 0] = 0 + # # pfb30 last one should be -1 + # self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += -sin_pos_r * scale_1 + # self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += -sin_pos_r * scale_2 + # self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1 + + # # Double support phase + # self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 + + # self.ref_action = 2 * self.ref_dof_pos + + def create_sim(self): + """Creates simulation, terrain and evironments""" + self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly + self.sim = self.gym.create_sim( + self.sim_device_id, + self.graphics_device_id, + self.physics_engine, + self.sim_params, + ) + mesh_type = self.cfg.terrain.mesh_type + if mesh_type in ["heightfield", "trimesh"]: + self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs) + if mesh_type == "plane": + self._create_ground_plane() + elif mesh_type == "heightfield": + self._create_heightfield() + elif mesh_type == "trimesh": + self._create_trimesh() + elif mesh_type is not None: + raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]") + self._create_envs() + + def _get_noise_scale_vec(self, cfg): + """Sets a vector used to scale the noise added to the observations. + [NOTE]: Must be adapted when changing the observations structure + + Args: + cfg (Dict): Environment config file + + Returns: + [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1] + """ + num_joints = self.num_joints + noise_vec = torch.zeros(self.cfg.env.num_single_obs, device=self.device) + self.add_noise = self.cfg.noise.add_noise + noise_scales = self.cfg.noise.noise_scales + noise_vec[0:5] = 0.0 # commands + noise_vec[5 : (num_joints + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos + noise_vec[(num_joints + 5) : (2 * num_joints + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel + noise_vec[(2 * num_joints + 5) : (4 * num_joints + 5)] = 0.0 # previous actions + noise_vec[(4 * num_joints + 5) : (4 * num_joints + 5) + 3] = ( + noise_scales.quat * self.obs_scales.quat + ) # projected_gravity + # noise_vec[(4 * num_joints + 5) + 3 : (4 * num_joints + 5) + 6] = noise_scales.ang_vel * self.obs_scales.ang_vel # ang vel + return noise_vec + + def compute_observations(self): + phase = self._get_phase() + self.compute_ref_state() + + sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1) + cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1) + + stance_mask = self._get_gait_phase() + contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0 + + self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1) + q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos + dq = self.dof_vel * self.obs_scales.dof_vel + + diff = self.dof_pos - self.ref_dof_pos + self.privileged_obs_buf = torch.cat( + ( + self.command_input, # 2 + 3 + (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 12 + self.dof_vel * self.obs_scales.dof_vel, # 12 + self.actions, # 12 + diff, # 12 + self.base_lin_vel * self.obs_scales.lin_vel, # 3 + self.base_ang_vel * self.obs_scales.ang_vel, # 3 + self.base_euler_xyz * self.obs_scales.quat, # 3 + self.rand_push_force[:, :2], # 3 + self.rand_push_torque, # 3 + self.env_frictions, # 1 + self.body_mass / 30.0, # 1 + stance_mask, # 2 + contact_mask, # 2 + ), + dim=-1, + ) + + # Handle latency + if self.cfg.domain_rand.randomize_obs_motor_latency: + self.obs_motor = self.obs_motor_latency_buffer[ + torch.arange(self.num_envs), :, self.obs_motor_latency_simstep.long() + ] + else: + self.obs_motor = torch.cat((q, dq), 1) + + if self.cfg.domain_rand.randomize_obs_imu_latency: + # self.obs_imu = self.obs_imu_latency_buffer[ + # torch.arange(self.num_envs), :, self.obs_imu_latency_simstep.long() + # ] + self.obs_imu = self.obs_imu_latency_buffer[ + torch.arange(self.num_envs), :3, self.obs_imu_latency_simstep.long() + ] # only projected_gravity + + else: + # self.obs_imu = torch.cat((self.projected_gravity, self.base_ang_vel * self.obs_scales.ang_vel), 1) + self.obs_imu = self.projected_gravity + + obs_buf = torch.cat( + ( + self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw) + self.obs_motor, # Delayed motor observations + self.actions, # 12D + self.obs_imu, # Delayed IMU observations + ), + dim=-1, + ) + + if self.cfg.terrain.measure_heights: + heights = ( + torch.clip( + self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights, + -1, + 1.0, + ) + * self.obs_scales.height_measurements + ) + self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1) + if self.add_noise: + obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level + else: + obs_now = obs_buf.clone() + self.obs_history.append(obs_now) + self.critic_history.append(self.privileged_obs_buf) + + obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1) # N,T,K + + self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K + self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1) + + def reset_idx(self, env_ids): + super().reset_idx(env_ids) + for i in range(self.obs_history.maxlen): + self.obs_history[i][env_ids] *= 0 + for i in range(self.critic_history.maxlen): + self.critic_history[i][env_ids] *= 0 + + # ================================================ Rewards ================================================== # + def _reward_joint_pos(self): + """Calculates the reward based on the difference between the current joint positions and the target joint positions.""" + joint_pos = self.dof_pos.clone() + pos_target = self.ref_dof_pos.clone() + diff = joint_pos - pos_target + r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5) + + return r + + def _reward_feet_distance(self): + """Calculates the reward based on the distance between the feet. Penilize feet get close to each other or too far away.""" + foot_pos = self.rigid_state[:, self.feet_indices, :2] + foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) + fd = self.cfg.rewards.min_dist + max_df = self.cfg.rewards.max_dist + d_min = torch.clamp(foot_dist - fd, -0.5, 0.0) + d_max = torch.clamp(foot_dist - max_df, 0, 0.5) + return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 + + def _reward_knee_distance(self): + """Calculates the reward based on the distance between the knee of the humanoid.""" + foot_pos = self.rigid_state[:, self.knee_indices, :2] + foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) + fd = self.cfg.rewards.min_dist + max_df = self.cfg.rewards.max_dist / 2 + d_min = torch.clamp(foot_dist - fd, -0.5, 0.0) + d_max = torch.clamp(foot_dist - max_df, 0, 0.5) + + return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 + + def _reward_foot_slip(self): + """Calculates the reward for minimizing foot slip. The reward is based on the contact forces + and the speed of the feet. A contact threshold is used to determine if the foot is in contact + with the ground. The speed of the foot is calculated and scaled by the contact condition. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2) + rew = torch.sqrt(foot_speed_norm) + rew *= contact + return torch.sum(rew, dim=1) + + def _reward_feet_air_time(self): + """Calculates the reward for feet air time, promoting longer steps. This is achieved by + checking the first contact with the ground after being in the air. The air time is + limited to a maximum value for reward calculation. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + stance_mask = self._get_gait_phase() + self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts) + self.last_contacts = contact + first_contact = (self.feet_air_time > 0.0) * self.contact_filt + self.feet_air_time += self.dt + air_time = self.feet_air_time.clamp(0, 0.5) * first_contact + self.feet_air_time *= ~self.contact_filt + return air_time.sum(dim=1) + + def _reward_feet_contact_number(self): + """Calculates a reward based on the number of feet contacts aligning with the gait phase. + Rewards or penalizes depending on whether the foot contact matches the expected gait phase. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + stance_mask = self._get_gait_phase() + reward = torch.where(contact == stance_mask, 1.0, -0.3) + return torch.mean(reward, dim=1) + + def _reward_orientation(self): + """Calculates the reward for maintaining a flat base orientation. It penalizes deviation + from the desired base orientation using the base euler angles and the projected gravity vector. + """ + quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10) + orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20) + + return (quat_mismatch + orientation) / 2.0 + + def _reward_feet_contact_forces(self): + """Calculates the reward for keeping contact forces within a specified range. Penalizes + high contact forces on the feet. + """ + return torch.sum( + ( + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force + ).clip(0, 400), + dim=1, + ) + + def _reward_default_joint_pos(self): + """Calculates the reward for keeping joint positions close to default positions, with a focus + on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty. + """ + joint_diff = self.dof_pos - self.default_joint_pd_target + left_yaw_roll = joint_diff[:, [self.legs_joints["left_hip_roll"], self.legs_joints["left_hip_yaw"]]] + right_yaw_roll = joint_diff[:, [self.legs_joints["right_hip_roll"], self.legs_joints["right_hip_yaw"]]] + yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1) + yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50) + + return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1) + + def _reward_base_height(self): + """Calculates the reward based on the robot's base height. Penalizes deviation from a target base height. + The reward is computed based on the height difference between the robot's base and the average height + of its feet when they are in contact with the ground. + """ + stance_mask = self._get_gait_phase() + measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum( + stance_mask, dim=1 + ) + base_height = self.root_states[:, 2] - (measured_heights - self.cfg.asset.default_feet_height) + reward = torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100) + return reward + + def _reward_base_acc(self): + """Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base, + encouraging smoother motion. + """ + root_acc = self.last_root_vel - self.root_states[:, 7:13] + rew = torch.exp(-torch.norm(root_acc, dim=1) * 3) + return rew + + def _reward_vel_mismatch_exp(self): + """Computes a reward based on the mismatch in the robot's linear and angular velocities. + Encourages the robot to maintain a stable velocity by penalizing large deviations. + """ + lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10) + ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.0) + + c_update = (lin_mismatch + ang_mismatch) / 2.0 + + return c_update + + def _reward_track_vel_hard(self): + """Calculates a reward for accurately tracking both linear and angular velocity commands. + Penalizes deviations from specified linear and angular velocity targets. + """ + # Tracking of linear velocity commands (xy axes) + lin_vel_error = torch.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1) + lin_vel_error_exp = torch.exp(-lin_vel_error * 10) + + # Tracking of angular velocity commands (yaw) + ang_vel_error = torch.abs(self.commands[:, 2] - self.base_ang_vel[:, 2]) + ang_vel_error_exp = torch.exp(-ang_vel_error * 10) + + linear_error = 0.2 * (lin_vel_error + ang_vel_error) + + return (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error + + def _reward_tracking_lin_vel(self): + """Tracks linear velocity commands along the xy axes. + Calculates a reward based on how closely the robot's linear velocity matches the commanded values. + """ + lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1) + return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma) + + def _reward_tracking_ang_vel(self): + """Tracks angular velocity commands for yaw rotation. + Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values. + """ + ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2]) + return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma) + + def _reward_feet_clearance(self): + """Calculates reward based on the clearance of the swing leg from the ground during movement. + Encourages appropriate lift of the feet during the swing phase of the gait. + """ + # Compute feet contact mask + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + + # Get the z-position of the feet and compute the change in z-position + feet_z = self.rigid_state[:, self.feet_indices, 2] - self.cfg.asset.default_feet_height + delta_z = feet_z - self.last_feet_z + self.feet_height += delta_z + self.last_feet_z = feet_z + + # Compute swing mask + swing_mask = 1 - self._get_gait_phase() + + # feet height should be closed to target feet height at the peak + rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02 + rew_pos = torch.sum(rew_pos * swing_mask, dim=1) + self.feet_height *= ~contact + + return rew_pos + + def _reward_low_speed(self): + """Rewards or penalizes the robot based on its speed relative to the commanded speed. + This function checks if the robot is moving too slow, too fast, or at the desired speed, + and if the movement direction matches the command. + """ + # Calculate the absolute value of speed and command for comparison + absolute_speed = torch.abs(self.base_lin_vel[:, 0]) + absolute_command = torch.abs(self.commands[:, 0]) + + # Define speed criteria for desired range + speed_too_low = absolute_speed < 0.5 * absolute_command + speed_too_high = absolute_speed > 1.2 * absolute_command + speed_desired = ~(speed_too_low | speed_too_high) + + # Check if the speed and command directions are mismatched + sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0]) + + # Initialize reward tensor + reward = torch.zeros_like(self.base_lin_vel[:, 0]) + + # Assign rewards based on conditions + # Speed too low + reward[speed_too_low] = -1.0 + # Speed too high + reward[speed_too_high] = 0.0 + # Speed within desired range + reward[speed_desired] = 1.2 + # Sign mismatch has the highest priority + reward[sign_mismatch] = -2.0 + return reward * (self.commands[:, 0].abs() > 0.1) + + def _reward_torques(self): + """Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing + the necessary force exerted by the motors. + """ + return torch.sum(torch.square(self.torques), dim=1) + + def _reward_dof_vel(self): + """Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and + more controlled movements. + """ + return torch.sum(torch.square(self.dof_vel), dim=1) + + def _reward_dof_acc(self): + """Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring + smooth and stable motion, reducing wear on the robot's mechanical parts. + """ + return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1) + + def _reward_collision(self): + """Penalizes collisions of the robot with the environment, specifically focusing on selected body parts. + This encourages the robot to avoid undesired contact with objects or surfaces. + """ + return torch.sum( + 1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), + dim=1, + ) + + def _reward_action_smoothness(self): + """Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions. + This is important for achieving fluid motion and reducing mechanical stress. + """ + term_1 = torch.sum(torch.square(self.last_actions - self.actions), dim=1) + term_2 = torch.sum( + torch.square(self.actions + self.last_last_actions - 2 * self.last_actions), + dim=1, + ) + term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1) + return term_1 + term_2 + term_3 + + def _reward_termination(self): + """Returns a penalty for each environment that is terminated.""" + penalty = 1.0 + # Apply the penalty element-wise: only environments that need reset get the penalty + return penalty * self.reset_buf.float() + + def _initialize_push_intervals(self): + # Create a tensor of size (num_envs,) for the next push step. + # We'll assume self.num_envs is available and self.dt is the simulation time step. + self.next_push_steps = torch.empty(self.num_envs, dtype=torch.int64, device=self.device) + for i in range(self.num_envs): + random_interval_sec = random.uniform( + self.cfg.domain_rand.push_random_interval_min, self.cfg.domain_rand.push_random_interval_max + ) + self.next_push_steps[i] = self.common_step_counter + int(random_interval_sec / self.dt) + + def _post_physics_step_callback(self): + """Callback called before computing terminations, rewards, and observations.""" + env_ids = ( + (self.episode_length_buf % int(self.cfg.commands.resampling_time / self.dt) == 0) + .nonzero(as_tuple=False) + .flatten() + ) + self._resample_commands(env_ids) + if self.cfg.commands.heading_command: + forward = quat_apply(self.base_quat, self.forward_vec) + heading = torch.atan2(forward[:, 1], forward[:, 0]) + self.commands[:, 2] = torch.clip(0.5 * wrap_to_pi(self.commands[:, 3] - heading), -1.0, 1.0) + if self.cfg.terrain.measure_heights: + self.measured_heights = self._get_heights() + if self.cfg.domain_rand.push_robots: + # Initialize per-env push intervals if not done yet. + if not hasattr(self, "next_push_steps"): + self._initialize_push_intervals() + # Determine which environments are due for a push + envs_to_push = (self.common_step_counter >= self.next_push_steps).nonzero(as_tuple=False).flatten() + if len(envs_to_push) > 0: + # Apply push only to the environments that are due + self._push_robots(envs_to_push) + # For each pushed environment, set a new random push time + for env_id in envs_to_push: + random_interval_sec = random.uniform( + self.cfg.domain_rand.push_random_interval_min, self.cfg.domain_rand.push_random_interval_max + ) + self.next_push_steps[env_id] = self.common_step_counter + int(random_interval_sec / self.dt) diff --git a/sim/envs/humanoids/gpr_latency_config.py b/sim/envs/humanoids/gpr_latency_config.py new file mode 100644 index 00000000..32f4ac12 --- /dev/null +++ b/sim/envs/humanoids/gpr_latency_config.py @@ -0,0 +1,352 @@ +"""Defines the environment configuration for the Getting up task""" + +# from kinfer import proto as P + +from sim.env import robot_urdf_path +from sim.envs.base.legged_robot_config import ( # type: ignore + LeggedRobotCfg, + LeggedRobotCfgPPO, +) +from sim.resources.gpr_latency.joints import Robot + +NUM_JOINTS = len(Robot.all_joints()) + + +class GprLatencyCfg(LeggedRobotCfg): + """Configuration class for the Legs humanoid robot.""" + + class env(LeggedRobotCfg.env): + # change the observation dim + frame_stack = 15 # 15 # actor + c_frame_stack = 3 # critic + # num_single_obs = 8 + NUM_JOINTS * 4 + 3 # ang vel + num_single_obs = 8 + NUM_JOINTS * 4 # no ang vel + num_observations = int(frame_stack * num_single_obs) + single_num_privileged_obs = 25 + NUM_JOINTS * 5 + num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) + num_joints = NUM_JOINTS + num_actions = NUM_JOINTS * 2 + num_envs = 4096 + episode_length_s = 24 # episode length in seconds + use_ref_actions = False + + class safety(LeggedRobotCfg.safety): + # safety factors + pos_limit = 1.0 + vel_limit = 1.0 + torque_limit = 1.0 + + class asset(LeggedRobotCfg.asset): + name = "gpr_latency" + + file = str(robot_urdf_path(name)) + + # foot_name = ["foot1", "foot3"] + # knee_name = ["leg3_shell1", "leg3_shell11"] + # imu_name = "imu" + + foot_name = ["foot1", "foot3"] + knee_name = ["leg3_shell2", "leg3_shell22"] + imu_name = "imu_link" + + termination_height = 0.8 # 0.4 # 0.3 + default_feet_height = 0.0 + + terminate_after_contacts_on = [ + "shoulder", + "shoulder_2", + "arm1_top", + "arm1_top_2", + "arm2_shell", + "arm2_shell_2", + "arm3_shell", + "arm3_shell2", + "hand_shell", + "hand_shell_2", + "leg0_shell", + "leg0_shell_2", + ] + + # terminate_after_contacts_on = ["arm1_top", "shoulder", "arm1_top_2", "shoulder_2"] + + penalize_contacts_on = [] + self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter + flip_visual_attachments = False + replace_cylinder_with_capsule = False + fix_base_link = False + + class terrain(LeggedRobotCfg.terrain): + mesh_type = "plane" + # mesh_type = "trimesh" + curriculum = False + # rough terrain only: + measure_heights = False + static_friction = 0.6 + dynamic_friction = 0.6 + terrain_length = 8.0 + terrain_width = 8.0 + num_rows = 10 # number of terrain rows (levels) + num_cols = 10 # number of terrain cols (types) + max_init_terrain_level = 10 # starting curriculum state + # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down + terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0] + restitution = 0.0 + + class noise: + add_noise = True + noise_level = 0.6 # 1.5 # 0.6 # scales other values + + class noise_scales: + dof_pos = 0.05 + dof_vel = 2.5 + ang_vel = 0.4 # 0.1 + lin_vel = 0.05 + quat = 0.08 # 0.03 + height_measurements = 0.1 + + class init_state(LeggedRobotCfg.init_state): + pos = [0.0, 0.0, Robot.height + 0.025] + rot = Robot.rotation + default_joint_angles = {k: 0.0 for k in Robot.all_joints()} + + default_positions = Robot.default_standing() + for joint in default_positions: + default_joint_angles[joint] = default_positions[joint] + + class control(LeggedRobotCfg.control): + # PD Drive parameters: + stiffness = Robot.stiffness() + damping = Robot.damping() + # action scale: target angle = actionScale * action + defaultAngle + action_scale = 0.25 + # decimation: Number of control action updates @ sim DT per policy DT + decimation = 20 # Policy at 50hz + pd_decimation = 1 # PD at 1000hz + + class sim(LeggedRobotCfg.sim): + dt = 0.001 # 1000 Hz + substeps = 1 # 2 + up_axis = 1 # 0 is y, 1 is z + + class physx(LeggedRobotCfg.sim.physx): + num_threads = 10 + solver_type = 1 # 0: pgs, 1: tgs + num_position_iterations = 4 + num_velocity_iterations = 1 + contact_offset = 0.01 # [m] + rest_offset = 0.0 # [m] + bounce_threshold_velocity = 0.1 # [m/s] + max_depenetration_velocity = 1.0 + max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more + default_buffer_size_multiplier = 5 + # 0: never, 1: last sub-step, 2: all sub-steps (default=2) + contact_collection = 2 + + class domain_rand(LeggedRobotCfg.domain_rand): + start_pos_noise = 0.1 + randomize_friction = True + friction_range = [0.1, 2.0] + + randomize_base_mass = True + added_mass_range = [-2.0, 2.0] + + randomize_link_mass = True + link_mass_multiplier_range = [0.8, 1.2] + + push_robots = True + push_random_interval_min = 1.0 + push_random_interval_max = 4.0 + max_push_vel_xy = 1.2 # 0.2 + max_push_ang_vel = 1.2 # 0.4 + + # dynamic randomization + action_noise = 0.02 + action_delay = 0.5 + randomize_pd_gains = False + + randomize_motor_zero_offset = True + motor_zero_offset_range = [-0.035, 0.035] # Offset to add to the motor angles + + randomize_joint_friction = True + joint_friction_range = [0.01, 1.15] + + randomize_joint_damping = True + joint_damping_range = [0.3, 1.5] + + randomize_joint_armature = True + joint_armature_range = [0.008, 0.06] + + randomize_pd_gains = True + stiffness_multiplier_range = [0.8, 1.2] + damping_multiplier_range = [0.8, 1.2] + + randomize_calculated_torque = True + torque_multiplier_range = [0.8, 1.2] + + # Latency randomization + add_cmd_action_latency = True # Enable action latency + randomize_cmd_action_latency = True # Randomize the latency amount + range_cmd_action_latency = [0, 2] # Range of latency in simulation steps + + add_obs_latency = True # Enable observation latency + randomize_obs_motor_latency = True # Randomize motor sensor latency + range_obs_motor_latency = [0, 10] # Range of motor latency in simulation steps + randomize_obs_imu_latency = True # Randomize IMU sensor latency + range_obs_imu_latency = [0, 10] # Range of IMU latency in simulation steps + + class commands(LeggedRobotCfg.commands): + # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) + num_commands = 4 + resampling_time = 8.0 # time before command are changed[s] + heading_command = True # if true: compute ang vel command from heading error + + curriculum = True + max_curriculum = 1.7 + + class ranges: + lin_vel_x = [-0.0, 1.0] # min max [m/s] + lin_vel_y = [-0.5, 0.5] # min max [m/s] + ang_vel_yaw = [-1.5, 1.5] # min max [rad/s] + heading = [-3.14, 3.14] + + class rewards: + # quite important to keep it right + base_height_target = Robot.height + min_dist = 0.2 + max_dist = 0.5 + # put some settings here for LLM parameter tuning + target_joint_pos_scale = 0.24 # rad + target_feet_height = 0.06 # m + + cycle_time = 0.4 # sec + # if true negative total rewards are clipped at zero (avoids early termination problems) + only_positive_rewards = True + # tracking reward = exp(error*sigma) + tracking_sigma = 5.0 + max_contact_force = 400 # forces above this value are penalized + + class scales: + termination = -10.0 + # reference motion tracking + joint_pos = 1.2 # 1.6 + feet_clearance = 1.2 + feet_contact_number = 0.8 # 1.0 # 1.2 + # gait + feet_air_time = 1.2 + foot_slip = -0.05 + feet_distance = 0.2 + knee_distance = 0.2 + # contact + feet_contact_forces = -0.01 + # vel tracking + tracking_lin_vel = 1.2 + tracking_ang_vel = 1.1 + vel_mismatch_exp = 0.5 # lin_z; ang x,y + low_speed = 0.2 + track_vel_hard = 0.5 + + # base pos + default_joint_pos = 0.5 + orientation = 1.0 + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.003 # -0.002 + torques = -1.5e-5 # -1e-5 + dof_vel = -5e-4 # -1e-3 + dof_acc = -1e-7 # -2.5e-7 + collision = -1.0 + + # action_smoothness = -0.004 + # torques = -1e-10 + # dof_vel = -1e-5 + # dof_acc = -5e-9 + # collision = -1. + + class normalization: + class obs_scales: + lin_vel = 2.0 + ang_vel = 1.0 + dof_pos = 1.0 + dof_vel = 0.05 + quat = 1.0 + height_measurements = 5.0 + + clip_observations = 18.0 + clip_actions = 18.0 + + class viewer: + ref_env = 0 + pos = [4, -4, 2] + lookat = [0, -2, 0] + + +class GprLatencyStandingCfg(GprLatencyCfg): + """Configuration class for the GPR humanoid robot.""" + + class init_state(LeggedRobotCfg.init_state): + pos = [0.0, 0.0, Robot.standing_height + 0.025] + rot = Robot.rotation + default_joint_angles = {k: 0.0 for k in Robot.all_joints()} + + class rewards: + # quite important to keep it right + base_height_target = Robot.standing_height + min_dist = 0.2 + max_dist = 0.5 + # put some settings here for LLM parameter tuning + target_joint_pos_scale = 0.17 # rad + target_feet_height = 0.06 # m + cycle_time = 0.4 # sec + # if true negative total rewards are clipped at zero (avoids early termination problems) + only_positive_rewards = False + # tracking reward = exp(error*sigma) + tracking_sigma = 5 + max_contact_force = 200 # forces above this value are penalized + + class scales: + # base pos + default_joint_pos = 1.0 + orientation = 1 + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.002 + torques = -1e-5 + dof_vel = -1e-3 + dof_acc = -2.5e-7 + collision = -1.0 + + +class GprLatencyCfgPPO(LeggedRobotCfgPPO): + seed = 5 + runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner + + class policy: + init_noise_std = 1.0 + actor_hidden_dims = [512, 256, 128] + critic_hidden_dims = [768, 256, 128] + + class algorithm(LeggedRobotCfgPPO.algorithm): + entropy_coef = 0.001 + learning_rate = 2e-5 + num_learning_epochs = 2 + gamma = 0.994 + lam = 0.9 + num_mini_batches = 4 + + class runner: + policy_class_name = "ActorCritic" + algorithm_class_name = "PPO" + num_steps_per_env = 60 # per iteration + max_iterations = 10001 # number of policy updates + + # logging + save_interval = 100 # check for potential saves every this many iterations + experiment_name = "gpr_latency" + run_name = "" + # load and resume + resume = False + load_run = -1 # -1 = last run + checkpoint = -1 # -1 = last saved model + resume_path = None # updated from load_run and chkpt diff --git a/sim/envs/humanoids/gpr_latency_env.py b/sim/envs/humanoids/gpr_latency_env.py new file mode 100644 index 00000000..96051c95 --- /dev/null +++ b/sim/envs/humanoids/gpr_latency_env.py @@ -0,0 +1,694 @@ +# mypy: disable-error-code="valid-newtype" +"""Defines the environment for training the humanoid.""" + +import random + +from sim.envs.base.legged_robot_latency import LeggedRobotLatency +from sim.resources.gpr_latency.joints import Robot +from sim.utils.math import wrap_to_pi +from sim.utils.terrain import HumanoidTerrain + +from isaacgym.torch_utils import * # isort:skip + + +from isaacgym import gymtorch # isort:skip + +import torch # isort:skip + + +class GprLatencyEnv(LeggedRobotLatency): + """GprLatencyEnv is a class that represents a custom environment for a legged robot. + + Args: + cfg (LeggedRobotCfg): Configuration object for the legged robot. + sim_params: Parameters for the simulation. + physics_engine: Physics engine used in the simulation. + sim_device: Device used for the simulation. + headless: Flag indicating whether the simulation should be run in headless mode. + + Attributes: + last_feet_z (float): The z-coordinate of the last feet position. + feet_height (torch.Tensor): Tensor representing the height of the feet. + sim (gymtorch.GymSim): The simulation object. + terrain (HumanoidTerrain): The terrain object. + up_axis_idx (int): The index representing the up axis. + command_input (torch.Tensor): Tensor representing the command input. + privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer. + obs_buf (torch.Tensor): Tensor representing the observations buffer. + obs_history (collections.deque): Deque containing the history of observations. + critic_history (collections.deque): Deque containing the history of critic observations. + + Methods: + _push_robots(): Randomly pushes the robots by setting a randomized base velocity. + _get_phase(): Calculates the phase of the gait cycle. + _get_gait_phase(): Calculates the gait phase. + compute_ref_state(): Computes the reference state. + create_sim(): Creates the simulation, terrain, and environments. + _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations. + step(actions): Performs a simulation step with the given actions. + compute_observations(): Computes the observations. + reset_idx(env_ids): Resets the environment for the specified environment IDs. + """ + + def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): + super().__init__(cfg, sim_params, physics_engine, sim_device, headless) + self.last_feet_z = self.cfg.asset.default_feet_height + self.feet_height = torch.zeros((self.num_envs, 2), device=self.device) + self.reset_idx(torch.tensor(range(self.num_envs), device=self.device)) + + env_handle = self.envs[0] + actor_handle = self.actor_handles[0] + + self.legs_joints = {} + for name, joint in Robot.legs.left.joints_motors(): + joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) + self.legs_joints["left_" + name] = joint_handle + + for name, joint in Robot.legs.right.joints_motors(): + joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) + self.legs_joints["right_" + name] = joint_handle + + self.compute_observations() + self._initialize_push_intervals() + + def step(self, actions): + """Apply actions, simulate, call self.post_physics_step() + + Args: + actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env) + """ + if self.cfg.env.use_ref_actions: + actions += self.ref_action + actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) + + # dynamic randomization + delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay + actions = (1 - delay) * actions + delay * self.actions + actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions + + clip_actions = self.cfg.normalization.clip_actions + self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device) + # step physics and render each frame + target_positions = self.actions[:, : self.cfg.env.num_actions // 2] + target_velocities = self.actions[:, self.cfg.env.num_actions // 2 :] + self.render() + for _ in range(self.cfg.control.decimation): + self.torques = self._compute_torques(target_positions, target_velocities).view(self.torques.shape) + self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques)) + + self.gym.simulate(self.sim) + if self.device == "cpu": + self.gym.fetch_results(self.sim, True) + self.gym.refresh_dof_state_tensor(self.sim) + self.post_physics_step() + + # return clipped obs, clipped states (None), rewards, dones and infos + clip_obs = self.cfg.normalization.clip_observations + self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs) + if self.privileged_obs_buf is not None: + self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs) + return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras + + def _compute_torques(self, positions, velocities): + """Compute torques from actions. + Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques. + [NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated. + + Args: + actions (torch.Tensor): Actions + + Returns: + [torch.Tensor]: Torques sent to the simulation + """ + pos_scaled = positions * self.cfg.control.action_scale + vel_scaled = velocities * self.cfg.control.action_scale + p_gains = self.p_gains + d_gains = self.d_gains + torques = p_gains * (pos_scaled + self.default_dof_pos - self.dof_pos) + d_gains * (vel_scaled - self.dof_vel) + res = torch.clip(torques, -self.torque_limits, self.torque_limits) + return res + + # def _push_robots(self): + # """Random pushes the robots. Emulates an impulse by setting a randomized base velocity.""" + # max_vel = self.cfg.domain_rand.max_push_vel_xy + # max_push_angular = self.cfg.domain_rand.max_push_ang_vel + # self.rand_push_force[:, :2] = torch_rand_float( + # -max_vel, max_vel, (self.num_envs, 2), device=self.device + # ) # lin vel x/y + # self.root_states[:, 7:9] = self.rand_push_force[:, :2] + + # self.rand_push_torque = torch_rand_float( + # -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device + # ) + # self.root_states[:, 10:13] = self.rand_push_torque + + # self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) + + def _push_robots(self, env_ids=None): + """Randomly pushes the robots for specific environments. + + If env_ids is None, push all environments; otherwise, push only for the provided indices. + """ + max_vel = self.cfg.domain_rand.max_push_vel_xy + max_push_angular = self.cfg.domain_rand.max_push_ang_vel + + if env_ids is None: + env_ids = torch.arange(self.num_envs, device=self.device) + + # For the selected environments, sample random push forces and torques + self.rand_push_force[env_ids, :2] = torch_rand_float(-max_vel, max_vel, (len(env_ids), 2), device=self.device) + self.root_states[env_ids, 7:9] = self.rand_push_force[env_ids, :2] + + self.rand_push_torque[env_ids] = torch_rand_float( + -max_push_angular, max_push_angular, (len(env_ids), 3), device=self.device + ) + self.root_states[env_ids, 10:13] = self.rand_push_torque[env_ids] + + self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) + + def _get_phase(self): + cycle_time = self.cfg.rewards.cycle_time + phase = self.episode_length_buf * self.dt / cycle_time + return phase + + def _get_gait_phase(self): + # return float mask 1 is stance, 0 is swing + phase = self._get_phase() + sin_pos = torch.sin(2 * torch.pi * phase) + # Add double support phase + stance_mask = torch.zeros((self.num_envs, 2), device=self.device) + # left foot stance + stance_mask[:, 0] = sin_pos >= 0 + # right foot stance + stance_mask[:, 1] = sin_pos < 0 + # Double support phase + stance_mask[torch.abs(sin_pos) < 0.1] = 1 + + return stance_mask + + def check_termination(self): + """Check if environments need to be reset""" + self.reset_buf = torch.any( + torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.0, + dim=1, + ) + self.reset_buf |= self.root_states[:, 2] < self.cfg.asset.termination_height + self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs + self.reset_buf |= self.time_out_buf + + def compute_ref_state(self): + phase = self._get_phase() + sin_pos = torch.sin(2 * torch.pi * phase) + sin_pos_l = sin_pos.clone() + sin_pos_r = sin_pos.clone() + default_clone = self.default_dof_pos.clone() + self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1) + + scale_1 = self.cfg.rewards.target_joint_pos_scale + scale_2 = 2 * scale_1 + # left foot stance phase set to default joint pos + sin_pos_l[sin_pos_l > 0] = 0 + + self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += -sin_pos_l * scale_1 + self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2 + self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 + + # right foot stance phase set to default joint pos + sin_pos_r[sin_pos_r < 0] = 0 + # pfb30 last one should be -1 + self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += -sin_pos_r * scale_1 + self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += -sin_pos_r * scale_2 + self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += -sin_pos_r * scale_1 + + # Double support phase + self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 + + self.ref_action = 2 * self.ref_dof_pos + + # def compute_ref_state(self): + # phase = self._get_phase() + # sin_pos = torch.sin(2 * torch.pi * phase) + # sin_pos_l = sin_pos.clone() + # sin_pos_r = sin_pos.clone() + # default_clone = self.default_dof_pos.clone() + # self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1) + + # scale_1 = self.cfg.rewards.target_joint_pos_scale + # scale_2 = 2 * scale_1 + # # left foot stance phase set to default joint pos + # sin_pos_l[sin_pos_l > 0] = 0 + + # self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += -sin_pos_l * scale_1 + # self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += -sin_pos_l * scale_2 + # self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 + + # # right foot stance phase set to default joint pos + # sin_pos_r[sin_pos_r < 0] = 0 + # # pfb30 last one should be -1 + # self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += -sin_pos_r * scale_1 + # self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += -sin_pos_r * scale_2 + # self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1 + + # # Double support phase + # self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 + + # self.ref_action = 2 * self.ref_dof_pos + + def create_sim(self): + """Creates simulation, terrain and evironments""" + self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly + self.sim = self.gym.create_sim( + self.sim_device_id, + self.graphics_device_id, + self.physics_engine, + self.sim_params, + ) + mesh_type = self.cfg.terrain.mesh_type + if mesh_type in ["heightfield", "trimesh"]: + self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs) + if mesh_type == "plane": + self._create_ground_plane() + elif mesh_type == "heightfield": + self._create_heightfield() + elif mesh_type == "trimesh": + self._create_trimesh() + elif mesh_type is not None: + raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]") + self._create_envs() + + def _get_noise_scale_vec(self, cfg): + """Sets a vector used to scale the noise added to the observations. + [NOTE]: Must be adapted when changing the observations structure + + Args: + cfg (Dict): Environment config file + + Returns: + [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1] + """ + num_joints = self.num_joints + noise_vec = torch.zeros(self.cfg.env.num_single_obs, device=self.device) + self.add_noise = self.cfg.noise.add_noise + noise_scales = self.cfg.noise.noise_scales + noise_vec[0:5] = 0.0 # commands + noise_vec[5 : (num_joints + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos + noise_vec[(num_joints + 5) : (2 * num_joints + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel + noise_vec[(2 * num_joints + 5) : (4 * num_joints + 5)] = 0.0 # previous actions + noise_vec[(4 * num_joints + 5) : (4 * num_joints + 5) + 3] = ( + noise_scales.quat * self.obs_scales.quat + ) # projected_gravity + # noise_vec[(4 * num_joints + 5) + 3 : (4 * num_joints + 5) + 6] = noise_scales.ang_vel * self.obs_scales.ang_vel # ang vel + return noise_vec + + def compute_observations(self): + phase = self._get_phase() + self.compute_ref_state() + + sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1) + cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1) + + stance_mask = self._get_gait_phase() + contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0 + + self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1) + q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos + dq = self.dof_vel * self.obs_scales.dof_vel + + diff = self.dof_pos - self.ref_dof_pos + self.privileged_obs_buf = torch.cat( + ( + self.command_input, # 2 + 3 + (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 12 + self.dof_vel * self.obs_scales.dof_vel, # 12 + self.actions, # 12 + diff, # 12 + self.base_lin_vel * self.obs_scales.lin_vel, # 3 + self.base_ang_vel * self.obs_scales.ang_vel, # 3 + self.base_euler_xyz * self.obs_scales.quat, # 3 + self.rand_push_force[:, :2], # 3 + self.rand_push_torque, # 3 + self.env_frictions, # 1 + self.body_mass / 30.0, # 1 + stance_mask, # 2 + contact_mask, # 2 + ), + dim=-1, + ) + + # Handle latency + if self.cfg.domain_rand.randomize_obs_motor_latency: + self.obs_motor = self.obs_motor_latency_buffer[ + torch.arange(self.num_envs), :, self.obs_motor_latency_simstep.long() + ] + else: + self.obs_motor = torch.cat((q, dq), 1) + + if self.cfg.domain_rand.randomize_obs_imu_latency: + # self.obs_imu = self.obs_imu_latency_buffer[ + # torch.arange(self.num_envs), :, self.obs_imu_latency_simstep.long() + # ] + self.obs_imu = self.obs_imu_latency_buffer[ + torch.arange(self.num_envs), :3, self.obs_imu_latency_simstep.long() + ] # only projected_gravity + + else: + # self.obs_imu = torch.cat((self.projected_gravity, self.base_ang_vel * self.obs_scales.ang_vel), 1) + self.obs_imu = self.projected_gravity + + obs_buf = torch.cat( + ( + self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw) + self.obs_motor, # Delayed motor observations + self.actions, # 12D + self.obs_imu, # Delayed IMU observations + ), + dim=-1, + ) + + if self.cfg.terrain.measure_heights: + heights = ( + torch.clip( + self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights, + -1, + 1.0, + ) + * self.obs_scales.height_measurements + ) + self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1) + if self.add_noise: + obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level + else: + obs_now = obs_buf.clone() + self.obs_history.append(obs_now) + self.critic_history.append(self.privileged_obs_buf) + + obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1) # N,T,K + + self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K + self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1) + + def reset_idx(self, env_ids): + super().reset_idx(env_ids) + for i in range(self.obs_history.maxlen): + self.obs_history[i][env_ids] *= 0 + for i in range(self.critic_history.maxlen): + self.critic_history[i][env_ids] *= 0 + + # ================================================ Rewards ================================================== # + def _reward_joint_pos(self): + """Calculates the reward based on the difference between the current joint positions and the target joint positions.""" + joint_pos = self.dof_pos.clone() + pos_target = self.ref_dof_pos.clone() + diff = joint_pos - pos_target + r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5) + + return r + + def _reward_feet_distance(self): + """Calculates the reward based on the distance between the feet. Penilize feet get close to each other or too far away.""" + foot_pos = self.rigid_state[:, self.feet_indices, :2] + foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) + fd = self.cfg.rewards.min_dist + max_df = self.cfg.rewards.max_dist + d_min = torch.clamp(foot_dist - fd, -0.5, 0.0) + d_max = torch.clamp(foot_dist - max_df, 0, 0.5) + return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 + + def _reward_knee_distance(self): + """Calculates the reward based on the distance between the knee of the humanoid.""" + foot_pos = self.rigid_state[:, self.knee_indices, :2] + foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) + fd = self.cfg.rewards.min_dist + max_df = self.cfg.rewards.max_dist / 2 + d_min = torch.clamp(foot_dist - fd, -0.5, 0.0) + d_max = torch.clamp(foot_dist - max_df, 0, 0.5) + + return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 + + def _reward_foot_slip(self): + """Calculates the reward for minimizing foot slip. The reward is based on the contact forces + and the speed of the feet. A contact threshold is used to determine if the foot is in contact + with the ground. The speed of the foot is calculated and scaled by the contact condition. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2) + rew = torch.sqrt(foot_speed_norm) + rew *= contact + return torch.sum(rew, dim=1) + + def _reward_feet_air_time(self): + """Calculates the reward for feet air time, promoting longer steps. This is achieved by + checking the first contact with the ground after being in the air. The air time is + limited to a maximum value for reward calculation. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + stance_mask = self._get_gait_phase() + self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts) + self.last_contacts = contact + first_contact = (self.feet_air_time > 0.0) * self.contact_filt + self.feet_air_time += self.dt + air_time = self.feet_air_time.clamp(0, 0.5) * first_contact + self.feet_air_time *= ~self.contact_filt + return air_time.sum(dim=1) + + def _reward_feet_contact_number(self): + """Calculates a reward based on the number of feet contacts aligning with the gait phase. + Rewards or penalizes depending on whether the foot contact matches the expected gait phase. + """ + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + stance_mask = self._get_gait_phase() + reward = torch.where(contact == stance_mask, 1.0, -0.3) + return torch.mean(reward, dim=1) + + def _reward_orientation(self): + """Calculates the reward for maintaining a flat base orientation. It penalizes deviation + from the desired base orientation using the base euler angles and the projected gravity vector. + """ + quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10) + orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20) + + return (quat_mismatch + orientation) / 2.0 + + def _reward_feet_contact_forces(self): + """Calculates the reward for keeping contact forces within a specified range. Penalizes + high contact forces on the feet. + """ + return torch.sum( + ( + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force + ).clip(0, 400), + dim=1, + ) + + def _reward_default_joint_pos(self): + """Calculates the reward for keeping joint positions close to default positions, with a focus + on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty. + """ + joint_diff = self.dof_pos - self.default_joint_pd_target + left_yaw_roll = joint_diff[:, [self.legs_joints["left_hip_roll"], self.legs_joints["left_hip_yaw"]]] + right_yaw_roll = joint_diff[:, [self.legs_joints["right_hip_roll"], self.legs_joints["right_hip_yaw"]]] + yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1) + yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50) + + return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1) + + def _reward_base_height(self): + """Calculates the reward based on the robot's base height. Penalizes deviation from a target base height. + The reward is computed based on the height difference between the robot's base and the average height + of its feet when they are in contact with the ground. + """ + stance_mask = self._get_gait_phase() + measured_heights = torch.sum(self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum( + stance_mask, dim=1 + ) + base_height = self.root_states[:, 2] - (measured_heights - self.cfg.asset.default_feet_height) + reward = torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100) + return reward + + def _reward_base_acc(self): + """Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base, + encouraging smoother motion. + """ + root_acc = self.last_root_vel - self.root_states[:, 7:13] + rew = torch.exp(-torch.norm(root_acc, dim=1) * 3) + return rew + + def _reward_vel_mismatch_exp(self): + """Computes a reward based on the mismatch in the robot's linear and angular velocities. + Encourages the robot to maintain a stable velocity by penalizing large deviations. + """ + lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10) + ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.0) + + c_update = (lin_mismatch + ang_mismatch) / 2.0 + + return c_update + + def _reward_track_vel_hard(self): + """Calculates a reward for accurately tracking both linear and angular velocity commands. + Penalizes deviations from specified linear and angular velocity targets. + """ + # Tracking of linear velocity commands (xy axes) + lin_vel_error = torch.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1) + lin_vel_error_exp = torch.exp(-lin_vel_error * 10) + + # Tracking of angular velocity commands (yaw) + ang_vel_error = torch.abs(self.commands[:, 2] - self.base_ang_vel[:, 2]) + ang_vel_error_exp = torch.exp(-ang_vel_error * 10) + + linear_error = 0.2 * (lin_vel_error + ang_vel_error) + + return (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error + + def _reward_tracking_lin_vel(self): + """Tracks linear velocity commands along the xy axes. + Calculates a reward based on how closely the robot's linear velocity matches the commanded values. + """ + lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1) + return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma) + + def _reward_tracking_ang_vel(self): + """Tracks angular velocity commands for yaw rotation. + Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values. + """ + ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2]) + return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma) + + def _reward_feet_clearance(self): + """Calculates reward based on the clearance of the swing leg from the ground during movement. + Encourages appropriate lift of the feet during the swing phase of the gait. + """ + # Compute feet contact mask + contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 + + # Get the z-position of the feet and compute the change in z-position + feet_z = self.rigid_state[:, self.feet_indices, 2] - self.cfg.asset.default_feet_height + delta_z = feet_z - self.last_feet_z + self.feet_height += delta_z + self.last_feet_z = feet_z + + # Compute swing mask + swing_mask = 1 - self._get_gait_phase() + + # feet height should be closed to target feet height at the peak + rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.02 + rew_pos = torch.sum(rew_pos * swing_mask, dim=1) + self.feet_height *= ~contact + + return rew_pos + + def _reward_low_speed(self): + """Rewards or penalizes the robot based on its speed relative to the commanded speed. + This function checks if the robot is moving too slow, too fast, or at the desired speed, + and if the movement direction matches the command. + """ + # Calculate the absolute value of speed and command for comparison + absolute_speed = torch.abs(self.base_lin_vel[:, 0]) + absolute_command = torch.abs(self.commands[:, 0]) + + # Define speed criteria for desired range + speed_too_low = absolute_speed < 0.5 * absolute_command + speed_too_high = absolute_speed > 1.2 * absolute_command + speed_desired = ~(speed_too_low | speed_too_high) + + # Check if the speed and command directions are mismatched + sign_mismatch = torch.sign(self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0]) + + # Initialize reward tensor + reward = torch.zeros_like(self.base_lin_vel[:, 0]) + + # Assign rewards based on conditions + # Speed too low + reward[speed_too_low] = -1.0 + # Speed too high + reward[speed_too_high] = 0.0 + # Speed within desired range + reward[speed_desired] = 1.2 + # Sign mismatch has the highest priority + reward[sign_mismatch] = -2.0 + return reward * (self.commands[:, 0].abs() > 0.1) + + def _reward_torques(self): + """Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing + the necessary force exerted by the motors. + """ + return torch.sum(torch.square(self.torques), dim=1) + + def _reward_dof_vel(self): + """Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and + more controlled movements. + """ + return torch.sum(torch.square(self.dof_vel), dim=1) + + def _reward_dof_acc(self): + """Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring + smooth and stable motion, reducing wear on the robot's mechanical parts. + """ + return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1) + + def _reward_collision(self): + """Penalizes collisions of the robot with the environment, specifically focusing on selected body parts. + This encourages the robot to avoid undesired contact with objects or surfaces. + """ + return torch.sum( + 1.0 * (torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), + dim=1, + ) + + def _reward_action_smoothness(self): + """Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions. + This is important for achieving fluid motion and reducing mechanical stress. + """ + term_1 = torch.sum(torch.square(self.last_actions - self.actions), dim=1) + term_2 = torch.sum( + torch.square(self.actions + self.last_last_actions - 2 * self.last_actions), + dim=1, + ) + term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1) + return term_1 + term_2 + term_3 + + def _reward_termination(self): + """Returns a penalty for each environment that is terminated.""" + penalty = 1.0 + # Apply the penalty element-wise: only environments that need reset get the penalty + return penalty * self.reset_buf.float() + + def _initialize_push_intervals(self): + # Create a tensor of size (num_envs,) for the next push step. + # We'll assume self.num_envs is available and self.dt is the simulation time step. + self.next_push_steps = torch.empty(self.num_envs, dtype=torch.int64, device=self.device) + for i in range(self.num_envs): + random_interval_sec = random.uniform( + self.cfg.domain_rand.push_random_interval_min, self.cfg.domain_rand.push_random_interval_max + ) + self.next_push_steps[i] = self.common_step_counter + int(random_interval_sec / self.dt) + + def _post_physics_step_callback(self): + """Callback called before computing terminations, rewards, and observations.""" + env_ids = ( + (self.episode_length_buf % int(self.cfg.commands.resampling_time / self.dt) == 0) + .nonzero(as_tuple=False) + .flatten() + ) + self._resample_commands(env_ids) + if self.cfg.commands.heading_command: + forward = quat_apply(self.base_quat, self.forward_vec) + heading = torch.atan2(forward[:, 1], forward[:, 0]) + self.commands[:, 2] = torch.clip(0.5 * wrap_to_pi(self.commands[:, 3] - heading), -1.0, 1.0) + if self.cfg.terrain.measure_heights: + self.measured_heights = self._get_heights() + if self.cfg.domain_rand.push_robots: + # Initialize per-env push intervals if not done yet. + if not hasattr(self, "next_push_steps"): + self._initialize_push_intervals() + # Determine which environments are due for a push + envs_to_push = (self.common_step_counter >= self.next_push_steps).nonzero(as_tuple=False).flatten() + if len(envs_to_push) > 0: + # Apply push only to the environments that are due + self._push_robots(envs_to_push) + # For each pushed environment, set a new random push time + for env_id in envs_to_push: + random_interval_sec = random.uniform( + self.cfg.domain_rand.push_random_interval_min, self.cfg.domain_rand.push_random_interval_max + ) + self.next_push_steps[env_id] = self.common_step_counter + int(random_interval_sec / self.dt) diff --git a/sim/envs/humanoids/gpr_vel_config.py b/sim/envs/humanoids/gpr_vel_config.py new file mode 100644 index 00000000..db24527d --- /dev/null +++ b/sim/envs/humanoids/gpr_vel_config.py @@ -0,0 +1,291 @@ +"""Defines the environment configuration for the Getting up task""" + +from sim.env import robot_urdf_path +from sim.envs.base.legged_robot_config import ( # type: ignore + LeggedRobotCfg, + LeggedRobotCfgPPO, +) +from sim.resources.gpr_vel.joints import Robot + +NUM_JOINTS = len(Robot.all_joints()) + + +class GprVelCfg(LeggedRobotCfg): + """Configuration class for the Legs humanoid robot.""" + + class env(LeggedRobotCfg.env): + # change the observation dim + frame_stack = 15 + c_frame_stack = 3 + num_single_obs = 8 + NUM_JOINTS * 4 # 2 for past actions, 1 for dof pos, 1 for dof vel + num_observations = int(frame_stack * num_single_obs) + single_num_privileged_obs = 25 + NUM_JOINTS * 5 # 2 for past actions, 1 for dof pos, 1 for dof vel, 1 for diff + num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) + num_joints = NUM_JOINTS + num_actions = NUM_JOINTS * 2 + num_envs = 4096 + episode_length_s = 24 # episode length in seconds + use_ref_actions = False + + class safety(LeggedRobotCfg.safety): + # safety factors + pos_limit = 1.0 + vel_limit = 1.0 + torque_limit = 1.0 + + class asset(LeggedRobotCfg.asset): + name = "gpr_vel" + + file = str(robot_urdf_path(name)) + + foot_name = ["foot1", "foot3"] + knee_name = ["leg3_shell2", "leg3_shell22"] + imu_name = "imu_link" + + # foot_name = ["foot1", "foot3"] + # knee_name = ["leg3_shell1", "leg3_shell11"] + # imu_name = "imu" + + # terminate_after_contacts_on = ["arm1_top_2", "arm1_top", "shoulder", "shoulder_2", + # "arm2_shell_2", "arm2_shell", "arm3_shell2", "arm3_shell", + # "hand_shell", "hand_shell_2"] + + termination_height = 0.5 # 0.2 + default_feet_height = 0.0 + + penalize_contacts_on = [] + self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter + flip_visual_attachments = False + replace_cylinder_with_capsule = False + fix_base_link = False + + class terrain(LeggedRobotCfg.terrain): + # mesh_type = "plane" + mesh_type = "trimesh" + curriculum = False + # rough terrain only: + measure_heights = False + static_friction = 0.6 + dynamic_friction = 0.6 + terrain_length = 8.0 + terrain_width = 8.0 + num_rows = 10 # number of terrain rows (levels) + num_cols = 10 # number of terrain cols (types) + max_init_terrain_level = 10 # starting curriculum state + # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down + terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0] + restitution = 0.0 + + class noise: + add_noise = True + noise_level = 1.25 # 1.5 # 1.2 # 0.6 # scales other values + + class noise_scales: + dof_pos = 0.05 + dof_vel = 1.8 # 2.0 # 2.5 #0.5 + ang_vel = 0.1 + lin_vel = 0.05 + quat = 0.05 # 0.06 # 0.03 + height_measurements = 0.1 + + class init_state(LeggedRobotCfg.init_state): + pos = [0.0, 0.0, Robot.height + 0.025] + rot = Robot.rotation + default_joint_angles = {k: 0.0 for k in Robot.all_joints()} + + default_positions = Robot.default_standing() + for joint in default_positions: + default_joint_angles[joint] = default_positions[joint] + + class control(LeggedRobotCfg.control): + # PD Drive parameters: + stiffness = Robot.stiffness() + damping = Robot.damping() + # action scale: target angle = actionScale * action + defaultAngle + action_scale = 0.25 + # decimation: Number of control action updates @ sim DT per policy DT + decimation = 20 # 50hz + pd_decimation = 1 # 1000hz + + class sim(LeggedRobotCfg.sim): + dt = 0.001 # 1000 Hz + substeps = 1 # 2 + up_axis = 1 # 0 is y, 1 is z + + class physx(LeggedRobotCfg.sim.physx): + num_threads = 10 + solver_type = 1 # 0: pgs, 1: tgs + num_position_iterations = 4 + num_velocity_iterations = 1 + contact_offset = 0.01 # [m] + rest_offset = 0.0 # [m] + bounce_threshold_velocity = 0.1 # [m/s] + max_depenetration_velocity = 1.0 + max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more + default_buffer_size_multiplier = 5 + # 0: never, 1: last sub-step, 2: all sub-steps (default=2) + contact_collection = 2 + + class domain_rand(LeggedRobotCfg.domain_rand): + start_pos_noise = 0.3 # 0.1 + randomize_friction = True + friction_range = [0.1, 2.0] + + randomize_base_mass = True + added_mass_range = [-3.0, 3.0] # [-2.0, 2.0] + push_robots = True + push_random_interval_min = 2.0 + push_random_interval_max = 4.0 + max_push_vel_xy = 1.5 # 1.8 # 1.5 # 0.4 + max_push_ang_vel = 1.5 # 1.8 # 1.5 # 0.4 + # dynamic randomization + action_noise = 0.05 # 0.02 + action_delay = 0.5 + randomize_pd_gains = False + + class commands(LeggedRobotCfg.commands): + # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) + num_commands = 4 + resampling_time = 8.0 # time before command are changed[s] + heading_command = True # if true: compute ang vel command from heading error + + class ranges: + lin_vel_x = [-0.0, 1.0] # min max [m/s] + lin_vel_y = [-0.5, 0.5] # min max [m/s] + ang_vel_yaw = [-1.5, 1.5] # min max [rad/s] + heading = [-3.14, 3.14] + + class rewards: + # quite important to keep it right + base_height_target = Robot.height + min_dist = 0.2 + max_dist = 0.5 + # put some settings here for LLM parameter tuning + target_joint_pos_scale = 0.24 # rad + target_feet_height = 0.06 # m + + cycle_time = 0.4 # sec + # if true negative total rewards are clipped at zero (avoids early termination problems) + only_positive_rewards = True + # tracking reward = exp(error*sigma) + tracking_sigma = 5.0 + max_contact_force = 400 # forces above this value are penalized + + class scales: + termination = -10.0 + # reference motion tracking + joint_pos = 0.8 # 1.6 + feet_clearance = 0.8 # 1.2 + feet_contact_number = 0.8 # 0.8 # 1.4 + # gait + feet_air_time = 1.2 + foot_slip = -0.05 + knee_distance = 0.2 + # contact + feet_contact_forces = -0.01 + # vel tracking + tracking_lin_vel = 1.2 + tracking_ang_vel = 1.1 + vel_mismatch_exp = 0.5 # lin_z; ang x,y + low_speed = 0.2 + track_vel_hard = 0.5 + + # base pos + default_joint_pos = 0.5 + orientation = 1.0 + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.004 # -0.002 + torques = -4e-5 + dof_vel = -5e-4 # -1e-3 + dof_acc = -2e-6 # -2.5e-7 # -1e-7 # -2.5e-7 + collision = -1.0 + + class normalization: + class obs_scales: + lin_vel = 2.0 + ang_vel = 1.0 + dof_pos = 1.0 + dof_vel = 0.05 + quat = 1.0 + height_measurements = 5.0 + + clip_observations = 18.0 + clip_actions = 18.0 + + class viewer: + ref_env = 0 + pos = [4, -4, 2] + lookat = [0, -2, 0] + + +class GprVelStandingCfg(GprVelCfg): + """Configuration class for the GPR humanoid robot.""" + + class init_state(LeggedRobotCfg.init_state): + pos = [0.0, 0.0, Robot.standing_height + 0.025] + rot = Robot.rotation + default_joint_angles = {k: 0.0 for k in Robot.all_joints()} + + class rewards: + # quite important to keep it right + base_height_target = Robot.standing_height + min_dist = 0.2 + max_dist = 0.5 + # put some settings here for LLM parameter tuning + target_joint_pos_scale = 0.17 # rad + target_feet_height = 0.06 # m + cycle_time = 0.4 # sec + # if true negative total rewards are clipped at zero (avoids early termination problems) + only_positive_rewards = False + # tracking reward = exp(error*sigma) + tracking_sigma = 5 + max_contact_force = 200 # forces above this value are penalized + + class scales: + # base pos + default_joint_pos = 1.0 + orientation = 1 + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.002 + torques = -1e-5 + dof_vel = -1e-3 + dof_acc = -2.5e-7 + collision = -1.0 + + +class GprVelCfgPPO(LeggedRobotCfgPPO): + seed = 5 + runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner + + class policy: + init_noise_std = 1.0 + actor_hidden_dims = [512, 256, 128] + critic_hidden_dims = [768, 256, 128] + + class algorithm(LeggedRobotCfgPPO.algorithm): + entropy_coef = 0.001 + learning_rate = 1e-5 + num_learning_epochs = 2 + gamma = 0.994 + lam = 0.9 + num_mini_batches = 4 + + class runner: + policy_class_name = "ActorCritic" + algorithm_class_name = "PPO" + num_steps_per_env = 60 # per iteration + max_iterations = 3001 # number of policy updates + + # logging + save_interval = 100 # check for potential saves every this many iterations + experiment_name = "gpr_vel" + run_name = "" + # load and resume + resume = False + load_run = -1 # -1 = last run + checkpoint = -1 # -1 = last saved model + resume_path = None # updated from load_run and chkpt diff --git a/sim/envs/humanoids/zeroth_env.py b/sim/envs/humanoids/gpr_vel_env.py similarity index 74% rename from sim/envs/humanoids/zeroth_env.py rename to sim/envs/humanoids/gpr_vel_env.py index c65c9e20..1a4f0287 100644 --- a/sim/envs/humanoids/zeroth_env.py +++ b/sim/envs/humanoids/gpr_vel_env.py @@ -1,24 +1,26 @@ # mypy: disable-error-code="valid-newtype" """Defines the environment for training the humanoid.""" +from isaacgym.torch_utils import * # isort:skip + from sim.envs.base.legged_robot import LeggedRobot -from sim.resources.zeroth.joints import Robot +from sim.resources.gpr_vel.joints import Robot +from sim.utils.math import wrap_to_pi from sim.utils.terrain import HumanoidTerrain from isaacgym import gymtorch # isort:skip -from isaacgym.torch_utils import * # isort: skip - import torch # isort:skip +import random # isort:skip -class ZerothEnv(LeggedRobot): - """ZerothFreeEnv is a class that represents a custom environment for a legged robot. +class GprVelEnv(LeggedRobot): + """GprVelEnv is a class that represents a custom environment for a legged robot. Args: - cfg: Configuration object for the legged robot. + cfg (LeggedRobotCfg): Configuration object for the legged robot. sim_params: Parameters for the simulation. - physics_engine: Physics engin e used in the simulation. + physics_engine: Physics engine used in the simulation. sim_device: Device used for the simulation. headless: Flag indicating whether the simulation should be run in headless mode. @@ -57,7 +59,6 @@ def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): self.legs_joints = {} for name, joint in Robot.legs.left.joints_motors(): - print(name) joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) self.legs_joints["left_" + name] = joint_handle @@ -66,20 +67,84 @@ def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): self.legs_joints["right_" + name] = joint_handle self.compute_observations() + self._initialize_push_intervals() + + def step(self, actions): + """Apply actions, simulate, call self.post_physics_step() + + Args: + actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env) + """ + if self.cfg.env.use_ref_actions: + actions += self.ref_action + actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) + + # dynamic randomization + delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay + actions = (1 - delay) * actions + delay * self.actions + actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions + + clip_actions = self.cfg.normalization.clip_actions + self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device) + # step physics and render each frame + target_positions = self.actions[:, : self.cfg.env.num_actions // 2] + target_velocities = self.actions[:, self.cfg.env.num_actions // 2 :] + self.render() + for _ in range(self.cfg.control.decimation): + self.torques = self._compute_torques(target_positions, target_velocities).view(self.torques.shape) + self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques)) + + self.gym.simulate(self.sim) + if self.device == "cpu": + self.gym.fetch_results(self.sim, True) + self.gym.refresh_dof_state_tensor(self.sim) + self.post_physics_step() + + # return clipped obs, clipped states (None), rewards, dones and infos + clip_obs = self.cfg.normalization.clip_observations + self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs) + if self.privileged_obs_buf is not None: + self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs) + return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras + + def _compute_torques(self, positions, velocities): + """Compute torques from actions. + Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques. + [NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated. - def _push_robots(self): - """Random pushes the robots. Emulates an impulse by setting a randomized base velocity.""" + Args: + actions (torch.Tensor): Actions + + Returns: + [torch.Tensor]: Torques sent to the simulation + """ + pos_scaled = positions * self.cfg.control.action_scale + vel_scaled = velocities * self.cfg.control.action_scale + p_gains = self.p_gains + d_gains = self.d_gains + torques = p_gains * (pos_scaled + self.default_dof_pos - self.dof_pos) + d_gains * (vel_scaled - self.dof_vel) + res = torch.clip(torques, -self.torque_limits, self.torque_limits) + return res + + def _push_robots(self, env_ids=None): + """Randomly pushes the robots for specific environments. + + If env_ids is None, push all environments; otherwise, push only for the provided indices. + """ max_vel = self.cfg.domain_rand.max_push_vel_xy max_push_angular = self.cfg.domain_rand.max_push_ang_vel - self.rand_push_force[:, :2] = torch_rand_float( - -max_vel, max_vel, (self.num_envs, 2), device=self.device - ) # lin vel x/y - self.root_states[:, 7:9] = self.rand_push_force[:, :2] - self.rand_push_torque = torch_rand_float( - -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device + if env_ids is None: + env_ids = torch.arange(self.num_envs, device=self.device) + + # For the selected environments, sample random push forces and torques + self.rand_push_force[env_ids, :2] = torch_rand_float(-max_vel, max_vel, (len(env_ids), 2), device=self.device) + self.root_states[env_ids, 7:9] = self.rand_push_force[env_ids, :2] + + self.rand_push_torque[env_ids] = torch_rand_float( + -max_push_angular, max_push_angular, (len(env_ids), 3), device=self.device ) - self.root_states[:, 10:13] = self.rand_push_torque + self.root_states[env_ids, 10:13] = self.rand_push_torque[env_ids] self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) @@ -125,15 +190,17 @@ def compute_ref_state(self): scale_2 = 2 * scale_1 # left foot stance phase set to default joint pos sin_pos_l[sin_pos_l > 0] = 0 - self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1 + + self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += -sin_pos_l * scale_1 self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2 self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1 # right foot stance phase set to default joint pos sin_pos_r[sin_pos_r < 0] = 0 - self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1 - self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2 - self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1 + # pfb30 last one should be -1 + self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += -sin_pos_r * scale_1 + self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += -sin_pos_r * scale_2 + self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += -sin_pos_r * scale_1 # Double support phase self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 @@ -172,20 +239,17 @@ def _get_noise_scale_vec(self, cfg): Returns: [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1] """ - num_actions = self.num_actions + num_joints = self.num_actions // 2 # Since num_actions is now NUM_JOINTS * 2 noise_vec = torch.zeros(self.cfg.env.num_single_obs, device=self.device) self.add_noise = self.cfg.noise.add_noise noise_scales = self.cfg.noise.noise_scales noise_vec[0:5] = 0.0 # commands - noise_vec[5 : (num_actions + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos - noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel - noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0 # previous actions - noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = ( - noise_scales.ang_vel * self.obs_scales.ang_vel - ) # ang vel - noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = ( + noise_vec[5 : (num_joints + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos + noise_vec[(num_joints + 5) : (2 * num_joints + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel + noise_vec[(2 * num_joints + 5) : (4 * num_joints + 5)] = 0.0 # previous actions (now includes both pos and vel) + noise_vec[(4 * num_joints + 5) : (4 * num_joints + 8)] = ( noise_scales.quat * self.obs_scales.quat - ) # euler x,y + ) # projected_gravity return noise_vec def compute_observations(self): @@ -226,11 +290,10 @@ def compute_observations(self): obs_buf = torch.cat( ( self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw) - q, # 20D - dq, # 20D - self.actions, # 20D - self.base_ang_vel * self.obs_scales.ang_vel, # 3 - self.base_euler_xyz * self.obs_scales.quat, # 3 + q, # 12D + dq, # 12D + self.actions, # 12D + self.projected_gravity, # 3 ), dim=-1, ) @@ -245,7 +308,6 @@ def compute_observations(self): * self.obs_scales.height_measurements ) self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1) - if self.add_noise: obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level else: @@ -327,7 +389,7 @@ def _reward_feet_contact_number(self): """ contact = self.contact_forces[:, self.feet_indices, 2] > 5.0 stance_mask = self._get_gait_phase() - reward = torch.where(contact == stance_mask, 1, -0.3) + reward = torch.where(contact == stance_mask, 1.0, -0.3) return torch.mean(reward, dim=1) def _reward_orientation(self): @@ -336,7 +398,8 @@ def _reward_orientation(self): """ quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10) orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20) - return (quat_mismatch + orientation) / 2 + + return (quat_mismatch + orientation) / 2.0 def _reward_feet_contact_forces(self): """Calculates the reward for keeping contact forces within a specified range. Penalizes @@ -358,6 +421,7 @@ def _reward_default_joint_pos(self): right_yaw_roll = joint_diff[:, [self.legs_joints["right_hip_roll"], self.legs_joints["right_hip_yaw"]]] yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1) yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50) + return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1) def _reward_base_height(self): @@ -514,3 +578,49 @@ def _reward_action_smoothness(self): ) term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1) return term_1 + term_2 + term_3 + + def _reward_termination(self): + """Returns a penalty for each environment that is terminated.""" + penalty = 1.0 + # Apply the penalty element-wise: only environments that need reset get the penalty + return penalty * self.reset_buf.float() + + def _initialize_push_intervals(self): + # Create a tensor of size (num_envs,) for the next push step. + # We'll assume self.num_envs is available and self.dt is the simulation time step. + self.next_push_steps = torch.empty(self.num_envs, dtype=torch.int64, device=self.device) + for i in range(self.num_envs): + random_interval_sec = random.uniform( + self.cfg.domain_rand.push_random_interval_min, self.cfg.domain_rand.push_random_interval_max + ) + self.next_push_steps[i] = self.common_step_counter + int(random_interval_sec / self.dt) + + def _post_physics_step_callback(self): + """Callback called before computing terminations, rewards, and observations.""" + env_ids = ( + (self.episode_length_buf % int(self.cfg.commands.resampling_time / self.dt) == 0) + .nonzero(as_tuple=False) + .flatten() + ) + self._resample_commands(env_ids) + if self.cfg.commands.heading_command: + forward = quat_apply(self.base_quat, self.forward_vec) + heading = torch.atan2(forward[:, 1], forward[:, 0]) + self.commands[:, 2] = torch.clip(0.5 * wrap_to_pi(self.commands[:, 3] - heading), -1.0, 1.0) + if self.cfg.terrain.measure_heights: + self.measured_heights = self._get_heights() + if self.cfg.domain_rand.push_robots: + # Initialize per-env push intervals if not done yet. + if not hasattr(self, "next_push_steps"): + self._initialize_push_intervals() + # Determine which environments are due for a push + envs_to_push = (self.common_step_counter >= self.next_push_steps).nonzero(as_tuple=False).flatten() + if len(envs_to_push) > 0: + # Apply push only to the environments that are due + self._push_robots(envs_to_push) + # For each pushed environment, set a new random push time + for env_id in envs_to_push: + random_interval_sec = random.uniform( + self.cfg.domain_rand.push_random_interval_min, self.cfg.domain_rand.push_random_interval_max + ) + self.next_push_steps[env_id] = self.common_step_counter + int(random_interval_sec / self.dt) diff --git a/sim/envs/humanoids/zbot2_config.py b/sim/envs/humanoids/zbot2_config.py index e0caa10a..bff23e4e 100644 --- a/sim/envs/humanoids/zbot2_config.py +++ b/sim/envs/humanoids/zbot2_config.py @@ -1,7 +1,5 @@ """Defines the environment configuration for the Getting up task""" - - from sim.env import robot_urdf_path from sim.envs.base.legged_robot_config import ( # type: ignore LeggedRobotCfg, @@ -19,8 +17,7 @@ class env(LeggedRobotCfg.env): # change the observation dim frame_stack = 15 c_frame_stack = 3 - # num_single_obs = 11 + NUM_JOINTS * 3 - num_single_obs = 8 + NUM_JOINTS * 3 # pfb30 + num_single_obs = 8 + NUM_JOINTS * 3 num_observations = int(frame_stack * num_single_obs) single_num_privileged_obs = 25 + NUM_JOINTS * 4 num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) @@ -264,7 +261,7 @@ class scales: foot_slip = -0.1 feet_distance = 0.2 knee_distance = 0.2 - # contact + # contact feet_contact_forces = -0.02 # vel tracking tracking_lin_vel = 1.4 @@ -275,7 +272,7 @@ class scales: # stand_still = 5 # base pos default_joint_pos = 0.8 - orientation = 1. + orientation = 1.0 base_height = 0.2 base_acc = 0.2 # energy @@ -283,7 +280,7 @@ class scales: torques = -1e-10 dof_vel = -1e-5 dof_acc = -5e-9 - collision = -1. + collision = -1.0 class normalization: class obs_scales: diff --git a/sim/envs/humanoids/zbot2_env.py b/sim/envs/humanoids/zbot2_env.py index ad30246f..17a6ec64 100644 --- a/sim/envs/humanoids/zbot2_env.py +++ b/sim/envs/humanoids/zbot2_env.py @@ -181,11 +181,8 @@ def _get_noise_scale_vec(self, cfg): noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0 # previous actions noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = ( - noise_scales.ang_vel * self.obs_scales.ang_vel - ) # ang vel - noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = ( noise_scales.quat * self.obs_scales.quat - ) # euler x,y + ) # projected_gravity return noise_vec def compute_observations(self): @@ -204,12 +201,6 @@ def compute_observations(self): diff = self.dof_pos - self.ref_dof_pos - # pfb30 - # if self.cfg.sim.use_projected_gravity: - # observation_imu = self.projected_gravity.clone() - # else: - # observation_imu = self.base_euler_xyz.clone() * self.obs_scales.quat - self.privileged_obs_buf = torch.cat( ( self.command_input, # 2 + 3 @@ -230,18 +221,13 @@ def compute_observations(self): dim=-1, ) - obs_buf = torch.cat( ( self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw) q, # 20D dq, # 20D self.actions, # 20D - # pfb30 self.projected_gravity, - # self.base_quat, # 4 - # self.base_ang_vel * self.obs_scales.ang_vel, # 3 - # observation_imu, # 3 ), dim=-1, ) diff --git a/sim/model_export.py b/sim/model_export.py index 2aba1250..289d04a1 100644 --- a/sim/model_export.py +++ b/sim/model_export.py @@ -1,19 +1,19 @@ """Script to convert weights to Rust-compatible format.""" +import importlib import re from dataclasses import dataclass, fields from io import BytesIO -from typing import List, Optional, Tuple +from typing import Any, List, Optional, Tuple, Union -import onnx +import onnx import onnxruntime as ort import torch from torch import Tensor, nn from torch.distributions import Normal -import importlib -def load_embodiment(embodiment: str): # noqa: ANN401 +def load_embodiment(embodiment: str) -> Any: # noqa: ANN401 # Dynamically import embodiment module_name = f"sim.resources.{embodiment}.joints" module = importlib.import_module(module_name) @@ -32,10 +32,14 @@ class ActorCfg: dof_pos_scale: float # Scale for joint positions dof_vel_scale: float # Scale for joint velocities frame_stack: int # Number of frames to stack for the policy input + num_single_obs: int # Number of single observation features clip_observations: float # Clip observations to this value clip_actions: float # Clip actions to this value + num_actions: int # Number of actions + num_joints: int # Number of joints sim_dt: float # Simulation time step sim_decimation: int # Simulation decimation + pd_decimation: int # PD decimation tau_factor: float # Torque limit factor use_projected_gravity: bool # Use projected gravity as IMU observation @@ -57,7 +61,7 @@ def __init__( mlp_input_dim_c = num_critic_obs # Policy function. - actor_layers = [] + actor_layers: List[Union[nn.Linear, nn.Module]] = [] actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0])) actor_layers.append(activation) for dim_i in range(len(actor_hidden_dims)): @@ -69,7 +73,7 @@ def __init__( self.actor = nn.Sequential(*actor_layers) # Value function. - critic_layers = [] + critic_layers: List[Union[nn.Linear, nn.Module]] = [] critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0])) critic_layers.append(activation) for dim_i in range(len(critic_hidden_dims)): @@ -84,8 +88,7 @@ def __init__( self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) self.distribution = None - # Disable args validation for speedup. - Normal.set_default_validate_args = False + Normal.set_default_validate_args = False # type: ignore[unused-ignore, assignment, method-assign] class Actor(nn.Module): @@ -103,13 +106,15 @@ def __init__(self, policy: nn.Module, cfg: ActorCfg) -> None: # Policy config default_dof_pos_dict = self.robot.default_standing() - self.num_actions = len(self.robot.all_joints()) self.frame_stack = cfg.frame_stack + self.num_actions = cfg.num_actions + self.num_joints = cfg.num_joints + # 11 is the number of single observation features - 6 from IMU, 5 from command input # 9 is the number of single observation features - 3 from IMU(quat), 5 from command input # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) - self.num_single_obs = 8 + self.num_actions * 3 # pfb30 + self.num_single_obs = cfg.num_single_obs # 11 + self.num_actions * 3 # pfb30 self.num_observations = int(self.frame_stack * self.num_single_obs) self.policy = policy @@ -145,6 +150,7 @@ def forward( prev_actions: Tensor, # previous actions taken by the model projected_gravity: Tensor, # quaternion of the IMU # imu_euler_xyz: Tensor, # euler angles of the IMU + # imu_ang_vel: Tensor, # angular velocity of the IMU buffer: Tensor, # buffer of previous observations ) -> Tuple[Tensor, Tensor, Tensor]: """Runs the actor model forward pass. @@ -157,6 +163,7 @@ def forward( dof_pos: The current angular position of the DoFs relative to default, with shape (num_actions). dof_vel: The current angular velocity of the DoFs, with shape (num_actions). prev_actions: The previous actions taken by the model, with shape (num_actions). + projected_gravity: The projected gravity vector, with shape (3), in meters per second squared. imu_ang_vel: The angular velocity of the IMU, with shape (3), in radians per second. If IMU is not used, can be all zeros. imu_euler_xyz: The euler angles of the IMU, with shape (3), @@ -173,7 +180,6 @@ def forward( """ sin_pos = torch.sin(2 * torch.pi * t / self.cycle_time) cos_pos = torch.cos(2 * torch.pi * t / self.cycle_time) - # Construct command input command_input = torch.cat( ( @@ -190,10 +196,6 @@ def forward( q = dof_pos * self.dof_pos_scale dq = dof_vel * self.dof_vel_scale - # if self.use_projected_gravity: - # imu_observation = imu_euler_xyz - # else: - # imu_observation = imu_euler_xyz * self.quat_scale # Construct new observation new_x = torch.cat( ( @@ -202,8 +204,7 @@ def forward( dq, prev_actions, projected_gravity, - # imu_ang_vel * self.ang_vel_scale, - # imu_observation, + # imu_ang_vel, ), dim=0, ) @@ -217,7 +218,6 @@ def forward( x = x[self.num_single_obs :] policy_input = x.unsqueeze(0) - # Get actions from the policy actions = self.policy(policy_input).squeeze(0) actions_scaled = actions * self.action_scale @@ -226,7 +226,7 @@ def forward( def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, Tuple[Tensor, ...]]: - all_weights = torch.load(model_path, map_location="cpu")#, weights_only=True) + all_weights = torch.load(model_path, map_location="cpu") weights = all_weights["model_state_dict"] num_actor_obs = weights["actor.0.weight"].shape[1] num_critic_obs = weights["critic.0.weight"].shape[1] @@ -246,15 +246,26 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T y_vel = torch.randn(1) rot = torch.randn(1) t = torch.randn(1) - dof_pos = torch.randn(a_model.num_actions) - dof_vel = torch.randn(a_model.num_actions) + dof_pos = torch.randn(a_model.num_joints) + dof_vel = torch.randn(a_model.num_joints) prev_actions = torch.randn(a_model.num_actions) - projected_gravity = torch.randn(3) # pfb30 - # imu_euler_xyz = torch.randn(3) + projected_gravity = torch.randn(3) + # ang_vel = torch.randn(3) buffer = a_model.get_init_buffer() - input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, projected_gravity, buffer) - - jit_model = torch.jit.script(a_model) + # input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, projected_gravity, ang_vel, buffer) + input_tensors = ( + x_vel, + y_vel, + rot, + t, + dof_pos, + dof_vel, + prev_actions, + projected_gravity, + buffer, + ) + + # jit_model = torch.jit.script(a_model) # Add sim2sim metadata robot_effort = list(a_model.robot.effort().values()) @@ -264,14 +275,19 @@ def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[nn.Module, dict, T num_actions = a_model.num_actions num_observations = a_model.num_observations - return a_model, { - "robot_effort": robot_effort, - "robot_stiffness": robot_stiffness, - "robot_damping": robot_damping, - "default_standing": default_standing, - "num_actions": num_actions, - "num_observations": num_observations, - }, input_tensors + return ( + a_model, + { + "robot_effort": robot_effort, + "robot_stiffness": robot_stiffness, + "robot_damping": robot_damping, + "default_standing": default_standing, + "num_actions": num_actions, + "num_observations": num_observations, + "num_joints": a_model.num_joints, + }, + input_tensors, + ) def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[str] = None) -> ort.InferenceSession: @@ -285,7 +301,7 @@ def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[st Returns: An ONNX inference session. """ - all_weights = torch.load(model_path, map_location="cpu")#, weights_only=True) + all_weights = torch.load(model_path, map_location="cpu") # , weights_only=True) weights = all_weights["model_state_dict"] num_actor_obs = weights["actor.0.weight"].shape[1] num_critic_obs = weights["critic.0.weight"].shape[1] @@ -360,4 +376,4 @@ def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[st if __name__ == "__main__": - convert_model_to_onnx("model_3000.pt", ActorCfg(), "policy.onnx") \ No newline at end of file + print("hi there :)") diff --git a/sim/mujoco_eval.py b/sim/mujoco_eval.py new file mode 100755 index 00000000..f0d81de1 --- /dev/null +++ b/sim/mujoco_eval.py @@ -0,0 +1,890 @@ +"""Sim2sim deployment evaluation. + +Completely self-contained. + +Run: + python sim/mujoco_eval.py --load_model examples/gpr_walking.kinfer --embodiment gpr + python sim/mujoco_eval.py --load_model kinfer_policy.onnx --embodiment zbot2 +""" + +import argparse +import dataclasses +import os +import time +from copy import deepcopy +from dataclasses import asdict, dataclass +from typing import Callable, List, Optional, Tuple, TypedDict + +import gpytorch +import mujoco +import mujoco_viewer +import numpy as np +import onnxruntime as ort +import pygame +import torch +from kinfer.inference.python import ONNXModel +from scipy.spatial.transform import Rotation as R +from tqdm import tqdm + + +@dataclass +class ModelInfo(TypedDict): + num_actions: int + num_observations: int + robot_effort: List[float] + robot_stiffness: List[float] + robot_damping: List[float] + sim_dt: float + sim_decimation: int + pd_decimation: int + tau_factor: float + + +def handle_keyboard_input() -> None: + global x_vel_cmd, y_vel_cmd, yaw_vel_cmd + + keys = pygame.key.get_pressed() + + # Update movement commands based on arrow keys + if keys[pygame.K_UP]: + x_vel_cmd += 0.0005 + if keys[pygame.K_DOWN]: + x_vel_cmd -= 0.0005 + if keys[pygame.K_LEFT]: + y_vel_cmd += 0.0005 + if keys[pygame.K_RIGHT]: + y_vel_cmd -= 0.0005 + + # Yaw control + if keys[pygame.K_a]: + yaw_vel_cmd += 0.001 + if keys[pygame.K_z]: + yaw_vel_cmd -= 0.001 + + +def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray: + # Ensure quaternion is in the correct format [x, y, z, w] + x, y, z, w = quat + + # Roll (x-axis rotation) + t0 = +2.0 * (w * x + y * z) + t1 = +1.0 - 2.0 * (x * x + y * y) + roll_x = np.arctan2(t0, t1) + + # Pitch (y-axis rotation) + t2 = +2.0 * (w * y - z * x) + t2 = np.clip(t2, -1.0, 1.0) + pitch_y = np.arcsin(t2) + + # Yaw (z-axis rotation) + t3 = +2.0 * (w * z + x * y) + t4 = +1.0 - 2.0 * (y * y + z * z) + yaw_z = np.arctan2(t3, t4) + + # Returns roll, pitch, yaw in a NumPy array in radians + return np.array([roll_x, pitch_y, yaw_z]) + + +def get_gravity_orientation(quaternion: np.ndarray) -> np.ndarray: + """Get the gravity orientation from the quaternion. + + Args: + quaternion: np.ndarray[float, float, float, float] + + Returns: + gravity_orientation: np.ndarray[float, float, float] + """ + qw = quaternion[0] + qx = quaternion[1] + qy = quaternion[2] + qz = quaternion[3] + + gravity_orientation = np.zeros(3) + + gravity_orientation[0] = 2 * (-qz * qx + qw * qy) + gravity_orientation[1] = -2 * (qz * qy + qw * qx) + gravity_orientation[2] = 1 - 2 * (qw * qw + qz * qz) + + return gravity_orientation + + +@dataclass +class RealWorldParams: + """Parameters to simulate real-world conditions.""" + + sensor_latency: int = 2 # frames of delay for sensor readings + motor_latency: int = 1 # frames of delay for motor commands + sensor_noise_std: float = 0.01 # standard deviation of sensor noise + motor_noise_std: float = 0.02 # standard deviation of motor noise + init_pos_noise_std: float = 0.03 # standard deviation of initial position noise + + +class SensorBuffer: + """Ring buffer to simulate sensor latency.""" + + def __init__(self, size: int, shape: tuple) -> None: + self.buffer = np.zeros((size,) + shape) + self.size = size + self.idx = 0 + + def push(self, data: np.ndarray) -> None: + self.buffer[self.idx] = data + self.idx = (self.idx + 1) % self.size + + def get(self) -> np.ndarray: + return self.buffer[self.idx] + + +def get_obs(data: mujoco.MjData, sensor_buffer: SensorBuffer, params: RealWorldParams) -> Tuple[np.ndarray, ...]: + """Extracts an observation from the mujoco data structure with added noise and latency.""" + q = data.qpos.astype(np.double) + dq = data.qvel.astype(np.double) + quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double) + omega = data.sensor("angular-velocity").data.astype(np.double) + base_vel = data.qvel[:3].astype(np.double) # Store base velocity before adding noise + + # Add sensor noise + q += np.random.normal(0, params.sensor_noise_std, q.shape) + dq += np.random.normal(0, params.sensor_noise_std, dq.shape) + quat += np.random.normal(0, params.sensor_noise_std, quat.shape) + omega += np.random.normal(0, params.sensor_noise_std, omega.shape) + base_vel += np.random.normal(0, params.sensor_noise_std, base_vel.shape) + + # Normalize quaternion after adding noise + quat_norm = np.linalg.norm(quat) + if quat_norm < 1e-6: # If quaternion is too close to zero + quat = np.array([0.0, 0.0, 0.0, 1.0]) # Default to identity rotation + else: + quat = quat / quat_norm + + # Store in buffer to simulate latency + sensor_data = np.concatenate([q, dq, quat, omega, base_vel]) + sensor_buffer.push(sensor_data) + delayed_data = sensor_buffer.get() + + # Unpack delayed data + q = delayed_data[: len(q)] + dq = delayed_data[len(q) : len(q) + len(dq)] + quat = delayed_data[len(q) + len(dq) : len(q) + len(dq) + 4] + omega = delayed_data[len(q) + len(dq) + 4 : len(q) + len(dq) + 4 + 3] + base_vel = delayed_data[len(q) + len(dq) + 4 + 3 :] + + # Ensure quaternion is normalized after delay and handle zero case + quat_norm = np.linalg.norm(quat) + if quat_norm < 1e-6: # If quaternion is too close to zero + quat = np.array([0.0, 0.0, 0.0, 1.0]) # Default to identity rotation + else: + quat = quat / quat_norm + + try: + r = R.from_quat(quat) + except ValueError: + print(f"Warning: Invalid quaternion detected: {quat}, norm: {np.linalg.norm(quat)}") + # Fall back to identity rotation + quat = np.array([0.0, 0.0, 0.0, 1.0]) + r = R.from_quat(quat) + + gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double) + v = r.apply(base_vel, inverse=True).astype(np.double) + + return (q, dq, quat, v, omega, gvec) + + +def pd_control( + target_q: np.ndarray, + q: np.ndarray, + kp: np.ndarray, + dq: np.ndarray, + kd: np.ndarray, + default: np.ndarray, +) -> np.ndarray: + """Calculates torques from position commands.""" + return kp * (target_q + default - q) - kd * dq + + +def run_mujoco( + embodiment: str, + policy: ort.InferenceSession, + model_info: ModelInfo, + real_world_params: RealWorldParams, + keyboard_use: bool = False, + render: bool = True, + sim_duration: float = 60.0, + terrain: bool = False, + random_pushes: bool = False, + push_magnitude: float = 100.0, # Maximum force magnitude in Newtons + push_duration: float = 0.2, # Duration of push in seconds + min_push_interval: float = 1.0, # Minimum time between pushes + max_push_interval: float = 3.0, # Maximum time between pushes +) -> bool: + """Run the Mujoco simulation using the provided policy and configuration.""" + model_dir = os.environ.get("MODEL_DIR", "sim/resources") + if terrain: + mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed_terrain.xml" + else: + mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml" + + model = mujoco.MjModel.from_xml_path(mujoco_model_path) + model.opt.timestep = model_info["sim_dt"] + data = mujoco.MjData(model) + + assert isinstance(model_info["num_actions"], int) + assert isinstance(model_info["num_observations"], int) + assert isinstance(model_info["robot_effort"], list) + assert isinstance(model_info["robot_stiffness"], list) + assert isinstance(model_info["robot_damping"], list) + + # tau_limit = np.array(list(model_info["robot_effort"]) + + # list(model_info["robot_effort"])) * model_info["tau_factor"] + # kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) + # kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) + # HACKY FOR HEADLESS + efforts = model_info["robot_effort"] + stiffnesses = model_info["robot_stiffness"] + dampings = model_info["robot_damping"] + leg_lims = [efforts[0], efforts[1], efforts[1], efforts[0], efforts[2]] + tau_limit = np.array(leg_lims + leg_lims) * model_info["tau_factor"] + + leg_kps = [stiffnesses[0], stiffnesses[1], stiffnesses[1], stiffnesses[0], stiffnesses[2]] + kps = np.array(leg_kps + leg_kps) + + leg_kds = [dampings[0], dampings[1], dampings[1], dampings[0], dampings[2]] + kds = np.array(leg_kds + leg_kds) + try: + data.qpos = model.keyframe("default").qpos + default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :] + # Add noise to initial position + default += np.random.normal(0, real_world_params.init_pos_noise_std, default.shape) + print("Default position (with noise):", default) + except Exception as e: + print(f"No default position found, using noisy zero initialization: {e}") + default = np.random.normal(0, real_world_params.init_pos_noise_std, model_info["num_actions"]) + + # Initialize sensor buffer for latency simulation + sensor_shape = (len(data.qpos) + len(data.qvel) + 4 + 3 + 3,) # qpos + qvel + quat + omega + base_vel + print(f"Sensor buffer shape: {sensor_shape}, Total size: {sum(sensor_shape)}") + sensor_buffer = SensorBuffer(real_world_params.sensor_latency, sensor_shape) + + # Get initial valid sensor data + q = data.qpos.astype(np.double) + dq = data.qvel.astype(np.double) + quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double) + quat_norm = np.linalg.norm(quat) + if quat_norm < 1e-6: # If quaternion is too close to zero + quat = np.array([0.0, 0.0, 0.0, 1.0]) # Default to identity rotation + else: + quat = quat / quat_norm + omega = data.sensor("angular-velocity").data.astype(np.double) + base_vel = data.qvel[:3].astype(np.double) + + # Create initial sensor data with valid quaternion + initial_sensor_data = np.concatenate([q, dq, quat, omega, base_vel]) + + # Initialize buffer with valid data + for _ in range(real_world_params.sensor_latency): + sensor_buffer.push(initial_sensor_data) + + # Buffer for motor command latency + motor_buffer = SensorBuffer(real_world_params.motor_latency, (model_info["num_actions"],)) + # Initialize motor buffer with zeros + for _ in range(real_world_params.motor_latency): + motor_buffer.push(np.zeros(model_info["num_actions"])) + + mujoco.mj_step(model, data) + for ii in range(len(data.ctrl) + 1): + print(data.joint(ii).id, data.joint(ii).name) + + data.qvel = np.zeros_like(data.qvel) + data.qacc = np.zeros_like(data.qacc) + + if render: + viewer = mujoco_viewer.MujocoViewer(model, data) + + target_q = np.zeros((model_info["num_actions"]), dtype=np.double) + prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double) + hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double) + + count_lowlevel = 0 + + input_data = { + "x_vel.1": np.zeros(1).astype(np.float32), + "y_vel.1": np.zeros(1).astype(np.float32), + "rot.1": np.zeros(1).astype(np.float32), + "t.1": np.zeros(1).astype(np.float32), + "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32), + "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32), + "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32), + # "imu_euler_xyz.1": np.zeros(3).astype(np.float32), + "projected_gravity.1": np.zeros(3).astype(np.float32), + "imu_ang_vel.1": np.zeros(3).astype(np.float32), + "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32), + } + + # Initialize variables for tracking upright steps and average speed + upright_steps = 0 + total_speed = 0.0 + step_count = 0 + + # Initialize push-related variables if random pushes are enabled + if random_pushes: + next_push_time = np.random.uniform(min_push_interval, max_push_interval) # Random time for first push + current_push_end = 0.0 + current_push_force = np.zeros(3) + + for _ in tqdm(range(int(sim_duration / model_info["sim_dt"])), desc="Simulating..."): + current_time = _ * model_info["sim_dt"] + + if random_pushes: + # Check if it's time for a new push + if current_time >= next_push_time and current_time >= current_push_end: + # Generate random force direction (in horizontal plane) + angle = np.random.uniform(0, 2 * np.pi) + force_magnitude = np.random.uniform(0.3 * push_magnitude, push_magnitude) + current_push_force = np.array( + [force_magnitude * np.cos(angle), force_magnitude * np.sin(angle), 0.0] # No vertical force + ) + + # Set push duration and next push time + current_push_end = current_time + push_duration + next_push_interval = np.random.uniform(min_push_interval, max_push_interval) + next_push_time = current_time + push_duration + next_push_interval + + # print( + # f"\nApplying push at t={current_time:.2f}s: magnitude={force_magnitude:.1f}N, " + # f"angle={angle:.1f}rad, next push in {next_push_interval:.1f}s" + # ) + + # Apply force if within push duration + if current_time < current_push_end: + data.xfrc_applied[1] = np.concatenate([current_push_force, np.zeros(3)]) + else: + data.xfrc_applied[1] = np.zeros(6) + + if keyboard_use: + handle_keyboard_input() + + # Obtain an observation + q, dq, quat, v, omega, gvec = get_obs(data, sensor_buffer, real_world_params) + q = q[-model_info["num_actions"] :] + dq = dq[-model_info["num_actions"] :] + + # eu_ang = quaternion_to_euler_array(quat) + # eu_ang[eu_ang > math.pi] -= 2 * math.pi + + # eu_ang = np.array([0.0, 0.0, 0.0]) + # omega = np.array([0.0, 0.0, 0.0]) + # gvec = np.array([0.0, 0.0, -1.0]) + + is_upright = gvec[2] < -0.8 + if is_upright: + upright_steps += 1 + + # Calculate speed and accumulate for average speed calculation + speed = np.linalg.norm(v[:2]) # Speed in the x-y plane + total_speed += speed.item() + step_count += 1 + + # If robot falls, print statistics and break + if not is_upright and upright_steps > 0: + break + + # 1000hz -> 50hz + if count_lowlevel % model_info["sim_decimation"] == 0: + # Convert sim coordinates to policy coordinates + cur_pos_obs = q - default + cur_vel_obs = dq + + input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32) + input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) + input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) + + input_data["t.1"] = np.array([count_lowlevel * model_info["sim_dt"]], dtype=np.float32) + + input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) + input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) + + input_data["prev_actions.1"] = prev_actions.astype(np.float32) + + input_data["projected_gravity.1"] = gvec.astype(np.float32) + input_data["imu_ang_vel.1"] = omega.astype(np.float32) + # input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) + + input_data["buffer.1"] = hist_obs.astype(np.float32) + + policy_output = policy(input_data) + positions = policy_output["actions_scaled"] + curr_actions = policy_output["actions"] + hist_obs = policy_output["x.3"] + + target_q = positions + + prev_actions = curr_actions + + if count_lowlevel % model_info["pd_decimation"] == 0: + # Generate PD control with motor noise + tau = pd_control(target_q, q, kps, dq, kds, default) + tau = np.clip(tau, -tau_limit, tau_limit) + tau += np.random.normal(0, real_world_params.motor_noise_std, tau.shape) + + # Simulate motor command latency + motor_buffer.push(tau) + tau = motor_buffer.get() + + data.ctrl = tau + mujoco.mj_step(model, data) + + if render: + viewer.render() + count_lowlevel += 1 + + if render: + viewer.close() + + # Calculate average speed + if step_count > 0: + average_speed = total_speed / step_count + else: + average_speed = 0.0 + + # Save or print the statistics at the end of the episode + print("=" * 100) + print("PERFORMANCE STATISTICS") + print("-" * 100) + print(f"Number of upright steps: {upright_steps} ({upright_steps * model_info['sim_dt']:.2f} seconds)") + print(f"Average speed: {average_speed:.4f} m/s") + print("=" * 100) + + return upright_steps == int(sim_duration / model_info["sim_dt"]) + + +def test_robustness( + embodiment: str, + policy: ort.InferenceSession, + model_info: ModelInfo, + params: RealWorldParams, + test_duration: float = 10.0, + push: bool = False, +) -> Tuple[RealWorldParams, bool]: + """Test if the model can handle given parameters.""" + try: + success = run_mujoco( + embodiment=embodiment, + policy=policy, + model_info=model_info, + real_world_params=params, + render=False, + sim_duration=test_duration, + random_pushes=push, + push_magnitude=50.0, + push_duration=0.2, + min_push_interval=1.0, + max_push_interval=3.0, + ) + except Exception as e: + print(f"Failed with error: {e}") + success = False + return params, success + + +class ExactGPModel(gpytorch.models.ExactGP): + def __init__( + self, train_x: torch.Tensor, train_y: torch.Tensor, likelihood: gpytorch.likelihoods.GaussianLikelihood + ) -> None: + super(ExactGPModel, self).__init__(train_x, train_y, likelihood) + self.mean_module = gpytorch.means.ConstantMean() + self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=train_x.size(1))) + + def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultivariateNormal: + mean_x = self.mean_module(x) + covar_x = self.covar_module(x) + return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) + + +class BayesianOptimizer: + def __init__(self, param_bounds: dict, objective_function: Callable) -> None: + self.param_bounds = param_bounds + self.objective_function = objective_function + self.device = "cpu" + + self.train_x = torch.empty((0, len(param_bounds)), device=self.device) + self.train_y = torch.empty(0, device=self.device) + + self.likelihood = gpytorch.likelihoods.GaussianLikelihood().to(self.device) + self.model: Optional[ExactGPModel] = None + + def _normalize_params(self, params: torch.Tensor) -> torch.Tensor: + """Normalize parameters to [0, 1] range.""" + normalized = torch.zeros_like(params) + for i, (name, bounds) in enumerate(self.param_bounds.items()): + normalized[:, i] = (params[:, i] - bounds[0]) / (bounds[1] - bounds[0]) + return normalized + + def _denormalize_params(self, normalized_params: torch.Tensor) -> torch.Tensor: + """Convert normalized parameters back to original range.""" + denormalized = torch.zeros_like(normalized_params) + for i, (name, bounds) in enumerate(self.param_bounds.items()): + denormalized[:, i] = normalized_params[:, i] * (bounds[1] - bounds[0]) + bounds[0] + return denormalized + + def _expected_improvement(self, mean: torch.Tensor, std: torch.Tensor, best_f: torch.Tensor) -> torch.Tensor: + """Calculate expected improvement.""" + z = (mean - best_f) / std + ei = std * (z * torch.distributions.Normal(0, 1).cdf(z) + torch.distributions.Normal(0, 1).log_prob(z).exp()) + return ei + + def _get_next_points(self, n_points: int = 1) -> torch.Tensor: + """Get next points to evaluate using random sampling and EI.""" + if len(self.train_y) == 0: + # If no observations, return random points + return torch.rand(n_points, len(self.param_bounds), device=self.device) + + # Generate random candidates + n_candidates = 1000 + candidates = torch.rand(n_candidates, len(self.param_bounds), device=self.device) + + assert self.model is not None + + # Get model predictions + self.model.eval() + self.likelihood.eval() + with torch.no_grad(), gpytorch.settings.fast_pred_var(): + predictions = self.likelihood(self.model(candidates)) + mean = predictions.mean + std = predictions.stddev + + best_f = self.train_y.max() + ei = self._expected_improvement(mean, std, best_f) + + # Return points with highest EI + top_indices = torch.topk(ei, n_points).indices + return candidates[top_indices] + + def optimize(self, n_iterations: int = 50) -> Tuple[dict, float]: + """Run Bayesian optimization.""" + best_params: Optional[dict] = None + best_value = float("-inf") + + for i in range(n_iterations): + print(f"\nIteration {i + 1}/{n_iterations}") + + # Get next points to evaluate + next_points = self._get_next_points(n_points=1) + params = self._denormalize_params(next_points) + + # Convert to dictionary for objective function + param_dict = {name: params[0, i].item() for i, name in enumerate(self.param_bounds.keys())} + + # Evaluate objective function + start_time = time.time() + value = self.objective_function(param_dict) + print(f"Evaluation took {time.time() - start_time:.2f}s") + + # Update best parameters if necessary + if value > best_value: + best_value = value + best_params = param_dict + print("New best parameters found!") + print(f"Parameters: {best_params}") + print(f"Value: {best_value}") + + self.train_x = torch.cat([self.train_x, next_points]) + self.train_y = torch.cat([self.train_y, torch.tensor([value], device=self.device)]) + + if self.model is None: + self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood).to(self.device) + else: + self.model.set_train_data(self.train_x, self.train_y, strict=False) + + self.model.train() + self.likelihood.train() + optimizer = torch.optim.Adam(self.model.parameters(), lr=0.1) + mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model) + + for _ in range(50): + optimizer.zero_grad() + output = self.model(self.train_x) + loss = -mll(output, self.train_y) + loss.backward() + optimizer.step() + + if best_params is None: + raise ValueError("No best parameters found") + + return best_params, best_value + + +@dataclass +class OptimizationConfig: + """Configuration for the Bayesian optimization process.""" + + # Maximum parameter bounds + max_sensor_latency: int = 10 + max_motor_latency: int = 10 + max_sensor_noise: float = 0.2 + max_motor_noise: float = 0.2 + max_init_noise: float = 0.2 + + # Parameter weights for optimization reward + sensor_noise_weight: float = 2.0 + motor_noise_weight: float = 1.0 + init_noise_weight: float = 1.0 + sensor_latency_weight: float = 1.5 + motor_latency_weight: float = 1.0 + + # Optimization settings + n_iterations: int = 50 + n_trials: int = 3 # Number of trials per parameter set + test_duration: float = 10.0 + + # Push test settings + push: bool = False + push_magnitude: float = 50.0 + push_duration: float = 0.2 + min_push_interval: float = 1.0 + max_push_interval: float = 3.0 + + +def optimize_params( + embodiment: str, + policy: ort.InferenceSession, + model_info: ModelInfo, + config: OptimizationConfig, +) -> RealWorldParams: + """Use Bayesian optimization to find maximum tolerable parameters.""" + print("=" * 100) + print("IDENTIFYING MAXIMUM RANDOMIZATION PARAMETERS") + print("-" * 100) + print("Optimization config:") + for field, value in asdict(config).items(): + print(f"{field}: {value}") + print("-" * 100) + + # Define parameter bounds + param_bounds = { + "sensor_noise": (0.0, config.max_sensor_noise), + "motor_noise": (0.0, config.max_motor_noise), + "init_noise": (0.0, config.max_init_noise), + "sensor_latency": (1, config.max_sensor_latency), + "motor_latency": (1, config.max_motor_latency), + } + + # Define objective function + def objective(params: dict) -> float: + test_params = RealWorldParams( + sensor_latency=round(params["sensor_latency"]), + motor_latency=round(params["motor_latency"]), + sensor_noise_std=params["sensor_noise"], + motor_noise_std=params["motor_noise"], + init_pos_noise_std=params["init_noise"], + ) + + print("\nTesting parameters:") + for name, value in asdict(test_params).items(): + print(f"{name}: {value}") + + # Run multiple trials to ensure reliability + successes = 0 + for trial in range(config.n_trials): + print(f"\nTrial {trial + 1}/{config.n_trials}") + success = run_mujoco( + embodiment=embodiment, + policy=policy, + model_info=model_info, + real_world_params=test_params, + render=False, + sim_duration=config.test_duration, + random_pushes=config.push, + push_magnitude=config.push_magnitude, + push_duration=config.push_duration, + min_push_interval=config.min_push_interval, + max_push_interval=config.max_push_interval, + ) + if success: + successes += 1 + + success_rate = successes / config.n_trials + print(f"\nSuccess rate: {success_rate * 100:.1f}%") + + if success_rate == 1.0: # Only reward if all trials succeed + weighted_params = ( + config.sensor_noise_weight * params["sensor_noise"] / config.max_sensor_noise + + config.motor_noise_weight * params["motor_noise"] / config.max_motor_noise + + config.init_noise_weight * params["init_noise"] / config.max_init_noise + + config.sensor_latency_weight * params["sensor_latency"] / config.max_sensor_latency + + config.motor_latency_weight * params["motor_latency"] / config.max_motor_latency + ) / sum( + [ + config.sensor_noise_weight, + config.motor_noise_weight, + config.init_noise_weight, + config.sensor_latency_weight, + config.motor_latency_weight, + ] + ) + return weighted_params + else: + return -50.0 # big ahh penalty + + # Run Bayesian optimization + optimizer = BayesianOptimizer(param_bounds, objective) + best_params, _ = optimizer.optimize(config.n_iterations) + + # Convert to RealWorldParams + final_params = RealWorldParams( + sensor_latency=round(best_params["sensor_latency"]), + motor_latency=round(best_params["motor_latency"]), + sensor_noise_std=best_params["sensor_noise"], + motor_noise_std=best_params["motor_noise"], + init_pos_noise_std=best_params["init_noise"], + ) + + print("\n" + "=" * 100) + print("MAXIMUM RANDOMIZATION PARAMETERS FOUND:") + print("-" * 100) + for name, value in asdict(final_params).items(): + print(f"{name}: {value}") + print("=" * 100) + + return final_params + + +def add_optimization_args(parser: argparse.ArgumentParser) -> None: + """Add OptimizationConfig parameters to an argument parser.""" + # Get all fields from OptimizationConfig + fields = dataclasses.fields(OptimizationConfig) + + # Create an argument group for optimization parameters + opt_group = parser.add_argument_group("Optimization Parameters") + + for field in fields: + # Convert field name from snake_case to kebab-case for args + arg_name = f"--{field.name.replace('_', '-')}" + + # Get default value and type from field + default = field.default + field_type = field.type + + # Handle boolean fields differently + if field_type is bool: + opt_group.add_argument( + arg_name, + action="store_true" if not default else "store_false", + help=f"{'Enable' if not default else 'Disable'} {field.name.replace('_', ' ')}", + ) + else: + opt_group.add_argument( + arg_name, + type=field_type, + default=default, + help=f"Set {field.name.replace('_', ' ')} (default: {default})", + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Deployment script.") + parser.add_argument("--embodiment", type=str, required=True, help="Embodiment name.") + parser.add_argument("--load_model", type=str, required=True, help="Path to run to load from.") + parser.add_argument("--keyboard_use", action="store_true", help="Enable keyboard control") + parser.add_argument("--no_render", action="store_false", dest="render", help="Disable rendering") + parser.add_argument("--terrain", action="store_true", help="Enable terrain") + parser.add_argument("--identify", action="store_true", help="Identify maximum randomization parameters") + parser.add_argument("--randomize_conditions", action="store_true", help="Randomize conditions") + + # Add optimization arguments automatically + add_optimization_args(parser) + + args = parser.parse_args() + + if args.keyboard_use: + x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.0, 0.0, 0.0 + pygame.init() + pygame.display.set_caption("Simulation Control") + else: + x_vel_cmd, y_vel_cmd, yaw_vel_cmd = -0.5, 0.0, 0.0 + + policy = ONNXModel(args.load_model) + metadata = policy.get_metadata() + + # # HACKY: (NOT YET ADDED TO METADATA) + metadata["pd_decimation"] = 1 + + # randomize conditions + + if args.randomize_conditions: + # PD constants + damping = metadata["robot_damping"] + damping = list(damping * np.random.uniform(1, 1.005, size=len(damping))) + stiffness = metadata["robot_stiffness"] + stiffness = list(stiffness * np.random.uniform(1, 1.005, size=len(stiffness))) + + # pd decimation + pd_decimation = metadata["sim_decimation"] + np.random.randint(-4, 5) + + # sim_decimation + sim_decimation = metadata["pd_decimation"] + np.random.randint(-4, 5) + else: + damping = metadata["robot_damping"] + stiffness = metadata["robot_stiffness"] + sim_decimation = metadata["sim_decimation"] + pd_decimation = metadata["pd_decimation"] + + print("=" * 100) + print("Testing with parameters:") + print(f"stiffness: {stiffness}") + print(f"damping: {damping}") + print(f"sim_decimation: {sim_decimation}") + print(f"pd_decimation: {pd_decimation}") + print("=" * 100) + + model_info: ModelInfo = { + "num_actions": metadata["num_actions"], + "num_observations": metadata["num_observations"], + "robot_effort": metadata["robot_effort"], + "robot_stiffness": stiffness, + "robot_damping": damping, + "sim_dt": metadata["sim_dt"], + "sim_decimation": sim_decimation, + "pd_decimation": pd_decimation, + "tau_factor": metadata["tau_factor"], + } + + # Create real-world parameters + # real_world_params = RealWorldParams( + # sensor_latency=3, motor_latency=1, sensor_noise_std=0.04, motor_noise_std=0.04, init_pos_noise_std=0.04 + # ) + + real_world_params = RealWorldParams( + sensor_latency=1, motor_latency=1, sensor_noise_std=0.0, motor_noise_std=0.0, init_pos_noise_std=0.0 + ) + + if args.identify: + # Create optimization config from args + opt_config = OptimizationConfig( + **{ + field.name: getattr(args, field.name.replace("-", "_")) + for field in dataclasses.fields(OptimizationConfig) + } + ) + + best_params = optimize_params( + embodiment=args.embodiment, + policy=policy, + model_info=model_info, + config=opt_config, + ) + real_world_params = best_params + + run_mujoco( + embodiment=args.embodiment, + policy=policy, + model_info=model_info, + real_world_params=real_world_params, + keyboard_use=args.keyboard_use, + render=args.render, + terrain=args.terrain, + random_pushes=args.push, + push_magnitude=args.push_magnitude, + push_duration=args.push_duration, + min_push_interval=args.min_push_interval, + max_push_interval=args.max_push_interval, + ) diff --git a/sim/play.py b/sim/play.py index 422b421d..7ae5c0a7 100755 --- a/sim/play.py +++ b/sim/play.py @@ -18,20 +18,18 @@ import h5py import numpy as np from isaacgym import gymapi +from kinfer.export.pytorch import export_to_onnx from tqdm import tqdm # Local imports third -from sim.env import run_dir -from sim.envs import task_registry -from sim.h5_logger import HDF5Logger - -import torch # special case with isort: skip comment from sim.env import run_dir # noqa: E402 from sim.envs import task_registry # noqa: E402 -from sim.model_export2 import ActorCfg, get_actor_policy # noqa: E402 +from sim.h5_logger import HDF5Logger +from sim.model_export import ActorCfg, get_actor_policy # noqa: E402 from sim.utils.helpers import get_args # noqa: E402 from sim.utils.logger import Logger # noqa: E402 -from kinfer.export.pytorch import export_to_onnx + +import torch # special case with isort: skip comment logger = logging.getLogger(__name__) @@ -48,7 +46,7 @@ def play(args: argparse.Namespace) -> None: logger.info("Configuring environment and training settings...") env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) - num_parallel_envs = 2 + num_parallel_envs = 5 env_cfg.env.num_envs = num_parallel_envs env_cfg.sim.max_gpu_contact_pairs = 2**10 * num_parallel_envs @@ -59,7 +57,7 @@ def play(args: argparse.Namespace) -> None: env_cfg.terrain.num_rows = 5 env_cfg.terrain.num_cols = 5 env_cfg.terrain.curriculum = False - env_cfg.terrain.max_init_terrain_level = 5 + env_cfg.terrain.max_init_terrain_level = 10 env_cfg.noise.add_noise = True env_cfg.domain_rand.push_robots = True env_cfg.domain_rand.joint_angle_noise = 0.0 @@ -89,12 +87,13 @@ def play(args: argparse.Namespace) -> None: # export policy as a onnx module (used to run it on web) if args.export_onnx: path = ppo_runner.load_path - embodiment = ppo_runner.cfg['experiment_name'].lower() + embodiment = ppo_runner.cfg["experiment_name"].lower() policy_cfg = ActorCfg( embodiment=embodiment, cycle_time=env_cfg.rewards.cycle_time, sim_dt=env_cfg.sim.dt, sim_decimation=env_cfg.control.decimation, + pd_decimation=env_cfg.control.pd_decimation, tau_factor=env_cfg.safety.torque_limit, action_scale=env_cfg.control.action_scale, lin_vel_scale=env_cfg.normalization.obs_scales.lin_vel, @@ -102,6 +101,9 @@ def play(args: argparse.Namespace) -> None: quat_scale=env_cfg.normalization.obs_scales.quat, dof_pos_scale=env_cfg.normalization.obs_scales.dof_pos, dof_vel_scale=env_cfg.normalization.obs_scales.dof_vel, + num_single_obs=env_cfg.env.num_single_obs, + num_actions=env_cfg.env.num_actions, + num_joints=env_cfg.env.num_joints, frame_stack=env_cfg.env.frame_stack, clip_observations=env_cfg.normalization.clip_observations, clip_actions=env_cfg.normalization.clip_actions, @@ -113,12 +115,7 @@ def play(args: argparse.Namespace) -> None: # Merge policy_cfg and sim2sim_info into a single config object export_config = {**vars(policy_cfg), **sim2sim_info} - export_to_onnx( - actor_model, - input_tensors=input_tensors, - config=export_config, - save_path="kinfer_policy.onnx" - ) + export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_policy.onnx") print("Exported policy as kinfer-compatible onnx to: ", path) # Prepare for logging @@ -132,7 +129,7 @@ def play(args: argparse.Namespace) -> None: # Create directory for HDF5 files h5_dir = run_dir() / "h5_out" / args.task / now h5_dir.mkdir(parents=True, exist_ok=True) - + # Get observation dimensions num_actions = env.num_dof obs_buffer = env.obs_buf.shape[1] @@ -142,14 +139,16 @@ def play(args: argparse.Namespace) -> None: for env_idx in range(env_cfg.env.num_envs): h5_dir = run_dir() / "h5_out" / args.task / now / f"env_{env_idx}" h5_dir.mkdir(parents=True, exist_ok=True) - - h5_loggers.append(HDF5Logger( - data_name=f"{args.task}_env_{env_idx}", - num_actions=num_actions, - max_timesteps=env_steps_to_run, - num_observations=obs_buffer, - h5_out_dir=str(h5_dir) - )) + + h5_loggers.append( + HDF5Logger( + data_name=f"{args.task}_env_{env_idx}", + num_actions=num_actions, + max_timesteps=env_steps_to_run, + num_observations=obs_buffer, + h5_out_dir=str(h5_dir), + ) + ) if args.render: camera_properties = gymapi.CameraProperties() @@ -179,12 +178,11 @@ def play(args: argparse.Namespace) -> None: if not os.path.exists(experiment_dir): os.mkdir(experiment_dir) video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080)) - for t in tqdm(range(env_steps_to_run)): actions = policy(obs.detach()) if args.fix_command: - env.commands[:, 0] = 0.2 + env.commands[:, 0] = -0.5 env.commands[:, 1] = 0.0 env.commands[:, 2] = 0.0 env.commands[:, 3] = 0.0 @@ -234,27 +232,29 @@ def play(args: argparse.Namespace) -> None: if args.log_h5: # Extract the current observation for env_idx in range(env_cfg.env.num_envs): - h5_loggers[env_idx].log_data({ - "t": np.array([t * env.dt], dtype=np.float32), - "2D_command": np.array( - [ - np.sin(2 * math.pi * t * env.dt / env.cfg.rewards.cycle_time), - np.cos(2 * math.pi * t * env.dt / env.cfg.rewards.cycle_time), - ], - dtype=np.float32, - ), - "3D_command": np.array(env.commands[env_idx, :3].cpu().numpy(), dtype=np.float32), - "joint_pos": np.array(env.dof_pos[env_idx].cpu().numpy(), dtype=np.float32), - "joint_vel": np.array(env.dof_vel[env_idx].cpu().numpy(), dtype=np.float32), - "prev_actions": prev_actions[env_idx].astype(np.float32), - "curr_actions": actions[env_idx].astype(np.float32), - "ang_vel": env.base_ang_vel[env_idx].cpu().numpy().astype(np.float32), - "euler_rotation": env.base_euler_xyz[env_idx].cpu().numpy().astype(np.float32), - "buffer": env.obs_buf[env_idx].cpu().numpy().astype(np.float32) - }) + h5_loggers[env_idx].log_data( + { + "t": np.array([t * env.dt], dtype=np.float32), + "2D_command": np.array( + [ + np.sin(2 * math.pi * t * env.dt / env.cfg.rewards.cycle_time), + np.cos(2 * math.pi * t * env.dt / env.cfg.rewards.cycle_time), + ], + dtype=np.float32, + ), + "3D_command": np.array(env.commands[env_idx, :3].cpu().numpy(), dtype=np.float32), + "joint_pos": np.array(env.dof_pos[env_idx].cpu().numpy(), dtype=np.float32), + "joint_vel": np.array(env.dof_vel[env_idx].cpu().numpy(), dtype=np.float32), + "prev_actions": prev_actions[env_idx].astype(np.float32), + "curr_actions": actions[env_idx].astype(np.float32), + "ang_vel": env.base_ang_vel[env_idx].cpu().numpy().astype(np.float32), + "euler_rotation": env.base_euler_xyz[env_idx].cpu().numpy().astype(np.float32), + "buffer": env.obs_buf[env_idx].cpu().numpy().astype(np.float32), + } + ) prev_actions = actions - + if infos["episode"]: num_episodes = env.reset_buf.sum().item() if num_episodes > 0: @@ -282,4 +282,4 @@ def play(args: argparse.Namespace) -> None: parser.add_argument("--export_policy", action="store_true", help="Export policy as JIT", default=True) args, unknown = parser.parse_known_args(namespace=base_args) - play(args) \ No newline at end of file + play(args) diff --git a/sim/produce_sim_data.py b/sim/produce_sim_data.py index a9fd0f55..babad109 100644 --- a/sim/produce_sim_data.py +++ b/sim/produce_sim_data.py @@ -60,5 +60,5 @@ def run_parallel_sims(num_threads: int, args: argparse.Namespace) -> None: for batch in range(num_batches): examples_remaining = args.num_examples - (batch * args.num_threads) threads_this_batch = min(args.num_threads, examples_remaining) - print(f"\nRunning batch {batch+1}/{num_batches} ({threads_this_batch} simulations)") + print(f"\nRunning batch {batch + 1}/{num_batches} ({threads_this_batch} simulations)") run_parallel_sims(threads_this_batch, args) diff --git a/sim/requirements-dev.txt b/sim/requirements-dev.txt index 3633b810..2ad1f6eb 100755 --- a/sim/requirements-dev.txt +++ b/sim/requirements-dev.txt @@ -8,8 +8,8 @@ matplotlib==3.3.4 mediapy==1.2.2 mujoco==2.3.6 mujoco_python_viewer==0.1.4 -onnx==1.15.0 -onnxruntime==1.15.0 +onnx +onnxruntime pandas==1.4.4 Pillow>6.2.0 poselib==2.0.4 @@ -23,4 +23,4 @@ wandb==0.18.7 mypy==1.10.0 pytest==8.3.3 ruff==0.7.4 -isort==5.13.2 \ No newline at end of file +isort==5.13.2 diff --git a/sim/requirements.txt b/sim/requirements.txt index ffac4a26..9aa79533 100755 --- a/sim/requirements.txt +++ b/sim/requirements.txt @@ -14,3 +14,4 @@ onnx mujoco==2.3.6 kinfer==0.0.5 opencv-python +gpytorch>=1.13 diff --git a/sim/resources/gpr/joints.py b/sim/resources/gpr/joints.py index 31e1f12e..cd62c867 100755 --- a/sim/resources/gpr/joints.py +++ b/sim/resources/gpr/joints.py @@ -48,19 +48,19 @@ def __str__(self) -> str: class LeftLeg(Node): - hip_pitch = "L_hip_y" - hip_yaw = "L_hip_x" - hip_roll = "L_hip_z" - knee_pitch = "L_knee" - ankle_pitch = "L_ankle_y" + hip_pitch = "left_hip_pitch_04" + hip_yaw = "left_hip_yaw_03" + hip_roll = "left_hip_roll_03" + knee_pitch = "left_knee_04" + ankle_pitch = "left_ankle_02" class RightLeg(Node): - hip_pitch = "R_hip_y" - hip_yaw = "R_hip_x" - hip_roll = "R_hip_z" - knee_pitch = "R_knee" - ankle_pitch = "R_ankle_y" + hip_pitch = "right_hip_pitch_04" + hip_yaw = "right_hip_yaw_03" + hip_roll = "right_hip_roll_03" + knee_pitch = "right_knee_04" + ankle_pitch = "right_ankle_02" class Legs(Node): @@ -71,39 +71,14 @@ class Legs(Node): class Robot(Node): legs = Legs() - height = 1.05 - standing_height = 1.05 + 0.025 - rotation = [0, 0, 0, 1] + # height = 1.05 #- 0.151 #maybe? + # standing_height = 1.05 + 0.025 #- 0.151 - @classmethod - def isaac_to_real_signs(cls) -> Dict[str, int]: - return { - Robot.legs.left.hip_pitch: 1, - Robot.legs.left.hip_yaw: 1, - Robot.legs.left.hip_roll: 1, - Robot.legs.left.knee_pitch: 1, - Robot.legs.left.ankle_pitch: 1, - Robot.legs.right.hip_pitch: -1, - Robot.legs.right.hip_yaw: -1, - Robot.legs.right.hip_roll: 1, - Robot.legs.right.knee_pitch: -1, - Robot.legs.right.ankle_pitch: 1, - } + height = 0.79 + standing_height = 0.79 + 0.025 - @classmethod - def isaac_to_mujoco_signs(cls) -> Dict[str, int]: - return { - Robot.legs.left.hip_pitch: 1, - Robot.legs.left.hip_yaw: 1, - Robot.legs.left.hip_roll: 1, - Robot.legs.left.knee_pitch: 1, - Robot.legs.left.ankle_pitch: 1, - Robot.legs.right.hip_pitch: 1, - Robot.legs.right.hip_yaw: 1, - Robot.legs.right.hip_roll: 1, - Robot.legs.right.knee_pitch: 1, - Robot.legs.right.ankle_pitch: 1, - } + # 1.3 m and 1.149m + rotation = [0, 0, 0, 1] @classmethod def default_positions(cls) -> Dict[str, float]: @@ -125,16 +100,16 @@ def default_positions(cls) -> Dict[str, float]: @classmethod def default_standing(cls) -> Dict[str, float]: return { - Robot.legs.left.hip_pitch: -0.23, + Robot.legs.left.hip_pitch: 0.23, Robot.legs.left.hip_yaw: 0.0, Robot.legs.left.hip_roll: 0.0, Robot.legs.left.knee_pitch: -0.441, - Robot.legs.left.ankle_pitch: -0.195, - Robot.legs.right.hip_pitch: 0.23, + Robot.legs.left.ankle_pitch: 0.195, # negated + Robot.legs.right.hip_pitch: -0.23, Robot.legs.right.hip_yaw: 0.0, Robot.legs.right.hip_roll: 0.0, - Robot.legs.right.knee_pitch: 0.441, - Robot.legs.right.ankle_pitch: 0.195, + Robot.legs.right.knee_pitch: 0.441, # negated + Robot.legs.right.ankle_pitch: 0.195, # negated } # CONTRACT - this should be ordered according to how the policy is trained. @@ -143,86 +118,58 @@ def default_standing(cls) -> Dict[str, float]: def joint_names(cls) -> List[str]: return list(cls.default_standing().keys()) - @classmethod - def default_limits(cls) -> Dict[str, Dict[str, float]]: - return { - Robot.legs.left.knee_pitch: {"lower": -1.57, "upper": 0}, - Robot.legs.right.knee_pitch: {"lower": 0, "upper": 1.57}, - } - # p_gains @classmethod def stiffness(cls) -> Dict[str, float]: return { - "hip_y": 300, - "hip_x": 120, - "hip_z": 120, - "knee": 300, - "ankle_y": 40, + "04": 300, + "03": 120, + "02": 40, } @classmethod def stiffness_mapping(cls) -> Dict[str, float]: mapping = {} stiffness = cls.stiffness() - for side in ["L", "R"]: - for joint, value in stiffness.items(): - mapping[f"{side}_{joint}"] = value + for joint in cls.joint_names(): + mapping[joint] = stiffness[joint[-2:]] return mapping # d_gains @classmethod def damping(cls) -> Dict[str, float]: return { - "hip_y": 5, - "hip_x": 5, - "hip_z": 5, - "knee": 5, - "ankle_y": 5, + "04": 5, + "03": 5, + "02": 5, } @classmethod def damping_mapping(cls) -> Dict[str, float]: mapping = {} damping = cls.damping() - for side in ["L", "R"]: - for joint, value in damping.items(): - mapping[f"{side}_{joint}"] = value + for joint in cls.joint_names(): + mapping[joint] = damping[joint[-2:]] + print(mapping) return mapping # effort_limits @classmethod def effort(cls) -> Dict[str, float]: return { - "hip_y": 60, - "hip_x": 40, - "hip_z": 40, - "knee": 60, - "ankle_y": 17, + "04": 60, + "03": 40, + "02": 40, } @classmethod def effort_mapping(cls) -> Dict[str, float]: mapping = {} effort = cls.effort() - for side in ["L", "R"]: - for joint, value in effort.items(): - mapping[f"{side}_{joint}"] = value + for joint in cls.joint_names(): + mapping[joint] = effort[joint[-2:]] return mapping - # vel_limits - @classmethod - def velocity(cls) -> Dict[str, float]: - return {"hip": 10, "knee": 10, "ankle": 10} - - @classmethod - def friction(cls) -> Dict[str, float]: - return { - "hip": 0, - "knee": 0, - "ankle": 0.1, - } - def print_joints() -> None: joints = Robot.all_joints() diff --git a/sim/resources/gpr/meshes/arm1_top.stl b/sim/resources/gpr/meshes/arm1_top.stl index d3937233..b0d6faf3 100644 Binary files a/sim/resources/gpr/meshes/arm1_top.stl and b/sim/resources/gpr/meshes/arm1_top.stl differ diff --git a/sim/resources/gpr/meshes/arm1_top_2.stl b/sim/resources/gpr/meshes/arm1_top_2.stl index 20808d7c..ecd01d5e 100644 Binary files a/sim/resources/gpr/meshes/arm1_top_2.stl and b/sim/resources/gpr/meshes/arm1_top_2.stl differ diff --git a/sim/resources/gpr/meshes/arm2_shell.stl b/sim/resources/gpr/meshes/arm2_shell.stl index d095688d..9c2fe7a9 100644 Binary files a/sim/resources/gpr/meshes/arm2_shell.stl and b/sim/resources/gpr/meshes/arm2_shell.stl differ diff --git a/sim/resources/gpr/meshes/arm2_shell_2.stl b/sim/resources/gpr/meshes/arm2_shell_2.stl index 98302751..06bd279c 100644 Binary files a/sim/resources/gpr/meshes/arm2_shell_2.stl and b/sim/resources/gpr/meshes/arm2_shell_2.stl differ diff --git a/sim/resources/gpr/meshes/arm3_shell.stl b/sim/resources/gpr/meshes/arm3_shell.stl index dd787c59..5d6ce472 100644 Binary files a/sim/resources/gpr/meshes/arm3_shell.stl and b/sim/resources/gpr/meshes/arm3_shell.stl differ diff --git a/sim/resources/gpr/meshes/arm3_shell2.stl b/sim/resources/gpr/meshes/arm3_shell2.stl index 9690a4e5..c99f0969 100644 Binary files a/sim/resources/gpr/meshes/arm3_shell2.stl and b/sim/resources/gpr/meshes/arm3_shell2.stl differ diff --git a/sim/resources/gpr/meshes/body1-part.stl b/sim/resources/gpr/meshes/body1-part.stl index 83dc0912..642a002d 100644 Binary files a/sim/resources/gpr/meshes/body1-part.stl and b/sim/resources/gpr/meshes/body1-part.stl differ diff --git a/sim/resources/gpr/meshes/foot1.stl b/sim/resources/gpr/meshes/foot1.stl index 93874859..4d173b54 100644 Binary files a/sim/resources/gpr/meshes/foot1.stl and b/sim/resources/gpr/meshes/foot1.stl differ diff --git a/sim/resources/gpr/meshes/foot3.stl b/sim/resources/gpr/meshes/foot3.stl index 841369df..d4353f2c 100644 Binary files a/sim/resources/gpr/meshes/foot3.stl and b/sim/resources/gpr/meshes/foot3.stl differ diff --git a/sim/resources/gpr/meshes/hand_shell.stl b/sim/resources/gpr/meshes/hand_shell.stl index 4aff47dc..0eac4fae 100644 Binary files a/sim/resources/gpr/meshes/hand_shell.stl and b/sim/resources/gpr/meshes/hand_shell.stl differ diff --git a/sim/resources/gpr/meshes/hand_shell_2.stl b/sim/resources/gpr/meshes/hand_shell_2.stl index 4aff47dc..0eac4fae 100644 Binary files a/sim/resources/gpr/meshes/hand_shell_2.stl and b/sim/resources/gpr/meshes/hand_shell_2.stl differ diff --git a/sim/resources/gpr/meshes/leg0_shell.stl b/sim/resources/gpr/meshes/leg0_shell.stl index c2a3806e..9cdde706 100644 Binary files a/sim/resources/gpr/meshes/leg0_shell.stl and b/sim/resources/gpr/meshes/leg0_shell.stl differ diff --git a/sim/resources/gpr/meshes/leg0_shell_2.stl b/sim/resources/gpr/meshes/leg0_shell_2.stl index c2a3806e..891770a7 100644 Binary files a/sim/resources/gpr/meshes/leg0_shell_2.stl and b/sim/resources/gpr/meshes/leg0_shell_2.stl differ diff --git a/sim/resources/gpr/meshes/leg1_shell.stl b/sim/resources/gpr/meshes/leg1_shell.stl index b3b29475..51a38c99 100644 Binary files a/sim/resources/gpr/meshes/leg1_shell.stl and b/sim/resources/gpr/meshes/leg1_shell.stl differ diff --git a/sim/resources/gpr/meshes/leg1_shell3.stl b/sim/resources/gpr/meshes/leg1_shell3.stl index 249d8151..2a643f14 100644 Binary files a/sim/resources/gpr/meshes/leg1_shell3.stl and b/sim/resources/gpr/meshes/leg1_shell3.stl differ diff --git a/sim/resources/gpr/meshes/leg2_shell.stl b/sim/resources/gpr/meshes/leg2_shell.stl index a9e9d2c6..9f4d4083 100644 Binary files a/sim/resources/gpr/meshes/leg2_shell.stl and b/sim/resources/gpr/meshes/leg2_shell.stl differ diff --git a/sim/resources/gpr/meshes/leg2_shell_2.stl b/sim/resources/gpr/meshes/leg2_shell_2.stl index a9e9d2c6..9f4d4083 100644 Binary files a/sim/resources/gpr/meshes/leg2_shell_2.stl and b/sim/resources/gpr/meshes/leg2_shell_2.stl differ diff --git a/sim/resources/gpr/meshes/leg3_shell2.stl b/sim/resources/gpr/meshes/leg3_shell2.stl deleted file mode 100644 index 6c8c68aa..00000000 Binary files a/sim/resources/gpr/meshes/leg3_shell2.stl and /dev/null differ diff --git a/sim/resources/gpr/meshes/leg3_shell22.stl b/sim/resources/gpr/meshes/leg3_shell22.stl deleted file mode 100644 index 952779e8..00000000 Binary files a/sim/resources/gpr/meshes/leg3_shell22.stl and /dev/null differ diff --git a/sim/resources/gpr/meshes/shoulder.stl b/sim/resources/gpr/meshes/shoulder.stl index 90bc3924..411350c6 100644 Binary files a/sim/resources/gpr/meshes/shoulder.stl and b/sim/resources/gpr/meshes/shoulder.stl differ diff --git a/sim/resources/gpr/meshes/shoulder_2.stl b/sim/resources/gpr/meshes/shoulder_2.stl index 59c1316a..f4897a4e 100644 Binary files a/sim/resources/gpr/meshes/shoulder_2.stl and b/sim/resources/gpr/meshes/shoulder_2.stl differ diff --git a/sim/resources/gpr/robot_fixed.urdf b/sim/resources/gpr/robot_fixed.urdf index b16a8ba5..3f096a5b 100644 --- a/sim/resources/gpr/robot_fixed.urdf +++ b/sim/resources/gpr/robot_fixed.urdf @@ -1,638 +1,644 @@ - + + - - - + + + - - - - - - - - - - - - - - - - - - + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - + + + + - - - - - - - - + + + + + + + + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + - - + + - - - - + + + + - - - - + + + + - - - - - - - - - - - - + + + + + + + + + + + + - - + + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - - - - - - - + + + + + + + + + + + + - - + + - - - - + + + + - - - - + + + + - - - - - - - - - - - - + + + + + + + + + + + + - - + + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - - - - - - - + + + + + + + + + + + + - - + + - - - - + + + + - - - - + + + + - - - - - - - - - - - - + + + + + + + + + + + + - - + + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - - - - + + + + - - - - + + + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + - - - - - - - + + + + + + + - - - - + + + + - + - - - - + + + + - - - - + + + + diff --git a/sim/resources/gpr/robot_fixed.xml b/sim/resources/gpr/robot_fixed.xml index d89f95a9..3431a2a3 100644 --- a/sim/resources/gpr/robot_fixed.xml +++ b/sim/resources/gpr/robot_fixed.xml @@ -1,4 +1,4 @@ - + diff --git a/sim/resources/gpr/robot.urdf b/sim/resources/gpr/robot_fixed_old.urdf similarity index 52% rename from sim/resources/gpr/robot.urdf rename to sim/resources/gpr/robot_fixed_old.urdf index 46370774..8aade215 100644 --- a/sim/resources/gpr/robot.urdf +++ b/sim/resources/gpr/robot_fixed_old.urdf @@ -1,616 +1,643 @@ - + - + - - - - + + + + - + - + - + - + - - - - + + + + - - + + - - - + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - + + - - - - + + + + - - + + - - + + - + - + - + - + - + - - + + - - + + - + - + - + - + - + - + - - - - - - + + + + + + - - + + - - + + - - + + - + - - + + - - - - + + + + - - - - - + + + + + - - + + - - + + - - + + - + - - + + - - - - + + + + - + - + - + - + - + - + - + - - + + - - + + - + - + - + - + - + - + - - + + - - - - - + + + + + - - + + - - + + - + - + - - + + - - - - + + + + - - - - - - + + + + + + - - + + - - + + - + - + - - + + - - - - + + + + - - + + - + - + - + - + - + - + - - + + - + - + - + - + - + - + - - - - - + + + + + - - + + - - + + - + - + - - + + - - - - + + + + - - - - - + + + + + - - + + - - + + - + - + - - + + - - - - + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - + + + + + - - + + - - + + - + - + - - + + - - - - + + + + - - - - - + + + + + - - + + - - + + - + - + - - + + - - - - + + + + - - + + - + - + - + - + - + - - - - + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + - + - + - + - - - - + + + + diff --git a/sim/resources/gpr/robot_fixed_terrain.xml b/sim/resources/gpr/robot_fixed_terrain.xml new file mode 100644 index 00000000..b30c570c --- /dev/null +++ b/sim/resources/gpr/robot_fixed_terrain.xml @@ -0,0 +1,226 @@ + + diff --git a/sim/resources/gpr_headless/README.md b/sim/resources/gpr_headless/README.md new file mode 100644 index 00000000..58ff4a70 --- /dev/null +++ b/sim/resources/gpr_headless/README.md @@ -0,0 +1,13 @@ +# URDF/XML development: +1. kol run https://cad.onshape.com/documents/bc3a557e2f92bdcea4099f0d/w/09713ac603d461fc1dff6b5d/e/5a4acdbffe6f8ed7c4e34970 --config-path config_example.json # noqa: E501 +2. python sim/scripts/create_fixed_torso.py +3. Rotate first link + + + + + + +4. Fix left leg axes to align with expected directions +5. urf2mjcf robot.urdf -o robot.xml + \ No newline at end of file diff --git a/sim/resources/gpr_headless/__init__.py b/sim/resources/gpr_headless/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/sim/resources/gpr_headless/joints.py b/sim/resources/gpr_headless/joints.py new file mode 100755 index 00000000..92a33202 --- /dev/null +++ b/sim/resources/gpr_headless/joints.py @@ -0,0 +1,231 @@ +"""Defines a more Pythonic interface for specifying the joint names.""" + +import textwrap +from abc import ABC +from typing import Dict, List, Tuple, Union + + +class Node(ABC): + @classmethod + def children(cls) -> List["Union[Node, str]"]: + return [ + attr + for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) + if isinstance(attr, (Node, str)) + ] + + @classmethod + def joints(cls) -> List[str]: + return [ + attr + for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) + if isinstance(attr, str) + ] + + @classmethod + def joints_motors(cls) -> List[Tuple[str, str]]: + joint_names: List[Tuple[str, str]] = [] + for attr in dir(cls): + if not attr.startswith("__"): + attr2 = getattr(cls, attr) + if isinstance(attr2, str): + joint_names.append((attr, attr2)) + + return joint_names + + @classmethod + def all_joints(cls) -> List[str]: + leaves = cls.joints() + for child in cls.children(): + if isinstance(child, Node): + leaves.extend(child.all_joints()) + return leaves + + def __str__(self) -> str: + parts = [str(child) for child in self.children()] + parts_str = textwrap.indent("\n" + "\n".join(parts), " ") + return f"[{self.__class__.__name__}]{parts_str}" + + +class LeftLeg(Node): + hip_pitch = "left_hip_pitch_04" + hip_yaw = "left_hip_yaw_03" + hip_roll = "left_hip_roll_03" + knee_pitch = "left_knee_04" + ankle_pitch = "left_ankle_02" + + +class RightLeg(Node): + hip_pitch = "right_hip_pitch_04" + hip_yaw = "right_hip_yaw_03" + hip_roll = "right_hip_roll_03" + knee_pitch = "right_knee_04" + ankle_pitch = "right_ankle_02" + + +class Legs(Node): + left = LeftLeg() + right = RightLeg() + + +class Robot(Node): + legs = Legs() + + height = 1.05 + standing_height = 1.05 + 0.025 + rotation = [0, 0, 0, 1] + + @classmethod + def isaac_to_real_signs(cls) -> Dict[str, int]: + return { + Robot.legs.left.hip_pitch: 1, + Robot.legs.left.hip_yaw: 1, + Robot.legs.left.hip_roll: 1, + Robot.legs.left.knee_pitch: 1, + Robot.legs.left.ankle_pitch: 1, + Robot.legs.right.hip_pitch: 1, + Robot.legs.right.hip_yaw: 1, + Robot.legs.right.hip_roll: 1, + Robot.legs.right.knee_pitch: 1, + Robot.legs.right.ankle_pitch: 1, + } + + @classmethod + def isaac_to_mujoco_signs(cls) -> Dict[str, int]: + return { + Robot.legs.left.hip_pitch: 1, + Robot.legs.left.hip_yaw: 1, + Robot.legs.left.hip_roll: 1, + Robot.legs.left.knee_pitch: 1, + Robot.legs.left.ankle_pitch: 1, + Robot.legs.right.hip_pitch: 1, + Robot.legs.right.hip_yaw: 1, + Robot.legs.right.hip_roll: 1, + Robot.legs.right.knee_pitch: 1, + Robot.legs.right.ankle_pitch: 1, + } + + @classmethod + def default_positions(cls) -> Dict[str, float]: + return { + Robot.legs.left.hip_pitch: 0.0, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0, + Robot.legs.left.knee_pitch: 0.0, + Robot.legs.left.ankle_pitch: 0.0, + Robot.legs.right.hip_pitch: 0.0, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, + Robot.legs.right.knee_pitch: 0.0, + Robot.legs.right.ankle_pitch: 0.0, + } + + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the angle of the first joint in the policy. + @classmethod + def default_standing(cls) -> Dict[str, float]: + return { + Robot.legs.left.hip_pitch: 0.23, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0, + Robot.legs.left.knee_pitch: 0.441, # negated + Robot.legs.left.ankle_pitch: -0.195, + Robot.legs.right.hip_pitch: -0.23, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, + Robot.legs.right.knee_pitch: -0.441, + Robot.legs.right.ankle_pitch: 0.195, # negated + } + + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the name of the first joint in the policy. + @classmethod + def joint_names(cls) -> List[str]: + return list(cls.default_standing().keys()) + + @classmethod + def default_limits(cls) -> Dict[str, Dict[str, float]]: + return { + Robot.legs.left.knee_pitch: {"lower": -1.57, "upper": 0}, + Robot.legs.right.knee_pitch: {"lower": -1.57, "upper": 0}, + } + + # p_gains + @classmethod + def stiffness(cls) -> Dict[str, float]: + return { + "04": 80, + "03": 40, + "02": 30, + } + + @classmethod + def stiffness_mapping(cls) -> Dict[str, float]: + mapping = {} + stiffness = cls.stiffness() + for joint in cls.joint_names(): + mapping[joint] = stiffness[joint[-2:]] + return mapping + + # d_gains + @classmethod + def damping(cls) -> Dict[str, float]: + return { + "04": 5, + "03": 4, + "02": 1, + } + + @classmethod + def damping_mapping(cls) -> Dict[str, float]: + mapping = {} + damping = cls.damping() + for joint in cls.joint_names(): + mapping[joint] = damping[joint[-2:]] + print(mapping) + return mapping + + # effort_limits + @classmethod + def effort(cls) -> Dict[str, float]: + return { + "04": 80, + "03": 40, + "02": 17, + } + + @classmethod + def effort_mapping(cls) -> Dict[str, float]: + mapping = {} + effort = cls.effort() + for joint in cls.joint_names(): + mapping[joint] = effort[joint[-2:]] + return mapping + + @classmethod + def velocity(cls) -> Dict[str, float]: + return { + "04": 18.0, + "03": 18.0, + "02": 18.0, + } + + @classmethod + def friction(cls) -> Dict[str, float]: + return { + "04": 0, + "03": 0, + "02": 0.1, + } + + +def print_joints() -> None: + joints = Robot.all_joints() + assert len(joints) == len(set(joints)), "Duplicate joint names found!" + print(Robot()) + print(len(joints)) + + +if __name__ == "__main__": + # python -m sim.Robot.joints + print_joints() diff --git a/sim/resources/gpr_headless/old_real/README.md b/sim/resources/gpr_headless/old_real/README.md new file mode 100644 index 00000000..58ff4a70 --- /dev/null +++ b/sim/resources/gpr_headless/old_real/README.md @@ -0,0 +1,13 @@ +# URDF/XML development: +1. kol run https://cad.onshape.com/documents/bc3a557e2f92bdcea4099f0d/w/09713ac603d461fc1dff6b5d/e/5a4acdbffe6f8ed7c4e34970 --config-path config_example.json # noqa: E501 +2. python sim/scripts/create_fixed_torso.py +3. Rotate first link + + + + + + +4. Fix left leg axes to align with expected directions +5. urf2mjcf robot.urdf -o robot.xml + \ No newline at end of file diff --git a/sim/resources/gpr_headless/old_real/__init__.py b/sim/resources/gpr_headless/old_real/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/sim/resources/gpr_headless/old_real/joints.py b/sim/resources/gpr_headless/old_real/joints.py new file mode 100755 index 00000000..986d0f7e --- /dev/null +++ b/sim/resources/gpr_headless/old_real/joints.py @@ -0,0 +1,183 @@ +"""Defines a more Pythonic interface for specifying the joint names.""" + +import textwrap +from abc import ABC +from typing import Dict, List, Tuple, Union + + +class Node(ABC): + @classmethod + def children(cls) -> List["Union[Node, str]"]: + return [ + attr + for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) + if isinstance(attr, (Node, str)) + ] + + @classmethod + def joints(cls) -> List[str]: + return [ + attr + for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) + if isinstance(attr, str) + ] + + @classmethod + def joints_motors(cls) -> List[Tuple[str, str]]: + joint_names: List[Tuple[str, str]] = [] + for attr in dir(cls): + if not attr.startswith("__"): + attr2 = getattr(cls, attr) + if isinstance(attr2, str): + joint_names.append((attr, attr2)) + + return joint_names + + @classmethod + def all_joints(cls) -> List[str]: + leaves = cls.joints() + for child in cls.children(): + if isinstance(child, Node): + leaves.extend(child.all_joints()) + return leaves + + def __str__(self) -> str: + parts = [str(child) for child in self.children()] + parts_str = textwrap.indent("\n" + "\n".join(parts), " ") + return f"[{self.__class__.__name__}]{parts_str}" + + +class LeftLeg(Node): + hip_pitch = "left_hip_pitch_04" + hip_yaw = "left_hip_yaw_03" + hip_roll = "left_hip_roll_03" + knee_pitch = "left_knee_04" + ankle_pitch = "left_ankle_02" + + +class RightLeg(Node): + hip_pitch = "right_hip_pitch_04" + hip_yaw = "right_hip_yaw_03" + hip_roll = "right_hip_roll_03" + knee_pitch = "right_knee_04" + ankle_pitch = "right_ankle_02" + + +class Legs(Node): + left = LeftLeg() + right = RightLeg() + + +class Robot(Node): + legs = Legs() + + # height = 1.05 #- 0.151 #maybe? + # standing_height = 1.05 + 0.025 #- 0.151 + + height = 1.04 + standing_height = 1.04 + 0.025 + + # 1.3 m and 1.149m + rotation = [0, 0, 0, 1] + + @classmethod + def default_positions(cls) -> Dict[str, float]: + return { + Robot.legs.left.hip_pitch: 0.0, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0, + Robot.legs.left.knee_pitch: 0.0, + Robot.legs.left.ankle_pitch: 0.0, + Robot.legs.right.hip_pitch: 0.0, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, + Robot.legs.right.knee_pitch: 0.0, + Robot.legs.right.ankle_pitch: 0.0, + } + + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the angle of the first joint in the policy. + @classmethod + def default_standing(cls) -> Dict[str, float]: + return { + Robot.legs.left.hip_pitch: 0.23, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0, + Robot.legs.left.knee_pitch: 0.441, # negated + Robot.legs.left.ankle_pitch: -0.195, # negated + Robot.legs.right.hip_pitch: -0.23, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, + Robot.legs.right.knee_pitch: -0.441, # negated + Robot.legs.right.ankle_pitch: -0.195, # negated + } + + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the name of the first joint in the policy. + @classmethod + def joint_names(cls) -> List[str]: + return list(cls.default_standing().keys()) + + # p_gains + @classmethod + def stiffness(cls) -> Dict[str, float]: + return { + "04": 300, + "03": 120, + "02": 40, + } + + @classmethod + def stiffness_mapping(cls) -> Dict[str, float]: + mapping = {} + stiffness = cls.stiffness() + for joint in cls.joint_names(): + mapping[joint] = stiffness[joint[-2:]] + return mapping + + # d_gains + @classmethod + def damping(cls) -> Dict[str, float]: + return { + "04": 5, + "03": 5, + "02": 5, + } + + @classmethod + def damping_mapping(cls) -> Dict[str, float]: + mapping = {} + damping = cls.damping() + for joint in cls.joint_names(): + mapping[joint] = damping[joint[-2:]] + print(mapping) + return mapping + + # effort_limits + @classmethod + def effort(cls) -> Dict[str, float]: + return { + "04": 80, + "03": 40, + "02": 20, + } + + @classmethod + def effort_mapping(cls) -> Dict[str, float]: + mapping = {} + effort = cls.effort() + for joint in cls.joint_names(): + mapping[joint] = effort[joint[-2:]] + return mapping + + +def print_joints() -> None: + joints = Robot.all_joints() + assert len(joints) == len(set(joints)), "Duplicate joint names found!" + print(Robot()) + print(len(joints)) + + +if __name__ == "__main__": + # python -m sim.Robot.joints + print_joints() diff --git a/sim/resources/gpr_headless/old_real/old/robot_fixed.urdf b/sim/resources/gpr_headless/old_real/old/robot_fixed.urdf new file mode 100644 index 00000000..77f91e17 --- /dev/null +++ b/sim/resources/gpr_headless/old_real/old/robot_fixed.urdf @@ -0,0 +1,644 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_headless/old_real/old/robot_fixed.xml b/sim/resources/gpr_headless/old_real/old/robot_fixed.xml new file mode 100644 index 00000000..3431a2a3 --- /dev/null +++ b/sim/resources/gpr_headless/old_real/old/robot_fixed.xml @@ -0,0 +1,221 @@ + + diff --git a/sim/resources/gpr_headless/old_real/old/robot_fixed_old.urdf b/sim/resources/gpr_headless/old_real/old/robot_fixed_old.urdf new file mode 100644 index 00000000..8aade215 --- /dev/null +++ b/sim/resources/gpr_headless/old_real/old/robot_fixed_old.urdf @@ -0,0 +1,643 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_headless/old_real/old/robot_fixed_terrain.xml b/sim/resources/gpr_headless/old_real/old/robot_fixed_terrain.xml new file mode 100644 index 00000000..b30c570c --- /dev/null +++ b/sim/resources/gpr_headless/old_real/old/robot_fixed_terrain.xml @@ -0,0 +1,226 @@ + + diff --git a/sim/resources/gpr_headless/old_real/robot_fixed.urdf b/sim/resources/gpr_headless/old_real/robot_fixed.urdf new file mode 100644 index 00000000..60a3f90d --- /dev/null +++ b/sim/resources/gpr_headless/old_real/robot_fixed.urdf @@ -0,0 +1,638 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_headless/old_real/robot_fixed.xml b/sim/resources/gpr_headless/old_real/robot_fixed.xml new file mode 100644 index 00000000..9486e977 --- /dev/null +++ b/sim/resources/gpr_headless/old_real/robot_fixed.xml @@ -0,0 +1,218 @@ + + diff --git a/sim/resources/gpr_headless/old_real/robot_fixed_new.urdf b/sim/resources/gpr_headless/old_real/robot_fixed_new.urdf new file mode 100644 index 00000000..e2f0b0e9 --- /dev/null +++ b/sim/resources/gpr_headless/old_real/robot_fixed_new.urdf @@ -0,0 +1,599 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_headless/old_real/robot_fixed_terrain.xml b/sim/resources/gpr_headless/old_real/robot_fixed_terrain.xml new file mode 100644 index 00000000..fb2ebf1b --- /dev/null +++ b/sim/resources/gpr_headless/old_real/robot_fixed_terrain.xml @@ -0,0 +1,223 @@ + + diff --git a/sim/resources/gpr_headless/robot.mjcf b/sim/resources/gpr_headless/robot.mjcf new file mode 100644 index 00000000..edb9c6fa --- /dev/null +++ b/sim/resources/gpr_headless/robot.mjcf @@ -0,0 +1,246 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_headless/robot.urdf b/sim/resources/gpr_headless/robot.urdf new file mode 100644 index 00000000..f1143ac7 --- /dev/null +++ b/sim/resources/gpr_headless/robot.urdf @@ -0,0 +1,584 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_headless/robot_fixed.urdf b/sim/resources/gpr_headless/robot_fixed.urdf new file mode 100644 index 00000000..edaee281 --- /dev/null +++ b/sim/resources/gpr_headless/robot_fixed.urdf @@ -0,0 +1,584 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_vel/README.md b/sim/resources/gpr_vel/README.md new file mode 100644 index 00000000..58ff4a70 --- /dev/null +++ b/sim/resources/gpr_vel/README.md @@ -0,0 +1,13 @@ +# URDF/XML development: +1. kol run https://cad.onshape.com/documents/bc3a557e2f92bdcea4099f0d/w/09713ac603d461fc1dff6b5d/e/5a4acdbffe6f8ed7c4e34970 --config-path config_example.json # noqa: E501 +2. python sim/scripts/create_fixed_torso.py +3. Rotate first link + + + + + + +4. Fix left leg axes to align with expected directions +5. urf2mjcf robot.urdf -o robot.xml + \ No newline at end of file diff --git a/sim/resources/gpr_vel/__init__.py b/sim/resources/gpr_vel/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sim/resources/gpr_vel/headless/README.md b/sim/resources/gpr_vel/headless/README.md new file mode 100644 index 00000000..58ff4a70 --- /dev/null +++ b/sim/resources/gpr_vel/headless/README.md @@ -0,0 +1,13 @@ +# URDF/XML development: +1. kol run https://cad.onshape.com/documents/bc3a557e2f92bdcea4099f0d/w/09713ac603d461fc1dff6b5d/e/5a4acdbffe6f8ed7c4e34970 --config-path config_example.json # noqa: E501 +2. python sim/scripts/create_fixed_torso.py +3. Rotate first link + + + + + + +4. Fix left leg axes to align with expected directions +5. urf2mjcf robot.urdf -o robot.xml + \ No newline at end of file diff --git a/sim/resources/gpr_vel/headless/__init__.py b/sim/resources/gpr_vel/headless/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sim/resources/gpr_vel/headless/joints.py b/sim/resources/gpr_vel/headless/joints.py new file mode 100644 index 00000000..22b5861e --- /dev/null +++ b/sim/resources/gpr_vel/headless/joints.py @@ -0,0 +1,183 @@ +"""Defines a more Pythonic interface for specifying the joint names.""" + +import textwrap +from abc import ABC +from typing import Dict, List, Tuple, Union + + +class Node(ABC): + @classmethod + def children(cls) -> List["Union[Node, str]"]: + return [ + attr + for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) + if isinstance(attr, (Node, str)) + ] + + @classmethod + def joints(cls) -> List[str]: + return [ + attr + for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) + if isinstance(attr, str) + ] + + @classmethod + def joints_motors(cls) -> List[Tuple[str, str]]: + joint_names: List[Tuple[str, str]] = [] + for attr in dir(cls): + if not attr.startswith("__"): + attr2 = getattr(cls, attr) + if isinstance(attr2, str): + joint_names.append((attr, attr2)) + + return joint_names + + @classmethod + def all_joints(cls) -> List[str]: + leaves = cls.joints() + for child in cls.children(): + if isinstance(child, Node): + leaves.extend(child.all_joints()) + return leaves + + def __str__(self) -> str: + parts = [str(child) for child in self.children()] + parts_str = textwrap.indent("\n" + "\n".join(parts), " ") + return f"[{self.__class__.__name__}]{parts_str}" + + +class LeftLeg(Node): + hip_pitch = "left_hip_pitch_04" + hip_yaw = "left_hip_yaw_03" + hip_roll = "left_hip_roll_03" + knee_pitch = "left_knee_04" + ankle_pitch = "left_ankle_02" + + +class RightLeg(Node): + hip_pitch = "right_hip_pitch_04" + hip_yaw = "right_hip_yaw_03" + hip_roll = "right_hip_roll_03" + knee_pitch = "right_knee_04" + ankle_pitch = "right_ankle_02" + + +class Legs(Node): + left = LeftLeg() + right = RightLeg() + + +class Robot(Node): + legs = Legs() + + height = 1.05 # - 0.151 #maybe? + standing_height = 1.05 + 0.025 # - 0.151 + + # height = 1.04 + # standing_height = 1.04 + 0.025 + + # 1.3 m and 1.149m + rotation = [0, 0, 0, 1] + + @classmethod + def default_positions(cls) -> Dict[str, float]: + return { + Robot.legs.left.hip_pitch: 0.0, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0, + Robot.legs.left.knee_pitch: 0.0, + Robot.legs.left.ankle_pitch: 0.0, + Robot.legs.right.hip_pitch: 0.0, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, + Robot.legs.right.knee_pitch: 0.0, + Robot.legs.right.ankle_pitch: 0.0, + } + + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the angle of the first joint in the policy. + @classmethod + def default_standing(cls) -> Dict[str, float]: + return { + Robot.legs.left.hip_pitch: 0.23, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0, + Robot.legs.left.knee_pitch: 0.441, # negated + Robot.legs.left.ankle_pitch: -0.195, # negated + Robot.legs.right.hip_pitch: -0.23, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, + Robot.legs.right.knee_pitch: -0.441, # negated + Robot.legs.right.ankle_pitch: 0.195, # negated + } + + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the name of the first joint in the policy. + @classmethod + def joint_names(cls) -> List[str]: + return list(cls.default_standing().keys()) + + # p_gains + @classmethod + def stiffness(cls) -> Dict[str, float]: + return { + "04": 85, + "03": 40, + "02": 30, + } + + @classmethod + def stiffness_mapping(cls) -> Dict[str, float]: + mapping = {} + stiffness = cls.stiffness() + for joint in cls.joint_names(): + mapping[joint] = stiffness[joint[-2:]] + return mapping + + # d_gains + @classmethod + def damping(cls) -> Dict[str, float]: + return { + "04": 5, + "03": 4, + "02": 1, + } + + @classmethod + def damping_mapping(cls) -> Dict[str, float]: + mapping = {} + damping = cls.damping() + for joint in cls.joint_names(): + mapping[joint] = damping[joint[-2:]] + print(mapping) + return mapping + + # effort_limits + @classmethod + def effort(cls) -> Dict[str, float]: + return { + "04": 100, + "03": 50, + "02": 17, + } + + @classmethod + def effort_mapping(cls) -> Dict[str, float]: + mapping = {} + effort = cls.effort() + for joint in cls.joint_names(): + mapping[joint] = effort[joint[-2:]] + return mapping + + +def print_joints() -> None: + joints = Robot.all_joints() + assert len(joints) == len(set(joints)), "Duplicate joint names found!" + print(Robot()) + print(len(joints)) + + +if __name__ == "__main__": + # python -m sim.Robot.joints + print_joints() diff --git a/sim/resources/gpr_vel/headless/robot_fixed.urdf b/sim/resources/gpr_vel/headless/robot_fixed.urdf new file mode 100644 index 00000000..23a72ed4 --- /dev/null +++ b/sim/resources/gpr_vel/headless/robot_fixed.urdf @@ -0,0 +1,566 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_vel/joints.py b/sim/resources/gpr_vel/joints.py new file mode 100644 index 00000000..3a7fa1cd --- /dev/null +++ b/sim/resources/gpr_vel/joints.py @@ -0,0 +1,236 @@ +"""Defines a more Pythonic interface for specifying the joint names.""" + +import textwrap +from abc import ABC +from typing import Dict, List, Tuple, Union + + +class Node(ABC): + @classmethod + def children(cls) -> List["Union[Node, str]"]: + return [ + attr + for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) + if isinstance(attr, (Node, str)) + ] + + @classmethod + def joints(cls) -> List[str]: + return [ + attr + for attr in (getattr(cls, attr) for attr in dir(cls) if not attr.startswith("__")) + if isinstance(attr, str) + ] + + @classmethod + def joints_motors(cls) -> List[Tuple[str, str]]: + joint_names: List[Tuple[str, str]] = [] + for attr in dir(cls): + if not attr.startswith("__"): + attr2 = getattr(cls, attr) + if isinstance(attr2, str): + joint_names.append((attr, attr2)) + + return joint_names + + @classmethod + def all_joints(cls) -> List[str]: + leaves = cls.joints() + for child in cls.children(): + if isinstance(child, Node): + leaves.extend(child.all_joints()) + return leaves + + def __str__(self) -> str: + parts = [str(child) for child in self.children()] + parts_str = textwrap.indent("\n" + "\n".join(parts), " ") + return f"[{self.__class__.__name__}]{parts_str}" + + +class LeftLeg(Node): + hip_pitch = "L_hip_y" + hip_yaw = "L_hip_x" + hip_roll = "L_hip_z" + knee_pitch = "L_knee" + ankle_pitch = "L_ankle" + + +class RightLeg(Node): + hip_pitch = "R_hip_y" + hip_yaw = "R_hip_x" + hip_roll = "R_hip_z" + knee_pitch = "R_knee" + ankle_pitch = "R_ankle" + + +class Legs(Node): + left = LeftLeg() + right = RightLeg() + + +class Robot(Node): + legs = Legs() + + height = 1.05 + standing_height = 1.05 + 0.025 + rotation = [0, 0, 0, 1] + + @classmethod + def isaac_to_real_signs(cls) -> Dict[str, int]: + return { + Robot.legs.left.hip_pitch: 1, + Robot.legs.left.hip_yaw: 1, + Robot.legs.left.hip_roll: 1, + Robot.legs.left.knee_pitch: 1, + Robot.legs.left.ankle_pitch: 1, + Robot.legs.right.hip_pitch: -1, + Robot.legs.right.hip_yaw: -1, + Robot.legs.right.hip_roll: 1, + Robot.legs.right.knee_pitch: -1, + Robot.legs.right.ankle_pitch: 1, + } + + @classmethod + def isaac_to_mujoco_signs(cls) -> Dict[str, int]: + return { + Robot.legs.left.hip_pitch: 1, + Robot.legs.left.hip_yaw: 1, + Robot.legs.left.hip_roll: 1, + Robot.legs.left.knee_pitch: 1, + Robot.legs.left.ankle_pitch: 1, + Robot.legs.right.hip_pitch: 1, + Robot.legs.right.hip_yaw: 1, + Robot.legs.right.hip_roll: 1, + Robot.legs.right.knee_pitch: 1, + Robot.legs.right.ankle_pitch: 1, + } + + @classmethod + def default_positions(cls) -> Dict[str, float]: + return { + Robot.legs.left.hip_pitch: 0.0, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0, + Robot.legs.left.knee_pitch: 0.0, + Robot.legs.left.ankle_pitch: 0.0, + Robot.legs.right.hip_pitch: 0.0, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, + Robot.legs.right.knee_pitch: 0.0, + Robot.legs.right.ankle_pitch: 0.0, + } + + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the angle of the first joint in the policy. + @classmethod + def default_standing(cls) -> Dict[str, float]: + return { + Robot.legs.left.hip_pitch: 0.23, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0, + Robot.legs.left.knee_pitch: -0.441, + Robot.legs.left.ankle_pitch: -0.195, + Robot.legs.right.hip_pitch: -0.23, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, + Robot.legs.right.knee_pitch: -0.441, + Robot.legs.right.ankle_pitch: -0.195, + } + + # CONTRACT - this should be ordered according to how the policy is trained. + # E.g. the first entry should be the name of the first joint in the policy. + @classmethod + def joint_names(cls) -> List[str]: + return list(cls.default_standing().keys()) + + @classmethod + def default_limits(cls) -> Dict[str, Dict[str, float]]: + return { + Robot.legs.left.knee_pitch: {"lower": -1.57, "upper": 0}, + Robot.legs.right.knee_pitch: {"lower": -1.57, "upper": 0}, + } + + # p_gains + @classmethod + def stiffness(cls) -> Dict[str, float]: + return { + "hip_y": 85, + "hip_x": 40, + "hip_z": 40, + "knee": 85, + "ankle": 30, + } + + @classmethod + def stiffness_mapping(cls) -> Dict[str, float]: + mapping = {} + stiffness = cls.stiffness() + for side in ["L", "R"]: + for joint, value in stiffness.items(): + mapping[f"{side}_{joint}"] = value + return mapping + + # d_gains + @classmethod + def damping(cls) -> Dict[str, float]: + return { + "hip_y": 5, + "hip_x": 4, + "hip_z": 4, + "knee": 5, + "ankle": 1, + } + + @classmethod + def damping_mapping(cls) -> Dict[str, float]: + mapping = {} + damping = cls.damping() + for side in ["L", "R"]: + for joint, value in damping.items(): + mapping[f"{side}_{joint}"] = value + return mapping + + # effort_limits + @classmethod + def effort(cls) -> Dict[str, float]: + return { + "hip_y": 60, + "hip_x": 40, + "hip_z": 40, + "knee": 60, + "ankle": 17, + } + + @classmethod + def effort_mapping(cls) -> Dict[str, float]: + mapping = {} + effort = cls.effort() + for side in ["L", "R"]: + for joint, value in effort.items(): + mapping[f"{side}_{joint}"] = value + return mapping + + # vel_limits + @classmethod + def velocity(cls) -> Dict[str, float]: + return {"hip": 1000, "knee": 1000, "ankle": 1000} + + @classmethod + def friction(cls) -> Dict[str, float]: + return { + "hip": 0, + "knee": 0, + "ankle": 0.1, + } + + +def print_joints() -> None: + joints = Robot.all_joints() + assert len(joints) == len(set(joints)), "Duplicate joint names found!" + print(Robot()) + print(len(joints)) + + +if __name__ == "__main__": + # python -m sim.Robot.joints + print_joints() diff --git a/sim/resources/gpr_vel/robot_fixed.urdf b/sim/resources/gpr_vel/robot_fixed.urdf new file mode 100644 index 00000000..25d5d125 --- /dev/null +++ b/sim/resources/gpr_vel/robot_fixed.urdf @@ -0,0 +1,616 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sim/resources/gpr_vel/robot_fixed.xml b/sim/resources/gpr_vel/robot_fixed.xml new file mode 100644 index 00000000..a1e1aba9 --- /dev/null +++ b/sim/resources/gpr_vel/robot_fixed.xml @@ -0,0 +1,193 @@ + + \ No newline at end of file diff --git a/sim/resources/gpr_vel/robot_fixed_terrain.xml b/sim/resources/gpr_vel/robot_fixed_terrain.xml new file mode 100644 index 00000000..bfcf12b7 --- /dev/null +++ b/sim/resources/gpr_vel/robot_fixed_terrain.xml @@ -0,0 +1,199 @@ + + \ No newline at end of file diff --git a/sim/scripts/imu_data_comparison.py b/sim/scripts/imu_data_comparison.py index 8487a2a5..d3cf5e2f 100644 --- a/sim/scripts/imu_data_comparison.py +++ b/sim/scripts/imu_data_comparison.py @@ -3,9 +3,11 @@ Run: python sim/scripts/imu_data_comparison.py --embodiment zbot2 """ + import argparse import os from copy import deepcopy +from typing import Dict, List import matplotlib.pyplot as plt import mediapy as media @@ -15,6 +17,8 @@ import pandas as pd from tqdm import tqdm +from sim.scripts.push_standing_tests import ModelInfo + def plot_comparison(sim_data: pd.DataFrame, real_data: pd.DataFrame) -> None: """Plot the real IMU data. @@ -25,52 +29,51 @@ def plot_comparison(sim_data: pd.DataFrame, real_data: pd.DataFrame) -> None: """ plt.figure(figsize=(10, 15)) + real_timestamps = (real_data["timestamp"] - real_data["timestamp"].iloc[0]).dt.total_seconds().to_numpy() - real_timestamps = (real_data['timestamp'] - real_data['timestamp'].iloc[0]).dt.total_seconds().to_numpy() - # Accelerometer plots plt.subplot(6, 1, 1) - plt.plot(real_timestamps, sim_data['accel_x'].to_numpy(), label='Simulated Accel X') - plt.plot(real_timestamps, real_data['accel_x'].to_numpy(), label='Real Accel X') - plt.title('Accelerometer X') + plt.plot(real_timestamps, sim_data["accel_x"].to_numpy(), label="Simulated Accel X") + plt.plot(real_timestamps, real_data["accel_x"].to_numpy(), label="Real Accel X") + plt.title("Accelerometer X") plt.legend() plt.subplot(6, 1, 2) - plt.plot(real_timestamps, sim_data['accel_y'].to_numpy(), label='Simulated Accel Y') - plt.plot(real_timestamps, real_data['accel_y'].to_numpy(), label='Real Accel Y') - plt.title('Accelerometer Y') + plt.plot(real_timestamps, sim_data["accel_y"].to_numpy(), label="Simulated Accel Y") + plt.plot(real_timestamps, real_data["accel_y"].to_numpy(), label="Real Accel Y") + plt.title("Accelerometer Y") plt.legend() plt.subplot(6, 1, 3) - plt.plot(real_timestamps, sim_data['accel_z'].to_numpy(), label='Simulated Accel Z') - plt.plot(real_timestamps, real_data['accel_z'].to_numpy(), label='Real Accel Z') - plt.title('Accelerometer Z') + plt.plot(real_timestamps, sim_data["accel_z"].to_numpy(), label="Simulated Accel Z") + plt.plot(real_timestamps, real_data["accel_z"].to_numpy(), label="Real Accel Z") + plt.title("Accelerometer Z") plt.legend() # Gyroscope plots plt.subplot(6, 1, 4) - plt.plot(real_timestamps, sim_data['gyro_x'].to_numpy(), label='Simulated Gyro X') - plt.plot(real_timestamps, real_data['gyro_x'].to_numpy(), label='Real Gyro X') - plt.title('Gyroscope X') + plt.plot(real_timestamps, sim_data["gyro_x"].to_numpy(), label="Simulated Gyro X") + plt.plot(real_timestamps, real_data["gyro_x"].to_numpy(), label="Real Gyro X") + plt.title("Gyroscope X") plt.legend() plt.subplot(6, 1, 5) - plt.plot(real_timestamps, sim_data['gyro_y'].to_numpy(), label='Simulated Gyro Y') - plt.plot(real_timestamps, real_data['gyro_y'].to_numpy(), label='Real Gyro Y') - plt.title('Gyroscope Y') + plt.plot(real_timestamps, sim_data["gyro_y"].to_numpy(), label="Simulated Gyro Y") + plt.plot(real_timestamps, real_data["gyro_y"].to_numpy(), label="Real Gyro Y") + plt.title("Gyroscope Y") plt.legend() plt.subplot(6, 1, 6) - plt.plot(real_timestamps, sim_data['gyro_z'].to_numpy(), label='Simulated Gyro Z') - plt.plot(real_timestamps, real_data['gyro_z'].to_numpy(), label='Real Gyro Z') - plt.title('Gyroscope Z') + plt.plot(real_timestamps, sim_data["gyro_z"].to_numpy(), label="Simulated Gyro Z") + plt.plot(real_timestamps, real_data["gyro_z"].to_numpy(), label="Real Gyro Z") + plt.title("Gyroscope Z") plt.legend() plt.tight_layout() - plt.savefig('imu_data_comparison.png') + plt.savefig("imu_data_comparison.png") -def read_real_data(data_file: str = "sim/resources/zbot2/imu_data.csv") -> None: +def read_real_data(data_file: str = "sim/resources/zbot2/imu_data.csv") -> pd.DataFrame: """Plot the real IMU data. Args: @@ -82,8 +85,8 @@ def read_real_data(data_file: str = "sim/resources/zbot2/imu_data.csv") -> None: # Reading the data from CSV file df = pd.read_csv(data_file) - df = df.apply(pd.to_numeric, errors='ignore') - df['timestamp'] = pd.to_datetime(df['timestamp']) + df = df.apply(pd.to_numeric, errors="ignore") + df["timestamp"] = pd.to_datetime(df["timestamp"]) return df @@ -96,7 +99,7 @@ def pd_control( kd: np.ndarray, default: np.ndarray, ) -> np.ndarray: - """Calculates torques from position commands + """Calculates torques from position commands. Args: target_q: The target position. @@ -118,9 +121,8 @@ def run_simulation( kd: float = 1.0, sim_duration: float = 15.0, effort: float = 5.0, -) -> None: - """ - Run the Mujoco simulation using the provided policy and configuration. +) -> pd.DataFrame: + """Run the Mujoco simulation using the provided policy and configuration. Args: embodiment: The embodiment to use for the simulation. @@ -129,7 +131,7 @@ def run_simulation( sim_duration: The duration of the simulation. effort: The effort to apply to the robot. """ - model_info = { + model_info: ModelInfo = { "sim_dt": 0.001, "tau_factor": 2, "num_actions": 10, @@ -156,14 +158,14 @@ def run_simulation( print("Default position:", default) target_q = np.zeros((model_info["num_actions"]), dtype=np.double) - viewer = mujoco_viewer.MujocoViewer(model, data,"offscreen") + viewer = mujoco_viewer.MujocoViewer(model, data, "offscreen") force_duration = 400 # Duration of force application in timesteps force_timer = 0 applied_force = np.array([0.0, -3, 0.0]) - sim_data = { + sim_data: Dict[str, List[float]] = { "timestamp": [], "gyro_x": [], "gyro_y": [], @@ -216,7 +218,7 @@ def run_simulation( data.xfrc_applied[1] = np.concatenate([applied_force, np.zeros(3)]) force_timer -= 1 else: - data.xfrc_applied[1] = np.zeros(6) + data.xfrc_applied[1] = np.zeros(6) media.write_video("push_tests.mp4", frames, fps=framerate) @@ -242,4 +244,4 @@ def run_simulation( sim_data = run_simulation(args.embodiment, args.kp, args.kd) real_data = read_real_data() - plot_comparison(sim_data, real_data) \ No newline at end of file + plot_comparison(sim_data, real_data) diff --git a/sim/scripts/push_standing_tests.py b/sim/scripts/push_standing_tests.py index 41208c24..6cf83e41 100644 --- a/sim/scripts/push_standing_tests.py +++ b/sim/scripts/push_standing_tests.py @@ -3,9 +3,12 @@ Run: python sim/scripts/push_standing_tests.py --load_model kinfer.onnx --embodiment zbot2 """ + import argparse import os from copy import deepcopy +from dataclasses import dataclass +from typing import List, TypedDict import mediapy as media import mujoco @@ -14,6 +17,17 @@ from tqdm import tqdm +@dataclass +class ModelInfo(TypedDict): + sim_dt: float + tau_factor: float + num_actions: int + num_observations: int + robot_effort: List[float] + robot_stiffness: List[float] + robot_damping: List[float] + + def pd_control( target_q: np.ndarray, q: np.ndarray, @@ -22,7 +36,16 @@ def pd_control( kd: np.ndarray, default: np.ndarray, ) -> np.ndarray: - """Calculates torques from position commands""" + """Calculates torques from position commands. + + Args: + target_q: The target position. + q: The current position. + kp: The proportional gain. + dq: The current velocity. + kd: The derivative gain. + default: The default position. + """ return kp * (target_q + default - q) - kd * dq @@ -34,14 +57,17 @@ def run_test( sim_duration: float = 3.0, effort: float = 5.0, ) -> None: - """ - Run the Mujoco simulation using the provided policy and configuration. + """Run the Mujoco simulation using the provided policy and configuration. Args: - policy: The policy used for controlling the simulation. - cfg: The configuration object containing simulation settings. + embodiment: The embodiment name. + kp: The proportional gain. + kd: The derivative gain. + push_force: The force to apply to the robot. + sim_duration: The duration of the simulation. + effort: The effort to apply to the robot. """ - model_info = { + model_info: ModelInfo = { "sim_dt": 0.001, "tau_factor": 2, "num_actions": 10, @@ -69,7 +95,7 @@ def run_test( data.qpos = model.keyframe("standing").qpos default = deepcopy(model.keyframe("standing").qpos)[-model_info["num_actions"] :] print("Default position:", default) - + mujoco.mj_step(model, data) for ii in range(len(data.ctrl) + 1): print(data.joint(ii).id, data.joint(ii).name) @@ -78,14 +104,13 @@ def run_test( data.qacc = np.zeros_like(data.qacc) target_q = np.zeros((model_info["num_actions"]), dtype=np.double) - viewer = mujoco_viewer.MujocoViewer(model, data,"offscreen") - + viewer = mujoco_viewer.MujocoViewer(model, data, "offscreen") + force_application_interval = 1000 # Apply force every 1000 steps (1 second at 1000Hz) - force_magnitude_range = (-push_force, push_force) # Force range in Newtons + force_magnitude_range = (-push_force, push_force) # Force range in Newtons force_duration = 100 # Duration of force application in timesteps force_timer = 0 - for timestep in tqdm(range(int(sim_duration / model_info["sim_dt"])), desc="Simulating..."): # Obtain an observation q = data.qpos.astype(np.double)[-model_info["num_actions"] :] @@ -101,11 +126,7 @@ def run_test( if timestep % force_application_interval == 0: print("Applying force") # Generate random force vector - random_force = np.random.uniform( - force_magnitude_range[0], - force_magnitude_range[1], - size=3 - ) + random_force = np.random.uniform(force_magnitude_range[0], force_magnitude_range[1], size=3) force_timer = force_duration # Apply force if timer is active diff --git a/sim/sim2sim.py b/sim/sim2sim.py index afe4587f..e33ac023 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -73,7 +73,7 @@ def get_gravity_orientation(quaternion): """ Args: quaternion: np.ndarray[float, float, float, float] - + Returns: gravity_orientation: np.ndarray[float, float, float] """ @@ -126,6 +126,7 @@ def run_mujoco( render: bool = True, sim_duration: float = 60.0, h5_out_dir: str = "sim/resources", + terrain: bool = False, ) -> None: """ Run the Mujoco simulation using the provided policy and configuration. @@ -135,7 +136,10 @@ def run_mujoco( cfg: The configuration object containing simulation settings. """ model_dir = os.environ.get("MODEL_DIR", "sim/resources") - mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml" + if terrain: + mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed_terrain.xml" + else: + mujoco_model_path = f"{model_dir}/{embodiment}/robot_fixed.xml" model = mujoco.MjModel.from_xml_path(mujoco_model_path) model.opt.timestep = model_info["sim_dt"] @@ -147,10 +151,21 @@ def run_mujoco( assert isinstance(model_info["robot_stiffness"], list) assert isinstance(model_info["robot_damping"], list) - tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * model_info["tau_factor"] - kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) - kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) - + # tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * model_info["tau_factor"] + # kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) + # kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) + # HACKY FOR HEADLESS + efforts = model_info["robot_effort"] + stiffnesses = model_info["robot_stiffness"] + dampings = model_info["robot_damping"] + leg_lims = [efforts[0], efforts[1], efforts[1], efforts[0], efforts[2]] + tau_limit = np.array(leg_lims + leg_lims) * model_info["tau_factor"] + + leg_kps = [stiffnesses[0], stiffnesses[1], stiffnesses[1], stiffnesses[0], stiffnesses[2]] + kps = np.array(leg_kps + leg_kps) + + leg_kds = [dampings[0], dampings[1], dampings[1], dampings[0], dampings[2]] + kds = np.array(leg_kds + leg_kds) try: data.qpos = model.keyframe("default").qpos default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :] @@ -184,9 +199,9 @@ def run_mujoco( "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32), "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32), "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32), - # "imu_ang_vel.1": np.zeros(3).astype(np.float32), # "imu_euler_xyz.1": np.zeros(3).astype(np.float32), "projected_gravity.1": np.zeros(3).astype(np.float32), + "imu_ang_vel.1": np.zeros(3).astype(np.float32), "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32), } @@ -221,7 +236,7 @@ def run_mujoco( # eu_ang = np.array([0.0, 0.0, 0.0]) # omega = np.array([0.0, 0.0, 0.0]) - + # gvec = np.array([0.0, 0.0, -1.0]) # Calculate speed and accumulate for average speed calculation speed = np.linalg.norm(v[:2]) # Speed in the x-y plane total_speed += speed @@ -245,7 +260,7 @@ def run_mujoco( input_data["prev_actions.1"] = prev_actions.astype(np.float32) input_data["projected_gravity.1"] = gvec.astype(np.float32) - # input_data["imu_ang_vel.1"] = omega.astype(np.float32) + input_data["imu_ang_vel.1"] = omega.astype(np.float32) # input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) input_data["buffer.1"] = hist_obs.astype(np.float32) @@ -264,6 +279,7 @@ def run_mujoco( tau = np.clip(tau, -tau_limit, tau_limit) # Clamp torques data.ctrl = tau + mujoco.mj_step(model, data) if render: @@ -295,7 +311,9 @@ def run_mujoco( parser.add_argument("--log_h5", action="store_true", help="log_h5") parser.add_argument("--h5_out_dir", type=str, default="sim/resources", help="Directory to save HDF5 files") parser.add_argument("--no_render", action="store_false", dest="render", help="Disable rendering") - parser.set_defaults(render=True) + parser.add_argument("--terrain", action="store_true", help="terrain") + parser.set_defaults(render=True, terrain=False) + args = parser.parse_args() if args.keyboard_use: @@ -303,7 +321,7 @@ def run_mujoco( pygame.init() pygame.display.set_caption("Simulation Control") else: - x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.15, 0.0, 0.0 + x_vel_cmd, y_vel_cmd, yaw_vel_cmd = -0.5, 0.0, 0.0 policy = ONNXModel(args.load_model) metadata = policy.get_metadata() @@ -326,4 +344,5 @@ def run_mujoco( log_h5=args.log_h5, render=args.render, h5_out_dir=args.h5_out_dir, + terrain=args.terrain, ) diff --git a/sim/utils/task_registry.py b/sim/utils/task_registry.py index 04198fa3..64325ee3 100755 --- a/sim/utils/task_registry.py +++ b/sim/utils/task_registry.py @@ -159,6 +159,7 @@ def make_alg_runner(self, env, name=None, args=None, train_cfg=None, log_root="d elif log_root is None: log_dir = None else: + log_root = os.path.join(log_root, "logs", train_cfg.runner.experiment_name) log_dir = os.path.join(log_root, datetime.now().strftime("%b%d_%H-%M-%S") + "_" + train_cfg.runner.run_name) train_cfg_dict = class_to_dict(train_cfg) diff --git a/third_party/kinfer b/third_party/kinfer deleted file mode 160000 index e7b2dcd2..00000000 --- a/third_party/kinfer +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e7b2dcd2b7728d853f778e23585b2f3eb26f6658