From 518867271dec0ab28086e9bd95ef7ce4b8864cdf Mon Sep 17 00:00:00 2001 From: Lucas Alegre Date: Fri, 25 Oct 2024 16:39:51 -0300 Subject: [PATCH] Do not treat reward conctact as separate objective in ant-v5 --- mo_gymnasium/envs/mujoco/ant_v5.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mo_gymnasium/envs/mujoco/ant_v5.py b/mo_gymnasium/envs/mujoco/ant_v5.py index c4e4ad9..77c6597 100644 --- a/mo_gymnasium/envs/mujoco/ant_v5.py +++ b/mo_gymnasium/envs/mujoco/ant_v5.py @@ -41,17 +41,16 @@ def step(self, action): x_velocity = info["x_velocity"] y_velocity = info["y_velocity"] cost = info["reward_ctrl"] - contact_cost = info["reward_contact"] healthy_reward = info["reward_survive"] if self.cost_objetive: cost /= self._ctrl_cost_weight # Ignore the weight in the original AntEnv - contact_cost /= self._contact_cost_weight vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32) else: vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32) - vec_reward += cost + contact_cost + vec_reward += cost vec_reward += healthy_reward + vec_reward += info["reward_contact"] # Do not treat contact forces as a separate objective return observation, vec_reward, terminated, truncated, info