Use forward speed rather than speed in highway-v0 reward calculation

Fix Farama-Foundation#268
boschresearch · Feb 18, 2022 · 1a04c6a · 1a04c6a
1 parent c80e2a2
commit 1a04c6a
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/highway_env/envs/highway_env.py b/highway_env/envs/highway_env.py
@@ -86,7 +86,9 @@ def _reward(self, action: Action) -> float:
         neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index)
         lane = self.vehicle.target_lane_index[2] if isinstance(self.vehicle, ControlledVehicle) \
             else self.vehicle.lane_index[2]
-        scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1])
+        # Use forward speed rather than speed, see https://github.com/eleurent/highway-env/issues/268
+        forward_speed = self.vehicle.speed * np.cos(self.vehicle.heading)
+        scaled_speed = utils.lmap(forward_speed, self.config["reward_speed_range"], [0, 1])
         reward = \
             + self.config["collision_reward"] * self.vehicle.crashed \
             + self.config["right_lane_reward"] * lane / max(len(neighbours) - 1, 1) \