Skip to content

Commit

Permalink
Adding three different mo-mountain-car environments (#97)
Browse files Browse the repository at this point in the history
Co-authored-by: Lucas Alegre <[email protected]>
  • Loading branch information
pranavg23 and Lucas Alegre authored Aug 8, 2024
1 parent 668ef51 commit 0ec9b86
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 9 deletions.
21 changes: 21 additions & 0 deletions mo_gymnasium/envs/mountain_car/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,24 @@
entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
max_episode_steps=200,
)

register(
id="mo-mountaincar-3d-v0",
entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
max_episode_steps=200,
kwargs={"add_speed_objective": True, "merge_move_penalty": True},
)

register(
id="mo-mountaincar-timemove-v0",
entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
max_episode_steps=200,
kwargs={"merge_move_penalty": True},
)

register(
id="mo-mountaincar-timespeed-v0",
entry_point="mo_gymnasium.envs.mountain_car.mountain_car:MOMountainCar",
max_episode_steps=200,
kwargs={"remove_move_penalty": True, "add_speed_objective": True},
)
56 changes: 48 additions & 8 deletions mo_gymnasium/envs/mountain_car/mountain_car.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,50 @@ class MOMountainCar(MountainCarEnv, EzPickle):
See [Gymnasium's env](https://gymnasium.farama.org/environments/classic_control/mountain_car_continuous/) for more information.
## Reward space:
The reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.
By default, the reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.
- time penalty: -1.0 for each time step
- reverse penalty: -1.0 for each time step the action is 0 (reverse)
- forward penalty: -1.0 for each time step the action is 2 (forward)
Alternatively, the reward can be changed with the following options:
- add_speed_objective: Add an extra objective corresponding to the speed of the car.
- remove_move_penalty: Remove the reverse and forward objectives.
- merge_move_penalty: Merge reverse and forward penalties into a single penalty.
"""

def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
def __init__(
self,
render_mode: Optional[str] = None,
add_speed_objective: bool = False,
remove_move_penalty: bool = False,
merge_move_penalty: bool = False,
goal_velocity=0,
):
super().__init__(render_mode, goal_velocity)
EzPickle.__init__(self, render_mode, goal_velocity)
EzPickle.__init__(self, render_mode, add_speed_objective, remove_move_penalty, merge_move_penalty, goal_velocity)
self.add_speed_objective = add_speed_objective
self.remove_move_penalty = remove_move_penalty
self.merge_move_penalty = merge_move_penalty

self.reward_space = spaces.Box(low=np.array([-1, -1, -1]), high=np.array([-1, 0, 0]), shape=(3,), dtype=np.float32)
self.reward_dim = 3

if self.add_speed_objective:
self.reward_dim += 1

if self.remove_move_penalty:
self.reward_dim -= 2
elif self.merge_move_penalty:
self.reward_dim -= 1

low = np.array([-1] * self.reward_dim)
high = np.zeros(self.reward_dim)
high[0] = -1 # Time penalty is always -1
if self.add_speed_objective:
low[-1] = 0.0
high[-1] = 1.1

self.reward_space = spaces.Box(low=low, high=high, shape=(self.reward_dim,), dtype=np.float32)

def step(self, action: int):
assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid"

Expand All @@ -39,11 +70,20 @@ def step(self, action: int):
velocity = 0

terminated = bool(position >= self.goal_position and velocity >= self.goal_velocity)
# reward = -1.0
reward = np.zeros(3, dtype=np.float32)

reward = np.zeros(self.reward_dim, dtype=np.float32)

reward[0] = 0.0 if terminated else -1.0 # time penalty
reward[1] = 0.0 if action != 0 else -1.0 # reverse penalty
reward[2] = 0.0 if action != 2 else -1.0 # forward penalty

if not self.remove_move_penalty:
if self.merge_move_penalty:
reward[1] = 0.0 if action == 1 else -1.0
else:
reward[1] = 0.0 if action != 0 else -1.0 # reverse penalty
reward[2] = 0.0 if action != 2 else -1.0 # forward penalty

if self.add_speed_objective:
reward[-1] = 15 * abs(velocity)

self.state = (position, velocity)
if self.render_mode == "human":
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ classifiers = [
]
dependencies = [
"gymnasium>=0.28.1,<0.30",
"numpy >=1.21.0",
"numpy >=1.21.0,<2",
"pygame >=2.1.0",
"scipy >=1.7.3",
"pymoo >=0.6.0",
Expand Down

0 comments on commit 0ec9b86

Please sign in to comment.