Skip to content

Commit

Permalink
Add option death_as_penalty to mo-super-mario
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasAlegre committed Jun 29, 2023
1 parent 7992b32 commit d98f068
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 21 deletions.
7 changes: 5 additions & 2 deletions mo_gymnasium/envs/mario/joypad_space.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""An environment wrapper to convert binary to discrete action space. This is a modified version of the original code from nes-py."""
from typing import List

import gymnasium as gym
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from gymnasium import Env, Wrapper


Expand All @@ -20,11 +23,11 @@ class JoypadSpace(Wrapper):
}

@classmethod
def buttons(cls) -> list:
def buttons(cls) -> List:
"""Return the buttons that can be used as actions."""
return list(cls._button_map.keys())

def __init__(self, env: Env, actions: list):
def __init__(self, env: Env, actions: List = SIMPLE_MOVEMENT):
"""
Initialize a new binary to discrete action space wrapper.
Expand Down
74 changes: 55 additions & 19 deletions mo_gymnasium/envs/mario/mario.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,47 @@ def __init__(
lost_levels=False,
target=None,
objectives=["x_pos", "time", "death", "coin", "enemy"],
death_as_penalty=False,
render_mode: Optional[str] = None,
):
EzPickle.__init__(self, rom_mode, lost_levels, target, objectives, render_mode)
EzPickle.__init__(self, rom_mode, lost_levels, target, objectives, death_as_penalty, render_mode)
self.render_mode = render_mode
super().__init__(rom_mode, lost_levels, target)

self.objectives = set(objectives)
self.death_as_penalty = death_as_penalty
if self.death_as_penalty: # death is not a separate objective
self.objectives.discard("death")
self.reward_dim = len(self.objectives)

low = np.empty(self.reward_dim, dtype=np.float32)
high = np.empty(self.reward_dim, dtype=np.float32)
obj_idx = 0
if "x_pos" in self.objectives:
low[obj_idx] = -np.inf
high[obj_idx] = np.inf
obj_idx += 1
if "time" in self.objectives:
low[obj_idx] = -np.inf
high[obj_idx] = 0.0
obj_idx += 1
if "death" in self.objectives:
low[obj_idx] = -25.0
high[obj_idx] = 0.0
obj_idx += 1
if "coin" in self.objectives:
low[obj_idx] = 0.0
high[obj_idx] = 100.0
obj_idx += 1
if "enemy" in self.objectives:
low[obj_idx] = 0.0
high[obj_idx] = np.inf

self.reward_space = gym.spaces.Box(
low=np.array([-np.inf, -np.inf, -25, 0, 0]),
high=np.array([np.inf, 0, 0, 100, np.inf]),
shape=(len(objectives),),
low=low,
high=high,
shape=(len(self.objectives),),
)
self.reward_dim = len(objectives)

# observation space for the environment is static across all instances
self.observation_space = gym.spaces.Box(low=0, high=255, shape=SCREEN_SHAPE_24_BIT, dtype=np.uint8)
Expand Down Expand Up @@ -103,7 +131,8 @@ def step(self, action):

""" Construct Multi-Objective Reward"""
# [x_pos, time, death, coin, enemy]
moreward = []
vec_reward = np.zeros(self.reward_dim, dtype=np.float32)
obj_idx = 0

# 1. x position
if "x_pos" in self.objectives:
Expand All @@ -112,39 +141,46 @@ def step(self, action):
# resolve an issue where after death the x position resets
if xpos_r < -5:
xpos_r = 0
moreward.append(xpos_r)
vec_reward[obj_idx] = xpos_r
obj_idx += 1

# 2. time penaltiy
if "time" in self.objectives:
time_r = info["time"] - self.time
self.time = info["time"]
# time is always decreasing
if time_r > 0:
time_r = 0
moreward.append(time_r)
time_r = 0.0
vec_reward[obj_idx] = time_r
obj_idx += 1

# 3. death
if self.lives > info["life"]:
death_r = -25.0
else:
death_r = 0.0
if "death" in self.objectives:
if self.lives > info["life"]:
death_r = -25
else:
death_r = 0
moreward.append(death_r)
vec_reward[obj_idx] = death_r
obj_idx += 1
elif self.death_as_penalty:
vec_reward += death_r # add death reward to all objectives

# 4. coin
coin_r = 0
coin_r = 0.0
if "coin" in self.objectives:
coin_r = (info["coins"] - self.coin) * 100
self.coin = info["coins"]
moreward.append(coin_r)
vec_reward[obj_idx] = coin_r
obj_idx += 1

# 5. enemy
if "enemy" in self.objectives:
enemy_r = info["score"] - self.score
if coin_r > 0 or done:
enemy_r = 0
self.score = info["score"]
moreward.append(enemy_r)
vec_reward[obj_idx] = enemy_r
obj_idx += 1

############################################################################

Expand All @@ -155,14 +191,14 @@ def step(self, action):

self.lives = info["life"]

mor = np.array(moreward, dtype=np.float32) * self.reward_space.shape[0] / 150
vec_reward *= self.reward_space.shape[0] / 150

info["score"] = info["score"] + self.stage_bonus

if self.render_mode == "human":
self.render()

return obs, mor, bool(done), False, info
return obs, vec_reward, bool(done), False, info


if __name__ == "__main__":
Expand Down

0 comments on commit d98f068

Please sign in to comment.