From e27f57623e403d58af55d76ccb53f4dedb4efa13 Mon Sep 17 00:00:00 2001 From: GianiStatie Date: Tue, 15 Oct 2024 18:01:06 +0300 Subject: [PATCH 1/9] added kwargs to function call --- godot_rl/core/godot_env.py | 54 +++++++++++++++---- godot_rl/wrappers/stable_baselines_wrapper.py | 20 +++++-- 2 files changed, 60 insertions(+), 14 deletions(-) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index 53996896..e1a89b7a 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -31,6 +31,7 @@ def __init__( action_repeat: Optional[int] = None, speedup: Optional[int] = None, convert_action_space: bool = False, + **kwargs, ): """ Initialize a new instance of GodotEnv @@ -51,9 +52,20 @@ def __init__( env_path = self._set_platform_suffix(env_path) self.check_platform(env_path) - self._launch_env(env_path, port, show_window, framerate, seed, action_repeat, speedup) + self._launch_env( + env_path, + port, + show_window, + framerate, + seed, + action_repeat, + speedup, + **kwargs, + ) else: - print("No game binary has been provided, please press PLAY in the Godot editor") + print( + "No game binary has been provided, please press PLAY in the Godot editor" + ) self.port = port self.connection = self._start_server() @@ -72,10 +84,13 @@ def __init__( # sf2 requires a tuple action space # Multiple agents' action space(s) self.tuple_action_spaces = [ - spaces.Tuple([v for _, v in action_space.items()]) for action_space in self.action_spaces + spaces.Tuple([v for _, v in action_space.items()]) + for action_space in self.action_spaces ] # Single agent action space processor using the action space(s) of the first agent - self.action_space_processor = ActionSpaceProcessor(self.tuple_action_spaces[0], convert_action_space) + self.action_space_processor = ActionSpaceProcessor( + self.tuple_action_spaces[0], convert_action_space + ) # For multi-policy envs: The name of each agent's policy set in the env itself (any training_mode # AIController instance is treated as an agent) @@ -222,7 +237,9 @@ def _process_obs(self, response_obs: dict): for k in response_obs[0].keys(): if "2d" in k: for sub in response_obs: - sub[k] = self._decode_2d_obs_from_string(sub[k], self.observation_space[k].shape) + sub[k] = self._decode_2d_obs_from_string( + sub[k], self.observation_space[k].shape + ) return response_obs @@ -277,7 +294,17 @@ def _close(self): print("exit was not clean, using atexit to close env") self.close() - def _launch_env(self, env_path, port, show_window, framerate, seed, action_repeat, speedup): + def _launch_env( + self, + env_path, + port, + show_window, + framerate, + seed, + action_repeat, + speedup, + **kwargs, + ): # --fixed-fps {framerate} path = convert_macos_path(env_path) if platform == "darwin" else env_path @@ -291,6 +318,9 @@ def _launch_env(self, env_path, port, show_window, framerate, seed, action_repea launch_cmd += f" --action_repeat={action_repeat}" if speedup is not None: launch_cmd += f" --speedup={speedup}" + if len(kwargs) > 0: + for key, value in kwargs.items(): + launch_cmd += f" --{key}={value}" launch_cmd = launch_cmd.split(" ") self.proc = subprocess.Popen( @@ -356,7 +386,9 @@ def _get_env_info(self): if v["action_type"] == "discrete": tmp_action_spaces[k] = spaces.Discrete(v["size"]) elif v["action_type"] == "continuous": - tmp_action_spaces[k] = spaces.Box(low=-1.0, high=1.0, shape=(v["size"],)) + tmp_action_spaces[k] = spaces.Box( + low=-1.0, high=1.0, shape=(v["size"],) + ) else: print(f"action space {v['action_type']} is not supported") assert 0, f"action space {v['action_type']} is not supported" @@ -367,7 +399,9 @@ def _get_env_info(self): # A single observation space will be received as a dict in previous versions, # A list of dicts will be received from newer version, defining the observation_space for each agent (AIController) if isinstance(json_dict["observation_space"], dict): - json_dict["observation_space"] = [json_dict["observation_space"]] * self.num_envs + json_dict["observation_space"] = [ + json_dict["observation_space"] + ] * self.num_envs for agent_obs_space in json_dict["observation_space"]: observation_spaces = {} @@ -396,7 +430,9 @@ def _get_env_info(self): # Gets policy names defined in AIControllers in Godot. If an older version of the plugin is used and no policy # names are sent, "shared_policy" will be set for compatibility. - self.agent_policy_names = json_dict.get("agent_policy_names", ["shared_policy"] * self.num_envs) + self.agent_policy_names = json_dict.get( + "agent_policy_names", ["shared_policy"] * self.num_envs + ) @staticmethod def _decode_2d_obs_from_string( diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py index 1db812a9..aed3f780 100644 --- a/godot_rl/wrappers/stable_baselines_wrapper.py +++ b/godot_rl/wrappers/stable_baselines_wrapper.py @@ -20,7 +20,9 @@ def __init__( ) -> None: # If we are doing editor training, n_parallel must be 1 if env_path is None and n_parallel > 1: - raise ValueError("You must provide the path to a exported game executable if n_parallel > 1") + raise ValueError( + "You must provide the path to a exported game executable if n_parallel > 1" + ) # Define the default port port = kwargs.pop("port", GodotEnv.DEFAULT_PORT) @@ -54,7 +56,9 @@ def _check_valid_action_space(self) -> None: len(action_space.spaces) == 1 ), f"sb3 supports a single action space, this env contains multiple spaces {action_space}" - def step(self, action: np.ndarray) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List[Dict[str, Any]]]: + def step( + self, action: np.ndarray + ) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List[Dict[str, Any]]]: # Initialize lists for collecting results all_obs = [] all_rewards = [] @@ -122,7 +126,9 @@ def action_space(self) -> gym.Space: def num_envs(self) -> int: return self.envs[0].num_envs * self.n_parallel - def env_is_wrapped(self, wrapper_class: type, indices: Optional[List[int]] = None) -> List[bool]: + def env_is_wrapped( + self, wrapper_class: type, indices: Optional[List[int]] = None + ) -> List[bool]: # Return a list indicating that no environments are wrapped return [False] * (self.envs[0].num_envs * self.n_parallel) @@ -133,7 +139,9 @@ def env_method(self): def get_attr(self, attr_name: str, indices=None) -> List[Any]: if attr_name == "render_mode": return [None for _ in range(self.num_envs)] - raise AttributeError("get attr not fully implemented in godot-rl StableBaselinesWrapper") + raise AttributeError( + "get attr not fully implemented in godot-rl StableBaselinesWrapper" + ) def seed(self, seed=None): raise NotImplementedError() @@ -156,7 +164,9 @@ def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) -> if can_import("ray"): print("WARNING, stable baselines and ray[rllib] are not compatible") # Initialize the custom environment - env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, **kwargs) + env = StableBaselinesGodotEnv( + env_path=args.env_path, show_window=args.viz, speedup=args.speedup, **kwargs + ) env = VecMonitor(env) # Initialize the PPO model From 1b60f6662d08226eb039e008f5e59b2b6c30367a Mon Sep 17 00:00:00 2001 From: GianiStatie Date: Tue, 15 Oct 2024 18:03:40 +0300 Subject: [PATCH 2/9] reverted linting --- godot_rl/core/godot_env.py | 52 ++++--------------- godot_rl/wrappers/stable_baselines_wrapper.py | 20 ++----- 2 files changed, 15 insertions(+), 57 deletions(-) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index e1a89b7a..aa37828f 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -31,7 +31,7 @@ def __init__( action_repeat: Optional[int] = None, speedup: Optional[int] = None, convert_action_space: bool = False, - **kwargs, + **kwargs ): """ Initialize a new instance of GodotEnv @@ -52,20 +52,9 @@ def __init__( env_path = self._set_platform_suffix(env_path) self.check_platform(env_path) - self._launch_env( - env_path, - port, - show_window, - framerate, - seed, - action_repeat, - speedup, - **kwargs, - ) + self._launch_env(env_path, port, show_window, framerate, seed, action_repeat, speedup, **kwargs) else: - print( - "No game binary has been provided, please press PLAY in the Godot editor" - ) + print("No game binary has been provided, please press PLAY in the Godot editor") self.port = port self.connection = self._start_server() @@ -84,13 +73,10 @@ def __init__( # sf2 requires a tuple action space # Multiple agents' action space(s) self.tuple_action_spaces = [ - spaces.Tuple([v for _, v in action_space.items()]) - for action_space in self.action_spaces + spaces.Tuple([v for _, v in action_space.items()]) for action_space in self.action_spaces ] # Single agent action space processor using the action space(s) of the first agent - self.action_space_processor = ActionSpaceProcessor( - self.tuple_action_spaces[0], convert_action_space - ) + self.action_space_processor = ActionSpaceProcessor(self.tuple_action_spaces[0], convert_action_space) # For multi-policy envs: The name of each agent's policy set in the env itself (any training_mode # AIController instance is treated as an agent) @@ -237,9 +223,7 @@ def _process_obs(self, response_obs: dict): for k in response_obs[0].keys(): if "2d" in k: for sub in response_obs: - sub[k] = self._decode_2d_obs_from_string( - sub[k], self.observation_space[k].shape - ) + sub[k] = self._decode_2d_obs_from_string(sub[k], self.observation_space[k].shape) return response_obs @@ -294,17 +278,7 @@ def _close(self): print("exit was not clean, using atexit to close env") self.close() - def _launch_env( - self, - env_path, - port, - show_window, - framerate, - seed, - action_repeat, - speedup, - **kwargs, - ): + def _launch_env(self, env_path, port, show_window, framerate, seed, action_repeat, speedup, **kwargs): # --fixed-fps {framerate} path = convert_macos_path(env_path) if platform == "darwin" else env_path @@ -386,9 +360,7 @@ def _get_env_info(self): if v["action_type"] == "discrete": tmp_action_spaces[k] = spaces.Discrete(v["size"]) elif v["action_type"] == "continuous": - tmp_action_spaces[k] = spaces.Box( - low=-1.0, high=1.0, shape=(v["size"],) - ) + tmp_action_spaces[k] = spaces.Box(low=-1.0, high=1.0, shape=(v["size"],)) else: print(f"action space {v['action_type']} is not supported") assert 0, f"action space {v['action_type']} is not supported" @@ -399,9 +371,7 @@ def _get_env_info(self): # A single observation space will be received as a dict in previous versions, # A list of dicts will be received from newer version, defining the observation_space for each agent (AIController) if isinstance(json_dict["observation_space"], dict): - json_dict["observation_space"] = [ - json_dict["observation_space"] - ] * self.num_envs + json_dict["observation_space"] = [json_dict["observation_space"]] * self.num_envs for agent_obs_space in json_dict["observation_space"]: observation_spaces = {} @@ -430,9 +400,7 @@ def _get_env_info(self): # Gets policy names defined in AIControllers in Godot. If an older version of the plugin is used and no policy # names are sent, "shared_policy" will be set for compatibility. - self.agent_policy_names = json_dict.get( - "agent_policy_names", ["shared_policy"] * self.num_envs - ) + self.agent_policy_names = json_dict.get("agent_policy_names", ["shared_policy"] * self.num_envs) @staticmethod def _decode_2d_obs_from_string( diff --git a/godot_rl/wrappers/stable_baselines_wrapper.py b/godot_rl/wrappers/stable_baselines_wrapper.py index aed3f780..1db812a9 100644 --- a/godot_rl/wrappers/stable_baselines_wrapper.py +++ b/godot_rl/wrappers/stable_baselines_wrapper.py @@ -20,9 +20,7 @@ def __init__( ) -> None: # If we are doing editor training, n_parallel must be 1 if env_path is None and n_parallel > 1: - raise ValueError( - "You must provide the path to a exported game executable if n_parallel > 1" - ) + raise ValueError("You must provide the path to a exported game executable if n_parallel > 1") # Define the default port port = kwargs.pop("port", GodotEnv.DEFAULT_PORT) @@ -56,9 +54,7 @@ def _check_valid_action_space(self) -> None: len(action_space.spaces) == 1 ), f"sb3 supports a single action space, this env contains multiple spaces {action_space}" - def step( - self, action: np.ndarray - ) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List[Dict[str, Any]]]: + def step(self, action: np.ndarray) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, List[Dict[str, Any]]]: # Initialize lists for collecting results all_obs = [] all_rewards = [] @@ -126,9 +122,7 @@ def action_space(self) -> gym.Space: def num_envs(self) -> int: return self.envs[0].num_envs * self.n_parallel - def env_is_wrapped( - self, wrapper_class: type, indices: Optional[List[int]] = None - ) -> List[bool]: + def env_is_wrapped(self, wrapper_class: type, indices: Optional[List[int]] = None) -> List[bool]: # Return a list indicating that no environments are wrapped return [False] * (self.envs[0].num_envs * self.n_parallel) @@ -139,9 +133,7 @@ def env_method(self): def get_attr(self, attr_name: str, indices=None) -> List[Any]: if attr_name == "render_mode": return [None for _ in range(self.num_envs)] - raise AttributeError( - "get attr not fully implemented in godot-rl StableBaselinesWrapper" - ) + raise AttributeError("get attr not fully implemented in godot-rl StableBaselinesWrapper") def seed(self, seed=None): raise NotImplementedError() @@ -164,9 +156,7 @@ def stable_baselines_training(args, extras, n_steps: int = 200000, **kwargs) -> if can_import("ray"): print("WARNING, stable baselines and ray[rllib] are not compatible") # Initialize the custom environment - env = StableBaselinesGodotEnv( - env_path=args.env_path, show_window=args.viz, speedup=args.speedup, **kwargs - ) + env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, **kwargs) env = VecMonitor(env) # Initialize the PPO model From 190344c2e583a83a36cc0d7af7f276a3a3d01bb8 Mon Sep 17 00:00:00 2001 From: GianiStatie Date: Wed, 16 Oct 2024 07:54:00 +0300 Subject: [PATCH 3/9] chore: adding back formatter to pass checks --- godot_rl/core/godot_env.py | 52 ++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index aa37828f..e1a89b7a 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -31,7 +31,7 @@ def __init__( action_repeat: Optional[int] = None, speedup: Optional[int] = None, convert_action_space: bool = False, - **kwargs + **kwargs, ): """ Initialize a new instance of GodotEnv @@ -52,9 +52,20 @@ def __init__( env_path = self._set_platform_suffix(env_path) self.check_platform(env_path) - self._launch_env(env_path, port, show_window, framerate, seed, action_repeat, speedup, **kwargs) + self._launch_env( + env_path, + port, + show_window, + framerate, + seed, + action_repeat, + speedup, + **kwargs, + ) else: - print("No game binary has been provided, please press PLAY in the Godot editor") + print( + "No game binary has been provided, please press PLAY in the Godot editor" + ) self.port = port self.connection = self._start_server() @@ -73,10 +84,13 @@ def __init__( # sf2 requires a tuple action space # Multiple agents' action space(s) self.tuple_action_spaces = [ - spaces.Tuple([v for _, v in action_space.items()]) for action_space in self.action_spaces + spaces.Tuple([v for _, v in action_space.items()]) + for action_space in self.action_spaces ] # Single agent action space processor using the action space(s) of the first agent - self.action_space_processor = ActionSpaceProcessor(self.tuple_action_spaces[0], convert_action_space) + self.action_space_processor = ActionSpaceProcessor( + self.tuple_action_spaces[0], convert_action_space + ) # For multi-policy envs: The name of each agent's policy set in the env itself (any training_mode # AIController instance is treated as an agent) @@ -223,7 +237,9 @@ def _process_obs(self, response_obs: dict): for k in response_obs[0].keys(): if "2d" in k: for sub in response_obs: - sub[k] = self._decode_2d_obs_from_string(sub[k], self.observation_space[k].shape) + sub[k] = self._decode_2d_obs_from_string( + sub[k], self.observation_space[k].shape + ) return response_obs @@ -278,7 +294,17 @@ def _close(self): print("exit was not clean, using atexit to close env") self.close() - def _launch_env(self, env_path, port, show_window, framerate, seed, action_repeat, speedup, **kwargs): + def _launch_env( + self, + env_path, + port, + show_window, + framerate, + seed, + action_repeat, + speedup, + **kwargs, + ): # --fixed-fps {framerate} path = convert_macos_path(env_path) if platform == "darwin" else env_path @@ -360,7 +386,9 @@ def _get_env_info(self): if v["action_type"] == "discrete": tmp_action_spaces[k] = spaces.Discrete(v["size"]) elif v["action_type"] == "continuous": - tmp_action_spaces[k] = spaces.Box(low=-1.0, high=1.0, shape=(v["size"],)) + tmp_action_spaces[k] = spaces.Box( + low=-1.0, high=1.0, shape=(v["size"],) + ) else: print(f"action space {v['action_type']} is not supported") assert 0, f"action space {v['action_type']} is not supported" @@ -371,7 +399,9 @@ def _get_env_info(self): # A single observation space will be received as a dict in previous versions, # A list of dicts will be received from newer version, defining the observation_space for each agent (AIController) if isinstance(json_dict["observation_space"], dict): - json_dict["observation_space"] = [json_dict["observation_space"]] * self.num_envs + json_dict["observation_space"] = [ + json_dict["observation_space"] + ] * self.num_envs for agent_obs_space in json_dict["observation_space"]: observation_spaces = {} @@ -400,7 +430,9 @@ def _get_env_info(self): # Gets policy names defined in AIControllers in Godot. If an older version of the plugin is used and no policy # names are sent, "shared_policy" will be set for compatibility. - self.agent_policy_names = json_dict.get("agent_policy_names", ["shared_policy"] * self.num_envs) + self.agent_policy_names = json_dict.get( + "agent_policy_names", ["shared_policy"] * self.num_envs + ) @staticmethod def _decode_2d_obs_from_string( From ee382d7f75156ac4496d4c48d04eb629918384cc Mon Sep 17 00:00:00 2001 From: GianiStatie Date: Wed, 16 Oct 2024 08:04:58 +0300 Subject: [PATCH 4/9] fix: applying kwargs passing to all env wrappers --- godot_rl/wrappers/petting_zoo_wrapper.py | 53 +++++++++++++++++------- godot_rl/wrappers/ray_wrapper.py | 31 ++++++++------ 2 files changed, 58 insertions(+), 26 deletions(-) diff --git a/godot_rl/wrappers/petting_zoo_wrapper.py b/godot_rl/wrappers/petting_zoo_wrapper.py index 2160d4e0..7abddfa0 100644 --- a/godot_rl/wrappers/petting_zoo_wrapper.py +++ b/godot_rl/wrappers/petting_zoo_wrapper.py @@ -26,7 +26,9 @@ def env(render_mode=None): class GDRLPettingZooEnv(ParallelEnv): metadata = {"render_modes": ["human"], "name": "GDRLPettingZooEnv"} - def __init__(self, port=GodotEnv.DEFAULT_PORT, show_window=True, seed=0, config: Dict = {}): + def __init__( + self, port=GodotEnv.DEFAULT_PORT, show_window=True, seed=0, config: Dict = None + ): """ The init method takes in environment arguments and should define the following attributes: - possible_agents @@ -38,20 +40,31 @@ def __init__(self, port=GodotEnv.DEFAULT_PORT, show_window=True, seed=0, config: These attributes should not be changed after initialization. """ + config = config or {} # initialize config as empty dict if None + + # using default values from GodotEnv + env_path = config.pop("env_path", None) + show_window = config.pop("show_window", False) + action_repeat = config.pop("action_repeat", None) + speedup = config.pop("speedup", None) + # Initialize the Godot Env which we will wrap self.godot_env = GodotEnv( - env_path=config.get("env_path"), - show_window=config.get("show_window"), - action_repeat=config.get("action_repeat"), - speedup=config.get("speedup"), + env_path=env_path, + show_window=show_window, + action_repeat=action_repeat, + speedup=speedup, convert_action_space=False, seed=seed, port=port, + **config, ) self.render_mode = None # Controlled by the env - self.possible_agents = [agent_idx for agent_idx in range(self.godot_env.num_envs)] + self.possible_agents = [ + agent_idx for agent_idx in range(self.godot_env.num_envs) + ] self.agents = self.possible_agents[:] # The policy names here are set on each AIController in Godot editor, @@ -59,14 +72,18 @@ def __init__(self, port=GodotEnv.DEFAULT_PORT, show_window=True, seed=0, config: self.agent_policy_names = self.godot_env.agent_policy_names # optional: a mapping between agent name and ID - self.agent_name_mapping = dict(zip(self.possible_agents, list(range(len(self.possible_agents))))) + self.agent_name_mapping = dict( + zip(self.possible_agents, list(range(len(self.possible_agents)))) + ) self.observation_spaces = { - agent: self.godot_env.observation_spaces[agent_idx] for agent_idx, agent in enumerate(self.agents) + agent: self.godot_env.observation_spaces[agent_idx] + for agent_idx, agent in enumerate(self.agents) } self.action_spaces = { - agent: self.godot_env.tuple_action_spaces[agent_idx] for agent_idx, agent in enumerate(self.agents) + agent: self.godot_env.tuple_action_spaces[agent_idx] + for agent_idx, agent in enumerate(self.agents) } # Observation space should be defined here. @@ -105,8 +122,12 @@ def reset(self, seed=None, options=None): """ godot_obs, godot_infos = self.godot_env.reset() - observations = {agent: godot_obs[agent_idx] for agent_idx, agent in enumerate(self.agents)} - infos = {agent: godot_infos[agent_idx] for agent_idx, agent in enumerate(self.agents)} + observations = { + agent: godot_obs[agent_idx] for agent_idx, agent in enumerate(self.agents) + } + infos = { + agent: godot_infos[agent_idx] for agent_idx, agent in enumerate(self.agents) + } return observations, infos @@ -125,12 +146,16 @@ def step(self, actions): # Godot env have done = true. For agents that received no actions, we will set zeros instead for # compatibility. godot_actions = [ - actions[agent] if agent in actions else np.zeros_like(self.action_spaces[agent_idx].sample()) + ( + actions[agent] + if agent in actions + else np.zeros_like(self.action_spaces[agent_idx].sample()) + ) for agent_idx, agent in enumerate(self.agents) ] - godot_obs, godot_rewards, godot_dones, godot_truncations, godot_infos = self.godot_env.step( - godot_actions, order_ij=True + godot_obs, godot_rewards, godot_dones, godot_truncations, godot_infos = ( + self.godot_env.step(godot_actions, order_ij=True) ) observations = {agent: godot_obs[agent] for agent in actions} rewards = {agent: godot_rewards[agent] for agent in actions} diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py index 0562c57e..920b1537 100644 --- a/godot_rl/wrappers/ray_wrapper.py +++ b/godot_rl/wrappers/ray_wrapper.py @@ -13,19 +13,23 @@ class RayVectorGodotEnv(VectorEnv): - def __init__( - self, - port=10008, - seed=0, - config=None, - ) -> None: + def __init__(self, port=10008, seed=0, config=None) -> None: + config = config or {} # initialize config as empty dict if None + + # using default values from GodotEnv + env_path = config.pop("env_path", None) + show_window = config.pop("show_window", False) + action_repeat = config.pop("action_repeat", None) + speedup = config.pop("speedup", None) + self._env = GodotEnv( - env_path=config["env_path"], + env_path=env_path, port=port, seed=seed, - show_window=config["show_window"], - action_repeat=config["action_repeat"], - speedup=config["speedup"], + show_window=show_window, + action_repeat=action_repeat, + speedup=speedup, + **config, ) super().__init__( observation_space=self._env.observation_space, @@ -127,9 +131,12 @@ def rllib_training(args, extras): checkpoint_freq=checkpoint_freq, checkpoint_at_end=not args.eval, restore=args.restore, - storage_path=os.path.abspath(args.experiment_dir) or os.path.abspath("logs/rllib"), + storage_path=os.path.abspath(args.experiment_dir) + or os.path.abspath("logs/rllib"), trial_name_creator=lambda trial: ( - f"{args.experiment_name}" if args.experiment_name else f"{trial.trainable_name}_{trial.trial_id}" + f"{args.experiment_name}" + if args.experiment_name + else f"{trial.trainable_name}_{trial.trial_id}" ), ) if args.export: From 837bc3e0e648b1b701ba14f41f9c196a6b48117f Mon Sep 17 00:00:00 2001 From: GianiStatie Date: Wed, 16 Oct 2024 16:08:45 +0300 Subject: [PATCH 5/9] chore: didn't see black line-length 120 --- godot_rl/core/godot_env.py | 27 +++++------------- godot_rl/wrappers/petting_zoo_wrapper.py | 36 +++++++----------------- godot_rl/wrappers/ray_wrapper.py | 7 ++--- 3 files changed, 19 insertions(+), 51 deletions(-) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index e1a89b7a..7361d509 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -63,9 +63,7 @@ def __init__( **kwargs, ) else: - print( - "No game binary has been provided, please press PLAY in the Godot editor" - ) + print("No game binary has been provided, please press PLAY in the Godot editor") self.port = port self.connection = self._start_server() @@ -84,13 +82,10 @@ def __init__( # sf2 requires a tuple action space # Multiple agents' action space(s) self.tuple_action_spaces = [ - spaces.Tuple([v for _, v in action_space.items()]) - for action_space in self.action_spaces + spaces.Tuple([v for _, v in action_space.items()]) for action_space in self.action_spaces ] # Single agent action space processor using the action space(s) of the first agent - self.action_space_processor = ActionSpaceProcessor( - self.tuple_action_spaces[0], convert_action_space - ) + self.action_space_processor = ActionSpaceProcessor(self.tuple_action_spaces[0], convert_action_space) # For multi-policy envs: The name of each agent's policy set in the env itself (any training_mode # AIController instance is treated as an agent) @@ -237,9 +232,7 @@ def _process_obs(self, response_obs: dict): for k in response_obs[0].keys(): if "2d" in k: for sub in response_obs: - sub[k] = self._decode_2d_obs_from_string( - sub[k], self.observation_space[k].shape - ) + sub[k] = self._decode_2d_obs_from_string(sub[k], self.observation_space[k].shape) return response_obs @@ -386,9 +379,7 @@ def _get_env_info(self): if v["action_type"] == "discrete": tmp_action_spaces[k] = spaces.Discrete(v["size"]) elif v["action_type"] == "continuous": - tmp_action_spaces[k] = spaces.Box( - low=-1.0, high=1.0, shape=(v["size"],) - ) + tmp_action_spaces[k] = spaces.Box(low=-1.0, high=1.0, shape=(v["size"],)) else: print(f"action space {v['action_type']} is not supported") assert 0, f"action space {v['action_type']} is not supported" @@ -399,9 +390,7 @@ def _get_env_info(self): # A single observation space will be received as a dict in previous versions, # A list of dicts will be received from newer version, defining the observation_space for each agent (AIController) if isinstance(json_dict["observation_space"], dict): - json_dict["observation_space"] = [ - json_dict["observation_space"] - ] * self.num_envs + json_dict["observation_space"] = [json_dict["observation_space"]] * self.num_envs for agent_obs_space in json_dict["observation_space"]: observation_spaces = {} @@ -430,9 +419,7 @@ def _get_env_info(self): # Gets policy names defined in AIControllers in Godot. If an older version of the plugin is used and no policy # names are sent, "shared_policy" will be set for compatibility. - self.agent_policy_names = json_dict.get( - "agent_policy_names", ["shared_policy"] * self.num_envs - ) + self.agent_policy_names = json_dict.get("agent_policy_names", ["shared_policy"] * self.num_envs) @staticmethod def _decode_2d_obs_from_string( diff --git a/godot_rl/wrappers/petting_zoo_wrapper.py b/godot_rl/wrappers/petting_zoo_wrapper.py index 7abddfa0..0fa8ba5c 100644 --- a/godot_rl/wrappers/petting_zoo_wrapper.py +++ b/godot_rl/wrappers/petting_zoo_wrapper.py @@ -26,9 +26,7 @@ def env(render_mode=None): class GDRLPettingZooEnv(ParallelEnv): metadata = {"render_modes": ["human"], "name": "GDRLPettingZooEnv"} - def __init__( - self, port=GodotEnv.DEFAULT_PORT, show_window=True, seed=0, config: Dict = None - ): + def __init__(self, port=GodotEnv.DEFAULT_PORT, show_window=True, seed=0, config: Dict = None): """ The init method takes in environment arguments and should define the following attributes: - possible_agents @@ -62,9 +60,7 @@ def __init__( self.render_mode = None # Controlled by the env - self.possible_agents = [ - agent_idx for agent_idx in range(self.godot_env.num_envs) - ] + self.possible_agents = [agent_idx for agent_idx in range(self.godot_env.num_envs)] self.agents = self.possible_agents[:] # The policy names here are set on each AIController in Godot editor, @@ -72,18 +68,14 @@ def __init__( self.agent_policy_names = self.godot_env.agent_policy_names # optional: a mapping between agent name and ID - self.agent_name_mapping = dict( - zip(self.possible_agents, list(range(len(self.possible_agents)))) - ) + self.agent_name_mapping = dict(zip(self.possible_agents, list(range(len(self.possible_agents))))) self.observation_spaces = { - agent: self.godot_env.observation_spaces[agent_idx] - for agent_idx, agent in enumerate(self.agents) + agent: self.godot_env.observation_spaces[agent_idx] for agent_idx, agent in enumerate(self.agents) } self.action_spaces = { - agent: self.godot_env.tuple_action_spaces[agent_idx] - for agent_idx, agent in enumerate(self.agents) + agent: self.godot_env.tuple_action_spaces[agent_idx] for agent_idx, agent in enumerate(self.agents) } # Observation space should be defined here. @@ -122,12 +114,8 @@ def reset(self, seed=None, options=None): """ godot_obs, godot_infos = self.godot_env.reset() - observations = { - agent: godot_obs[agent_idx] for agent_idx, agent in enumerate(self.agents) - } - infos = { - agent: godot_infos[agent_idx] for agent_idx, agent in enumerate(self.agents) - } + observations = {agent: godot_obs[agent_idx] for agent_idx, agent in enumerate(self.agents)} + infos = {agent: godot_infos[agent_idx] for agent_idx, agent in enumerate(self.agents)} return observations, infos @@ -146,16 +134,12 @@ def step(self, actions): # Godot env have done = true. For agents that received no actions, we will set zeros instead for # compatibility. godot_actions = [ - ( - actions[agent] - if agent in actions - else np.zeros_like(self.action_spaces[agent_idx].sample()) - ) + (actions[agent] if agent in actions else np.zeros_like(self.action_spaces[agent_idx].sample())) for agent_idx, agent in enumerate(self.agents) ] - godot_obs, godot_rewards, godot_dones, godot_truncations, godot_infos = ( - self.godot_env.step(godot_actions, order_ij=True) + godot_obs, godot_rewards, godot_dones, godot_truncations, godot_infos = self.godot_env.step( + godot_actions, order_ij=True ) observations = {agent: godot_obs[agent] for agent in actions} rewards = {agent: godot_rewards[agent] for agent in actions} diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py index 920b1537..9a38d5da 100644 --- a/godot_rl/wrappers/ray_wrapper.py +++ b/godot_rl/wrappers/ray_wrapper.py @@ -131,12 +131,9 @@ def rllib_training(args, extras): checkpoint_freq=checkpoint_freq, checkpoint_at_end=not args.eval, restore=args.restore, - storage_path=os.path.abspath(args.experiment_dir) - or os.path.abspath("logs/rllib"), + storage_path=os.path.abspath(args.experiment_dir) or os.path.abspath("logs/rllib"), trial_name_creator=lambda trial: ( - f"{args.experiment_name}" - if args.experiment_name - else f"{trial.trainable_name}_{trial.trial_id}" + f"{args.experiment_name}" if args.experiment_name else f"{trial.trainable_name}_{trial.trial_id}" ), ) if args.export: From fe95013dc6e117e84a8f192e70e6101b694fdcbf Mon Sep 17 00:00:00 2001 From: Giani Statie Date: Wed, 16 Oct 2024 20:36:53 +0300 Subject: [PATCH 6/9] fix: restricting the arguments we pass to the environment --- godot_rl/wrappers/petting_zoo_wrapper.py | 19 +++++++++---------- godot_rl/wrappers/ray_wrapper.py | 21 ++++++++++----------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/godot_rl/wrappers/petting_zoo_wrapper.py b/godot_rl/wrappers/petting_zoo_wrapper.py index 0fa8ba5c..12385b18 100644 --- a/godot_rl/wrappers/petting_zoo_wrapper.py +++ b/godot_rl/wrappers/petting_zoo_wrapper.py @@ -39,23 +39,22 @@ def __init__(self, port=GodotEnv.DEFAULT_PORT, show_window=True, seed=0, config: These attributes should not be changed after initialization. """ config = config or {} # initialize config as empty dict if None - - # using default values from GodotEnv - env_path = config.pop("env_path", None) - show_window = config.pop("show_window", False) - action_repeat = config.pop("action_repeat", None) - speedup = config.pop("speedup", None) + extra_arguments = { + key: value + for key, value in config.items() + if key not in ["env_path", "show_window", "action_repeat", "speedup", "seed", "port"] + } # Initialize the Godot Env which we will wrap self.godot_env = GodotEnv( - env_path=env_path, + env_path=config["env_path"], show_window=show_window, - action_repeat=action_repeat, - speedup=speedup, + action_repeat=config["action_repeat"], + speedup=config["speedup"], convert_action_space=False, seed=seed, port=port, - **config, + **extra_arguments, ) self.render_mode = None # Controlled by the env diff --git a/godot_rl/wrappers/ray_wrapper.py b/godot_rl/wrappers/ray_wrapper.py index 9a38d5da..dc8d03a1 100644 --- a/godot_rl/wrappers/ray_wrapper.py +++ b/godot_rl/wrappers/ray_wrapper.py @@ -15,21 +15,20 @@ class RayVectorGodotEnv(VectorEnv): def __init__(self, port=10008, seed=0, config=None) -> None: config = config or {} # initialize config as empty dict if None - - # using default values from GodotEnv - env_path = config.pop("env_path", None) - show_window = config.pop("show_window", False) - action_repeat = config.pop("action_repeat", None) - speedup = config.pop("speedup", None) + extra_arguments = { + key: value + for key, value in config.items() + if key not in ["env_path", "show_window", "action_repeat", "speedup", "seed", "port"] + } self._env = GodotEnv( - env_path=env_path, + env_path=config["env_path"], port=port, seed=seed, - show_window=show_window, - action_repeat=action_repeat, - speedup=speedup, - **config, + show_window=config["show_window"], + action_repeat=config["action_repeat"], + speedup=config["speedup"], + **extra_arguments, ) super().__init__( observation_space=self._env.observation_space, From cd62849c4aa411988ed2cbe6befe45aa85a0d415 Mon Sep 17 00:00:00 2001 From: Giani Statie Date: Fri, 18 Oct 2024 22:37:15 +0300 Subject: [PATCH 7/9] chore: added some info in the docs about custom args --- docs/ADV_STABLE_BASELINES_3.md | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md index 33179606..c1b4ee32 100644 --- a/docs/ADV_STABLE_BASELINES_3.md +++ b/docs/ADV_STABLE_BASELINES_3.md @@ -120,3 +120,46 @@ and reach 0 at `--timesteps` value. ```bash python stable_baselines3_example.py --timesteps=1_000_000 --linear_lr_schedule ``` + +### Custom environments and additional arguments: +This part is a bit more specific, and is used primerily for Custom Godot Environments. + +> OBSERVATION: The following example focuses on how to change the **starting_level** using additional command line arguments. +> +> You can also use commind line arguments to: +> * change the **game_difficulty** +> * change the **starting_character** +> * load a custom **game_state** +> * update any other variable before the game starts + +Let's say you have a Godot Environment with multiple levels and want to set the **starting_level** before the simulation starts. All you need to do is to pass **starting_level** as a argument, when instantiating a `StableBaselinesGodotEnv` along with the value you want to pass to Godot and you're good to go. Here's an example in python: + +```python +from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv + +# we set the starting_level as RainbowRoad +env = StableBaselinesGodotEnv( + env_path=PATH_TO_ENVIRONMENT, + port=11008, + env_seed=42, + speedup=1, + starting_level="RainbowRoad" +) +``` +After running the script you'll see in the console something like: +```bask +getting command line arguments +--port=11008 +--env_seed=42 +--speedup=1 +--starting_level=RainbowRoad +``` +Which means that those variables got to the Godot Environment successfully. However, your environment needs to support handling those environments, prior to it being built. + +You can access the environemnt variables though the **godot_rl_agents_plugin/sync.gd** script. You'll see there's a `args` variable that stores all the command line arguments your environment received from the python script. You can access **starting_level** by doing: +```godot +func get_starting_level(): + return agrs.get("starting_level", None) +``` + +Then, you can use the returned value to set the starting level before the game begins. From 7549643f86b53ea8e17e8bd3fbcbcd9aaea2bbb3 Mon Sep 17 00:00:00 2001 From: Giani Statie Date: Fri, 18 Oct 2024 22:40:24 +0300 Subject: [PATCH 8/9] chore: fixing typos --- docs/ADV_STABLE_BASELINES_3.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md index c1b4ee32..3f182936 100644 --- a/docs/ADV_STABLE_BASELINES_3.md +++ b/docs/ADV_STABLE_BASELINES_3.md @@ -122,17 +122,17 @@ python stable_baselines3_example.py --timesteps=1_000_000 --linear_lr_schedule ``` ### Custom environments and additional arguments: -This part is a bit more specific, and is used primerily for Custom Godot Environments. +This part is a bit more specific, and is used primarily for Custom Godot Environments. > OBSERVATION: The following example focuses on how to change the **starting_level** using additional command line arguments. > -> You can also use commind line arguments to: +> You can also use command line arguments to: > * change the **game_difficulty** > * change the **starting_character** > * load a custom **game_state** > * update any other variable before the game starts -Let's say you have a Godot Environment with multiple levels and want to set the **starting_level** before the simulation starts. All you need to do is to pass **starting_level** as a argument, when instantiating a `StableBaselinesGodotEnv` along with the value you want to pass to Godot and you're good to go. Here's an example in python: +Let's say you have a Godot Environment with multiple levels and want to set the **starting_level** before the simulation starts. All you need to do is to pass **starting_level** as an argument, when instantiating a `StableBaselinesGodotEnv` along with the value you want to pass to Godot and you're good to go. Here's an example in python: ```python from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv @@ -156,7 +156,7 @@ getting command line arguments ``` Which means that those variables got to the Godot Environment successfully. However, your environment needs to support handling those environments, prior to it being built. -You can access the environemnt variables though the **godot_rl_agents_plugin/sync.gd** script. You'll see there's a `args` variable that stores all the command line arguments your environment received from the python script. You can access **starting_level** by doing: +You can access the environment variables though the **godot_rl_agents_plugin/sync.gd** script. You'll see there's a `args` variable that stores all the command line arguments your environment received from the python script. You can access **starting_level** by doing: ```godot func get_starting_level(): return agrs.get("starting_level", None) From 86a1f5abda7300232e820d83d901d4aa6d2eb2cc Mon Sep 17 00:00:00 2001 From: Giani Statie Date: Sat, 19 Oct 2024 11:09:08 +0300 Subject: [PATCH 9/9] fix: moved custom args docs to a better place --- docs/ADV_STABLE_BASELINES_3.md | 43 ---------------------------------- docs/CUSTOM_ENV.md | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md index 3f182936..33179606 100644 --- a/docs/ADV_STABLE_BASELINES_3.md +++ b/docs/ADV_STABLE_BASELINES_3.md @@ -120,46 +120,3 @@ and reach 0 at `--timesteps` value. ```bash python stable_baselines3_example.py --timesteps=1_000_000 --linear_lr_schedule ``` - -### Custom environments and additional arguments: -This part is a bit more specific, and is used primarily for Custom Godot Environments. - -> OBSERVATION: The following example focuses on how to change the **starting_level** using additional command line arguments. -> -> You can also use command line arguments to: -> * change the **game_difficulty** -> * change the **starting_character** -> * load a custom **game_state** -> * update any other variable before the game starts - -Let's say you have a Godot Environment with multiple levels and want to set the **starting_level** before the simulation starts. All you need to do is to pass **starting_level** as an argument, when instantiating a `StableBaselinesGodotEnv` along with the value you want to pass to Godot and you're good to go. Here's an example in python: - -```python -from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv - -# we set the starting_level as RainbowRoad -env = StableBaselinesGodotEnv( - env_path=PATH_TO_ENVIRONMENT, - port=11008, - env_seed=42, - speedup=1, - starting_level="RainbowRoad" -) -``` -After running the script you'll see in the console something like: -```bask -getting command line arguments ---port=11008 ---env_seed=42 ---speedup=1 ---starting_level=RainbowRoad -``` -Which means that those variables got to the Godot Environment successfully. However, your environment needs to support handling those environments, prior to it being built. - -You can access the environment variables though the **godot_rl_agents_plugin/sync.gd** script. You'll see there's a `args` variable that stores all the command line arguments your environment received from the python script. You can access **starting_level** by doing: -```godot -func get_starting_level(): - return agrs.get("starting_level", None) -``` - -Then, you can use the returned value to set the starting level before the game begins. diff --git a/docs/CUSTOM_ENV.md b/docs/CUSTOM_ENV.md index f5e941cb..ae638999 100644 --- a/docs/CUSTOM_ENV.md +++ b/docs/CUSTOM_ENV.md @@ -196,6 +196,49 @@ https://user-images.githubusercontent.com/7275864/209363084-f91b2fcb-2042-494c-9 https://user-images.githubusercontent.com/7275864/209363098-a6bee0a6-dc85-4b8d-b69a-d747bcf39635.mp4 +### Custom environments and additional arguments: + +> OBSERVATION +> +> The following example focuses on how to change the **starting_level** using additional command line arguments. \ +> You can also use command line arguments to: +> * change the **game_difficulty** +> * change the **starting_character** +> * load a custom **game_state** +> * update any other variable before the game starts + +Let's say you have a Godot Environment with multiple levels and want to set the **starting_level** before the simulation starts. All you need to do is to pass **starting_level** as an argument, when instantiating a `StableBaselinesGodotEnv` along with the value you want to pass to Godot and you're good to go. Here's an example in python: + +```python +from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv + +# we set the starting_level as RainbowRoad +env = StableBaselinesGodotEnv( + env_path=PATH_TO_ENVIRONMENT, + port=11008, + env_seed=42, + speedup=1, + starting_level="RainbowRoad" +) +``` +After running the script you'll see in the console something like: +```bask +getting command line arguments +--port=11008 +--env_seed=42 +--speedup=1 +--starting_level=RainbowRoad +``` +Which means that those variables got to the Godot Environment successfully. However, your environment needs to support handling those environments, prior to it being built. + +You can access the environment variables though the **godot_rl_agents_plugin/sync.gd** script. You'll see there's a `args` variable that stores all the command line arguments your environment received from the python script. You can access **starting_level** by doing: +```godot +func get_starting_level(): + return agrs.get("starting_level", None) +``` + +Then, you can use the returned value to set the starting level before the game begins. + ## There’s more! We have only scratched the surface of what can be achieved with Godot RL Agents, the library includes custom sensors and cameras to enrich the information available to the agent. Take a look at the [examples](https://github.com/edbeeching/godot_rl_agents_examples) to find out more!