From dd13506b2fdac2f40b08abc66279691db4662b6a Mon Sep 17 00:00:00 2001 From: Giani Statie Date: Sun, 20 Oct 2024 16:29:55 +0300 Subject: [PATCH 1/6] feat: adding info - is_success --- godot_rl/core/godot_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index 53996896..04bd64e3 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -206,7 +206,7 @@ def step_recv(self): response["reward"], np.array(response["done"]).tolist(), np.array(response["done"]).tolist(), # TODO update API to term, trunc - [{}] * len(response["done"]), + response["info"], ) def _process_obs(self, response_obs: dict): From 146f8d979c2f5caef774515c5a7dc03de99e7928 Mon Sep 17 00:00:00 2001 From: Giani Statie Date: Sun, 20 Oct 2024 16:57:50 +0300 Subject: [PATCH 2/6] fix: added default value for info, not to crash examples --- godot_rl/core/godot_env.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index 04bd64e3..8516fea5 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -201,12 +201,15 @@ def step_recv(self): response = self._get_json_dict() response["obs"] = self._process_obs(response["obs"]) + # TODO: update godot_rl_agents so we won't need this default_info + default_info = [{}] * len(response["done"]) + return ( response["obs"], response["reward"], np.array(response["done"]).tolist(), np.array(response["done"]).tolist(), # TODO update API to term, trunc - response["info"], + response.get("info", default_info), ) def _process_obs(self, response_obs: dict): From 640ff494935f80dd96dcee977e2cc7a01f68e437 Mon Sep 17 00:00:00 2001 From: Giani Statie Date: Sun, 20 Oct 2024 17:00:27 +0300 Subject: [PATCH 3/6] chore: fixed styling --- godot_rl/core/godot_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index 8516fea5..4d09d422 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -201,7 +201,7 @@ def step_recv(self): response = self._get_json_dict() response["obs"] = self._process_obs(response["obs"]) - # TODO: update godot_rl_agents so we won't need this default_info + # TODO: update godot_rl_agents so we won't need this default_info default_info = [{}] * len(response["done"]) return ( From 3b80419b3e47d63491c55b6dac2753c4d610c27f Mon Sep 17 00:00:00 2001 From: Giani Statie <44785388+GianiStatie@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:14:56 +0300 Subject: [PATCH 4/6] Update godot_rl/core/godot_env.py Co-authored-by: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> --- godot_rl/core/godot_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py index 4d09d422..92e937de 100644 --- a/godot_rl/core/godot_env.py +++ b/godot_rl/core/godot_env.py @@ -201,7 +201,7 @@ def step_recv(self): response = self._get_json_dict() response["obs"] = self._process_obs(response["obs"]) - # TODO: update godot_rl_agents so we won't need this default_info + # Kept for backward compatibility if the plugin doesn't send info. default_info = [{}] * len(response["done"]) return ( From 9515a9c4c09c1219eb6388055a649ee72ace326f Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Mon, 21 Oct 2024 18:37:44 +0200 Subject: [PATCH 5/6] Adds is_success to ADV_STABLE_BASELINES_3.md --- docs/ADV_STABLE_BASELINES_3.md | 81 ++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md index 33179606..7bd190c1 100644 --- a/docs/ADV_STABLE_BASELINES_3.md +++ b/docs/ADV_STABLE_BASELINES_3.md @@ -27,7 +27,6 @@ pip install godot-rl[sb3] ``` ## Basic Environment Usage -Usage instructions for envs **BallChase**, **FlyBy** and **JumperHard.** ### Download the env: @@ -36,19 +35,8 @@ gdrl.env_from_hub -r edbeeching/godot_rl_ chmod +x examples/godot_rl_/bin/.x86_64 # linux example ``` -### Train a model from scratch: - -```bash -gdrl --env=gdrl --env_path=examples/godot_rl_/bin/.x86_64 --experiment_name=Experiment_01 --viz -``` - -While the default options for sb3 work reasonably well. You may be interested in changing the hyperparameters. - -We recommend taking the [sb3 example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/stable_baselines3_example.py) and modifying to match your needs. - -The example exposes more parameters for the user to configure, such as `--speedup` to run the environment faster than realtime and the `--n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training). - -## SB3 Example script usage: +## Training / SB3 Example script usage: +Clone the repository or download the script [sb3 example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/stable_baselines3_example.py). To use the example script, first move to the location where the downloaded script is in the console/terminal, and then try some of the example use cases below: ### Train a model in editor: @@ -60,9 +48,13 @@ python stable_baselines3_example.py ```bash python stable_baselines3_example.py --env_path=path_to_executable ``` +For the previously downloaded envs, the path will be e.g. +`--env_path=examples/godot_rl_/bin/.x86_64` + Note that the exported environment will not be rendered in order to accelerate training. If you want to display it, add the `--viz` argument. + ### Train an exported environment using 4 environment processes: ```bash python stable_baselines3_example.py --env_path=path_to_executable --n_parallel=4 @@ -120,3 +112,64 @@ and reach 0 at `--timesteps` value. ```bash python stable_baselines3_example.py --timesteps=1_000_000 --linear_lr_schedule ``` + +## Training statistics and logging: +### Adding success rate to console logs: +If you want to report success rate based on some condition (e.g. whether the agent successfully finished the level or not), +follow the steps below: + +#### 1 - Add the following method to your extended `AIController`: +```gdscript +var is_success := false +func get_info() -> Dictionary: + if done: + return {"is_success": is_success} + is_success = false + return {} +``` + +The above snippet will send the information on whether or not the episode was succesful to the Python training server. +SB3 can use this to report the success rate. + +Note that we are setting `"false"` as the default, this is just one way to implement it, you can adjust the implementation to your preference. +Due to it being reset to `false`, we just need to set it to `true` when an episode is done succesfully, otherwise we don't need to change it. + +#### 2 - On some condition, set `is_success` to `true`: +The condition depends on your use case, for example, here's how we can implement this in the [SimpleReachGoal](https://github.com/edbeeching/godot_rl_agents_examples/tree/main/examples/TestExamples/SimpleReachGoal) env. +In the `player.gd` script, we just add `is_success` to depend on whether or not the reward is higher than 0: + +```gdscript +## Ends the game, setting an optional reward +func game_over(reward: float = 0.0): + ai_controller.is_success = reward > 0 + ai_controller.reward += reward + game_scene_manager.reset() +``` +Notes: +- Although not directly visible, the `done` condition is also set by this method (by calling `game_scene_manager.reset()`), +in a different env it might be something such as: + +```gdscript +func game_over(): + ai_controller.is_success = reward > 0 + ai_controller.done = true + ai_controller.needs_reset = true +``` + +- The condition for success can vary based on your environment, it does not have to depend directly on the reward. +- The current [sb3 docs relevant section](https://stable-baselines3.readthedocs.io/en/master/common/logger.html#rollout) suggests: +> you must pass an extra argument to the Monitor wrapper to log that value (info_keywords=("is_success",) + +We didn't add this to the SB3 example script since it seems to work without the value in the current SB3 version, +as we didn't test this in-depth yet - try adding the argument in case of any issues. + +After these changes, you should be able to see the rate in the training stats, e.g.: + +![success rate](https://github.com/user-attachments/assets/4901df0b-e48f-463d-a05f-39a16b9f94fb) + + +### Tensorboard: +You can see the output from the training session in tensorboard. Check [this guide](https://github.com/GianiStatie/godot_rl_agents/blob/main/docs/TRAINING_STATISTICS.md) for more info. + + + From e4cb17f01bddac78e7dabaa7da6d72bd4e59af83 Mon Sep 17 00:00:00 2001 From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com> Date: Mon, 21 Oct 2024 23:09:20 +0200 Subject: [PATCH 6/6] Update ADV_STABLE_BASELINES_3.md Removes the unnecessary resetting of is_success to false (the auto reset could be delayed based on action_repeat, so it's better to specify true or false every time when setting done = true to make sure it's up to date). --- docs/ADV_STABLE_BASELINES_3.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md index 7bd190c1..90ef7ec3 100644 --- a/docs/ADV_STABLE_BASELINES_3.md +++ b/docs/ADV_STABLE_BASELINES_3.md @@ -124,17 +124,13 @@ var is_success := false func get_info() -> Dictionary: if done: return {"is_success": is_success} - is_success = false return {} ``` The above snippet will send the information on whether or not the episode was succesful to the Python training server. SB3 can use this to report the success rate. -Note that we are setting `"false"` as the default, this is just one way to implement it, you can adjust the implementation to your preference. -Due to it being reset to `false`, we just need to set it to `true` when an episode is done succesfully, otherwise we don't need to change it. - -#### 2 - On some condition, set `is_success` to `true`: +#### 2 - Set is_success to `true` or `false` when ending the episode The condition depends on your use case, for example, here's how we can implement this in the [SimpleReachGoal](https://github.com/edbeeching/godot_rl_agents_examples/tree/main/examples/TestExamples/SimpleReachGoal) env. In the `player.gd` script, we just add `is_success` to depend on whether or not the reward is higher than 0: