From dd13506b2fdac2f40b08abc66279691db4662b6a Mon Sep 17 00:00:00 2001
From: Giani Statie <gianistatie@gmail.com>
Date: Sun, 20 Oct 2024 16:29:55 +0300
Subject: [PATCH 1/6] feat: adding info - is_success

---
 godot_rl/core/godot_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py
index 53996896..04bd64e3 100644
--- a/godot_rl/core/godot_env.py
+++ b/godot_rl/core/godot_env.py
@@ -206,7 +206,7 @@ def step_recv(self):
             response["reward"],
             np.array(response["done"]).tolist(),
             np.array(response["done"]).tolist(),  # TODO update API to term, trunc
-            [{}] * len(response["done"]),
+            response["info"],
         )
 
     def _process_obs(self, response_obs: dict):

From 146f8d979c2f5caef774515c5a7dc03de99e7928 Mon Sep 17 00:00:00 2001
From: Giani Statie <gianistatie@gmail.com>
Date: Sun, 20 Oct 2024 16:57:50 +0300
Subject: [PATCH 2/6] fix: added default value for info, not to crash examples

---
 godot_rl/core/godot_env.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py
index 04bd64e3..8516fea5 100644
--- a/godot_rl/core/godot_env.py
+++ b/godot_rl/core/godot_env.py
@@ -201,12 +201,15 @@ def step_recv(self):
         response = self._get_json_dict()
         response["obs"] = self._process_obs(response["obs"])
 
+        # TODO: update godot_rl_agents so we won't need this default_info 
+        default_info = [{}] * len(response["done"])
+
         return (
             response["obs"],
             response["reward"],
             np.array(response["done"]).tolist(),
             np.array(response["done"]).tolist(),  # TODO update API to term, trunc
-            response["info"],
+            response.get("info", default_info),
         )
 
     def _process_obs(self, response_obs: dict):

From 640ff494935f80dd96dcee977e2cc7a01f68e437 Mon Sep 17 00:00:00 2001
From: Giani Statie <gianistatie@gmail.com>
Date: Sun, 20 Oct 2024 17:00:27 +0300
Subject: [PATCH 3/6] chore: fixed styling

---
 godot_rl/core/godot_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py
index 8516fea5..4d09d422 100644
--- a/godot_rl/core/godot_env.py
+++ b/godot_rl/core/godot_env.py
@@ -201,7 +201,7 @@ def step_recv(self):
         response = self._get_json_dict()
         response["obs"] = self._process_obs(response["obs"])
 
-        # TODO: update godot_rl_agents so we won't need this default_info 
+        # TODO: update godot_rl_agents so we won't need this default_info
         default_info = [{}] * len(response["done"])
 
         return (

From 3b80419b3e47d63491c55b6dac2753c4d610c27f Mon Sep 17 00:00:00 2001
From: Giani Statie <44785388+GianiStatie@users.noreply.github.com>
Date: Mon, 21 Oct 2024 13:14:56 +0300
Subject: [PATCH 4/6] Update godot_rl/core/godot_env.py

Co-authored-by: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
---
 godot_rl/core/godot_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/godot_rl/core/godot_env.py b/godot_rl/core/godot_env.py
index 4d09d422..92e937de 100644
--- a/godot_rl/core/godot_env.py
+++ b/godot_rl/core/godot_env.py
@@ -201,7 +201,7 @@ def step_recv(self):
         response = self._get_json_dict()
         response["obs"] = self._process_obs(response["obs"])
 
-        # TODO: update godot_rl_agents so we won't need this default_info
+        # Kept for backward compatibility if the plugin doesn't send info.
         default_info = [{}] * len(response["done"])
 
         return (

From 9515a9c4c09c1219eb6388055a649ee72ace326f Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Mon, 21 Oct 2024 18:37:44 +0200
Subject: [PATCH 5/6] Adds is_success to ADV_STABLE_BASELINES_3.md

---
 docs/ADV_STABLE_BASELINES_3.md | 81 ++++++++++++++++++++++++++++------
 1 file changed, 67 insertions(+), 14 deletions(-)

diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md
index 33179606..7bd190c1 100644
--- a/docs/ADV_STABLE_BASELINES_3.md
+++ b/docs/ADV_STABLE_BASELINES_3.md
@@ -27,7 +27,6 @@ pip install godot-rl[sb3]
 ```
 
 ## Basic Environment Usage
-Usage instructions for envs **BallChase**, **FlyBy** and **JumperHard.**
 
 ### Download the env:
 
@@ -36,19 +35,8 @@ gdrl.env_from_hub -r edbeeching/godot_rl_<ENV_NAME>
 chmod +x examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 # linux example
 ```
 
-### Train a model from scratch:
-
-```bash
-gdrl --env=gdrl --env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64 --experiment_name=Experiment_01 --viz
-```
-
-While the default options for sb3 work reasonably well. You may be interested in changing the hyperparameters.
-
-We recommend taking the [sb3 example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/stable_baselines3_example.py) and modifying to match your needs. 
-
-The example exposes more parameters for the user to configure, such as `--speedup` to run the environment faster than realtime and the `--n_parallel` to launch several instances of the game executable in order to accelerate training (not available for in-editor training). 
-
-## SB3 Example script usage:
+## Training / SB3 Example script usage:
+Clone the repository or download the script [sb3 example](https://github.com/edbeeching/godot_rl_agents/blob/main/examples/stable_baselines3_example.py). 
 To use the example script, first move to the location where the downloaded script is in the console/terminal, and then try some of the example use cases below:
 
 ### Train a model in editor:
@@ -60,9 +48,13 @@ python stable_baselines3_example.py
 ```bash
 python stable_baselines3_example.py --env_path=path_to_executable
 ```
+For the previously downloaded envs, the path will be e.g.
+`--env_path=examples/godot_rl_<ENV_NAME>/bin/<ENV_NAME>.x86_64`
+
 Note that the exported environment will not be rendered in order to accelerate training.
 If you want to display it, add the `--viz` argument.
 
+
 ### Train an exported environment using 4 environment processes:
 ```bash
 python stable_baselines3_example.py --env_path=path_to_executable --n_parallel=4
@@ -120,3 +112,64 @@ and reach 0 at `--timesteps` value.
 ```bash
 python stable_baselines3_example.py --timesteps=1_000_000 --linear_lr_schedule
 ```
+
+## Training statistics and logging:
+### Adding success rate to console logs:
+If you want to report success rate based on some condition (e.g. whether the agent successfully finished the level or not), 
+follow the steps below:
+
+#### 1 - Add the following method to your extended `AIController`:
+```gdscript
+var is_success := false
+func get_info() -> Dictionary:
+	if done: 
+		return {"is_success": is_success}
+		is_success = false
+	return {}
+```
+
+The above snippet will send the information on whether or not the episode was succesful to the Python training server.
+SB3 can use this to report the success rate.
+
+Note that we are setting `"false"` as the default, this is just one way to implement it, you can adjust the implementation to your preference.
+Due to it being reset to `false`, we just need to set it to `true` when an episode is done succesfully, otherwise we don't need to change it.
+
+#### 2 - On some condition, set `is_success` to `true`:
+The condition depends on your use case, for example, here's how we can implement this in the [SimpleReachGoal](https://github.com/edbeeching/godot_rl_agents_examples/tree/main/examples/TestExamples/SimpleReachGoal) env.
+In the `player.gd` script, we just add `is_success` to depend on whether or not the reward is higher than 0:
+
+```gdscript
+## Ends the game, setting an optional reward
+func game_over(reward: float = 0.0):
+	ai_controller.is_success = reward > 0
+	ai_controller.reward += reward
+	game_scene_manager.reset()
+```
+Notes:
+- Although not directly visible, the `done` condition is also set by this method (by calling `game_scene_manager.reset()`),
+in a different env it might be something such as:
+
+```gdscript
+func game_over():
+	ai_controller.is_success = reward > 0
+	ai_controller.done = true
+	ai_controller.needs_reset = true
+```
+
+- The condition for success can vary based on your environment, it does not have to depend directly on the reward.
+- The current [sb3 docs relevant section](https://stable-baselines3.readthedocs.io/en/master/common/logger.html#rollout) suggests:
+> you must pass an extra argument to the Monitor wrapper to log that value (info_keywords=("is_success",)
+
+We didn't add this to the SB3 example script since it seems to work without the value in the current SB3 version,
+as we didn't test this in-depth yet - try adding the argument in case of any issues.
+
+After these changes, you should be able to see the rate in the training stats, e.g.:
+
+![success rate](https://github.com/user-attachments/assets/4901df0b-e48f-463d-a05f-39a16b9f94fb)
+
+
+### Tensorboard:
+You can see the output from the training session in tensorboard. Check [this guide](https://github.com/GianiStatie/godot_rl_agents/blob/main/docs/TRAINING_STATISTICS.md) for more info.
+
+
+

From e4cb17f01bddac78e7dabaa7da6d72bd4e59af83 Mon Sep 17 00:00:00 2001
From: Ivan-267 <61947090+Ivan-267@users.noreply.github.com>
Date: Mon, 21 Oct 2024 23:09:20 +0200
Subject: [PATCH 6/6] Update ADV_STABLE_BASELINES_3.md

Removes the unnecessary resetting of is_success to false (the auto reset could be delayed based on action_repeat, so it's better to specify true or false every time when setting done = true to make sure it's up to date).
---
 docs/ADV_STABLE_BASELINES_3.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/docs/ADV_STABLE_BASELINES_3.md b/docs/ADV_STABLE_BASELINES_3.md
index 7bd190c1..90ef7ec3 100644
--- a/docs/ADV_STABLE_BASELINES_3.md
+++ b/docs/ADV_STABLE_BASELINES_3.md
@@ -124,17 +124,13 @@ var is_success := false
 func get_info() -> Dictionary:
 	if done: 
 		return {"is_success": is_success}
-		is_success = false
 	return {}
 ```
 
 The above snippet will send the information on whether or not the episode was succesful to the Python training server.
 SB3 can use this to report the success rate.
 
-Note that we are setting `"false"` as the default, this is just one way to implement it, you can adjust the implementation to your preference.
-Due to it being reset to `false`, we just need to set it to `true` when an episode is done succesfully, otherwise we don't need to change it.
-
-#### 2 - On some condition, set `is_success` to `true`:
+#### 2 - Set is_success to `true` or `false` when ending the episode
 The condition depends on your use case, for example, here's how we can implement this in the [SimpleReachGoal](https://github.com/edbeeching/godot_rl_agents_examples/tree/main/examples/TestExamples/SimpleReachGoal) env.
 In the `player.gd` script, we just add `is_success` to depend on whether or not the reward is higher than 0: