Merge pull request #46 from GianiStatie/giani/passing-info

feat: adding info - is_success
edbeeching · Oct 23, 2024 · b6763fa · b6763fa
2 parents 6df3ba9 + 0d6c551
commit b6763fa
Show file tree

Hide file tree

Showing 3 changed files with 76 additions and 48 deletions.
diff --git a/addons/godot_rl_agents/controller/ai_controller_2d.gd b/addons/godot_rl_agents/controller/ai_controller_2d.gd
@@ -2,11 +2,11 @@ extends Node2D
 class_name AIController2D
 
 enum ControlModes {
-    INHERIT_FROM_SYNC, ## Inherit setting from sync node
-    HUMAN, ## Test the environment manually
-    TRAINING, ## Train a model
-    ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
-    RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
+	INHERIT_FROM_SYNC, ## Inherit setting from sync node
+	HUMAN, ## Test the environment manually
+	TRAINING, ## Train a model
+	ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
+	RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
 }
 @export var control_mode: ControlModes = ControlModes.INHERIT_FROM_SYNC
 ## The path to a trained .onnx model file to use for inference (overrides the path set in sync node).
@@ -49,7 +49,7 @@ func init(player: Node2D):
 	_player = player
 
 
-#-- Methods that need implementing using the "extend script" option in Godot --#
+#region Methods that need implementing using the "extend script" option in Godot
 func get_obs() -> Dictionary:
 	assert(false, "the get_obs method is not implemented when extending from ai_controller")
 	return {"obs": []}
@@ -62,8 +62,7 @@ func get_reward() -> float:
 
 func get_action_space() -> Dictionary:
 	assert(
-		false,
-		"the get get_action_space method is not implemented when extending from ai_controller"
+		false, "the get_action_space method is not implemented when extending from ai_controller"
 	)
 	return {
 		"example_actions_continous": {"size": 2, "action_type": "continuous"},
@@ -75,16 +74,26 @@ func set_action(action) -> void:
 	assert(false, "the set_action method is not implemented when extending from ai_controller")
 
 
-#-----------------------------------------------------------------------------#
+#endregion
 
 
-#-- Methods that sometimes need implementing using the "extend script" option in Godot --#
+#region Methods that sometimes need implementing using the "extend script" option in Godot
 # Only needed if you are recording expert demos with this AIController
 func get_action() -> Array:
-	assert(false, "the get_action method is not implemented in extended AIController but demo_recorder is used")
+	assert(
+		false,
+		"the get_action method is not implemented in extended AIController but demo_recorder is used"
+	)
 	return []
 
-# -----------------------------------------------------------------------------#
+
+# For providing additional info (e.g. `is_success` for SB3 training)
+func get_info() -> Dictionary:
+	return {}
+
+
+#endregion
+
 
 func _physics_process(delta):
 	n_steps += 1

diff --git a/addons/godot_rl_agents/controller/ai_controller_3d.gd b/addons/godot_rl_agents/controller/ai_controller_3d.gd
@@ -2,11 +2,11 @@ extends Node3D
 class_name AIController3D
 
 enum ControlModes {
-    INHERIT_FROM_SYNC, ## Inherit setting from sync node
-    HUMAN, ## Test the environment manually
-    TRAINING, ## Train a model
-    ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
-    RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
+	INHERIT_FROM_SYNC, ## Inherit setting from sync node
+	HUMAN, ## Test the environment manually
+	TRAINING, ## Train a model
+	ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
+	RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
 }
 @export var control_mode: ControlModes = ControlModes.INHERIT_FROM_SYNC
 ## The path to a trained .onnx model file to use for inference (overrides the path set in sync node).
@@ -49,7 +49,7 @@ func init(player: Node3D):
 	_player = player
 
 
-#-- Methods that need implementing using the "extend script" option in Godot --#
+#region Methods that need implementing using the "extend script" option in Godot
 func get_obs() -> Dictionary:
 	assert(false, "the get_obs method is not implemented when extending from ai_controller")
 	return {"obs": []}
@@ -62,8 +62,7 @@ func get_reward() -> float:
 
 func get_action_space() -> Dictionary:
 	assert(
-		false,
-		"the get_action_space method is not implemented when extending from ai_controller"
+		false, "the get_action_space method is not implemented when extending from ai_controller"
 	)
 	return {
 		"example_actions_continous": {"size": 2, "action_type": "continuous"},
@@ -75,16 +74,25 @@ func set_action(action) -> void:
 	assert(false, "the set_action method is not implemented when extending from ai_controller")
 
 
-#-----------------------------------------------------------------------------#
+#endregion
 
 
-#-- Methods that sometimes need implementing using the "extend script" option in Godot --#
+#region Methods that sometimes need implementing using the "extend script" option in Godot
 # Only needed if you are recording expert demos with this AIController
 func get_action() -> Array:
-	assert(false, "the get_action method is not implemented in extended AIController but demo_recorder is used")
+	assert(
+		false,
+		"the get_action method is not implemented in extended AIController but demo_recorder is used"
+	)
 	return []
 
-# -----------------------------------------------------------------------------#
+
+# For providing additional info (e.g. `is_success` for SB3 training)
+func get_info() -> Dictionary:
+	return {}
+
+
+#endregion
 
 
 func _physics_process(delta):

diff --git a/addons/godot_rl_agents/sync.gd b/addons/godot_rl_agents/sync.gd
@@ -4,12 +4,12 @@ class_name Sync
 # --fixed-fps 2000 --disable-render-loop
 
 enum ControlModes {
-    HUMAN, ## Test the environment manually
-    TRAINING, ## Train a model
-    ONNX_INFERENCE ## Load a pretrained model using an .onnx file
+	HUMAN, ## Test the environment manually
+	TRAINING, ## Train a model
+	ONNX_INFERENCE ## Load a pretrained model using an .onnx file
 }
 @export var control_mode: ControlModes = ControlModes.TRAINING
-## Action will be repeated for n frames (Godot physics steps). 
+## Action will be repeated for n frames (Godot physics steps).
 @export_range(1, 10, 1, "or_greater") var action_repeat := 8
 ## Speeds up the physics in the environment to enable faster training.
 @export_range(0, 10, 0.1, "or_greater") var speed_up := 1.0
@@ -57,6 +57,7 @@ var _action_space_training: Array[Dictionary] = []
 var _action_space_inference: Array[Dictionary] = []
 var _obs_space_training: Array[Dictionary] = []
 
+
 # Called when the node enters the scene tree for the first time.
 func _ready():
 	await get_parent().ready
@@ -155,7 +156,7 @@ func _initialize_inference_agents():
 			agent.onnx_model = agent_onnx_model
 			if not agent_onnx_model.action_means_only_set:
 				agent_onnx_model.set_action_means_only(action_space)
-				
+
 		_set_heuristic("model", agents_inference)
 
 
@@ -198,11 +199,13 @@ func _training_process():
 	if connected:
 		get_tree().set_pause(true)
 
+		var obs = _get_obs_from_agents(agents_training)
+		var info = _get_info_from_agents(agents_training)
+
 		if just_reset:
 			just_reset = false
-			var obs = _get_obs_from_agents(agents_training)
 
-			var reply = {"type": "reset", "obs": obs}
+			var reply = {"type": "reset", "obs": obs, "info": info}
 			_send_dict_as_json_message(reply)
 			# this should go straight to getting the action and setting it checked the agent, no need to perform one phyics tick
 			get_tree().set_pause(false)
@@ -214,9 +217,7 @@ func _training_process():
 			var done = _get_done_from_agents()
 			#_reset_agents_if_done() # this ensures the new observation is from the next env instance : NEEDS REFACTOR
 
-			var obs = _get_obs_from_agents(agents_training)
-
-			var reply = {"type": "step", "obs": obs, "reward": reward, "done": done}
+			var reply = {"type": "step", "obs": obs, "reward": reward, "done": done, "info": info}
 			_send_dict_as_json_message(reply)
 
 		var handled = handle_message()
@@ -229,9 +230,7 @@ func _inference_process():
 
 		for agent_id in range(0, agents_inference.size()):
 			var model: ONNXModel = agents_inference[agent_id].onnx_model
-			var action = model.run_inference(
-				obs[agent_id]["obs"], 1.0
-			)
+			var action = model.run_inference(obs[agent_id]["obs"], 1.0)
 			var action_dict = _extract_action_dict(
 				action["output"], _action_space_inference[agent_id], model.action_means_only
 			)
@@ -290,30 +289,35 @@ func _heuristic_process():
 func _extract_action_dict(action_array: Array, action_space: Dictionary, action_means_only: bool):
 	var index = 0
 	var result = {}
-	for key in action_space.keys():	
-		var size = action_space[key]["size"]	
+	for key in action_space.keys():
+		var size = action_space[key]["size"]
 		var action_type = action_space[key]["action_type"]
 		if action_type == "discrete":
-			var largest_logit: float # Value of the largest logit for this action in the actions array
-			var largest_logit_idx: int # Index of the largest logit for this action in the actions array
+			var largest_logit: float  # Value of the largest logit for this action in the actions array
+			var largest_logit_idx: int  # Index of the largest logit for this action in the actions array
 			for logit_idx in range(0, size):
 				var logit_value = action_array[index + logit_idx]
 				if logit_value > largest_logit:
 					largest_logit = logit_value
-					largest_logit_idx = logit_idx 
-			result[key] = largest_logit_idx # Index of the largest logit is the discrete action value
+					largest_logit_idx = logit_idx
+			result[key] = largest_logit_idx  # Index of the largest logit is the discrete action value
 			index += size
 		elif action_type == "continuous":
 			# For continous actions, we only take the action mean values
 			result[key] = clamp_array(action_array.slice(index, index + size), -1.0, 1.0)
 			if action_means_only:
-				index += size # model only outputs action means, so we move index by size
+				index += size  # model only outputs action means, so we move index by size
 			else:
-				index += size * 2 # model outputs logstd after action mean, we skip the logstd part
+				index += size * 2  # model outputs logstd after action mean, we skip the logstd part
 
 		else:
-			assert(false, 'Only "discrete" and "continuous" action types supported. Found: %s action type set.' % action_type)
-
+			assert(
+				false,
+				(
+					'Only "discrete" and "continuous" action types supported. Found: %s action type set.'
+					% action_type
+				)
+			)
 
 	return result
 
@@ -349,7 +353,7 @@ func _get_agents():
 				"Currently only a single AIController can be used for recording expert demos."
 			)
 			agent_demo_record = agent
-	
+
 	var training_agent_count = agents_training.size()
 	agents_training_policy_names.resize(training_agent_count)
 	for i in range(0, training_agent_count):
@@ -547,6 +551,13 @@ func _get_reward_from_agents(agents: Array = agents_training):
 	return rewards
 
 
+func _get_info_from_agents(agents: Array = all_agents):
+	var info = []
+	for agent in agents:
+		info.append(agent.get_info())
+	return info
+
+
 func _get_done_from_agents(agents: Array = agents_training):
 	var dones = []
 	for agent in agents: