Skip to content

Commit

Permalink
Merge pull request #46 from GianiStatie/giani/passing-info
Browse files Browse the repository at this point in the history
feat: adding info - is_success
  • Loading branch information
GianiStatie authored Oct 23, 2024
2 parents 6df3ba9 + 0d6c551 commit b6763fa
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 48 deletions.
33 changes: 21 additions & 12 deletions addons/godot_rl_agents/controller/ai_controller_2d.gd
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ extends Node2D
class_name AIController2D

enum ControlModes {
INHERIT_FROM_SYNC, ## Inherit setting from sync node
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
INHERIT_FROM_SYNC, ## Inherit setting from sync node
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
}
@export var control_mode: ControlModes = ControlModes.INHERIT_FROM_SYNC
## The path to a trained .onnx model file to use for inference (overrides the path set in sync node).
Expand Down Expand Up @@ -49,7 +49,7 @@ func init(player: Node2D):
_player = player


#-- Methods that need implementing using the "extend script" option in Godot --#
#region Methods that need implementing using the "extend script" option in Godot
func get_obs() -> Dictionary:
assert(false, "the get_obs method is not implemented when extending from ai_controller")
return {"obs": []}
Expand All @@ -62,8 +62,7 @@ func get_reward() -> float:

func get_action_space() -> Dictionary:
assert(
false,
"the get get_action_space method is not implemented when extending from ai_controller"
false, "the get_action_space method is not implemented when extending from ai_controller"
)
return {
"example_actions_continous": {"size": 2, "action_type": "continuous"},
Expand All @@ -75,16 +74,26 @@ func set_action(action) -> void:
assert(false, "the set_action method is not implemented when extending from ai_controller")


#-----------------------------------------------------------------------------#
#endregion


#-- Methods that sometimes need implementing using the "extend script" option in Godot --#
#region Methods that sometimes need implementing using the "extend script" option in Godot
# Only needed if you are recording expert demos with this AIController
func get_action() -> Array:
assert(false, "the get_action method is not implemented in extended AIController but demo_recorder is used")
assert(
false,
"the get_action method is not implemented in extended AIController but demo_recorder is used"
)
return []

# -----------------------------------------------------------------------------#

# For providing additional info (e.g. `is_success` for SB3 training)
func get_info() -> Dictionary:
return {}


#endregion


func _physics_process(delta):
n_steps += 1
Expand Down
32 changes: 20 additions & 12 deletions addons/godot_rl_agents/controller/ai_controller_3d.gd
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ extends Node3D
class_name AIController3D

enum ControlModes {
INHERIT_FROM_SYNC, ## Inherit setting from sync node
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
INHERIT_FROM_SYNC, ## Inherit setting from sync node
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
}
@export var control_mode: ControlModes = ControlModes.INHERIT_FROM_SYNC
## The path to a trained .onnx model file to use for inference (overrides the path set in sync node).
Expand Down Expand Up @@ -49,7 +49,7 @@ func init(player: Node3D):
_player = player


#-- Methods that need implementing using the "extend script" option in Godot --#
#region Methods that need implementing using the "extend script" option in Godot
func get_obs() -> Dictionary:
assert(false, "the get_obs method is not implemented when extending from ai_controller")
return {"obs": []}
Expand All @@ -62,8 +62,7 @@ func get_reward() -> float:

func get_action_space() -> Dictionary:
assert(
false,
"the get_action_space method is not implemented when extending from ai_controller"
false, "the get_action_space method is not implemented when extending from ai_controller"
)
return {
"example_actions_continous": {"size": 2, "action_type": "continuous"},
Expand All @@ -75,16 +74,25 @@ func set_action(action) -> void:
assert(false, "the set_action method is not implemented when extending from ai_controller")


#-----------------------------------------------------------------------------#
#endregion


#-- Methods that sometimes need implementing using the "extend script" option in Godot --#
#region Methods that sometimes need implementing using the "extend script" option in Godot
# Only needed if you are recording expert demos with this AIController
func get_action() -> Array:
assert(false, "the get_action method is not implemented in extended AIController but demo_recorder is used")
assert(
false,
"the get_action method is not implemented in extended AIController but demo_recorder is used"
)
return []

# -----------------------------------------------------------------------------#

# For providing additional info (e.g. `is_success` for SB3 training)
func get_info() -> Dictionary:
return {}


#endregion


func _physics_process(delta):
Expand Down
59 changes: 35 additions & 24 deletions addons/godot_rl_agents/sync.gd
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ class_name Sync
# --fixed-fps 2000 --disable-render-loop

enum ControlModes {
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE ## Load a pretrained model using an .onnx file
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE ## Load a pretrained model using an .onnx file
}
@export var control_mode: ControlModes = ControlModes.TRAINING
## Action will be repeated for n frames (Godot physics steps).
## Action will be repeated for n frames (Godot physics steps).
@export_range(1, 10, 1, "or_greater") var action_repeat := 8
## Speeds up the physics in the environment to enable faster training.
@export_range(0, 10, 0.1, "or_greater") var speed_up := 1.0
Expand Down Expand Up @@ -57,6 +57,7 @@ var _action_space_training: Array[Dictionary] = []
var _action_space_inference: Array[Dictionary] = []
var _obs_space_training: Array[Dictionary] = []


# Called when the node enters the scene tree for the first time.
func _ready():
await get_parent().ready
Expand Down Expand Up @@ -155,7 +156,7 @@ func _initialize_inference_agents():
agent.onnx_model = agent_onnx_model
if not agent_onnx_model.action_means_only_set:
agent_onnx_model.set_action_means_only(action_space)

_set_heuristic("model", agents_inference)


Expand Down Expand Up @@ -198,11 +199,13 @@ func _training_process():
if connected:
get_tree().set_pause(true)

var obs = _get_obs_from_agents(agents_training)
var info = _get_info_from_agents(agents_training)

if just_reset:
just_reset = false
var obs = _get_obs_from_agents(agents_training)

var reply = {"type": "reset", "obs": obs}
var reply = {"type": "reset", "obs": obs, "info": info}
_send_dict_as_json_message(reply)
# this should go straight to getting the action and setting it checked the agent, no need to perform one phyics tick
get_tree().set_pause(false)
Expand All @@ -214,9 +217,7 @@ func _training_process():
var done = _get_done_from_agents()
#_reset_agents_if_done() # this ensures the new observation is from the next env instance : NEEDS REFACTOR

var obs = _get_obs_from_agents(agents_training)

var reply = {"type": "step", "obs": obs, "reward": reward, "done": done}
var reply = {"type": "step", "obs": obs, "reward": reward, "done": done, "info": info}
_send_dict_as_json_message(reply)

var handled = handle_message()
Expand All @@ -229,9 +230,7 @@ func _inference_process():

for agent_id in range(0, agents_inference.size()):
var model: ONNXModel = agents_inference[agent_id].onnx_model
var action = model.run_inference(
obs[agent_id]["obs"], 1.0
)
var action = model.run_inference(obs[agent_id]["obs"], 1.0)
var action_dict = _extract_action_dict(
action["output"], _action_space_inference[agent_id], model.action_means_only
)
Expand Down Expand Up @@ -290,30 +289,35 @@ func _heuristic_process():
func _extract_action_dict(action_array: Array, action_space: Dictionary, action_means_only: bool):
var index = 0
var result = {}
for key in action_space.keys():
var size = action_space[key]["size"]
for key in action_space.keys():
var size = action_space[key]["size"]
var action_type = action_space[key]["action_type"]
if action_type == "discrete":
var largest_logit: float # Value of the largest logit for this action in the actions array
var largest_logit_idx: int # Index of the largest logit for this action in the actions array
var largest_logit: float # Value of the largest logit for this action in the actions array
var largest_logit_idx: int # Index of the largest logit for this action in the actions array
for logit_idx in range(0, size):
var logit_value = action_array[index + logit_idx]
if logit_value > largest_logit:
largest_logit = logit_value
largest_logit_idx = logit_idx
result[key] = largest_logit_idx # Index of the largest logit is the discrete action value
largest_logit_idx = logit_idx
result[key] = largest_logit_idx # Index of the largest logit is the discrete action value
index += size
elif action_type == "continuous":
# For continous actions, we only take the action mean values
result[key] = clamp_array(action_array.slice(index, index + size), -1.0, 1.0)
if action_means_only:
index += size # model only outputs action means, so we move index by size
index += size # model only outputs action means, so we move index by size
else:
index += size * 2 # model outputs logstd after action mean, we skip the logstd part
index += size * 2 # model outputs logstd after action mean, we skip the logstd part

else:
assert(false, 'Only "discrete" and "continuous" action types supported. Found: %s action type set.' % action_type)

assert(
false,
(
'Only "discrete" and "continuous" action types supported. Found: %s action type set.'
% action_type
)
)

return result

Expand Down Expand Up @@ -349,7 +353,7 @@ func _get_agents():
"Currently only a single AIController can be used for recording expert demos."
)
agent_demo_record = agent

var training_agent_count = agents_training.size()
agents_training_policy_names.resize(training_agent_count)
for i in range(0, training_agent_count):
Expand Down Expand Up @@ -547,6 +551,13 @@ func _get_reward_from_agents(agents: Array = agents_training):
return rewards


func _get_info_from_agents(agents: Array = all_agents):
var info = []
for agent in agents:
info.append(agent.get_info())
return info


func _get_done_from_agents(agents: Array = agents_training):
var dones = []
for agent in agents:
Expand Down

0 comments on commit b6763fa

Please sign in to comment.