Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding info - is_success #46

Merged
merged 5 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 21 additions & 12 deletions addons/godot_rl_agents/controller/ai_controller_2d.gd
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ extends Node2D
class_name AIController2D

enum ControlModes {
INHERIT_FROM_SYNC, ## Inherit setting from sync node
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
INHERIT_FROM_SYNC, ## Inherit setting from sync node
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
}
@export var control_mode: ControlModes = ControlModes.INHERIT_FROM_SYNC
## The path to a trained .onnx model file to use for inference (overrides the path set in sync node).
Expand Down Expand Up @@ -49,7 +49,7 @@ func init(player: Node2D):
_player = player


#-- Methods that need implementing using the "extend script" option in Godot --#
#region Methods that need implementing using the "extend script" option in Godot
func get_obs() -> Dictionary:
assert(false, "the get_obs method is not implemented when extending from ai_controller")
return {"obs": []}
Expand All @@ -62,8 +62,7 @@ func get_reward() -> float:

func get_action_space() -> Dictionary:
assert(
false,
"the get get_action_space method is not implemented when extending from ai_controller"
false, "the get_action_space method is not implemented when extending from ai_controller"
)
return {
"example_actions_continous": {"size": 2, "action_type": "continuous"},
Expand All @@ -75,16 +74,26 @@ func set_action(action) -> void:
assert(false, "the set_action method is not implemented when extending from ai_controller")


#-----------------------------------------------------------------------------#
#endregion


#-- Methods that sometimes need implementing using the "extend script" option in Godot --#
#region Methods that sometimes need implementing using the "extend script" option in Godot
# Only needed if you are recording expert demos with this AIController
func get_action() -> Array:
assert(false, "the get_action method is not implemented in extended AIController but demo_recorder is used")
assert(
false,
"the get_action method is not implemented in extended AIController but demo_recorder is used"
)
return []

# -----------------------------------------------------------------------------#

# For providing additional info (e.g. `is_success` for SB3 training)
func get_info() -> Dictionary:
return {}


#endregion


func _physics_process(delta):
n_steps += 1
Expand Down
32 changes: 20 additions & 12 deletions addons/godot_rl_agents/controller/ai_controller_3d.gd
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ extends Node3D
class_name AIController3D

enum ControlModes {
INHERIT_FROM_SYNC, ## Inherit setting from sync node
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
INHERIT_FROM_SYNC, ## Inherit setting from sync node
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE, ## Load a pretrained model using an .onnx file
RECORD_EXPERT_DEMOS ## Record observations and actions for expert demonstrations
}
@export var control_mode: ControlModes = ControlModes.INHERIT_FROM_SYNC
## The path to a trained .onnx model file to use for inference (overrides the path set in sync node).
Expand Down Expand Up @@ -49,7 +49,7 @@ func init(player: Node3D):
_player = player


#-- Methods that need implementing using the "extend script" option in Godot --#
#region Methods that need implementing using the "extend script" option in Godot
func get_obs() -> Dictionary:
assert(false, "the get_obs method is not implemented when extending from ai_controller")
return {"obs": []}
Expand All @@ -62,8 +62,7 @@ func get_reward() -> float:

func get_action_space() -> Dictionary:
assert(
false,
"the get_action_space method is not implemented when extending from ai_controller"
false, "the get_action_space method is not implemented when extending from ai_controller"
)
return {
"example_actions_continous": {"size": 2, "action_type": "continuous"},
Expand All @@ -75,16 +74,25 @@ func set_action(action) -> void:
assert(false, "the set_action method is not implemented when extending from ai_controller")


#-----------------------------------------------------------------------------#
#endregion


#-- Methods that sometimes need implementing using the "extend script" option in Godot --#
#region Methods that sometimes need implementing using the "extend script" option in Godot
# Only needed if you are recording expert demos with this AIController
func get_action() -> Array:
assert(false, "the get_action method is not implemented in extended AIController but demo_recorder is used")
assert(
false,
"the get_action method is not implemented in extended AIController but demo_recorder is used"
)
return []

# -----------------------------------------------------------------------------#

# For providing additional info (e.g. `is_success` for SB3 training)
func get_info() -> Dictionary:
return {}


#endregion


func _physics_process(delta):
Expand Down
59 changes: 35 additions & 24 deletions addons/godot_rl_agents/sync.gd
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ class_name Sync
# --fixed-fps 2000 --disable-render-loop

enum ControlModes {
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE ## Load a pretrained model using an .onnx file
HUMAN, ## Test the environment manually
TRAINING, ## Train a model
ONNX_INFERENCE ## Load a pretrained model using an .onnx file
}
@export var control_mode: ControlModes = ControlModes.TRAINING
## Action will be repeated for n frames (Godot physics steps).
## Action will be repeated for n frames (Godot physics steps).
@export_range(1, 10, 1, "or_greater") var action_repeat := 8
## Speeds up the physics in the environment to enable faster training.
@export_range(0, 10, 0.1, "or_greater") var speed_up := 1.0
Expand Down Expand Up @@ -57,6 +57,7 @@ var _action_space_training: Array[Dictionary] = []
var _action_space_inference: Array[Dictionary] = []
var _obs_space_training: Array[Dictionary] = []


# Called when the node enters the scene tree for the first time.
func _ready():
await get_parent().ready
Expand Down Expand Up @@ -155,7 +156,7 @@ func _initialize_inference_agents():
agent.onnx_model = agent_onnx_model
if not agent_onnx_model.action_means_only_set:
agent_onnx_model.set_action_means_only(action_space)

_set_heuristic("model", agents_inference)


Expand Down Expand Up @@ -198,11 +199,13 @@ func _training_process():
if connected:
get_tree().set_pause(true)

var obs = _get_obs_from_agents(agents_training)
var info = _get_info_from_agents(agents_training)

if just_reset:
just_reset = false
var obs = _get_obs_from_agents(agents_training)

var reply = {"type": "reset", "obs": obs}
var reply = {"type": "reset", "obs": obs, "info": info}
_send_dict_as_json_message(reply)
# this should go straight to getting the action and setting it checked the agent, no need to perform one phyics tick
get_tree().set_pause(false)
Expand All @@ -214,9 +217,7 @@ func _training_process():
var done = _get_done_from_agents()
#_reset_agents_if_done() # this ensures the new observation is from the next env instance : NEEDS REFACTOR

var obs = _get_obs_from_agents(agents_training)

var reply = {"type": "step", "obs": obs, "reward": reward, "done": done}
var reply = {"type": "step", "obs": obs, "reward": reward, "done": done, "info": info}
_send_dict_as_json_message(reply)

var handled = handle_message()
Expand All @@ -229,9 +230,7 @@ func _inference_process():

for agent_id in range(0, agents_inference.size()):
var model: ONNXModel = agents_inference[agent_id].onnx_model
var action = model.run_inference(
obs[agent_id]["obs"], 1.0
)
var action = model.run_inference(obs[agent_id]["obs"], 1.0)
var action_dict = _extract_action_dict(
action["output"], _action_space_inference[agent_id], model.action_means_only
)
Expand Down Expand Up @@ -290,30 +289,35 @@ func _heuristic_process():
func _extract_action_dict(action_array: Array, action_space: Dictionary, action_means_only: bool):
var index = 0
var result = {}
for key in action_space.keys():
var size = action_space[key]["size"]
for key in action_space.keys():
var size = action_space[key]["size"]
var action_type = action_space[key]["action_type"]
if action_type == "discrete":
var largest_logit: float # Value of the largest logit for this action in the actions array
var largest_logit_idx: int # Index of the largest logit for this action in the actions array
var largest_logit: float # Value of the largest logit for this action in the actions array
var largest_logit_idx: int # Index of the largest logit for this action in the actions array
for logit_idx in range(0, size):
var logit_value = action_array[index + logit_idx]
if logit_value > largest_logit:
largest_logit = logit_value
largest_logit_idx = logit_idx
result[key] = largest_logit_idx # Index of the largest logit is the discrete action value
largest_logit_idx = logit_idx
result[key] = largest_logit_idx # Index of the largest logit is the discrete action value
index += size
elif action_type == "continuous":
# For continous actions, we only take the action mean values
result[key] = clamp_array(action_array.slice(index, index + size), -1.0, 1.0)
if action_means_only:
index += size # model only outputs action means, so we move index by size
index += size # model only outputs action means, so we move index by size
else:
index += size * 2 # model outputs logstd after action mean, we skip the logstd part
index += size * 2 # model outputs logstd after action mean, we skip the logstd part

else:
assert(false, 'Only "discrete" and "continuous" action types supported. Found: %s action type set.' % action_type)

assert(
false,
(
'Only "discrete" and "continuous" action types supported. Found: %s action type set.'
% action_type
)
)

return result

Expand Down Expand Up @@ -349,7 +353,7 @@ func _get_agents():
"Currently only a single AIController can be used for recording expert demos."
)
agent_demo_record = agent

var training_agent_count = agents_training.size()
agents_training_policy_names.resize(training_agent_count)
for i in range(0, training_agent_count):
Expand Down Expand Up @@ -547,6 +551,13 @@ func _get_reward_from_agents(agents: Array = agents_training):
return rewards


func _get_info_from_agents(agents: Array = all_agents):
var info = []
for agent in agents:
info.append(agent.get_info())
return info


func _get_done_from_agents(agents: Array = agents_training):
var dones = []
for agent in agents:
Expand Down