Skip to content

Commit

Permalink
Generate training scripts in torch
Browse files Browse the repository at this point in the history
  • Loading branch information
Toni-SM committed Sep 9, 2024
1 parent cc0882a commit 7eb8b16
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 152 deletions.
173 changes: 105 additions & 68 deletions docs/source/examples/isaaclab/generator/isaaclab_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
app_launcher = AppLauncher()
simulation_app = app_launcher.app

import copy
import math
import gymnasium as gym
import importlib
Expand All @@ -17,6 +18,11 @@
from omni.isaac.lab.utils import class_to_dict
from omni.isaac.lab_tasks.utils import load_cfg_from_registry

from skrl.utils.model_instantiators.torch import deterministic_model, gaussian_model, shared_model

GENERATE_YAML = True
GENERATE_SCRIPTS = True


class Config:
def __init__(self, library: str) -> None:
Expand Down Expand Up @@ -164,59 +170,62 @@ def generate_yaml(self) -> tuple[str, str]:
dirname = os.path.dirname(self.path)[len(os.getcwd()) + 1:]
return dirname, filename

def generate_python_script(self) -> None:
def convert_hidden_activation(activations, framework):
mapping = {
"torch": {
"": "Identity",
"relu": "ReLU",
"tanh": "Tanh",
"sigmoid": "Sigmoid",
"leaky_relu": "LeakyReLU",
"elu": "ELU",
"softplus": "Softplus",
"softsign": "Softsign",
"selu": "SELU",
"softmax": "Softmax",
},
"jax": {
"relu": "relu",
"tanh": "tanh",
"sigmoid": "sigmoid",
"leaky_relu": "leaky_relu",
"elu": "elu",
"softplus": "softplus",
"softsign": "soft_sign",
"selu": "selu",
"softmax": "softmax",
},
}
return [mapping[framework][activation] for activation in activations]

task_name = "_".join([item.lower() for item in self.cfg["metadata"]["task"].split("-")[1:-1]])
for framework in ["torch", "jax"]:
def generate_python_script(self) -> list[str]:
paths = []
task_name = "_".join([item.lower() for item in self.cfg["metadata"]["task"].split("-")[1:]])
for framework in ["torch"]: # TODO: , "jax"]:
content = ""
if self.library == "skrl":
# generate file name
os.makedirs("skrl_examples", exist_ok=True)
path = os.path.join("skrl_examples", f"{framework}_{task_name}_ppo.py")
with open(os.path.join(self._templates, f"ppo_skrl_py_{framework}")) as file:
path = os.path.join("skrl_examples", f"{framework}_{task_name}_{self.algorithm}.py")
with open(os.path.join(self._templates, f"{self.algorithm}_skrl_py_{framework}")) as file:
content = file.read()
if not content:
raise ValueError
# update config
self.cfg["models"]["policy"][f"hidden_activation__{framework}"] = convert_hidden_activation(
self.cfg["models"]["policy"]["hidden_activation"], framework
)
self.cfg["models"]["value"][f"hidden_activation__{framework}"] = convert_hidden_activation(
self.cfg["models"]["value"]["hidden_activation"], framework
)
policy = copy.deepcopy(self.cfg["models"]["policy"])
value = copy.deepcopy(self.cfg["models"]["value"])
del policy["class"]
del value["class"]
if self.cfg["models"]["separate"]:
source = gaussian_model(
observation_space=1,
action_space=1,
device="cuda:0",
return_source=True,
**policy,
).rstrip()
source = source.replace("GaussianModel", "Policy")
self.cfg["models"]["generated"] = {"policy": source}
source = deterministic_model(
observation_space=1,
action_space=1,
device="cuda:0",
return_source=True,
**value,
).rstrip()
source = source.replace("DeterministicModel", "Value")
self.cfg["models"]["generated"]["value"] = source
else:
source = shared_model(
observation_space=1,
action_space=1,
device="cuda:0",
roles=["policy", "value"],
parameters=[policy, value],
return_source=True,
).rstrip()
source = source.replace("GaussianDeterministicModel", "Shared")
self.cfg["models"]["generated"] = source
# render template
template = Template(content, keep_trailing_newline=True, trim_blocks=True, lstrip_blocks=True)
content = template.render(self.cfg)
# save file
with open(path, "w") as file:
file.write(content)
paths.append(os.path.basename(path))
return paths


if __name__ == "__main__":
Expand All @@ -227,7 +236,7 @@ def convert_hidden_activation(activations, framework):
# ignore non-Isaac Lab envs
if not env_name.lower().startswith("isaac-"):
continue
stats.append({"env": env_name, "registered": {}, "generated": []})
stats.append({"env": env_name, "registered": {}, "generated": [], "generated_scripts": []})
print(f"\n{'=' * len(env_name)}\n{env_name}\n{'=' * len(env_name)}")

# get number of environments
Expand Down Expand Up @@ -264,44 +273,72 @@ def convert_hidden_activation(activations, framework):
# ignore PLAY configs: Isaac-ENV-NAME-Play-v0
if env_name.lower().endswith("-play-v0"):
stats[-1]["generated"].append({"filename": "-"})
stats[-1]["generated_scripts"].append({"filename": "-"})
continue

# generate config file
generated = False
# rl_games config
if len(rl_games_configs):
assert len(rl_games_configs) == 1
config = rl_games_configs[0]
if config.valid:
dirname, filename = config.generate_yaml()
stats[-1]["generated"].append({"dirname": dirname, "filename": filename, "library": "rl_games"})
generated = True
# rsl_rl config
if not generated and len(rsl_rl_configs):
assert len(rsl_rl_configs) == 1
config = rsl_rl_configs[0]
if config.valid:
dirname, filename = config.generate_yaml()
stats[-1]["generated"].append({"dirname": dirname, "filename": filename, "library": "rsl_rl"})
generated = True
if GENERATE_YAML:
generated = False
# rl_games config
if len(rl_games_configs):
assert len(rl_games_configs) == 1
config = rl_games_configs[0]
if config.valid:
dirname, filename = config.generate_yaml()
stats[-1]["generated"].append({"dirname": dirname, "filename": filename, "library": "rl_games"})
generated = True
# rsl_rl config
if not generated and len(rsl_rl_configs):
assert len(rsl_rl_configs) == 1
config = rsl_rl_configs[0]
if config.valid:
dirname, filename = config.generate_yaml()
stats[-1]["generated"].append({"dirname": dirname, "filename": filename, "library": "rsl_rl"})
generated = True

# generate Python scripts
library = "skrl"
skrl_configs = [
Config(library).parse(entry, env_name, num_envs)
for entry, _ in env_data.kwargs.items()
if entry.startswith(library)
]
if len(skrl_configs):
assert len(skrl_configs) == 1
config = skrl_configs[0]
if config.valid:
config.generate_python_script()
if GENERATE_SCRIPTS:
if len(skrl_configs):
# assert len(skrl_configs) == 1 # TODO
config = skrl_configs[0]
if config.valid:
paths = config.generate_python_script()
stats[-1]["generated_scripts"].append({"filename": ", ".join(paths) if paths else ""})

stats = sorted(stats, key=lambda x: x["env"])

print()
print("#################################")
print()

if GENERATE_SCRIPTS:
table = PrettyTable()
table.field_names = ["Task", "Registered", "Generated"]
table.align["Task"] = "l"
table.align["Generated"] = "l"
for data in stats:
if "skrl" in data["registered"]:
registered = f'- {len(data["registered"]["skrl"])} -' if len(data["registered"]["skrl"]) > 1 else ""
filenames = [item.get("filename") for item in data["generated"]]
if filenames == ["-"]:
pass
elif filenames:
exist = False
for item in data["registered"]["skrl"]:
if item.filename in filenames:
exist = True
if not exist:
registered = f"{registered} other" if registered else "other"
else:
registered = "No"
filenames = [f'{item.get("filename")}' for item in data["generated_scripts"] if item]
table.add_row([data["env"], registered, ", ".join(filenames)])
print(table)
print()

if not GENERATE_YAML:
exit()

cmd = "git status --porcelain | grep skrl_.*.yaml"
git_status = subprocess.check_output(cmd, shell=True, text=True).split("\n")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,92 +19,12 @@ set_seed() # e.g. `set_seed(42)` for fixed seed

{% if models.separate %}
# define models (stochastic and deterministic models) using mixins
class Policy(GaussianMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions=False,
clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
Model.__init__(self, observation_space, action_space, device)
GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)

{% for index in range(models.policy.hiddens | length) %}
{% if loop.first %}
self.net = nn.Sequential(nn.Linear(self.num_observations, {{ models.policy.hiddens | first }}),
nn.{{ models.policy.hidden_activation__torch | first }}(),
{% elif loop.last %}
nn.Linear({{ models.policy.hiddens[loop.previtem] }}, {{ models.policy.hiddens | last }}),
nn.{{ models.policy.hidden_activation__torch | last }}(),
nn.Linear({{ models.policy.hiddens | last }}, self.num_actions))
{% else %}
nn.Linear({{ models.policy.hiddens[loop.previtem] }}, {{ models.policy.hiddens[index] }}),
nn.{{ models.policy.hidden_activation__torch[index] }}(),
{% endif %}
{% endfor %}
self.log_std_parameter = nn.Parameter(torch.ones(self.num_actions))

def compute(self, inputs, role):
return self.net(inputs["states"]), self.log_std_parameter, {}

class Value(DeterministicMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions=False):
Model.__init__(self, observation_space, action_space, device)
DeterministicMixin.__init__(self, clip_actions)

{% for index in range(models.value.hiddens | length) %}
{% if loop.first %}
self.net = nn.Sequential(nn.Linear(self.num_observations, {{ models.value.hiddens | first }}),
nn.{{ models.value.hidden_activation_torch | first }}(),
{% elif loop.last %}
nn.Linear({{ models.value.hiddens[loop.previtem] }}, {{ models.value.hiddens | last }}),
nn.{{ models.value.hidden_activation_torch | last }}(),
nn.Linear({{ models.value.hiddens | last }}, 1))
{% else %}
nn.Linear({{ models.value.hiddens[loop.previtem] }}, {{ models.value.hiddens[index] }}),
nn.{{ models.value.hidden_activation_torch[index] }}(),
{% endif %}
{% endfor %}

def compute(self, inputs, role):
return self.net(inputs["states"]), {}
{{ models.generated.policy }}

{{ models.generated.value }}
{% else %}
# define shared model (stochastic and deterministic models) using mixins
class Shared(GaussianMixin, DeterministicMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions=False,
clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
Model.__init__(self, observation_space, action_space, device)
GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
DeterministicMixin.__init__(self, clip_actions)

{% for index in range(models.policy.hiddens | length) %}
{% if loop.first %}
self.net = nn.Sequential(nn.Linear(self.num_observations, {{ models.policy.hiddens | first }}),
nn.{{ models.policy.hidden_activation | first }}(),
{% elif loop.last %}
nn.Linear({{ models.policy.hiddens[loop.previtem] }}, {{ models.policy.hiddens | last }}),
nn.{{ models.policy.hidden_activation | last }}())
{% else %}
nn.Linear({{ models.policy.hiddens[loop.previtem] }}, {{ models.policy.hiddens[index] }}),
nn.{{ models.policy.hidden_activation[index] }}(),
{% endif %}
{% endfor %}

self.mean_layer = nn.Linear({{ models.policy.hiddens | last }}, self.num_actions)
self.log_std_parameter = nn.Parameter(torch.ones(self.num_actions))

self.value_layer = nn.Linear({{ models.value.hiddens | last }}, 1)

def act(self, inputs, role):
if role == "policy":
return GaussianMixin.act(self, inputs, role)
elif role == "value":
return DeterministicMixin.act(self, inputs, role)

def compute(self, inputs, role):
if role == "policy":
self._shared_output = self.net(inputs["states"])
return self.mean_layer(self._shared_output), self.log_std_parameter, {}
elif role == "value":
shared_output = self.net(inputs["states"]) if self._shared_output is None else self._shared_output
self._shared_output = None
return self.value_layer(shared_output), {}
{{ models.generated }}
{% endif %}


Expand Down

0 comments on commit 7eb8b16

Please sign in to comment.