Skip to content

Commit

Permalink
Shared model instantiator's default parameters (#242)
Browse files Browse the repository at this point in the history
* Add reduction parameter to gaussian_model instantiator

* Specify shared model parameters' default values

* Update CHANGELOG

* Define reduction as string value
  • Loading branch information
Toni-SM authored Dec 21, 2024
1 parent d2aee9f commit 23b61dc
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 13 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Format code using Black code formatter (it's ugly, yes, but it does its job)

### Fixed
- Moved the batch sampling inside gradient step loop for DQN, DDQN, DDPG (RNN), TD3 (RNN), SAC and SAC (RNN)
- Move the batch sampling inside gradient step loop for DQN, DDQN, DDPG (RNN), TD3 (RNN), SAC and SAC (RNN)
- Model state dictionary initialization for composite Gymnasium spaces in JAX
- Add missing `reduction` parameter to Gaussian model instantiator

### Removed
- Remove OpenAI Gym (`gym`) from dependencies and source code. **skrl** continues to support gym environments,
Expand Down
6 changes: 6 additions & 0 deletions skrl/utils/model_instantiators/jax/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def gaussian_model(
clip_log_std: bool = True,
min_log_std: float = -20,
max_log_std: float = 2,
reduction: str = "sum",
initial_log_std: float = 0,
network: Sequence[Mapping[str, Any]] = [],
output: Union[str, Sequence[str]] = "",
Expand All @@ -47,6 +48,10 @@ def gaussian_model(
:type min_log_std: float, optional
:param max_log_std: Maximum value of the log standard deviation (default: 2)
:type max_log_std: float, optional
:param reduction: Reduction method for returning the log probability density function: (default: ``"sum"``).
Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density
function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)``
:type reduction: str, optional
:param initial_log_std: Initial value for the log standard deviation (default: 0)
:type initial_log_std: float, optional
:param network: Network definition (default: [])
Expand Down Expand Up @@ -117,4 +122,5 @@ def __call__(self, inputs, role):
clip_log_std=clip_log_std,
min_log_std=min_log_std,
max_log_std=max_log_std,
reduction=reduction,
)
6 changes: 6 additions & 0 deletions skrl/utils/model_instantiators/torch/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def gaussian_model(
clip_log_std: bool = True,
min_log_std: float = -20,
max_log_std: float = 2,
reduction: str = "sum",
initial_log_std: float = 0,
network: Sequence[Mapping[str, Any]] = [],
output: Union[str, Sequence[str]] = "",
Expand All @@ -46,6 +47,10 @@ def gaussian_model(
:type min_log_std: float, optional
:param max_log_std: Maximum value of the log standard deviation (default: 2)
:type max_log_std: float, optional
:param reduction: Reduction method for returning the log probability density function: (default: ``"sum"``).
Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density
function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)``
:type reduction: str, optional
:param initial_log_std: Initial value for the log standard deviation (default: 0)
:type initial_log_std: float, optional
:param network: Network definition (default: [])
Expand Down Expand Up @@ -115,4 +120,5 @@ def compute(self, inputs, role=""):
clip_log_std=clip_log_std,
min_log_std=min_log_std,
max_log_std=max_log_std,
reduction=reduction,
)
29 changes: 17 additions & 12 deletions skrl/utils/model_instantiators/torch/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,28 @@ def shared_model(

def get_init(class_name, parameter, role):
if class_name.lower() == "categoricalmixin":
return f'CategoricalMixin.__init__(self, unnormalized_log_prob={parameter["unnormalized_log_prob"]}, role="{role}")'
return f'CategoricalMixin.__init__(self, unnormalized_log_prob={parameter.get("unnormalized_log_prob", True)}, role="{role}")'
elif class_name.lower() == "deterministicmixin":
return f'DeterministicMixin.__init__(self, clip_actions={parameter["clip_actions"]}, role="{role}")'
return (
f'DeterministicMixin.__init__(self, clip_actions={parameter.get("clip_actions", False)}, role="{role}")'
)
elif class_name.lower() == "gaussianmixin":
return f"""GaussianMixin.__init__(
self,
clip_actions={parameter["clip_actions"]},
clip_log_std={parameter["clip_log_std"]},
min_log_std={parameter["min_log_std"]},
max_log_std={parameter["max_log_std"]},
clip_actions={parameter.get("clip_actions", False)},
clip_log_std={parameter.get("clip_log_std", True)},
min_log_std={parameter.get("min_log_std", -20)},
max_log_std={parameter.get("max_log_std", 2)},
reduction="{parameter.get("reduction", "sum")}",
role="{role}",
)"""
elif class_name.lower() == "multivariategaussianmixin":
return f"""MultivariateGaussianMixin.__init__(
self,
clip_actions={parameter["clip_actions"]},
clip_log_std={parameter["clip_log_std"]},
min_log_std={parameter["min_log_std"]},
max_log_std={parameter["max_log_std"]},
clip_actions={parameter.get("clip_actions", False)},
clip_log_std={parameter.get("clip_log_std", True)},
min_log_std={parameter.get("min_log_std", -20)},
max_log_std={parameter.get("max_log_std", 2)},
role="{role}",
)"""
raise ValueError(f"Unknown class: {class_name}")
Expand All @@ -91,9 +94,11 @@ def get_extra(class_name, parameter, role, model):
elif class_name.lower() == "deterministicmixin":
return ""
elif class_name.lower() == "gaussianmixin":
return f'self.log_std_parameter = nn.Parameter(torch.full(size=({model["output"]["size"]},), fill_value={float(parameter["initial_log_std"])}))'
initial_log_std = float(parameter.get("initial_log_std", 0))
return f'self.log_std_parameter = nn.Parameter(torch.full(size=({model["output"]["size"]},), fill_value={initial_log_std}))'
elif class_name.lower() == "multivariategaussianmixin":
return f'self.log_std_parameter = nn.Parameter(torch.full(size=({model["output"]["size"]},), fill_value={float(parameter["initial_log_std"])}))'
initial_log_std = float(parameter.get("initial_log_std", 0))
return f'self.log_std_parameter = nn.Parameter(torch.full(size=({model["output"]["size"]},), fill_value={initial_log_std}))'
raise ValueError(f"Unknown class: {class_name}")

# compatibility with versions prior to 1.3.0
Expand Down

0 comments on commit 23b61dc

Please sign in to comment.