diff --git a/CHANGELOG.md b/CHANGELOG.md index d21be90e..aa715359 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,8 +23,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Format code using Black code formatter (it's ugly, yes, but it does its job) ### Fixed -- Moved the batch sampling inside gradient step loop for DQN, DDQN, DDPG (RNN), TD3 (RNN), SAC and SAC (RNN) +- Move the batch sampling inside gradient step loop for DQN, DDQN, DDPG (RNN), TD3 (RNN), SAC and SAC (RNN) - Model state dictionary initialization for composite Gymnasium spaces in JAX +- Add missing `reduction` parameter to Gaussian model instantiator ### Removed - Remove OpenAI Gym (`gym`) from dependencies and source code. **skrl** continues to support gym environments, diff --git a/skrl/utils/model_instantiators/jax/gaussian.py b/skrl/utils/model_instantiators/jax/gaussian.py index 529be7fd..916dedf3 100644 --- a/skrl/utils/model_instantiators/jax/gaussian.py +++ b/skrl/utils/model_instantiators/jax/gaussian.py @@ -21,6 +21,7 @@ def gaussian_model( clip_log_std: bool = True, min_log_std: float = -20, max_log_std: float = 2, + reduction: str = "sum", initial_log_std: float = 0, network: Sequence[Mapping[str, Any]] = [], output: Union[str, Sequence[str]] = "", @@ -47,6 +48,10 @@ def gaussian_model( :type min_log_std: float, optional :param max_log_std: Maximum value of the log standard deviation (default: 2) :type max_log_std: float, optional + :param reduction: Reduction method for returning the log probability density function: (default: ``"sum"``). + Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density + function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)`` + :type reduction: str, optional :param initial_log_std: Initial value for the log standard deviation (default: 0) :type initial_log_std: float, optional :param network: Network definition (default: []) @@ -117,4 +122,5 @@ def __call__(self, inputs, role): clip_log_std=clip_log_std, min_log_std=min_log_std, max_log_std=max_log_std, + reduction=reduction, ) diff --git a/skrl/utils/model_instantiators/torch/gaussian.py b/skrl/utils/model_instantiators/torch/gaussian.py index b37cdefc..4c378b86 100644 --- a/skrl/utils/model_instantiators/torch/gaussian.py +++ b/skrl/utils/model_instantiators/torch/gaussian.py @@ -20,6 +20,7 @@ def gaussian_model( clip_log_std: bool = True, min_log_std: float = -20, max_log_std: float = 2, + reduction: str = "sum", initial_log_std: float = 0, network: Sequence[Mapping[str, Any]] = [], output: Union[str, Sequence[str]] = "", @@ -46,6 +47,10 @@ def gaussian_model( :type min_log_std: float, optional :param max_log_std: Maximum value of the log standard deviation (default: 2) :type max_log_std: float, optional + :param reduction: Reduction method for returning the log probability density function: (default: ``"sum"``). + Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density + function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)`` + :type reduction: str, optional :param initial_log_std: Initial value for the log standard deviation (default: 0) :type initial_log_std: float, optional :param network: Network definition (default: []) @@ -115,4 +120,5 @@ def compute(self, inputs, role=""): clip_log_std=clip_log_std, min_log_std=min_log_std, max_log_std=max_log_std, + reduction=reduction, ) diff --git a/skrl/utils/model_instantiators/torch/shared.py b/skrl/utils/model_instantiators/torch/shared.py index 49649a2b..193c4b42 100644 --- a/skrl/utils/model_instantiators/torch/shared.py +++ b/skrl/utils/model_instantiators/torch/shared.py @@ -51,25 +51,28 @@ def shared_model( def get_init(class_name, parameter, role): if class_name.lower() == "categoricalmixin": - return f'CategoricalMixin.__init__(self, unnormalized_log_prob={parameter["unnormalized_log_prob"]}, role="{role}")' + return f'CategoricalMixin.__init__(self, unnormalized_log_prob={parameter.get("unnormalized_log_prob", True)}, role="{role}")' elif class_name.lower() == "deterministicmixin": - return f'DeterministicMixin.__init__(self, clip_actions={parameter["clip_actions"]}, role="{role}")' + return ( + f'DeterministicMixin.__init__(self, clip_actions={parameter.get("clip_actions", False)}, role="{role}")' + ) elif class_name.lower() == "gaussianmixin": return f"""GaussianMixin.__init__( self, - clip_actions={parameter["clip_actions"]}, - clip_log_std={parameter["clip_log_std"]}, - min_log_std={parameter["min_log_std"]}, - max_log_std={parameter["max_log_std"]}, + clip_actions={parameter.get("clip_actions", False)}, + clip_log_std={parameter.get("clip_log_std", True)}, + min_log_std={parameter.get("min_log_std", -20)}, + max_log_std={parameter.get("max_log_std", 2)}, + reduction="{parameter.get("reduction", "sum")}", role="{role}", )""" elif class_name.lower() == "multivariategaussianmixin": return f"""MultivariateGaussianMixin.__init__( self, - clip_actions={parameter["clip_actions"]}, - clip_log_std={parameter["clip_log_std"]}, - min_log_std={parameter["min_log_std"]}, - max_log_std={parameter["max_log_std"]}, + clip_actions={parameter.get("clip_actions", False)}, + clip_log_std={parameter.get("clip_log_std", True)}, + min_log_std={parameter.get("min_log_std", -20)}, + max_log_std={parameter.get("max_log_std", 2)}, role="{role}", )""" raise ValueError(f"Unknown class: {class_name}") @@ -91,9 +94,11 @@ def get_extra(class_name, parameter, role, model): elif class_name.lower() == "deterministicmixin": return "" elif class_name.lower() == "gaussianmixin": - return f'self.log_std_parameter = nn.Parameter(torch.full(size=({model["output"]["size"]},), fill_value={float(parameter["initial_log_std"])}))' + initial_log_std = float(parameter.get("initial_log_std", 0)) + return f'self.log_std_parameter = nn.Parameter(torch.full(size=({model["output"]["size"]},), fill_value={initial_log_std}))' elif class_name.lower() == "multivariategaussianmixin": - return f'self.log_std_parameter = nn.Parameter(torch.full(size=({model["output"]["size"]},), fill_value={float(parameter["initial_log_std"])}))' + initial_log_std = float(parameter.get("initial_log_std", 0)) + return f'self.log_std_parameter = nn.Parameter(torch.full(size=({model["output"]["size"]},), fill_value={initial_log_std}))' raise ValueError(f"Unknown class: {class_name}") # compatibility with versions prior to 1.3.0