Fix IPPO and MAPPO act method return values when JAX-NumPy backend is…

… enabled (#193)
Toni-SM · Aug 29, 2024 · 53b0243 · 53b0243
1 parent 238641e
commit 53b0243
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Catch TensorBoard summary iterator exceptions in `TensorboardFileIterator` postprocessing utils
 - Fix automatic wrapper detection for Isaac Gym (previews), DeepMind and vectorized Gymnasium environments
 - Fix vectorized/parallel environments `reset` method return values when called more than once
+- IPPO and MAPPO `act` method return values when JAX-NumPy backend is enabled
 
 ## [1.2.0] - 2024-06-23
 ### Added

diff --git a/skrl/multi_agents/jax/ippo/ippo.py b/skrl/multi_agents/jax/ippo/ippo.py
@@ -375,8 +375,8 @@ def act(self, states: Mapping[str, Union[np.ndarray, jax.Array]], timestep: int,
         outputs = {uid: d[2] for uid, d in zip(self.possible_agents, data)}
 
         if not self._jax:  # numpy backend
-            actions = {jax.device_get(_actions) for _actions in actions}
-            log_prob = {jax.device_get(_log_prob) for _log_prob in log_prob}
+            actions = {uid: jax.device_get(_actions) for uid, _actions in actions.items()}
+            log_prob = {uid: jax.device_get(_log_prob) for uid, _log_prob in log_prob.items()}
 
         self._current_log_prob = log_prob
 

diff --git a/skrl/multi_agents/jax/mappo/mappo.py b/skrl/multi_agents/jax/mappo/mappo.py
@@ -391,8 +391,8 @@ def act(self, states: Mapping[str, Union[np.ndarray, jax.Array]], timestep: int,
         outputs = {uid: d[2] for uid, d in zip(self.possible_agents, data)}
 
         if not self._jax:  # numpy backend
-            actions = {jax.device_get(_actions) for _actions in actions}
-            log_prob = {jax.device_get(_log_prob) for _log_prob in log_prob}
+            actions = {uid: jax.device_get(_actions) for uid, _actions in actions.items()}
+            log_prob = {uid: jax.device_get(_log_prob) for uid, _log_prob in log_prob.items()}
 
         self._current_log_prob = log_prob