pytorch · vmoens · Apr 8, 2024 · Mar 29, 2024 · Apr 2, 2024 · Apr 2, 2024
diff --git a/torchrl/envs/libs/pettingzoo.py b/torchrl/envs/libs/pettingzoo.py
@@ -90,12 +90,12 @@ class PettingZooWrapper(_EnvWrapper):
     If the number of agents during the task varies, please set ``use_mask=True``.
     ``"mask"`` will be provided
     as an output in each group and should be used to mask out dead agents.
-    The environment will be reset as soon as one agent is done.
+    The environment will be reset as soon as one agent is done (unless ``done_on_any`` is ``False``).
 
     In wrapped ``pettingzoo.AECEnv``, at each step only one agent will act.
     For this reason, it is compulsory to set ``use_mask=True`` for this type of environment.
     ``"mask"`` will be provided as an output for each group and can be used to mask out non-acting agents.
-    The environment will be reset only when all agents are done.
+    The environment will be reset only when all agents are done (unless ``done_on_any`` is ``True``).
 
     If there are any unavailable actions for an agent,
     the environment will also automatically update the mask of its ``action_spec`` and output an ``"action_mask"``
@@ -156,6 +156,9 @@ class PettingZooWrapper(_EnvWrapper):
         categorical_actions (bool, optional): if the enviornments actions are discrete, whether to transform
             them to categorical or one-hot.
         seed (int, optional): the seed. Defaults to ``None``.
+        done_on_any (bool, optional): whether the environment's done keys are set by aggregating the agent keys
+            using ``any()`` (when True) or ``all()`` (when False). Default (``None``) is to use ``any()`` for
+            parallel environments and ``all()`` for AEC ones.
 
     Examples:
         >>> # Parallel env
@@ -204,6 +207,7 @@ def __init__(
         use_mask: bool = False,
         categorical_actions: bool = True,
         seed: int | None = None,
+        done_on_any: bool = None,
         **kwargs,
     ):
         if env is not None:
@@ -214,6 +218,7 @@ def __init__(
         self.seed = seed
         self.use_mask = use_mask
         self.categorical_actions = categorical_actions
+        self.done_on_any = done_on_any
 
         super().__init__(**kwargs, allow_done_after_reset=True)
 
@@ -283,6 +288,9 @@ def _make_specs(
             "pettingzoo.utils.env.AECEnv",  # noqa: F821
         ],
     ) -> None:
+        # Set default for done on any or all
+        if self.done_on_any is None:
+            self.done_on_any = self.parallel
 
         # Create and check group map
         if self.group_map is None:
@@ -582,7 +590,6 @@ def _step(
         self,
         tensordict: TensorDictBase,
     ) -> TensorDictBase:
-
         if self.parallel:
             (
                 observation_dict,
@@ -651,16 +658,33 @@ def _step(
                                 value, device=self.device
                             )
 
-                elif not self.use_action_mask:
+                elif self.use_mask:
+                    if agent in self.agents:
+                        raise ValueError(
+                            f"Dead agent {agent} not found in step observation but still available in {self.agents}"
+                        )
+                    # Dead agent
+                    terminated = (
+                        terminations_dict[agent] if agent in terminations_dict else True
+                    )
+                    truncated = (
+                        truncations_dict[agent] if agent in truncations_dict else True
+                    )
+                    done = terminated or truncated
+                    group_done[index] = done
+                    group_terminated[index] = terminated
+                    group_truncated[index] = truncated
+
+                else:
                     # Dead agent, if we are not masking it out, this is not allowed
                     raise ValueError(
                         "Dead agents found in the environment,"
-                        " you need to set use_action_mask=True to allow this."
+                        " you need to set use_mask=True to allow this."
                     )
 
         # set done values
         done, terminated, truncated = self._aggregate_done(
-            tensordict_out, use_any=self.parallel
+            tensordict_out, use_any=self.done_on_any
         )
 
         tensordict_out.set("done", done)
@@ -673,7 +697,7 @@ def _aggregate_done(self, tensordict_out, use_any):
         truncated = False if use_any else True
         terminated = False if use_any else True
         for key in self.done_keys:
-            if isinstance(key, tuple):
+            if isinstance(key, tuple):  # Only look at group keys
                 if use_any:
                     if key[-1] == "done":
                         done = done | tensordict_out.get(key).any()
@@ -719,7 +743,6 @@ def _step_aec(
         self,
         tensordict: TensorDictBase,
     ) -> Tuple[Dict, Dict, Dict, Dict, Dict]:
-
         for group, agents in self.group_map.items():
             if self.agent_selection in agents:
                 agent_index = agents.index(self._env.agent_selection)
@@ -747,7 +770,6 @@ def _step_aec(
         )
 
     def _update_action_mask(self, td, observation_dict, info_dict):
-
         # Since we remove the action_mask keys we need to copy the data
         observation_dict = copy.deepcopy(observation_dict)
         info_dict = copy.deepcopy(info_dict)
@@ -821,15 +843,15 @@ class PettingZooEnv(PettingZooWrapper):
     If the number of agents during the task varies, please set ``use_mask=True``.
     ``"mask"`` will be provided
     as an output in each group and should be used to mask out dead agents.
-    The environment will be reset as soon as one agent is done.
+    The environment will be reset as soon as one agent is done (unless ``done_on_any`` is ``False``).
 
     For wrapping ``pettingzoo.AECEnv`` provide the name of your petting zoo task (in the ``task`` argument)
     and specify ``parallel=False``. This will construct the ``pettingzoo.AECEnv`` version of that task
     and wrap it for torchrl.
     In wrapped ``pettingzoo.AECEnv``, at each step only one agent will act.
     For this reason, it is compulsory to set ``use_mask=True`` for this type of environment.
     ``"mask"`` will be provided as an output for each group and can be used to mask out non-acting agents.
-    The environment will be reset only when all agents are done.
+    The environment will be reset only when all agents are done (unless ``done_on_any`` is ``True``).
 
     If there are any unavailable actions for an agent,
     the environment will also automatically update the mask of its ``action_spec`` and output an ``"action_mask"``
@@ -892,6 +914,9 @@ class PettingZooEnv(PettingZooWrapper):
         categorical_actions (bool, optional): if the enviornments actions are discrete, whether to transform
             them to categorical or one-hot.
         seed (int, optional): the seed.  Defaults to ``None``.
+        done_on_any (bool, optional): whether the environment's done keys are set by aggregating the agent keys
+            using ``any()`` (when True) or ``all()`` (when False). Default (``None``) is to use ``any()`` for
+            parallel environments and ``all()`` for AEC ones.
 
     Examples:
         >>> # Parallel env
@@ -930,6 +955,7 @@ def __init__(
         use_mask: bool = False,
         categorical_actions: bool = True,
         seed: int | None = None,
+        done_on_any: bool = None,
         **kwargs,
     ):
         if not _has_pettingzoo:
@@ -944,6 +970,7 @@ def __init__(
         kwargs["use_mask"] = use_mask
         kwargs["categorical_actions"] = categorical_actions
         kwargs["seed"] = seed
+        kwargs["done_on_any"] = done_on_any
 
         super().__init__(**kwargs)