pytorch · SandishKumarHN · Feb 12, 2024 · Feb 17, 2024 · Feb 17, 2024 · Feb 17, 2024
diff --git a/examples/dqn/dqn_atari.py b/examples/dqn/dqn_atari.py
@@ -160,7 +160,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
             sampled_tensordict = sampled_tensordict.to(device)
 
             loss_td = loss_module(sampled_tensordict)
-            q_loss = loss_td["loss"]
+            q_loss = loss_td["loss_objective"]
             optimizer.zero_grad()
             q_loss.backward()
             torch.nn.utils.clip_grad_norm_(

diff --git a/examples/multiagent/iql.py b/examples/multiagent/iql.py
@@ -163,7 +163,7 @@ def train(cfg: "DictConfig"):  # noqa: F821
                 loss_vals = loss_module(subdata)
                 training_tds.append(loss_vals.detach())
 
-                loss_value = loss_vals["loss"]
+                loss_value = loss_vals["loss_objective"]
 
                 loss_value.backward()
 

diff --git a/test/test_cost.py b/test/test_cost.py
@@ -491,9 +491,13 @@ def test_dqn(self, delay_value, double_dqn, device, action_spec_type, td_est):
         )
         loss_fn = DQNLoss(
             actor,
+
             loss_function="l2",
+
             delay_value=delay_value,
+
             double_dqn=double_dqn,
+            return_tensorclass=False,,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):
@@ -799,7 +803,7 @@ def test_dqn_notensordict(
         SoftUpdate(dqn_loss, eps=0.5)
         loss_val = dqn_loss(**kwargs)
         loss_val_td = dqn_loss(td)
-        torch.testing.assert_close(loss_val_td.get("loss"), loss_val)
+        torch.testing.assert_close(loss_val_td.get("loss_objective"), loss_val)
 
     def test_distributional_dqn_tensordict_keys(self):
         torch.manual_seed(self.seed)
@@ -1565,6 +1569,7 @@ def test_ddpg(self, delay_actor, delay_value, device, td_est):
             loss_function="l2",
             delay_actor=delay_actor,
             delay_value=delay_value,
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):
@@ -2232,6 +2237,7 @@ def test_td3(
             noise_clip=noise_clip,
             delay_actor=delay_actor,
             delay_qvalue=delay_qvalue,
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):
@@ -4457,6 +4463,7 @@ def test_redq(self, delay_qvalue, num_qvalue, device, td_est):
             num_qvalue_nets=num_qvalue,
             loss_function="l2",
             delay_qvalue=delay_qvalue,
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):
@@ -5302,6 +5309,7 @@ def test_cql(
             with_lagrange=with_lagrange,
             delay_actor=delay_actor,
             delay_qvalue=delay_qvalue,
+            return_tensorclass=False,
         )
 
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
@@ -7027,6 +7035,7 @@ def test_a2c(self, device, gradient_mode, advantage, td_est, functional):
             value,
             loss_critic_type="l2",
             functional=functional,
+            reduction=reduction,
         )
 
         # Check error is raised when actions require grads
@@ -7529,6 +7538,7 @@ def test_reinforce_value_net(
             critic_network=value_net,
             delay_value=delay_value,
             functional=functional,
+            return_tensorclass=False,
         )
 
         td = TensorDict(
@@ -8143,6 +8153,7 @@ def test_dreamer_world_model(
             reco_loss=reco_loss,
             delayed_clamp=delayed_clamp,
             free_nats=free_nats,
+            return_tensorclass=False,
         )
         loss_td, _ = loss_module(tensordict)
         for loss_str, lmbda in zip(
@@ -8963,6 +8974,7 @@ def test_iql(
             temperature=temperature,
             expectile=expectile,
             loss_function="l2",
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):

diff --git a/torchrl/modules/distributions/discrete.py b/torchrl/modules/distributions/discrete.py
@@ -97,7 +97,7 @@ def __init__(
         self.num_samples = self._param.shape[-1]
 
     def log_prob(self, value: torch.Tensor) -> torch.Tensor:
-        return super().log_prob(value.argmax(dim=-1))
+        return super().log_prob(value.int().argmax(dim=-1))
 
     @property
     def mode(self) -> torch.Tensor:

diff --git a/torchrl/objectives/a2c.py b/torchrl/objectives/a2c.py
@@ -2,14 +2,16 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
 import contextlib
 import warnings
 from copy import deepcopy
 from dataclasses import dataclass
 from typing import Tuple
 
 import torch
-from tensordict import TensorDict, TensorDictBase
+from tensordict import tensorclass, TensorDict, TensorDictBase
 from tensordict.nn import dispatch, ProbabilisticTensorDictSequential, TensorDictModule
 from tensordict.utils import NestedKey
 from torch import distributions as d
@@ -33,6 +35,34 @@
 )
 
 
+class LossContainerBase:
+    """ContainerBase class loss tensorclass's."""
+
+    __getitem__ = TensorDictBase.__getitem__
+
+    def aggregate_loss(self):
+        result = 0.0
+        for key in self.__dataclass_attr__:
+            if key.startswith("loss_"):
+                result += getattr(self, key)
+        return result
+
+
+@tensorclass
+class A2CLosses(LossContainerBase):
+    """The tensorclass for The A2CLoss Loss class."""
+
+    loss_actor: torch.Tensor
+    loss_objective: torch.Tensor
+    loss_critic: torch.Tensor | None = None
+    loss_entropy: torch.Tensor | None = None
+    entropy: torch.Tensor | None = None
+
+    @property
+    def aggregate_loss(self):
+        return self.loss_critic + self.loss_objective + self.loss_entropy
+
+
 class A2CLoss(LossModule):
     """TorchRL implementation of the A2C loss.
 
@@ -129,6 +159,16 @@ class A2CLoss(LossModule):
             batch_size=torch.Size([]),
             device=None,
             is_shared=False)
+        >>> loss = A2CLoss(actor, value, loss_critic_type="l2", return_tensorclass=True)
+        >>> loss(data)
+        A2CLosses(
+            entropy=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            loss_critic=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            loss_entropy=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            loss_objective=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            batch_size=torch.Size([]),
+            device=None,
+            is_shared=False)
 
     This class is compatible with non-tensordict based modules too and can be
     used without recurring to any tensordict-related primitive. In this case,
@@ -174,7 +214,7 @@ class A2CLoss(LossModule):
     method.
 
     Examples:
-        >>> loss.select_out_keys('loss_objective', 'loss_critic')
+        >>> _ = loss.select_out_keys('loss_objective', 'loss_critic')
         >>> loss_obj, loss_critic = loss(
         ...     observation = torch.randn(*batch, n_obs),
         ...     action = spec.rand(batch),
@@ -240,6 +280,7 @@ def __init__(
         functional: bool = True,
         actor: ProbabilisticTensorDictSequential = None,
         critic: ProbabilisticTensorDictSequential = None,
+        return_tensorclass: bool = False,
         reduction: str = None,
     ):
         if actor is not None:
@@ -300,6 +341,7 @@ def __init__(
         if gamma is not None:
             raise TypeError(_GAMMA_LMBDA_DEPREC_ERROR)
         self.loss_critic_type = loss_critic_type
+        self.return_tensorclass = return_tensorclass
 
     @property
     def functional(self):
@@ -455,7 +497,7 @@ def _cached_detach_critic_network_params(self):
         return self.critic_network_params.detach()
 
     @dispatch()
-    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
+    def forward(self, tensordict: TensorDictBase) -> A2CLosses:
-    def forward(self, tensordict: TensorDictBase) -> A2CLosses:
+    def forward(self, tensordict: TensorDictBase) -> A2CLosses | TensorDictBase:
-    def forward(self, tensordict: TensorDictBase) -> A2CLosses:
+    def forward(self, tensordict: TensorDictBase) -> A2CLosses | TensorDictBase:
         tensordict = tensordict.clone(False)
         advantage = tensordict.get(self.tensor_keys.advantage, None)
         if advantage is None:
@@ -474,6 +516,10 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             td_out.set("entropy", entropy.detach().mean())  # for logging
             td_out.set("loss_entropy", -self.entropy_coef * entropy)
         if self.critic_coef:
+            loss_critic = self.loss_critic(tensordict).mean()
+            td_out.set("loss_critic", loss_critic.mean())
+        if self.return_tensorclass:
+            return A2CLosses._from_tensordict(td_out)
             loss_critic = self.loss_critic(tensordict)
             td_out.set("loss_critic", loss_critic)
         td_out = td_out.named_apply(

diff --git a/torchrl/objectives/cql.py b/torchrl/objectives/cql.py
@@ -2,6 +2,8 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
 import math
 import warnings
 from copy import deepcopy
@@ -12,7 +14,7 @@
 import numpy as np
 import torch
 import torch.nn as nn
-from tensordict import TensorDict, TensorDictBase
+from tensordict import tensorclass, TensorDict, TensorDictBase
 from tensordict.nn import dispatch, TensorDictModule
 from tensordict.utils import NestedKey, unravel_key
 from torch import Tensor
@@ -36,6 +38,32 @@
 from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 
+class LossContainerBase:
+    """ContainerBase class loss tensorclass's."""
+
+    __getitem__ = TensorDictBase.__getitem__
+
+    def aggregate_loss(self):
+        result = 0.0
+        for key in self.__dataclass_attr__:
+            if key.startswith("loss_"):
+                result += getattr(self, key)
+        return result
+
+
+@tensorclass
+class CQLLosses(LossContainerBase):
+    """The tensorclass for The CQLLoss Loss class."""
+
+    alpha: torch.Tensor
+    loss_actor: torch.Tensor | None = None
+    loss_actor_bc: torch.Tensor | None = None
+    loss_qvalue: torch.Tensor | None = None
+    entropy: torch.Tensor | None = None
+    loss_alpha: torch.Tensor | None = None
+    loss_cql: torch.Tensor | None = None
+
+
 class CQLLoss(LossModule):
     """TorchRL implementation of the continuous CQL loss.
 
@@ -129,12 +157,27 @@ class CQLLoss(LossModule):
                 entropy: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
                 loss_actor: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
                 loss_actor_bc: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+                loss_actor_bc: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
                 loss_alpha: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
                 loss_cql: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+                loss_cql: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
                 loss_qvalue: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False)},
             batch_size=torch.Size([]),
             device=None,
             is_shared=False)
+        >>> loss = CQLLoss(actor, qvalue, return_tensorclass=True)
+        >>> loss(data)
+        CQLLosses(
+            alpha=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            entropy=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            loss_actor=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            loss_actor_bc=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            loss_alpha=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            loss_cql=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            loss_qvalue=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
+            batch_size=torch.Size([]),
+            device=None,
+            is_shared=False)
 
     This class is compatible with non-tensordict based modules too and can be
     used without recurring to any tensordict-related primitive. In this case,
@@ -174,20 +217,21 @@ class CQLLoss(LossModule):
         >>> loss = CQLLoss(actor, qvalue)
         >>> batch = [2, ]
         >>> action = spec.rand(batch)
-        >>> loss_actor, loss_actor_bc, loss_qvalue, loss_cql, *_ = loss(
+        >>> loss_actor, loss_qvalue, _, _, _, _ = loss(
         ...     observation=torch.randn(*batch, n_obs),
         ...     action=action,
         ...     next_done=torch.zeros(*batch, 1, dtype=torch.bool),
         ...     next_terminated=torch.zeros(*batch, 1, dtype=torch.bool),
         ...     next_observation=torch.zeros(*batch, n_obs),
-        ...     next_reward=torch.randn(*batch, 1))
+        ...     next_reward=torch.randn(*batch, 1),
+        ... )
         >>> loss_actor.backward()
 
     The output keys can also be filtered using the :meth:`CQLLoss.select_out_keys`
     method.
 
     Examples:
-        >>> _ = loss.select_out_keys('loss_actor', 'loss_qvalue')
+        >>> loss.select_out_keys('loss_actor', 'loss_qvalue')
         >>> loss_actor, loss_qvalue = loss(
         ...     observation=torch.randn(*batch, n_obs),
         ...     action=action,
@@ -271,6 +315,7 @@ def __init__(
         num_random: int = 10,
         with_lagrange: bool = False,
         lagrange_thresh: float = 0.0,
+        return_tensorclass: bool = False,
     ) -> None:
         self._out_keys = None
         super().__init__()
@@ -356,6 +401,7 @@ def __init__(
         self._vmap_qvalue_network00 = _vmap_func(
             self.qvalue_network, randomness=self.vmap_randomness
         )
+        self.return_tensorclass = return_tensorclass
 
     @property
     def target_entropy(self):
@@ -524,7 +570,10 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         }
         if self.with_lagrange:
             out["loss_alpha_prime"] = alpha_prime_loss.mean()
-        return TensorDict(out, [])
+        td_out = TensorDict(out, [])
+        if self.return_tensorclass:
+            return CQLLosses._from_tensordict(td_out)
+        return td_out
 
     @property
     @_cache_values
@@ -1007,6 +1056,7 @@ def __init__(
         delay_value: bool = True,
         gamma: float = None,
         action_space=None,
+        return_tensorclass: bool = False,
     ) -> None:
         super().__init__()
         self._in_keys = None
@@ -1047,6 +1097,7 @@ def __init__(
 
         if gamma is not None:
             raise TypeError(_GAMMA_LMBDA_DEPREC_ERROR)
+        self.return_tensorclass = return_tensorclass
 
     def _forward_value_estimator_keys(self, **kwargs) -> None:
         if self._value_estimator is not None:
@@ -1178,7 +1229,7 @@ def value_loss(
         return loss, metadata
 
     @dispatch
-    def forward(self, tensordict: TensorDictBase) -> TensorDict:
+    def forward(self, tensordict: TensorDictBase) -> CQLLosses:
         """Computes the (DQN) CQL loss given a tensordict sampled from the replay buffer.
 
         This function will also write a "td_error" key that can be used by prioritized replay buffers to assign
@@ -1203,6 +1254,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDict:
             source=source,
             batch_size=[],
         )
+        if self.return_tensorclass:
+            return CQLLosses._from_tensordict(td_out)
 
         return td_out