pytorch · SandishKumarHN · Feb 12, 2024 · Feb 17, 2024 · Feb 17, 2024 · Feb 17, 2024
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -491,7 +491,11 @@ def test_dqn(self, delay_value, double_dqn, device, action_spec_type, td_est):
             action_spec_type=action_spec_type, device=device
         )
         loss_fn = DQNLoss(
-            actor, loss_function="l2", delay_value=delay_value, double_dqn=double_dqn
+            actor,
+            loss_function="l2",
+            delay_value=delay_value,
+            double_dqn=double_dqn,
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):
@@ -1490,6 +1494,7 @@ def test_ddpg(self, delay_actor, delay_value, device, td_est):
             loss_function="l2",
             delay_actor=delay_actor,
             delay_value=delay_value,
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):
@@ -2118,6 +2123,7 @@ def test_td3(
             noise_clip=noise_clip,
             delay_actor=delay_actor,
             delay_qvalue=delay_qvalue,
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):
@@ -4216,6 +4222,7 @@ def test_redq(self, delay_qvalue, num_qvalue, device, td_est):
             num_qvalue_nets=num_qvalue,
             loss_function="l2",
             delay_qvalue=delay_qvalue,
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):
@@ -5013,6 +5020,7 @@ def test_cql(
             with_lagrange=with_lagrange,
             delay_actor=delay_actor,
             delay_qvalue=delay_qvalue,
+            return_tensorclass=False,
         )
 
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
@@ -6648,7 +6656,13 @@ def test_a2c(self, device, gradient_mode, advantage, td_est, functional):
         else:
             raise NotImplementedError
 
-        loss_fn = A2CLoss(actor, value, loss_critic_type="l2", functional=functional)
+        loss_fn = A2CLoss(
+            actor,
+            value,
+            loss_critic_type="l2",
+            functional=functional,
+            return_tensorclass=False,
+        )
 
         # Check error is raised when actions require grads
         td["action"].requires_grad = True
@@ -7113,6 +7127,7 @@ def test_reinforce_value_net(
             critic_network=value_net,
             delay_value=delay_value,
             functional=functional,
+            return_tensorclass=False,
         )
 
         td = TensorDict(
@@ -7705,6 +7720,7 @@ def test_dreamer_world_model(
             reco_loss=reco_loss,
             delayed_clamp=delayed_clamp,
             free_nats=free_nats,
+            return_tensorclass=False,
         )
         loss_td, _ = loss_module(tensordict)
         for loss_str, lmbda in zip(
@@ -8525,6 +8541,7 @@ def test_iql(
             temperature=temperature,
             expectile=expectile,
             loss_function="l2",
+            return_tensorclass=False,
         )
         if td_est in (ValueEstimators.GAE, ValueEstimators.VTrace):
             with pytest.raises(NotImplementedError):

diff --git a/torchrl/objectives/a2c.py b/torchrl/objectives/a2c.py
@@ -2,14 +2,16 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
 import contextlib
 import warnings
 from copy import deepcopy
 from dataclasses import dataclass
 from typing import Tuple
 
 import torch
-from tensordict import TensorDict, TensorDictBase
+from tensordict import tensorclass, TensorDict, TensorDictBase
 from tensordict.nn import dispatch, ProbabilisticTensorDictSequential, TensorDictModule
 from tensordict.utils import NestedKey
 from torch import distributions as d
@@ -31,6 +33,20 @@
 )
 
 
+@tensorclass
+class A2CLosses:
+    """The tensorclass for The A2CLoss Loss class."""
+
+    loss_objective: torch.Tensor
+    loss_critic: torch.Tensor | None = None
+    loss_entropy: torch.Tensor | None = None
+    entropy: torch.Tensor | None = None
+
+    @property
+    def aggregate_loss(self):
+        return self.loss_critic + self.loss_objective + self.loss_entropy
+
+
 class A2CLoss(LossModule):
     """TorchRL implementation of the A2C loss.
 
@@ -234,6 +250,7 @@ def __init__(
         functional: bool = True,
         actor: ProbabilisticTensorDictSequential = None,
         critic: ProbabilisticTensorDictSequential = None,
+        return_tensorclass: bool = False,
     ):
         if actor is not None:
             actor_network = actor
@@ -290,6 +307,7 @@ def __init__(
         if gamma is not None:
             raise TypeError(_GAMMA_LMBDA_DEPREC_ERROR)
         self.loss_critic_type = loss_critic_type
+        self.return_tensorclass = return_tensorclass
 
     @property
     def functional(self):
@@ -445,7 +463,7 @@ def _cached_detach_critic_network_params(self):
         return self.critic_network_params.detach()
 
     @dispatch()
-    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
+    def forward(self, tensordict: TensorDictBase) -> A2CLosses:
-    def forward(self, tensordict: TensorDictBase) -> A2CLosses:
+    def forward(self, tensordict: TensorDictBase) -> A2CLosses | TensorDictBase:
-    def forward(self, tensordict: TensorDictBase) -> A2CLosses:
+    def forward(self, tensordict: TensorDictBase) -> A2CLosses | TensorDictBase:
         tensordict = tensordict.clone(False)
         advantage = tensordict.get(self.tensor_keys.advantage, None)
         if advantage is None:
@@ -466,6 +484,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         if self.critic_coef:
             loss_critic = self.loss_critic(tensordict).mean()
             td_out.set("loss_critic", loss_critic.mean())
+        if self.return_tensorclass:
+            return A2CLosses._from_tensordict(td_out)
         return td_out
 
     def make_value_estimator(self, value_type: ValueEstimators = None, **hyperparams):

diff --git a/torchrl/objectives/cql.py b/torchrl/objectives/cql.py
@@ -2,6 +2,8 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
 import math
 import warnings
 from copy import deepcopy
@@ -12,7 +14,7 @@
 import numpy as np
 import torch
 import torch.nn as nn
-from tensordict import TensorDict, TensorDictBase
+from tensordict import tensorclass, TensorDict, TensorDictBase
 from tensordict.nn import dispatch, TensorDictModule
 from tensordict.utils import NestedKey, unravel_key
 from torch import Tensor
@@ -36,6 +38,20 @@
 from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 
+@tensorclass
+class CQLLosses:
+    """The tensorclass for The CQLLoss Loss class."""
+
+    loss_objective: torch.Tensor
+    loss_critic: torch.Tensor | None = None
+    loss_entropy: torch.Tensor | None = None
+    entropy: torch.Tensor | None = None
+
+    @property
+    def aggregate_loss(self):
+        return self.loss_critic + self.loss_objective + self.loss_entropy
+
+
 class CQLLoss(LossModule):
     """TorchRL implementation of the continuous CQL loss.
 
@@ -269,6 +285,7 @@ def __init__(
         num_random: int = 10,
         with_lagrange: bool = False,
         lagrange_thresh: float = 0.0,
+        return_tensorclass: bool = False,
     ) -> None:
         self._out_keys = None
         super().__init__()
@@ -354,6 +371,7 @@ def __init__(
         self._vmap_qvalue_network00 = _vmap_func(
             self.qvalue_network, randomness=self.vmap_randomness
         )
+        self.return_tensorclass = return_tensorclass
 
     @property
     def target_entropy(self):
@@ -521,7 +539,10 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         }
         if self.with_lagrange:
             out["loss_alpha_prime"] = alpha_prime_loss.mean()
-        return TensorDict(out, [])
+        td_out = TensorDict(out, [])
+        if self.return_tensorclass:
+            return CQLLosses._from_tensordict(td_out)
+        return td_out
 
     @property
     @_cache_values
@@ -1000,6 +1021,7 @@ def __init__(
         delay_value: bool = True,
         gamma: float = None,
         action_space=None,
+        return_tensorclass: bool = False,
     ) -> None:
         super().__init__()
         self._in_keys = None
@@ -1040,6 +1062,7 @@ def __init__(
 
         if gamma is not None:
             raise TypeError(_GAMMA_LMBDA_DEPREC_ERROR)
+        self.return_tensorclass = return_tensorclass
 
     def _forward_value_estimator_keys(self, **kwargs) -> None:
         if self._value_estimator is not None:
@@ -1171,7 +1194,7 @@ def value_loss(
         return loss, metadata
 
     @dispatch
-    def forward(self, tensordict: TensorDictBase) -> TensorDict:
+    def forward(self, tensordict: TensorDictBase) -> CQLLosses:
         """Computes the (DQN) CQL loss given a tensordict sampled from the replay buffer.
 
         This function will also write a "td_error" key that can be used by prioritized replay buffers to assign
@@ -1196,6 +1219,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDict:
             source=source,
             batch_size=[],
         )
+        if self.return_tensorclass:
+            return CQLLosses._from_tensordict(td_out)
 
         return td_out
 

diff --git a/torchrl/objectives/ddpg.py b/torchrl/objectives/ddpg.py
@@ -10,7 +10,7 @@
 from typing import Tuple
 
 import torch
-from tensordict import TensorDict, TensorDictBase
+from tensordict import tensorclass, TensorDict, TensorDictBase
 from tensordict.nn import dispatch, TensorDictModule
 
 from tensordict.utils import NestedKey, unravel_key
@@ -26,6 +26,20 @@
 from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 
+@tensorclass
+class DDPGLosses:
+    """The tensorclass for The DDPGLoss class."""
+
+    loss_objective: torch.Tensor
+    loss_critic: torch.Tensor | None = None
+    loss_entropy: torch.Tensor | None = None
+    entropy: torch.Tensor | None = None
+
+    @property
+    def aggregate_loss(self):
+        return self.loss_critic + self.loss_objective + self.loss_entropy
+
+
 class DDPGLoss(LossModule):
     """The DDPG Loss class.
 
@@ -189,6 +203,7 @@ def __init__(
         delay_value: bool = True,
         gamma: float = None,
         separate_losses: bool = False,
+        return_tensorclass: bool = False,
     ) -> None:
         self._in_keys = None
         super().__init__()
@@ -229,6 +244,7 @@ def __init__(
         )
 
         self.loss_function = loss_function
+        self.return_tensorclass = return_tensorclass
 
         if gamma is not None:
             raise TypeError(_GAMMA_LMBDA_DEPREC_ERROR)
@@ -266,7 +282,7 @@ def in_keys(self, values):
         self._in_keys = values
 
     @dispatch
-    def forward(self, tensordict: TensorDictBase) -> TensorDict:
+    def forward(self, tensordict: TensorDictBase) -> DDPGLosses:
         """Computes the DDPG losses given a tensordict sampled from the replay buffer.
 
         This function will also write a "td_error" key that can be used by prioritized replay buffers to assign
@@ -283,10 +299,13 @@ def forward(self, tensordict: TensorDictBase) -> TensorDict:
         loss_value, metadata = self.loss_value(tensordict)
         loss_actor, metadata_actor = self.loss_actor(tensordict)
         metadata.update(metadata_actor)
-        return TensorDict(
+        td_out = TensorDict(
             source={"loss_actor": loss_actor, "loss_value": loss_value, **metadata},
             batch_size=[],
         )
+        if self.return_tensorclass:
+            return DDPGLosses._from_tensordict(td_out)
+        return td_out
 
     def loss_actor(
         self,