Support weighted loss in Pearl CB

Summary: Support weights in NN CB loss computation Reviewed By: BerenLuthien, zxpmirror1994 Differential Revision: D53206841 fbshipit-source-id: 61ac050942b3546c9c94972dabc9997fc73c5d90
facebookresearch · Jan 31, 2024 · 10a83b1 · 10a83b1
1 parent a55a3c8
commit 10a83b1
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 4 deletions.
diff --git a/pearl/policy_learners/contextual_bandits/linear_bandit.py b/pearl/policy_learners/contextual_bandits/linear_bandit.py
@@ -70,7 +70,6 @@ def learn_batch(self, batch: TransitionBatch) -> Dict[str, Any]:
             else torch.ones_like(expected_values)
         )
         x = torch.cat([batch.state, batch.action], dim=1)
-        assert batch.weight is not None
         self.model.learn_batch(
             x=x,
             y=batch.reward,

diff --git a/pearl/policy_learners/contextual_bandits/neural_bandit.py b/pearl/policy_learners/contextual_bandits/neural_bandit.py
@@ -103,7 +103,15 @@ def learn_batch(self, batch: TransitionBatch) -> Dict[str, Any]:
         predicted_values = self.model(input_features)
 
         criterion = LOSS_TYPES[self.loss_type]
-        loss = criterion(predicted_values.view(expected_values.shape), expected_values)
+
+        # don't reduce the loss, so that we can calculate weighted loss
+        loss = criterion(
+            predicted_values.view(expected_values.shape),
+            expected_values,
+            reduction="none",
+        )
+        assert loss.shape == batch_weight.shape
+        loss = (loss * batch_weight).sum() / batch_weight.sum()  # weighted average loss
 
         # Backward pass + optimizer step
         self.optimizer.zero_grad()

diff --git a/pearl/policy_learners/contextual_bandits/neural_linear_bandit.py b/pearl/policy_learners/contextual_bandits/neural_linear_bandit.py
@@ -128,8 +128,14 @@ def learn_batch(self, batch: TransitionBatch) -> Dict[str, Any]:
             assert torch.all(predicted_values >= 0) and torch.all(predicted_values <= 1)
             assert isinstance(self.model.output_activation, torch.nn.Sigmoid)
 
-        # TODO: handle weight in NN training by computing weighted loss
-        loss = criterion(predicted_values.view(expected_values.shape), expected_values)
+        # don't reduce the loss, so that we can calculate weighted loss
+        loss = criterion(
+            predicted_values.view(expected_values.shape),
+            expected_values,
+            reduction="none",
+        )
+        assert loss.shape == batch_weight.shape
+        loss = (loss * batch_weight).sum() / batch_weight.sum()  # weighted average loss
 
         # Optimize the NN via backpropagation
         self._optimizer.zero_grad()