CB bug fix

Summary: get scores should get batch.state Reviewed By: alexnikulkov Differential Revision: D55813029 fbshipit-source-id: ef912c4f7c789944eb26476b8e24cc6a214bd57d
facebookresearch · Apr 9, 2024 · 9da5168 · 9da5168
1 parent 80651b0
commit 9da5168
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/pearl/policy_learners/contextual_bandits/neural_linear_bandit.py b/pearl/policy_learners/contextual_bandits/neural_linear_bandit.py
@@ -136,6 +136,10 @@ def optimizer(self) -> torch.optim.Optimizer:
         return self._optimizer
 
     def learn_batch(self, batch: TransitionBatch) -> Dict[str, Any]:
+
+        # get scores for logging purpose
+        ucb_scores = self.get_scores(batch.state).mean()
+
         if self._state_features_only:
             input_features = batch.state
         else:
@@ -151,9 +155,6 @@ def learn_batch(self, batch: TransitionBatch) -> Dict[str, Any]:
             else torch.ones_like(expected_values)
         )
 
-        # get scores for logging
-        ucb_scores = self.get_scores(input_features).mean()
-
         # criterion = mae, mse, Xentropy
         # Xentropy loss apply Sigmoid, MSE or MAE apply Identiy
         criterion = LOSS_TYPES[self.loss_type]