Skip to content

Commit

Permalink
Add mu,ucb score logging during ap_container training
Browse files Browse the repository at this point in the history
Summary:
Adding logging of mu and ucb during training. Based on scalar metric logging added to RecMetrics in D53499300 (example: D53499301)
`lifetime_scalar` shows the average values for each batch
`window_scalar` shows ths smoothed average values

Reviewed By: zxpmirror1994

Differential Revision: D54961342

fbshipit-source-id: 6df4dfcdefd71f1adaa266e4087d42de53af6ba4
  • Loading branch information
Alex Nikulkov authored and facebook-github-bot committed Mar 18, 2024
1 parent b1f3ca1 commit b335f84
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
1 change: 0 additions & 1 deletion pearl/policy_learners/contextual_bandits/linear_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ def get_scores(
UCB scores when exploration module is UCB
Shape is (batch)
"""
assert isinstance(self._exploration_module, ScoreExplorationBase)
feature = concatenate_actions_to_state(
subjective_state=subjective_state,
action_space=action_space,
Expand Down
11 changes: 9 additions & 2 deletions pearl/policy_learners/contextual_bandits/neural_linear_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
DEFAULT_ACTION_SPACE,
)
from pearl.policy_learners.exploration_modules.contextual_bandits.ucb_exploration import (
UCBExploration,
ScoreExplorationBase,
)
from pearl.policy_learners.exploration_modules.exploration_module import (
ExplorationModule,
Expand Down Expand Up @@ -151,6 +151,9 @@ def learn_batch(self, batch: TransitionBatch) -> Dict[str, Any]:
else torch.ones_like(expected_values)
)

# get scores for logging
ucb_scores = self.get_scores(input_features).mean()

# criterion = mae, mse, Xentropy
# Xentropy loss apply Sigmoid, MSE or MAE apply Identiy
criterion = LOSS_TYPES[self.loss_type]
Expand Down Expand Up @@ -179,11 +182,14 @@ def learn_batch(self, batch: TransitionBatch) -> Dict[str, Any]:
batch_weight,
)
self._maybe_apply_discounting()
predicted_values = predicted_values.detach() # detach for logging
return {
"label": expected_values,
"prediction": predicted_values,
"weight": batch_weight,
"loss": loss.detach(),
"scores:ucb": ucb_scores,
"scores:mu": predicted_values.mean(),
}

def act(
Expand Down Expand Up @@ -219,6 +225,7 @@ def act(
representation=self.model._linear_regression_layer,
)

@torch.no_grad() # the UCB scores don't need the gradients
def get_scores(
self,
subjective_state: SubjectiveState,
Expand All @@ -238,7 +245,7 @@ def get_scores(
# dim: [batch_size, num_arms, feature_dim]
model_ret = self.model.forward_with_intermediate_values(feature)
# dim: [batch_size * num_arms, 1]
assert isinstance(self._exploration_module, UCBExploration)
assert isinstance(self._exploration_module, ScoreExplorationBase)
scores = self._exploration_module.get_scores(
subjective_state=model_ret["nn_output"],
values=model_ret["pred_label"],
Expand Down

0 comments on commit b335f84

Please sign in to comment.