From af3c7208c1b14ad98d70dee2baa209888025f808 Mon Sep 17 00:00:00 2001 From: shengguangming Date: Sat, 1 Feb 2025 12:45:34 +0800 Subject: [PATCH] [misc] fix: grpo kl loss should be add when do minimization --- verl/workers/actor/dp_actor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verl/workers/actor/dp_actor.py b/verl/workers/actor/dp_actor.py index c8f75646..4db326a1 100644 --- a/verl/workers/actor/dp_actor.py +++ b/verl/workers/actor/dp_actor.py @@ -263,7 +263,7 @@ def update_policy(self, data: DataProto): kl_penalty=self.config.kl_loss_type) kl_loss = masked_mean(kld, response_mask) - policy_loss = policy_loss - kl_loss * self.config.kl_loss_coef + policy_loss = policy_loss + kl_loss * self.config.kl_loss_coef metrics['actor/kl_loss'] = kl_loss.detach().item() metrics['actor/kl_coef'] = self.config.kl_loss_coef