volcengine · eric-haibin-lin · Dec 18, 2024 · Dec 17, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/verl/utils/reward_score/gsm8k.py b/verl/utils/reward_score/gsm8k.py
@@ -42,6 +42,17 @@ def extract_solution(solution_str, method='strict'):
 
 
 def compute_score(solution_str, ground_truth, method='strict', format_score=0., score=1.):
+    """The scoring function for GSM8k.
+
+    Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024.
+
+    Args:
+        solution_str: the solution text
+        ground_truth: the ground truth
+        method: the method to extract the solution, choices are 'strict' and 'flexible'
+        format_score: the score for the format
+        score: the score for the correct answer
+    """
     answer = extract_solution(solution_str=solution_str, method=method)
     if answer is None:
         return 0

diff --git a/verl/workers/critic/dp_critic.py b/verl/workers/critic/dp_critic.py
@@ -29,8 +29,6 @@
 from verl.utils.py_functional import append_to_dict
 from verl.utils.torch_functional import masked_mean
 
-from flash_attn.bert_padding import pad_input, unpad_input
-
 __all__ = ['DataParallelPPOCritic']