cast before decoding candidate response tensors back to string

lucidrains · Jan 25, 2024 · 67922a1 · 67922a1
1 parent 5fbe322
commit 67922a1
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/self_rewarding_lm_pytorch/self_rewarding_lm_pytorch.py b/self_rewarding_lm_pytorch/self_rewarding_lm_pytorch.py
@@ -427,7 +427,7 @@ def forward(self) -> DPODataset:
                     )
                 )
 
-                candidate_responses: List[str] = [*map(self.tokenizer_decode, candidate_responses_tensor)]
+                candidate_responses: List[str] = [*map(self.tokenizer_decode, candidate_responses_tensor.long().tolist())]
 
                 # get rewards