clean

allenai · Jun 12, 2024 · f66059f · f66059f
1 parent c9e0cc1
commit f66059f
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 16 deletions.
diff --git a/rewardbench/dpo.py b/rewardbench/dpo.py
@@ -146,20 +146,7 @@ def tokenize_row(self, feature) -> Dict:
                     batch[f"{k}{type_key}"] = tokens
 
         else:
-            chosen_tokens = self.tokenizer(
-                chosen, truncation=True, max_length=self.max_target_length, add_special_tokens=True
-            )
-            rejected_tokens = self.tokenizer(
-                rejected, truncation=True, max_length=self.max_target_length, add_special_tokens=True
-            )
-            prompt_tokens = self.tokenizer(
-                prompt, truncation=True, max_length=self.max_prompt_length, add_special_tokens=True
-            )
-
-            batch["chosen_labels"] = chosen_tokens["input_ids"]
-            batch["rejected_labels"] = rejected_tokens["input_ids"]
-            batch["prompt_input_ids"] = prompt_tokens["input_ids"]
-            batch["prompt_attention_mask"] = prompt_tokens["attention_mask"]
+            raise ValueError("Encoder-decoder models are not supported yet.")
 
         return batch
 

diff --git a/scripts/run_dpo.py b/scripts/run_dpo.py
@@ -181,9 +181,13 @@ def main():
     # tokenize dataset
     column_names = list(dataset.features)
 
-    import ipdb; ipdb.set_trace()
+    import ipdb
+
+    ipdb.set_trace()
     tokenized_dataset = dataset.map(dpo.tokenize_row, remove_columns=column_names)
-    import ipdb; ipdb.set_trace()
+    import ipdb
+
+    ipdb.set_trace()
     dataloader = torch.utils.data.DataLoader(
         tokenized_dataset,
         batch_size=BATCH_SIZE,