Merge branch 'main' into cgpo_mixture_of_judges

gaetanlop · Oct 21, 2024 · 1c33494 · 1c33494
2 parents 567b798 + 84dab85
commit 1c33494
Show file tree

Hide file tree

Showing 21 changed files with 702 additions and 908 deletions.
diff --git a/README.md b/README.md
@@ -66,13 +66,17 @@ You can use the TRL Command Line Interface (CLI) to quickly get started with Sup
 **SFT:**
 
 ```bash
-trl sft --model_name_or_path Qwen/Qwen2.5-0.5B --dataset_name trl-lib/Capybara --output_dir Qwen2.5-0.5B-SFT
+trl sft --model_name_or_path Qwen/Qwen2.5-0.5B \
+    --dataset_name trl-lib/Capybara \
+    --output_dir Qwen2.5-0.5B-SFT
 ```
 
 **DPO:**
 
 ```bash
-trl dpo --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct --dataset_name argilla/Capybara-Preferences --output_dir Qwen2.5-0.5B-DPO 
+trl dpo --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct \
+    --dataset_name argilla/Capybara-Preferences \
+    --output_dir Qwen2.5-0.5B-DPO 
 ```
 
 **Chat:**

diff --git a/docs/source/dataset_formats.mdx b/docs/source/dataset_formats.mdx
@@ -589,7 +589,7 @@ dataset = dataset.remove_columns(["chosen", "rejected"])
 
 ### From explicit to implicit prompt preference dataset
 
-To convert a preference dataset with implicit prompt into a preference dataset with explicit prompt, concatenate the prompt to both chosen and rejected, and remove the prompt.
+To convert a preference dataset with explicit prompt into a preference dataset with implicit prompt, concatenate the prompt to both chosen and rejected, and remove the prompt.
 
 ```python
 from datasets import Dataset

diff --git a/docs/source/dpo_trainer.mdx b/docs/source/dpo_trainer.mdx
@@ -276,3 +276,7 @@ dpo_trainer = DPOTrainer(
 ## DPOConfig
 
 [[autodoc]] DPOConfig
+
+## PreferenceCollator
+
+[[autodoc]] trainer.dpo_trainer.PreferenceCollator
diff --git a/examples/scripts/dpo_vlm.py b/examples/scripts/dpo_vlm.py
@@ -27,7 +27,6 @@
 """
 
 import torch
-from accelerate import PartialState
 from datasets import load_dataset
 from transformers import AutoModelForVision2Seq, AutoProcessor
 
@@ -106,17 +105,6 @@
     ################
     dataset = load_dataset(script_args.dataset_name)
 
-    def process(row):
-        row["prompt"] = processor.apply_chat_template(row["prompt"], tokenize=False)
-        row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False)
-        row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False)
-        return row
-
-    # Compute that only on the main process for faster data processing.
-    # see: https://github.com/huggingface/trl/pull/1255
-    with PartialState().local_main_process_first():
-        dataset = dataset.map(process, num_proc=training_args.dataset_num_proc)
-
     ################
     # Training
     ################

diff --git a/examples/scripts/sft.py b/examples/scripts/sft.py
@@ -12,38 +12,40 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-# regular:
+# Full training
 python examples/scripts/sft.py \
-    --dataset_name trl-lib/ultrafeedback_binarized \
-    --model_name_or_path="facebook/opt-350m" \
-    --report_to="wandb" \
-    --learning_rate=1.41e-5 \
-    --per_device_train_batch_size=64 \
-    --gradient_accumulation_steps=16 \
-    --output_dir="sft_openassistant-guanaco" \
-    --logging_steps=1 \
-    --num_train_epochs=3 \
-    --max_steps=-1 \
-    --push_to_hub \
-    --gradient_checkpointing
+    --model_name_or_path Qwen/Qwen2-0.5B \
+    --dataset_name trl-lib/Capybara \
+    --learning_rate 2.0e-5 \
+    --num_train_epochs 1 \
+    --packing \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 8 \
+    --gradient_checkpointing \
+    --logging_steps 25 \
+    --eval_strategy steps \
+    --eval_steps 100 \
+    --output_dir Qwen2-0.5B-SFT \
+    --push_to_hub
 
-# peft:
+# LoRA
 python examples/scripts/sft.py \
-    --dataset_name trl-lib/ultrafeedback_binarized \
-    --model_name_or_path="facebook/opt-350m" \
-    --report_to="wandb" \
-    --learning_rate=1.41e-5 \
-    --per_device_train_batch_size=64 \
-    --gradient_accumulation_steps=16 \
-    --output_dir="sft_openassistant-guanaco" \
-    --logging_steps=1 \
-    --num_train_epochs=3 \
-    --max_steps=-1 \
-    --push_to_hub \
+    --model_name_or_path Qwen/Qwen2-0.5B \
+    --dataset_name trl-lib/Capybara \
+    --learning_rate 2.0e-4 \
+    --num_train_epochs 1 \
+    --packing \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 8 \
     --gradient_checkpointing \
+    --logging_steps 25 \
+    --eval_strategy steps \
+    --eval_steps 100 \
     --use_peft \
-    --lora_r=64 \
-    --lora_alpha=16
+    --lora_r 32 \
+    --lora_alpha 16 \
+    --output_dir Qwen2-0.5B-SFT \
+    --push_to_hub
 """
 
 from datasets import load_dataset

diff --git a/scripts/log_reports.py b/scripts/log_reports.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import argparse
 import json
+import logging
 import os
 from datetime import date
 from pathlib import Path
@@ -25,6 +26,9 @@
 parser = argparse.ArgumentParser()
 parser.add_argument("--slack_channel_name", default="trl-push-ci")
 
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+
 
 def main(slack_channel_name=None):
     failed = []
@@ -40,25 +44,31 @@ def main(slack_channel_name=None):
     for log in Path().glob("*.log"):
         section_num_failed = 0
         i = 0
-        with open(log) as f:
-            for line in f:
-                line = json.loads(line)
-                i += 1
-                if line.get("nodeid", "") != "":
-                    test = line["nodeid"]
-                    if line.get("duration", None) is not None:
-                        duration = f'{line["duration"]:.4f}'
-                        if line.get("outcome", "") == "failed":
-                            section_num_failed += 1
-                            failed.append([test, duration, log.name.split("_")[0]])
-                            total_num_failed += 1
-                        else:
-                            passed.append([test, duration, log.name.split("_")[0]])
+        try:  # Added error handling for file operations
+            with open(log) as f:
+                for line in f:
+                    line = json.loads(line)
+                    i += 1
+                    if line.get("nodeid", "") != "":
+                        test = line["nodeid"]
+                        if line.get("duration", None) is not None:
+                            duration = f'{line["duration"]:.4f}'
+                            if line.get("outcome", "") == "failed":
+                                section_num_failed += 1
+                                failed.append([test, duration, log.name.split("_")[0]])
+                                total_num_failed += 1
+                            else:
+                                passed.append([test, duration, log.name.split("_")[0]])
             empty_file = i == 0
-        group_info.append([str(log), section_num_failed, failed])
-        total_empty_files.append(empty_file)
-        os.remove(log)
-        failed = []
+        except Exception as e:  # Catch any exceptions during file processing
+            logging.error(f"Error processing log file {log}: {e}")
+        else:
+            group_info.append([str(log), section_num_failed, failed])
+            total_empty_files.append(empty_file)
+        finally:
+            os.remove(log)
+            failed = []
+
     no_error_payload = {
         "type": "section",
         "text": {
@@ -104,6 +114,7 @@ def main(slack_channel_name=None):
 
             if total_empty_files[i]:
                 message += f"\n*{name}: Warning! Empty file - please check the GitHub action job *\n"
+        logging.info(f"Total failed tests: {total_num_failed}")  # Log the total failed tests
         print(f"### {message}")
     else:
         payload.append(no_error_payload)

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -32,7 +32,7 @@ def test_sft_cli():
 def test_dpo_cli():
     try:
         subprocess.run(
-            "trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name trl-lib/ultrafeedback_binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
+            "trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name trl-internal-testing/tiny-ultrafeedback-binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
             shell=True,
             check=True,
         )