Skip to content

Commit

Permalink
Merge branch 'main' into cgpo_mixture_of_judges
Browse files Browse the repository at this point in the history
  • Loading branch information
gaetanlop authored Oct 21, 2024
2 parents 567b798 + 84dab85 commit 1c33494
Show file tree
Hide file tree
Showing 21 changed files with 702 additions and 908 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,17 @@ You can use the TRL Command Line Interface (CLI) to quickly get started with Sup
**SFT:**

```bash
trl sft --model_name_or_path Qwen/Qwen2.5-0.5B --dataset_name trl-lib/Capybara --output_dir Qwen2.5-0.5B-SFT
trl sft --model_name_or_path Qwen/Qwen2.5-0.5B \
--dataset_name trl-lib/Capybara \
--output_dir Qwen2.5-0.5B-SFT
```

**DPO:**

```bash
trl dpo --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct --dataset_name argilla/Capybara-Preferences --output_dir Qwen2.5-0.5B-DPO
trl dpo --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct \
--dataset_name argilla/Capybara-Preferences \
--output_dir Qwen2.5-0.5B-DPO
```

**Chat:**
Expand Down
2 changes: 1 addition & 1 deletion docs/source/dataset_formats.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ dataset = dataset.remove_columns(["chosen", "rejected"])

### From explicit to implicit prompt preference dataset

To convert a preference dataset with implicit prompt into a preference dataset with explicit prompt, concatenate the prompt to both chosen and rejected, and remove the prompt.
To convert a preference dataset with explicit prompt into a preference dataset with implicit prompt, concatenate the prompt to both chosen and rejected, and remove the prompt.

```python
from datasets import Dataset
Expand Down
4 changes: 4 additions & 0 deletions docs/source/dpo_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,7 @@ dpo_trainer = DPOTrainer(
## DPOConfig

[[autodoc]] DPOConfig

## PreferenceCollator

[[autodoc]] trainer.dpo_trainer.PreferenceCollator
12 changes: 0 additions & 12 deletions examples/scripts/dpo_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
"""

import torch
from accelerate import PartialState
from datasets import load_dataset
from transformers import AutoModelForVision2Seq, AutoProcessor

Expand Down Expand Up @@ -106,17 +105,6 @@
################
dataset = load_dataset(script_args.dataset_name)

def process(row):
row["prompt"] = processor.apply_chat_template(row["prompt"], tokenize=False)
row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False)
row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False)
return row

# Compute that only on the main process for faster data processing.
# see: https://github.com/huggingface/trl/pull/1255
with PartialState().local_main_process_first():
dataset = dataset.map(process, num_proc=training_args.dataset_num_proc)

################
# Training
################
Expand Down
56 changes: 29 additions & 27 deletions examples/scripts/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,38 +12,40 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
# regular:
# Full training
python examples/scripts/sft.py \
--dataset_name trl-lib/ultrafeedback_binarized \
--model_name_or_path="facebook/opt-350m" \
--report_to="wandb" \
--learning_rate=1.41e-5 \
--per_device_train_batch_size=64 \
--gradient_accumulation_steps=16 \
--output_dir="sft_openassistant-guanaco" \
--logging_steps=1 \
--num_train_epochs=3 \
--max_steps=-1 \
--push_to_hub \
--gradient_checkpointing
--model_name_or_path Qwen/Qwen2-0.5B \
--dataset_name trl-lib/Capybara \
--learning_rate 2.0e-5 \
--num_train_epochs 1 \
--packing \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 8 \
--gradient_checkpointing \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 100 \
--output_dir Qwen2-0.5B-SFT \
--push_to_hub
# peft:
# LoRA
python examples/scripts/sft.py \
--dataset_name trl-lib/ultrafeedback_binarized \
--model_name_or_path="facebook/opt-350m" \
--report_to="wandb" \
--learning_rate=1.41e-5 \
--per_device_train_batch_size=64 \
--gradient_accumulation_steps=16 \
--output_dir="sft_openassistant-guanaco" \
--logging_steps=1 \
--num_train_epochs=3 \
--max_steps=-1 \
--push_to_hub \
--model_name_or_path Qwen/Qwen2-0.5B \
--dataset_name trl-lib/Capybara \
--learning_rate 2.0e-4 \
--num_train_epochs 1 \
--packing \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 8 \
--gradient_checkpointing \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 100 \
--use_peft \
--lora_r=64 \
--lora_alpha=16
--lora_r 32 \
--lora_alpha 16 \
--output_dir Qwen2-0.5B-SFT \
--push_to_hub
"""

from datasets import load_dataset
Expand Down
47 changes: 29 additions & 18 deletions scripts/log_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
import argparse
import json
import logging
import os
from datetime import date
from pathlib import Path
Expand All @@ -25,6 +26,9 @@
parser = argparse.ArgumentParser()
parser.add_argument("--slack_channel_name", default="trl-push-ci")

# Set up logging
logging.basicConfig(level=logging.INFO)


def main(slack_channel_name=None):
failed = []
Expand All @@ -40,25 +44,31 @@ def main(slack_channel_name=None):
for log in Path().glob("*.log"):
section_num_failed = 0
i = 0
with open(log) as f:
for line in f:
line = json.loads(line)
i += 1
if line.get("nodeid", "") != "":
test = line["nodeid"]
if line.get("duration", None) is not None:
duration = f'{line["duration"]:.4f}'
if line.get("outcome", "") == "failed":
section_num_failed += 1
failed.append([test, duration, log.name.split("_")[0]])
total_num_failed += 1
else:
passed.append([test, duration, log.name.split("_")[0]])
try: # Added error handling for file operations
with open(log) as f:
for line in f:
line = json.loads(line)
i += 1
if line.get("nodeid", "") != "":
test = line["nodeid"]
if line.get("duration", None) is not None:
duration = f'{line["duration"]:.4f}'
if line.get("outcome", "") == "failed":
section_num_failed += 1
failed.append([test, duration, log.name.split("_")[0]])
total_num_failed += 1
else:
passed.append([test, duration, log.name.split("_")[0]])
empty_file = i == 0
group_info.append([str(log), section_num_failed, failed])
total_empty_files.append(empty_file)
os.remove(log)
failed = []
except Exception as e: # Catch any exceptions during file processing
logging.error(f"Error processing log file {log}: {e}")
else:
group_info.append([str(log), section_num_failed, failed])
total_empty_files.append(empty_file)
finally:
os.remove(log)
failed = []

no_error_payload = {
"type": "section",
"text": {
Expand Down Expand Up @@ -104,6 +114,7 @@ def main(slack_channel_name=None):

if total_empty_files[i]:
message += f"\n*{name}: Warning! Empty file - please check the GitHub action job *\n"
logging.info(f"Total failed tests: {total_num_failed}") # Log the total failed tests
print(f"### {message}")
else:
payload.append(no_error_payload)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_sft_cli():
def test_dpo_cli():
try:
subprocess.run(
"trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name trl-lib/ultrafeedback_binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
"trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name trl-internal-testing/tiny-ultrafeedback-binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
shell=True,
check=True,
)
Expand Down
Loading

0 comments on commit 1c33494

Please sign in to comment.