diff --git a/examples/datasets/hh-rlhf-helpful-base.py b/examples/datasets/hh-rlhf-helpful-base.py index e089ed108e..4f665ead2e 100644 --- a/examples/datasets/hh-rlhf-helpful-base.py +++ b/examples/datasets/hh-rlhf-helpful-base.py @@ -17,6 +17,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import HfArgumentParser @@ -85,6 +86,28 @@ def extract_dialogue(example: str) -> list[dict[str, str]]: return {"prompt": prompt, "chosen": chosen, "rejected": rejected} +model_card = ModelCard(""" +--- +tags: [trl] +--- +**HH-RLHF-Helpful-Base Dataset** + +**Summary** + +The HH-RLHF-Helpful-Base dataset is a processed version of [Anthropic's HH-RLHF](https://huggingface.co/datasets/Anthropic/hh-rlhf) dataset, specifically curated to train models using the TRL library for preference learning and Alignment tasks. It contains pairs of text samples, each labeled as either "chosen" or "rejected," based on human preferences regarding the helpfulness of the responses. This dataset enables models to learn human preferences in generating helpful responses, enhancing their ability to assist users effectively. + +**Data Structure** + +The dataset follows the [Preference](https://huggingface.co/docs/trl/main/dataset_formats#preference) format: + +- **Format**: [Preference](https://huggingface.co/docs/trl/main/dataset_formats#preference) +- **Prompt**: The original user query. +- **Chosen**: A response deemed helpful by human evaluators. +- **Rejected**: A response considered less helpful or unhelpful. + +This structure allows models to learn to prefer the "Chosen" response over the "Rejected" one, thereby aligning with human preferences in helpfulness. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -94,3 +117,4 @@ def extract_dialogue(example: str) -> list[dict[str, str]]: if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/lm-human-preferences-descriptiveness.py b/examples/datasets/lm-human-preferences-descriptiveness.py index 621757770c..e1a4b1e096 100644 --- a/examples/datasets/lm-human-preferences-descriptiveness.py +++ b/examples/datasets/lm-human-preferences-descriptiveness.py @@ -16,6 +16,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import AutoTokenizer, HfArgumentParser @@ -55,6 +56,26 @@ def to_prompt_completion(example, tokenizer): return {"prompt": prompt, "chosen": chosen, "rejected": rejected} +model_card = ModelCard(""" +--- +tags: [trl] +--- +**LM-Human-Preferences-Descriptiveness Dataset** + +**Summary** + +The LM-Human-Preferences-Descriptiveness dataset is a processed subset of [OpenAI's LM-Human-Preferences](https://github.com/openai/lm-human-preferences), focusing specifically on enhancing the descriptiveness of generated text. It contains pairs of text samples, each labeled as either "chosen" or "rejected," based on human preferences regarding the level of detail and vividness in the descriptions. This dataset enables models to learn human preferences in descriptive language, improving their ability to generate rich and engaging narratives. + +**Data Structure** + +- **Format**: [Preference](https://huggingface.co/docs/trl/main/dataset_formats#preference) +- **Prompt**: The original text sample. +- **Chosen**: A version of the text with enhanced descriptiveness. +- **Rejected**: A version of the text with less descriptiveness. + +This structure allows models to learn to prefer the "Chosen" text over the "Rejected" one, thereby aligning with human preferences in descriptive language. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -79,3 +100,4 @@ def to_prompt_completion(example, tokenizer): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/lm-human-preferences-sentiment.py b/examples/datasets/lm-human-preferences-sentiment.py index a3eaa4d06e..1c0074db3d 100644 --- a/examples/datasets/lm-human-preferences-sentiment.py +++ b/examples/datasets/lm-human-preferences-sentiment.py @@ -16,6 +16,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import AutoTokenizer, HfArgumentParser @@ -50,6 +51,26 @@ def to_prompt_completion(example, tokenizer): return {"prompt": prompt, "chosen": chosen, "rejected": rejected} +model_card = ModelCard(""" +--- +tags: [trl] +--- +**LM-Human-Preferences-Sentiment Dataset** + +**Summary** + +The LM-Human-Preferences-Sentiment dataset is a processed subset of [OpenAI's LM-Human-Preferences](https://github.com/openai/lm-human-preferences), focusing specifically on sentiment analysis tasks. It contains pairs of text samples, each labeled as either "chosen" or "rejected," based on human preferences regarding the sentiment conveyed in the text. This dataset enables models to learn human preferences in sentiment expression, enhancing their ability to generate and evaluate text with desired emotional tones. + +**Data Structure** + +- **Format**: [Preference](https://huggingface.co/docs/trl/main/dataset_formats#preference) +- **Prompt**: The original text sample. +- **Chosen**: A version of the text that conveys the desired sentiment. +- **Rejected**: A version of the text that does not convey the desired sentiment. + +This structure allows models to learn to prefer the "Chosen" text over the "Rejected" one, thereby aligning with human preferences in sentiment expression. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -72,3 +93,4 @@ def to_prompt_completion(example, tokenizer): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/math_shepherd.py b/examples/datasets/math_shepherd.py index c09e745ad5..b503357eed 100644 --- a/examples/datasets/math_shepherd.py +++ b/examples/datasets/math_shepherd.py @@ -18,6 +18,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import HfArgumentParser @@ -114,6 +115,26 @@ def process_example(example): return {"prompt": prompt, "completions": completions, "labels": labels} +model_card = ModelCard(""" +--- +tags: [trl] +--- +**Math-Shepherd** + +**Summary** + +The Math-Shepherd dataset is a processed version of [Math-Shepherd dataset](peiyi9979/Math-Shepherd), designed to train models using the TRL library for stepwise supervision tasks. It provides step-by-step solutions to mathematical problems, enabling models to learn and verify each step of a solution, thereby enhancing their reasoning capabilities. + +**Data Structure** + +- **Format**: [Stepwise Supervision](https://huggingface.co/docs/trl/main/dataset_formats#stepwise-supervision) +- **Prompt**: The original problem statement. +- **Completions**: A list of reasoning steps generated to solve the problem. +- **Labels**: A list of booleans or floats indicating the correctness of each corresponding reasoning step. + +This structure allows models to learn the correctness of each step in a solution, facilitating improved reasoning and problem-solving abilities. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -129,3 +150,4 @@ def process_example(example): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/prm800k.py b/examples/datasets/prm800k.py index b5f95742be..6deb41d0c7 100644 --- a/examples/datasets/prm800k.py +++ b/examples/datasets/prm800k.py @@ -16,6 +16,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import HfArgumentParser @@ -88,6 +89,26 @@ def process_batch(examples): return outputs +model_card = ModelCard(""" +--- +tags: [trl] +--- +**PRM800K Dataset** + +**Summary** + +The PRM800K dataset is a processed version of OpenAI's PRM800K, designed to train models using the TRL library for stepwise supervision tasks. It contains 800,000 step-level correctness labels for model-generated solutions to problems from the MATH dataset. This dataset enables models to learn and verify each step of a solution, enhancing their reasoning capabilities. + +**Data Structure** + +- **Format**: [Stepwise Supervision](https://huggingface.co/docs/trl/main/dataset_formats#stepwise-supervision) +- **Prompt**: The original problem statement. +- **Completions**: A list of reasoning steps generated to solve the problem. +- **Labels**: A list of booleans or floats indicating the correctness of each corresponding reasoning step. + +This structure allows models to learn the correctness of each step in a solution, facilitating improved reasoning and problem-solving abilities. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -116,3 +137,4 @@ def process_batch(examples): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/rlaif-v.py b/examples/datasets/rlaif-v.py index 84ae292f87..74b277b18a 100644 --- a/examples/datasets/rlaif-v.py +++ b/examples/datasets/rlaif-v.py @@ -16,6 +16,7 @@ from typing import Optional from datasets import features, load_dataset +from huggingface_hub import ModelCard from transformers import HfArgumentParser @@ -50,6 +51,26 @@ def to_conversational(example): return {"prompt": prompt, "images": [example["image"]], "chosen": chosen, "rejected": rejected} +model_card = ModelCard(""" +--- +tags: [trl] +--- +**RLAIF-V** + +**Summary** + +The RLAIF-V dataset is a processed version of the [openbmb/RLAIF-V-Dataset](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset#dataset-card-for-rlaif-v-dataset), specifically curated to train vision-language models using the TRL library for preference learning tasks. It contains 83,132 high-quality comparison pairs, each comprising an image and two textual descriptions: one preferred and one rejected. This dataset enables models to learn human preferences in visual contexts, enhancing their ability to generate and evaluate image captions. + +**Data Structure** + +- **Format**: [Preference](https://huggingface.co/docs/trl/main/dataset_formats#preference) +- **Prompt**: The image to be described. +- **Chosen**: The preferred textual description of the image. +- **Rejected**: An alternative textual description that was not preferred. + +This structure allows models to learn to prefer the "Chosen" description over the "Rejected" one, thereby aligning with human preferences in image captioning tasks. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -71,3 +92,4 @@ def to_conversational(example): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/tldr.py b/examples/datasets/tldr.py index 0ae29481e3..85ddf90499 100644 --- a/examples/datasets/tldr.py +++ b/examples/datasets/tldr.py @@ -16,6 +16,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import HfArgumentParser @@ -45,6 +46,25 @@ def to_prompt_completion(example): return {"prompt": prompt, "completion": completion} +model_card = ModelCard(""" +--- +tags: [trl] +--- +# TL;DR Dataset + +## Overview + +The TL;DR dataset is a processed version of Reddit posts, specifically curated to train models using the TRL library for summarization tasks. It leverages the common practice on Reddit where users append "TL;DR" (Too Long; Didn't Read) summaries to lengthy posts, providing a rich source of paired text data for training summarization models. + +## Data Structure + +- **Format**: [Prompt-Completion](https://huggingface.co/docs/trl/en/dataset_formats#prompt-completion) +- **Prompt**: The original, unabridged Reddit post. +- **Completion**: The concise "TL;DR" summary appended by the author. + +This structure enables models to learn the relationship between detailed content and its abbreviated form, enhancing their summarization capabilities. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -65,3 +85,4 @@ def to_prompt_completion(example): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/tldr_preference.py b/examples/datasets/tldr_preference.py index 1c4ff5bcbd..410e5ebd3c 100644 --- a/examples/datasets/tldr_preference.py +++ b/examples/datasets/tldr_preference.py @@ -16,6 +16,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import HfArgumentParser @@ -56,6 +57,25 @@ def to_preference(example): return {"prompt": prompt, "chosen": chosen, "rejected": rejected} +model_card = ModelCard(""" +--- +tags: [trl] +--- +# TL;DR Dataset for Preference Learning +## Overview + +The TL;DR dataset is a processed version of Reddit posts, specifically curated to train models using the TRL library for preference learning and Reinforcement Learning from Human Feedback (RLHF) tasks. It leverages the common practice on Reddit where users append "TL;DR" (Too Long; Didn't Read) summaries to lengthy posts, providing a rich source of paired text data for training models to understand and generate concise summaries. + +## Data Structure + +- **Format**: [Preference](https://huggingface.co/docs/trl/main/dataset_formats#preference) +- **Prompt**: The original, unabridged Reddit post. +- **Chosen**: The concise "TL;DR" summary appended by the author. +- **Rejected**: An alternative summary or response that was not selected. + +This structure enables models to learn the relationship between detailed content and its abbreviated form, enhancing their summarization capabilities. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -70,3 +90,4 @@ def to_preference(example): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/ultrafeedback-prompt.py b/examples/datasets/ultrafeedback-prompt.py index 3cb92467d5..8c05e97c22 100644 --- a/examples/datasets/ultrafeedback-prompt.py +++ b/examples/datasets/ultrafeedback-prompt.py @@ -16,6 +16,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import HfArgumentParser @@ -50,6 +51,26 @@ def drop_long_prompt(example): return True +model_card = ModelCard(""" +--- +tags: [trl] +--- +# UltraFeedback - Prompts + +## Summary + +A processed version of the [UltraFeedback](https://huggingface.co/datasets/openbmb/UltraFeedback) dataset for model evaluation on specific aspects like helpfulness, honesty, and instruction-following. + +## Description + +This dataset contains prompts from the original UltraFeedback dataset, designed to evaluate and fine-tune language models. It is preprocessed to target specific model behavior evaluation (e.g., helpfulness), with the option to push the dataset to the Hugging Face Hub. + +## Data Structure + +- **Format**: [prompt-only](https://huggingface.co/docs/trl/main/dataset_formats#prompt-completion) +- **Prompt**: The input question or instruction provided to the model. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -66,3 +87,4 @@ def drop_long_prompt(example): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset") diff --git a/examples/datasets/ultrafeedback.py b/examples/datasets/ultrafeedback.py index cb6c556d0c..9953bda3c2 100644 --- a/examples/datasets/ultrafeedback.py +++ b/examples/datasets/ultrafeedback.py @@ -16,6 +16,7 @@ from typing import Optional from datasets import load_dataset +from huggingface_hub import ModelCard from transformers import HfArgumentParser @@ -79,6 +80,26 @@ def to_unpaired_preference(example, model_name, aspect): return {"prompt": prompt, "completion": completion, "label": label} +model_card = ModelCard(""" +--- +tags: [trl] +--- +# UltraFeedback GPT-3.5-Turbo Helpfulness + +## Summary +This dataset contains processed user-assistant interactions filtered for helpfulness, derived from the [openbmb/UltraFeedback](https://huggingface.co/datasets/openbmb/UltraFeedback) dataset. It is designed for fine-tuning and evaluating models in alignment tasks. + + +## Description +The dataset focuses on evaluating and improving the helpfulness of responses generated by the `gpt-3.5-turbo` model. It includes prompts, model completions, and binary labels indicating whether a response meets a helpfulness threshold. + +## Data Structure +[Prompt completion](https://huggingface.co/docs/trl/en/dataset_formats#prompt-completion) +- **prompt**: User input (e.g., a question or instruction). +- **completion**: Assistant's response to the prompt. +- **label**: Binary value (`True` or `False`) indicating if the response is sufficiently helpful. +""") + if __name__ == "__main__": parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] @@ -100,3 +121,4 @@ def to_unpaired_preference(example, model_name, aspect): if script_args.push_to_hub: dataset.push_to_hub(script_args.repo_id) + model_card.push_to_hub(script_args.repo_id, repo_type="dataset")