From 300a67d5af6451ed9760e55dcf83ba26c95a6a17 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Mon, 8 Jan 2024 13:34:52 +0100 Subject: [PATCH] Documentation review (#223) * Fix conflictive line-break within docstring example * Add `PrometheusTask` docstrings * Add missing `kwargs` type-hint to `Any` * Fix line-break issues within docstrings * Fix docstring formatting in `PrometheusTask` * Fix referencing issues pointed out by `mkdocs` * Add `UltraCMTask` docstrings * Add references section within `{*}Task` docstrings * Add disclaimer in `CritiqueTask` subclasses * Exclude some modules from `gen_ref_pages.py` --- docs/index.md | 2 +- docs/scripts/gen_ref_pages.py | 19 ++++++- docs/technical-reference/pipeline.md | 2 +- src/distilabel/pipeline.py | 2 +- src/distilabel/tasks/critique/prometheus.py | 50 +++++++++++++++++++ src/distilabel/tasks/critique/ultracm.py | 39 +++++++++++++++ src/distilabel/tasks/preference/judgelm.py | 8 ++- .../tasks/preference/ultrafeedback.py | 6 ++- src/distilabel/tasks/preference/ultrajudge.py | 4 ++ src/distilabel/tasks/prompt.py | 2 +- .../tasks/text_generation/principles.py | 3 +- .../tasks/text_generation/self_instruct.py | 8 +-- 12 files changed, 134 insertions(+), 11 deletions(-) diff --git a/docs/index.md b/docs/index.md index d8302c83c9..3100bbd404 100644 --- a/docs/index.md +++ b/docs/index.md @@ -54,7 +54,7 @@ For a more complete example, check out our awesome notebook on Google Colab: Understand the components and their interactions. --

[**API Reference**](./reference/distilabel)

+-

[**API Reference**](./reference/distilabel/index.md)

--- diff --git a/docs/scripts/gen_ref_pages.py b/docs/scripts/gen_ref_pages.py index f249af899d..efe51411a8 100644 --- a/docs/scripts/gen_ref_pages.py +++ b/docs/scripts/gen_ref_pages.py @@ -1,4 +1,18 @@ -# https://mkdocstrings.github.io/recipes/#automatic-code-reference-pages +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Ported from https://mkdocstrings.github.io/recipes/#automatic-code-reference-pages from pathlib import Path @@ -7,8 +21,11 @@ nav = mkdocs_gen_files.Nav() src = Path(__file__).parent.parent.parent / "src" +excluded = ["distilabel/utils", "distilabel/logger.py", "distilabel/progress_bar.py"] for path in sorted(src.rglob("*.py")): + if any(path.name.__contains__(exclude) for exclude in excluded): + continue module_path = path.relative_to(src).with_suffix("") doc_path = path.relative_to(src).with_suffix(".md") full_doc_path = Path("reference", doc_path) diff --git a/docs/technical-reference/pipeline.md b/docs/technical-reference/pipeline.md index 93406211fe..8e1886ac80 100644 --- a/docs/technical-reference/pipeline.md +++ b/docs/technical-reference/pipeline.md @@ -21,7 +21,7 @@ We will create a [`Pipeline`][distilabel.pipeline.Pipeline] that will use [Notus --8<-- "docs/snippets/technical-reference/pipeline/pipeline_generator_1.py" ``` -We've set up our pipeline using a specialized [`TextGenerationTask`](distilabel.tasks.text_generation.base.TextGenerationTask) (refer to the [tasks section](./tasks.md) for more task details), and an [InferenceEndpointsLLM][distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM] configured for [`notus-7b-v1`](https://huggingface.co/argilla/notus-7b-v1), although any of the available `LLMs` will work. +We've set up our pipeline using a specialized [TextGenerationTask][distilabel.tasks.text_generation.base.TextGenerationTask] (refer to the [tasks section](./tasks.md) for more task details), and an [InferenceEndpointsLLM][distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM] configured for [`notus-7b-v1`](https://huggingface.co/argilla/notus-7b-v1), although any of the available `LLMs` will work. To use the [Pipeline][distilabel.pipeline.Pipeline] for dataset generation, we call the generate method. We provide it with the input dataset and specify the desired number of generations. In this example, we've prepared a `Dataset` with a single row to illustrate the process. This dataset contains one row, and we'll trigger 2 generations from it: diff --git a/src/distilabel/pipeline.py b/src/distilabel/pipeline.py index 9dffec9c3a..3096283a62 100644 --- a/src/distilabel/pipeline.py +++ b/src/distilabel/pipeline.py @@ -746,7 +746,7 @@ def pipeline( *, generator: Optional["LLM"] = None, labeller: Optional["LLM"] = None, - **kwargs, + **kwargs: Any, ) -> Pipeline: """Creates a `Pipeline` instance with the provided LLMs for a given task, which is useful whenever you want to use a pre-defined `Pipeline` for a given task, or if you want to diff --git a/src/distilabel/tasks/critique/prometheus.py b/src/distilabel/tasks/critique/prometheus.py index 95a4910d22..5eb7908bfa 100644 --- a/src/distilabel/tasks/critique/prometheus.py +++ b/src/distilabel/tasks/critique/prometheus.py @@ -25,6 +25,28 @@ @dataclass class PrometheusTask(CritiqueTask): + """A `CritiqueTask` following the prompt templated used by Prometheus. + + Args: + system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`. + scoring_criteria (str): the scoring criteria to be used for the task, that defines + the scores below, provided via `score_descriptions`. + score_descriptions (Dict[int, str]): the descriptions of the scores, where + the key is the rating value (ideally those should be consecutive), and the + value is the description of each rating. + + Disclaimer: + Since the Prometheus model has been trained with OpenAI API generated data, the prompting + strategy may just be consistent / compliant with either GPT-3.5 or GPT-4 from OpenAI API, or + with their own model. Any other model may fail on the generation of a structured output, as + well as providing an incorrect / inaccurate critique. + + References: + - [`Prometheus: Inducing Fine-grained Evaluation Capability in Language Models`](https://arxiv.org/abs/2310.08491) + - [`kaist-ai/prometheus-13b-v1.0`](https://huggingface.co/kaist-ai/prometheus-7b-v1.0) + - [`kaist-ai/prometheus-13b-v1.0`](https://huggingface.co/kaist-ai/prometheus-13b-v1.0) + """ + scoring_criteria: str score_descriptions: Dict[int, str] @@ -39,6 +61,34 @@ def input_args_names(self) -> List[str]: def generate_prompt( self, input: str, generations: str, ref_completion: str, **_: Any ) -> Prompt: + """Generates a prompt following the Prometheus specification. + + Args: + input (str): the input to be used for the prompt. + generations (List[str]): the generations to be used for the prompt, in + this case, the ones to be critiqued. + ref_completion (str): the reference completion to be used for the prompt, + which is the reference one, assuming the one with the highest score. + + Returns: + Prompt: the generated prompt. + + Examples: + >>> from distilabel.tasks.critique import PrometheusTask + >>> task = PrometheusTask( + ... scoring_criteria="Overall quality of the responses provided.", + ... score_descriptions={0: "false", 1: "partially false", 2: "average", 3: "partially true", 4: "true"}, + ... ) + >>> task.generate_prompt( + ... input="What are the first 5 Fibonacci numbers?", + ... generations=["0 1 1 2 3", "0 1 1 2 3"], + ... ref_completion="0 1 1 2 3", + ... ) + Prompt( + system_prompt="You are a fair evaluator language model.", + formatted_prompt=""###Task Description:...", + ) + """ render_kwargs = { "instruction": input, "completion": generations, diff --git a/src/distilabel/tasks/critique/ultracm.py b/src/distilabel/tasks/critique/ultracm.py index a4fd751e14..d7dc08938b 100644 --- a/src/distilabel/tasks/critique/ultracm.py +++ b/src/distilabel/tasks/critique/ultracm.py @@ -28,6 +28,23 @@ @dataclass class UltraCMTask(CritiqueTask): + """A `CritiqueTask` following the prompt templated used by UltraCM (from UltraFeedback). + + Args: + system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`. + + Disclaimer: + Since the UltraCM model has been trained with OpenAI API generated data, the prompting + strategy may just be consistent / compliant with either GPT-3.5 or GPT-4 from OpenAI API, or + with their own model. Any other model may fail on the generation of a structured output, as + well as providing an incorrect / inaccurate critique. + + References: + - [`UltraFeedback: Boosting Language Models with High-quality Feedback`](https://arxiv.org/abs/2310.01377) + - [`UltraFeedback - GitHub Repository`](https://github.com/OpenBMB/UltraFeedback) + - [`openbmb/UltraCM-13b`](https://huggingface.co/openbmb/UltraCM-13b) + """ + __jinja2_template__: ClassVar[str] = _ULTRACM_TEMPLATE system_prompt: str = ( @@ -37,6 +54,28 @@ class UltraCMTask(CritiqueTask): ) def generate_prompt(self, input: str, generations: str, **_: Any) -> Prompt: + """Generates a prompt following the UltraCM specification. + + Args: + input (str): the input to be used for the prompt. + generations (List[str]): the generations to be used for the prompt, in + this case, the ones to be critiqued. + + Returns: + Prompt: the generated prompt. + + Examples: + >>> from distilabel.tasks.critique import UltraCMTask + >>> task = UltraCMTask() + >>> task.generate_prompt( + ... input="What are the first 5 Fibonacci numbers?", + ... generations=["0 1 1 2 3", "0 1 1 2 3"], + ... ) + Prompt( + system_prompt="User: A one-turn chat between a curious user ...", + formatted_prompt="User: Given my answer to an instruction, your role ...", + ) + """ render_kwargs = { "instruction": input, "completion": generations, diff --git a/src/distilabel/tasks/preference/judgelm.py b/src/distilabel/tasks/preference/judgelm.py index 81754b0800..d4e494999b 100644 --- a/src/distilabel/tasks/preference/judgelm.py +++ b/src/distilabel/tasks/preference/judgelm.py @@ -36,6 +36,12 @@ class JudgeLMTask(PreferenceTask): Args: system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`. task_description (Union[str, None], optional): the description of the task. Defaults to `None`. + + References: + - [`Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena`](https://arxiv.org/abs/2306.05685) + - [`BAAI/JudgeLM-7B-v1.0`](https://huggingface.co/BAAI/JudgeLM-7B-v1.0) + - [`BAAI/JudgeLM-13B-v1.0`](https://huggingface.co/BAAI/JudgeLM-13B-v1.0) + - [`BAAI/JudgeLM-33B-v1.0`](https://huggingface.co/BAAI/JudgeLM-33B-v1.0) """ task_description: str = ( @@ -68,7 +74,7 @@ def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Promp >>> task.generate_prompt("What are the first 5 Fibonacci numbers?", ["0 1 1 2 3", "0 1 1 2 3"]) Prompt( system_prompt="You are a helpful assistant.", - formatted_prompt="[Question]\nWhat are the first 5 Fibonacci numbers?\n...", + formatted_prompt="[Question] What are the first 5 Fibonacci numbers? ...", ) """ render_kwargs = { diff --git a/src/distilabel/tasks/preference/ultrafeedback.py b/src/distilabel/tasks/preference/ultrafeedback.py index deec1d2249..7583d085a5 100644 --- a/src/distilabel/tasks/preference/ultrafeedback.py +++ b/src/distilabel/tasks/preference/ultrafeedback.py @@ -56,6 +56,10 @@ class UltraFeedbackTask(PreferenceTask): system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`. task_description (Union[str, None], optional): the description of the task. Defaults to `None`. ratings (Union[List[Rating], None], optional): the ratings to be used for the task. Defaults to `None`. + + References: + - [`UltraFeedback: Boosting Language Models with High-quality Feedback`](https://arxiv.org/abs/2310.01377) + - [`UltraFeedback - GitHub Repository`](https://github.com/OpenBMB/UltraFeedback) """ ratings: List[Rating] @@ -92,7 +96,7 @@ def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Promp >>> task.generate_prompt("What are the first 5 Fibonacci numbers?", ["0 1 1 2 3", "0 1 1 2 3"]) Prompt( system_prompt="Your role is to evaluate text quality based on given criteria.", - formatted_prompt="# General Text Quality Assessment\nEvaluate the model's ...", + formatted_prompt="# General Text Quality Assessment...", ) """ render_kwargs = { diff --git a/src/distilabel/tasks/preference/ultrajudge.py b/src/distilabel/tasks/preference/ultrajudge.py index 02d04e676f..c61a220e35 100644 --- a/src/distilabel/tasks/preference/ultrajudge.py +++ b/src/distilabel/tasks/preference/ultrajudge.py @@ -47,6 +47,10 @@ class UltraJudgeTask(PreferenceTask): task_description (Union[str, None], optional): the description of the task. Defaults to `None`. areas (List[str], optional): the areas to be used for the task. Defaults to a list of four areas: "Practical Accuracy", "Clarity & Transparency", "Authenticity & Reliability", and "Compliance with Intent". + + References: + - [`UltraFeedback: Boosting Language Models with High-quality Feedback`](https://arxiv.org/abs/2310.01377) + - [`Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena`](https://arxiv.org/abs/2306.05685) """ system_prompt: str = ( diff --git a/src/distilabel/tasks/prompt.py b/src/distilabel/tasks/prompt.py index 010e38e9d0..5969cd66e7 100644 --- a/src/distilabel/tasks/prompt.py +++ b/src/distilabel/tasks/prompt.py @@ -66,7 +66,7 @@ def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion] ... formatted_prompt="What are the first 5 Fibonacci numbers?", ... ) >>> prompt.format_as("default") - 'You are a helpful assistant.\nWhat are the first 5 Fibonacci numbers?' + 'You are a helpful assistant. What are the first 5 Fibonacci numbers?' """ if format == "default": return f"{self.system_prompt}\n{self.formatted_prompt}" diff --git a/src/distilabel/tasks/text_generation/principles.py b/src/distilabel/tasks/text_generation/principles.py index 27866a7a9a..cfc9ff7252 100644 --- a/src/distilabel/tasks/text_generation/principles.py +++ b/src/distilabel/tasks/text_generation/principles.py @@ -18,7 +18,8 @@ class UltraFeedbackPrinciples: be injected into the system prompt given to the LLM. References: - - https://github.com/OpenBMB/UltraFeedback + - [`UltraFeedback: Boosting Language Models with High-quality Feedback`](https://arxiv.org/abs/2310.01377) + - [`UltraFeedback - GitHub Repository`](https://github.com/OpenBMB/UltraFeedback) """ helpfulness = [ diff --git a/src/distilabel/tasks/text_generation/self_instruct.py b/src/distilabel/tasks/text_generation/self_instruct.py index 7f20c191ac..a0cb9d74c0 100644 --- a/src/distilabel/tasks/text_generation/self_instruct.py +++ b/src/distilabel/tasks/text_generation/self_instruct.py @@ -40,8 +40,6 @@ class SelfInstructTask(TextGenerationTask): """A `TextGenerationTask` following the Self-Instruct specification for building the prompts. - Reference: https://github.com/yizhongw/self-instruct - Args: system_prompt (str, optional): the system prompt to be used. Defaults to `None`. principles (Dict[str, List[str]], optional): the principles to be used for the system prompt. @@ -52,6 +50,10 @@ class SelfInstructTask(TextGenerationTask): "AI assistant". num_instructions (int, optional): the number of instructions to be used for the prompt. Defaults to 5. + + References: + - [`Self-Instruct: Aligning Language Models with Self-Generated Instructions`](https://arxiv.org/abs/2212.10560) + - [`Self-Instruct - GitHub Repository`](https://github.com/yizhongw/self-instruct) """ system_prompt: str = ( @@ -79,7 +81,7 @@ def generate_prompt(self, input: str, **_: Any) -> Prompt: >>> task.generate_prompt("What are the first 5 Fibonacci numbers?") Prompt( system_prompt="You are a helpful assistant.", - formatted_prompt="# Task Description\nDevelop 2 user queries that ...", + formatted_prompt="# Task Description ...", ) """ render_kwargs = {