From 8cd0cf4c3e5c77752be859143cc657119a20e671 Mon Sep 17 00:00:00 2001
From: gabrielmbmb AI Feedback (AIF) framework to build datasets with and for LLMs:
212
213
214
-215
diff --git a/dev/reference/distilabel/llm/together/index.html b/dev/reference/distilabel/llm/together/index.html
index a93d2c8bba..e89cb20f0e 100644
--- a/dev/reference/distilabel/llm/together/index.html
+++ b/dev/reference/distilabel/llm/together/index.html
@@ -2138,7 +2138,21 @@ class TogetherInferenceLLM(LLM):
def __init__(
self,
task: "Task",
@@ -7299,59 +7313,73 @@
"""Returns the name of the Together Inference model."""
return self.model
- def _generate(
- self,
- inputs: List[Dict[str, Any]],
- num_generations: int = 1,
- ) -> List[List[LLMOutput]]:
- """Generates `num_generations` for each input in `inputs`.
-
- Args:
- inputs (List[Dict[str, Any]]): the inputs to be used for generation.
- num_generations (int, optional): the number of generations to be performed for each
- input. Defaults to 1.
-
- Returns:
- List[List[LLMOutput]]: the generated outputs.
- """
- prompts = self._generate_prompts(inputs, default_format=None)
- outputs = []
- for prompt in prompts:
- batch = []
- for _ in range(num_generations):
- output = together.Complete.create(
- prompt=prompt,
- model=self.model,
- max_tokens=self.max_new_tokens,
- stop=self.stop,
- temperature=self.temperature,
- top_k=self.top_k,
- top_p=self.top_p,
- repetition_penalty=self.repetition_penalty,
- logprobs=self.logprobs,
- )
- if output["output"]["choices"] is not None:
- for choice in output["output"]["choices"]:
- try:
- parsed_response = self.task.parse_output(
- choice["text"].strip()
- )
- except Exception as e:
- logger.error(
- f"Error parsing Together Inference response: {e}"
- )
- parsed_response = None
- batch.append(
- LLMOutput(
- model_name=self.model_name,
- prompt_used=prompt,
- raw_output=choice["text"],
- parsed_output=parsed_response,
- )
- )
- if len(batch) > 0:
- outputs.append(batch)
- return outputs
+ def _generate_single_output(self, prompt: str) -> LLMOutput:
+ """Runs the Together Inference text generation function over a single prompt
+ producing a single `LLMOutput`.
+
+ Args:
+ prompt (str): the formatted prompt to be provided to the Together Inference
+ endpoint.
+
+ Raises:
+ RuntimeError: raised if the Together Inference endpoint fails.
+ """
+ try:
+ output = together.Complete.create(
+ prompt=prompt,
+ model=self.model,
+ max_tokens=self.max_new_tokens,
+ stop=self.stop,
+ temperature=self.temperature,
+ top_k=self.top_k,
+ top_p=self.top_p,
+ repetition_penalty=self.repetition_penalty,
+ logprobs=self.logprobs,
+ )
+ except Exception as e:
+ raise RuntimeError(
+ f"Together Inference generation failed with exception: {e}"
+ ) from e
+
+ if output["output"]["choices"] is None or len(output["output"]["choices"]) < 1: # type: ignore
+ raise RuntimeError("Together Inference generation returned no generations.")
+
+ choice = output["output"]["choices"][0] # type: ignore
+ try:
+ parsed_response = self.task.parse_output(choice["text"].strip())
+ except Exception as e:
+ logger.error(f"Error parsing Together Inference response: {e}")
+ parsed_response = None
+
+ return LLMOutput(
+ model_name=self.model_name,
+ prompt_used=prompt,
+ raw_output=choice["text"] or None,
+ parsed_output=parsed_response,
+ )
+
+ def _generate(
+ self,
+ inputs: List[Dict[str, Any]],
+ num_generations: int = 1,
+ ) -> List[List[LLMOutput]]:
+ """Generates `num_generations` for each input in `inputs`.
+
+ Args:
+ inputs (List[Dict[str, Any]]): the inputs to be used for generation.
+ num_generations (int, optional): the number of generations to be performed for each
+ input. Defaults to 1.
+
+ Returns:
+ List[List[LLMOutput]]: the generated outputs.
+ """
+ prompts = self._generate_prompts(inputs, default_format=None)
+ outputs = []
+ for prompt in prompts:
+ outputs.append(
+ [self._generate_single_output(prompt) for _ in range(num_generations)]
+ )
+ return outputs
212
213
214
-215
diff --git a/dev/search/search_index.json b/dev/search/search_index.json
index 93e9436383..eeba939797 100644
--- a/dev/search/search_index.json
+++ b/dev/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"distilabel","text":"class TogetherInferenceLLM(LLM):
def __init__(
self,
task: "Task",
@@ -2266,59 +2280,73 @@
"""Returns the name of the Together Inference model."""
return self.model
- def _generate(
- self,
- inputs: List[Dict[str, Any]],
- num_generations: int = 1,
- ) -> List[List[LLMOutput]]:
- """Generates `num_generations` for each input in `inputs`.
-
- Args:
- inputs (List[Dict[str, Any]]): the inputs to be used for generation.
- num_generations (int, optional): the number of generations to be performed for each
- input. Defaults to 1.
-
- Returns:
- List[List[LLMOutput]]: the generated outputs.
- """
- prompts = self._generate_prompts(inputs, default_format=None)
- outputs = []
- for prompt in prompts:
- batch = []
- for _ in range(num_generations):
- output = together.Complete.create(
- prompt=prompt,
- model=self.model,
- max_tokens=self.max_new_tokens,
- stop=self.stop,
- temperature=self.temperature,
- top_k=self.top_k,
- top_p=self.top_p,
- repetition_penalty=self.repetition_penalty,
- logprobs=self.logprobs,
- )
- if output["output"]["choices"] is not None:
- for choice in output["output"]["choices"]:
- try:
- parsed_response = self.task.parse_output(
- choice["text"].strip()
- )
- except Exception as e:
- logger.error(
- f"Error parsing Together Inference response: {e}"
- )
- parsed_response = None
- batch.append(
- LLMOutput(
- model_name=self.model_name,
- prompt_used=prompt,
- raw_output=choice["text"],
- parsed_output=parsed_response,
- )
- )
- if len(batch) > 0:
- outputs.append(batch)
- return outputs
+ def _generate_single_output(self, prompt: str) -> LLMOutput:
+ """Runs the Together Inference text generation function over a single prompt
+ producing a single `LLMOutput`.
+
+ Args:
+ prompt (str): the formatted prompt to be provided to the Together Inference
+ endpoint.
+
+ Raises:
+ RuntimeError: raised if the Together Inference endpoint fails.
+ """
+ try:
+ output = together.Complete.create(
+ prompt=prompt,
+ model=self.model,
+ max_tokens=self.max_new_tokens,
+ stop=self.stop,
+ temperature=self.temperature,
+ top_k=self.top_k,
+ top_p=self.top_p,
+ repetition_penalty=self.repetition_penalty,
+ logprobs=self.logprobs,
+ )
+ except Exception as e:
+ raise RuntimeError(
+ f"Together Inference generation failed with exception: {e}"
+ ) from e
+
+ if output["output"]["choices"] is None or len(output["output"]["choices"]) < 1: # type: ignore
+ raise RuntimeError("Together Inference generation returned no generations.")
+
+ choice = output["output"]["choices"][0] # type: ignore
+ try:
+ parsed_response = self.task.parse_output(choice["text"].strip())
+ except Exception as e:
+ logger.error(f"Error parsing Together Inference response: {e}")
+ parsed_response = None
+
+ return LLMOutput(
+ model_name=self.model_name,
+ prompt_used=prompt,
+ raw_output=choice["text"] or None,
+ parsed_output=parsed_response,
+ )
+
+ def _generate(
+ self,
+ inputs: List[Dict[str, Any]],
+ num_generations: int = 1,
+ ) -> List[List[LLMOutput]]:
+ """Generates `num_generations` for each input in `inputs`.
+
+ Args:
+ inputs (List[Dict[str, Any]]): the inputs to be used for generation.
+ num_generations (int, optional): the number of generations to be performed for each
+ input. Defaults to 1.
+
+ Returns:
+ List[List[LLMOutput]]: the generated outputs.
+ """
+ prompts = self._generate_prompts(inputs, default_format=None)
+ outputs = []
+ for prompt in prompts:
+ outputs.append(
+ [self._generate_single_output(prompt) for _ in range(num_generations)]
+ )
+ return outputs
"},{"location":"#installation","title":"Installation","text":"
Requires Python 3.8+pip install distilabel\n
In addition, the following extras are available:
hf-transformers
: for using models available in transformers package via the TransformersLLM
integration.hf-inference-endpoints
: for using the HuggingFace Inference Endpoints via the InferenceEndpointsLLM
integration.openai
: for using OpenAI API models via the OpenAILLM
integration.vllm
: for using vllm serving engine via the vLLM
integration.llama-cpp
: for using llama-cpp-python as Python bindings for llama.cpp
.together
: for using Together Inference via their Python client.argilla
: for exporting the generated datasets to Argilla.from datasets import load_dataset\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.pipeline import pipeline\nfrom distilabel.tasks import TextGenerationTask\n\ndataset = (\n load_dataset(\"HuggingFaceH4/instruction-dataset\", split=\"test[:10]\")\n .remove_columns([\"completion\", \"meta\"])\n .rename_column(\"prompt\", \"input\")\n)\n\ntask = TextGenerationTask() # (1)\n\ngenerator = OpenAILLM(task=task, max_new_tokens=512) # (2)\n\npipeline = pipeline(\"preference\", \"instruction-following\", generator=generator) # (3)\n\ndataset = pipeline.generate(dataset)\n
Task
for generating text given an instruction.LLM
for generating text using the Task
created in the first step. As the LLM
will generate text, it will be a generator
.Pipeline
using the pipeline
function and the generator
created in step 2. The pipeline
function will create a labeller
LLM using OpenAILLM
with the UltraFeedback
task for instruction following assessment.Note
To run the script successfully, ensure you have assigned your OpenAI API key to the OPENAI_API_KEY
environment variable.
For a more complete example, check out our awesome notebook on Google Colab:
"},{"location":"#navigation","title":"Navigation","text":"Concept Guides
Understand the components and their interactions.
API Reference
Technical description of the classes and functions.
This page aims to get you familiarized with the basic concepts of the framework, describing the most important components or classes and how they work together. The following sections will guide you through the primary components of the framework: Pipeline
, LLM
(both generator and labeller), and the Task
.
distilabel flow diagram"},{"location":"concepts/#components","title":"Components","text":""},{"location":"concepts/#task","title":"Task","text":"
The Task
class in the one in charge of defining the behaviour of the LLM
, and therefore it can define if an LLM is a generator
or a labeller
. To do so, the Task
class generates the prompt that will be sent to the LLM
from a template. It also defines, which input arguments are required to generate the prompt, and which output arguments will be extracted from the LLM
response. It's worth mentioning that the Task
class doesn't return a str
, but a Prompt
class which will generate the str
format depending on the LLM
that is going to be used (Zephyr, Llama, OpenAI, etc).
from distilabel.tasks import UltraJudgeTask\n\ntask = UltraJudgeTask()\n\ninput = (\n \"Can you provide a corrected version of the following sentence using proper \"\n 'English grammar? \"We going to the beach\" Additionally, could you please '\n \"provide your correction in an Excel table format with the following columns: \"\n \"| Incorrect Sentence | Corrected Sentence | |-------------------|--------------------|\"\n)\n\ngenerations = [\n (\n \"| Incorrect Sentence | Corrected Sentence |\\n|-------------------|-------------------\"\n '-----|\\n| \"We going to the beach\" | \"We are going to the beach\" |\\n\\nCorrectio'\n 'n: The verb in the second sentence (\"are\") changes to reflect the subject\\'s (\"w'\n 'e\") agreement with the verb \"be.\" This is called subject-verb agreement. In the '\n 'first sentence, the verb \"going\" infers that the action is ongoing or in a contin'\n \"uous state, which is not the case. Therefore, the second sentence is grammatically \"\n \"correct.\"\n ),\n (\n \"| Incorrect Sentence | Corrected Sentence |\\n|-------------------|-------------------\"\n \"-----|\\n| We going to the beach | We are going to the beach | \\n\\nHere's a breakdo\"\n 'wn of the correction:\\n\\n- \"We going to the beach\" is an example of a subject-ve'\n 'rb agreement error. The verb changing from the third person singular (\"is\") to t'\n 'he third person plural (\"are\") in this instance, as there are multiple people go'\n 'ing to the beach.\\n- The \"g\" in \"going\" changes to an \"e\" due to a hard \"g\"'\n ' sound being followed by an \"e,\" which is a common spelling rule in English.'\n ),\n]\n\n\nprompt = task.generate_prompt(input, generations)\nprint(prompt.format_as(\"default\")) # format as \"openai\", \"zephyr\", \"llama\", ...\n
"},{"location":"concepts/#llm","title":"LLM","text":"The LLM
class represents a language model and implements the way to interact with it. It also defines the generation parameters that can be passed to the model to tweak the generations. As mentioned above, the LLM
will have a Task
associated that will use to generate the prompt and extract the output from the generation.
from distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraJudgeTask\n\nlabeller = OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=UltraJudgeTask(),\n prompt_format=\"openai\",\n max_new_tokens=2048,\n temperature=0.0,\n)\n\noutputs = labeller.generate(\n inputs=[\n {\n \"input\": \"Here's a math problem that you need to resolve: 2 + 2 * 3. What's the result of this problem? Explain it\",\n \"generations\": [\n (\n \"The output of the math problem 2 + 2 * 3 is calculated by following \"\n \"the order of operations (PEMDAS). First, perform the multiplication: \"\n \"2 * 3 = 6. Then, perform the addition: 2 + 6 = 8. Therefore, the \"\n \"output of the problem is 8.\"\n ),\n (\n \"The correct solution to the math problem is 8. To get the correct \"\n \"answer, we follow the order of operations (PEMDAS) and perform \"\n \"multiplication before addition. So, first, we solve 2 * 3 = 6, \"\n \"then we add 2 to 6 to get 8.\"\n ),\n ],\n }\n ]\n)\n\nprint(outputs[0][0][\"parsed_output\"])\n
Note
To run the script successfully, ensure you have assigned your OpenAI API key to the OPENAI_API_KEY
environment variable.
The Pipeline
class orchestrates the whole generation and labelling process, and it's in charge of the batching of the input dataset, as well as reporting the generation progress. It's worth mentioning that is not mandatory to pass both a generator LLM
and a labeller LLM
to the Pipeline
class, as it can also be used only for generation or labelling.
Pipelines
Generator and labellerOnly generatorOnly labellerfrom datasets import load_dataset\nfrom distilabel.llm import LlamaCppLLM, OpenAILLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import TextGenerationTask, UltraJudgeTask\nfrom llama_cpp import Llama\n\ndataset = load_dataset(\"argilla/distilabel-docs\", split=\"train\")\ndataset = dataset.remove_columns(\n [\n column\n for column in dataset.column_names\n if column not in [\"input\", \"generations\"]\n ]\n)\n\npipeline = Pipeline(\n generator=LlamaCppLLM(\n model=Llama(\n model_path=\"./llama-2-7b-chat.Q4_0.gguf\",\n verbose=False,\n n_ctx=1024,\n ),\n task=TextGenerationTask(),\n max_new_tokens=512,\n prompt_format=\"llama2\",\n ),\n labeller=OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=UltraJudgeTask(),\n prompt_format=\"openai\",\n max_new_tokens=1024,\n num_threads=1,\n temperature=0.0,\n ),\n)\n\n\ndataset = pipeline.generate(dataset, num_generations=2, batch_size=5)\n
Note
To run the script successfully, ensure you have assigned your OpenAI API key to the OPENAI_API_KEY
environment variable and that you have download the file llama-2-7b-chat.Q4_O.gguf in the same folder as the script.
from datasets import load_dataset\nfrom distilabel.llm import LlamaCppLLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import TextGenerationTask\nfrom llama_cpp import Llama\n\ndataset = load_dataset(\"argilla/distilabel-docs\", split=\"train\")\ndataset = dataset.remove_columns(\n [column for column in dataset.column_names if column not in [\"input\"]]\n)\n\npipeline = Pipeline(\n generator=LlamaCppLLM(\n model=Llama(\n model_path=\"./llama-2-7b-chat.Q4_0.gguf\",\n verbose=False,\n n_ctx=1024,\n ),\n task=TextGenerationTask(),\n max_new_tokens=512,\n prompt_format=\"llama2\",\n ),\n)\n\n\ndataset = pipeline.generate(dataset, num_generations=2, batch_size=5)\n
from datasets import load_dataset\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import UltraJudgeTask\n\ndataset = load_dataset(\"argilla/distilabel-docs\", split=\"train\")\ndataset = dataset.remove_columns(\n [\n column\n for column in dataset.column_names\n if column not in [\"input\", \"generations\"]\n ]\n)\n\npipeline = Pipeline(\n labeller=OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=UltraJudgeTask(),\n prompt_format=\"openai\",\n max_new_tokens=1024,\n num_threads=1,\n temperature=0.0,\n ),\n)\n\n\ndataset = pipeline.generate(dataset, num_generations=2, batch_size=5)\n
"},{"location":"reference/SUMMARY/","title":"SUMMARY","text":"CustomDataset
","text":" Bases: Dataset
A custom dataset class that extends from datasets.Dataset
and is used to generate an Argilla FeedbackDataset
instance from the pre-defined configuration within the task provided to Pipeline.generate
.
src/distilabel/dataset.py
class CustomDataset(Dataset):\n \"\"\"A custom dataset class that extends from `datasets.Dataset` and is used to generate\n an Argilla `FeedbackDataset` instance from the pre-defined configuration within the task\n provided to `Pipeline.generate`.\n \"\"\"\n\n task: Union[\"Task\", None] = None\n\n def to_argilla(self) -> \"FeedbackDataset\":\n \"\"\"Converts the dataset to an Argilla `FeedbackDataset` instance, based on the\n task defined in the dataset as part of `Pipeline.generate`.\n\n Raises:\n ImportError: if the argilla library is not installed.\n ValueError: if the task is not set.\n\n Returns:\n FeedbackDataset: the Argilla `FeedbackDataset` instance.\n \"\"\"\n if not _ARGILLA_AVAILABLE:\n raise ImportError(\n \"To use `to_argilla` method is required to have `argilla` installed. \"\n \"Please install it with `pip install argilla`.\"\n )\n\n if self.task is None:\n raise ValueError(\n \"The task is not set. Please set it with `dataset.task = <task>`.\"\n )\n\n try:\n rg_dataset = self.task.to_argilla_dataset(dataset_row=self[0]) # type: ignore\n except Exception as e:\n raise ValueError(\n f\"Error while converting the dataset to an Argilla `FeedbackDataset` instance: {e}\"\n ) from e\n\n # try:\n # rg_dataset = infer_model_metadata_properties(\n # hf_dataset=self, rg_dataset=rg_dataset\n # )\n # except Exception as e:\n # warnings.warn(\n # f\"Error while adding the model metadata properties: {e}\",\n # UserWarning,\n # stacklevel=2,\n # )\n\n for dataset_row in self:\n if any(\n dataset_row[input_arg_name] is None # type: ignore\n for input_arg_name in self.task.input_args_names\n ):\n continue\n try:\n rg_dataset.add_records(\n self.task._to_argilla_record(dataset_row=dataset_row) # type: ignore\n ) # type: ignore\n except Exception as e:\n warnings.warn(\n f\"Error while converting a row into an Argilla `FeedbackRecord` instance: {e}\",\n UserWarning,\n stacklevel=2,\n )\n return rg_dataset\n
"},{"location":"reference/distilabel/dataset/#distilabel.dataset.CustomDataset.to_argilla","title":"to_argilla()
","text":"Converts the dataset to an Argilla FeedbackDataset
instance, based on the task defined in the dataset as part of Pipeline.generate
.
Raises:
Type DescriptionImportError
if the argilla library is not installed.
ValueError
if the task is not set.
Returns:
Name Type DescriptionFeedbackDataset
FeedbackDataset
the Argilla FeedbackDataset
instance.
src/distilabel/dataset.py
def to_argilla(self) -> \"FeedbackDataset\":\n \"\"\"Converts the dataset to an Argilla `FeedbackDataset` instance, based on the\n task defined in the dataset as part of `Pipeline.generate`.\n\n Raises:\n ImportError: if the argilla library is not installed.\n ValueError: if the task is not set.\n\n Returns:\n FeedbackDataset: the Argilla `FeedbackDataset` instance.\n \"\"\"\n if not _ARGILLA_AVAILABLE:\n raise ImportError(\n \"To use `to_argilla` method is required to have `argilla` installed. \"\n \"Please install it with `pip install argilla`.\"\n )\n\n if self.task is None:\n raise ValueError(\n \"The task is not set. Please set it with `dataset.task = <task>`.\"\n )\n\n try:\n rg_dataset = self.task.to_argilla_dataset(dataset_row=self[0]) # type: ignore\n except Exception as e:\n raise ValueError(\n f\"Error while converting the dataset to an Argilla `FeedbackDataset` instance: {e}\"\n ) from e\n\n # try:\n # rg_dataset = infer_model_metadata_properties(\n # hf_dataset=self, rg_dataset=rg_dataset\n # )\n # except Exception as e:\n # warnings.warn(\n # f\"Error while adding the model metadata properties: {e}\",\n # UserWarning,\n # stacklevel=2,\n # )\n\n for dataset_row in self:\n if any(\n dataset_row[input_arg_name] is None # type: ignore\n for input_arg_name in self.task.input_args_names\n ):\n continue\n try:\n rg_dataset.add_records(\n self.task._to_argilla_record(dataset_row=dataset_row) # type: ignore\n ) # type: ignore\n except Exception as e:\n warnings.warn(\n f\"Error while converting a row into an Argilla `FeedbackRecord` instance: {e}\",\n UserWarning,\n stacklevel=2,\n )\n return rg_dataset\n
"},{"location":"reference/distilabel/logger/","title":"logger","text":""},{"location":"reference/distilabel/pipeline/","title":"pipeline","text":""},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.Pipeline","title":"Pipeline
","text":"Source code in src/distilabel/pipeline.py
class Pipeline:\n def __init__(\n self,\n generator: Union[\"LLM\", \"ProcessLLM\", \"LLMPool\", None] = None,\n labeller: Union[\"LLM\", \"ProcessLLM\", None] = None,\n ) -> None:\n \"\"\"Initializes the Pipeline class.\n\n Args:\n generator (Union[\"LLM\", None], optional): the LLM to be used for generation.\n Defaults to None.\n labeller (Union[\"LLM\", None], optional): the LLM to be used for labelling.\n Defaults to None.\n\n Raises:\n ValueError: if no LLM is provided.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.llm import OpenAILLM, TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n >>> from distilabel.pipeline import Pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> labeller = OpenAILLM(\n ... model=\"gpt-3.5-turbo\",\n ... task=UltraFeedbackTask.for_text_quality(),\n ... )\n >>> pipeline = Pipeline(generator=generator, labeller=labeller)\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if generator is not None and not isinstance(\n generator, (LLM, ProcessLLM, LLMPool)\n ):\n raise ValueError(\n \"`generator` must be an instance of `LLM`, `ProcessLLM` or `LLMPool`\"\n )\n\n if labeller is not None and not isinstance(labeller, (LLM, ProcessLLM)):\n raise ValueError(\"`labeller` must be an instance of `LLM` or `ProcessLLM`\")\n\n self.generator = generator\n self.labeller = labeller\n\n if self.generator is None and self.labeller is None:\n raise ValueError(\"Either `generator` or `labeller` must be provided.\")\n\n def __repr__(self) -> str:\n return (\n f\"Pipeline(\\n\\tgenerator={self.generator},\\n\\tlabeller={self.labeller}\\n)\"\n )\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"generator\", self.generator\n yield \"labeller\", self.labeller\n\n def _validate_dataset(self, dataset: Dataset) -> None:\n \"\"\"Validates that the provided dataset contains the columns needed by the LLMs, and\n warns the user if the columns to be generated already exist.\n\n Args:\n dataset (Dataset): the dataset to be validated.\n\n Raises:\n KeyError: if the dataset does not contain the columns needed by the LLMs.\n \"\"\"\n # Generation LLM has not been provided, so the columns needed by the Labelling\n # LLM must be in the provided dataset\n if self.labeller is not None:\n if self.generator is None:\n try:\n self.labeller.task.validate_dataset(dataset.column_names)\n except KeyError as err:\n raise KeyError(\n \"Labelling LLM expects a dataset with at least the following\"\n f\" columns: {self.labeller.task.input_args_names}, but the provided\"\n f\" dataset just contains: {dataset.column_names}\"\n ) from err\n else:\n expected_columns = (\n dataset.column_names + self.generator.task.output_args_names\n )\n try:\n self.labeller.task.validate_dataset(expected_columns)\n except KeyError as err:\n raise KeyError(\n \"Labelling LLM expects to receive the following columns after the\"\n f\" generation process: {self.labeller.task.input_args_names}, but the\"\n f\" provided dataset including the columns to generate just contains: {expected_columns}\"\n ) from err\n\n if self.generator is not None:\n try:\n self.generator.task.validate_dataset(dataset.column_names)\n except KeyError as err:\n raise KeyError(\n \"Generation LLM expects a dataset with the following columns:\"\n f\" {self.generator.task.input_args_names}, but the provided dataset\"\n f\" just contains: {dataset.column_names}\"\n ) from err\n\n # Additionally, we need to check that if the columns to be generated already exist,\n # then we should look for `None`/`null` values and just fulfill those, while skipping\n # the rest. This is useful to be able to continue a generation that broke or a process\n # that was interrupted\n generated_columns = []\n if self.generator is not None:\n generated_columns += self.generator.task.output_args_names\n if self.labeller is not None:\n generated_columns += self.labeller.task.output_args_names\n\n if set(generated_columns) == set(dataset.column_names).intersection(\n set(generated_columns)\n ):\n warnings.warn(\n \"The provided dataset already contains the columns to be generated:\"\n f\" {generated_columns}; which means that the generation process will\"\n \" be skipped for the rows with values for those columns. If you want\"\n \" to re-generate those columns, please remove them from the dataset.\",\n UserWarning,\n stacklevel=2,\n )\n\n def _get_batch_generations(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int,\n shuffle_before_labelling: bool = True,\n progress_callback_func: Union[Callable, None] = None,\n ) -> List[Dict[str, Any]]:\n \"\"\"Gets the batch generations for the given inputs, capturing the futures if the\n LLM returns them, and then processes the batch generations.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int): the number of generations to be performed for each\n input.\n shuffle_before_labelling (bool, optional): whether to shuffle the generations\n before labelling or not. This is useful to avoid the labelling LLM to be\n biased by the order of the generations. Defaults to `True`.\n progress_callback_func (Union[Callable, None], optional): the callback function\n to be called when the progress of the generation process changes. Defaults\n to None.\n\n Returns:\n List[Dict[str, Any]]: the processed batch generations.\n \"\"\"\n outputs = self.generator.generate( # type: ignore\n inputs=inputs,\n num_generations=num_generations,\n progress_callback_func=progress_callback_func,\n )\n batch_generations = []\n if isinstance(outputs, Future):\n batch_generations.extend(outputs.result())\n else:\n batch_generations = outputs\n return self._process_batch_generations(\n batch_generations=batch_generations,\n shuffle_before_labelling=shuffle_before_labelling,\n )\n\n def _get_batch_labels(\n self,\n inputs: List[Dict[str, Any]],\n progress_callback_func: Union[Callable, None] = None,\n ) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Gets the batch labels for the given inputs.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for labelling. Each dict\n should contain a key with the text generations.\n progress_callback_func (Union[Callable, None], optional): the callback function\n to be called when the progress of the labelling process changes. Defaults\n to `None`.\n\n Returns:\n Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]: the batch\n labels.\n \"\"\"\n\n return self.labeller.generate( # type: ignore\n inputs=inputs,\n # `num_generations` is always 1 because labelling the same input multiple times\n # using the same LLM may not make sense\n num_generations=1,\n progress_callback_func=progress_callback_func,\n )\n\n def _process_batch_generations(\n self,\n batch_generations: List[List[\"LLMOutput\"]],\n shuffle_before_labelling: bool = True,\n ) -> List[Dict[str, Any]]:\n \"\"\"Processes the batch generations, combining the outputs of the LLMs into a single\n dictionary.\n\n Args:\n batch_generations (List[List[\"LLMOutput\"]]): the batch generations to be processed.\n shuffle_before_labelling (bool, optional): whether to shuffle the generations\n before labelling or not. This is useful to avoid the labelling LLM to be\n biased by the order of the generations. Defaults to `True`.\n\n Returns:\n List[Dict[str, Any]]: the processed batch generations.\n \"\"\"\n processed_generations = []\n for generations in batch_generations:\n processed_generation = {\n \"generation_model\": [],\n \"generation_prompt\": [],\n \"raw_generation_responses\": [],\n }\n if shuffle_before_labelling:\n random.shuffle(generations)\n for generation in generations:\n processed_generation[\"generation_model\"].append(\n generation[\"model_name\"]\n )\n processed_generation[\"generation_prompt\"].append(\n generation[\"prompt_used\"]\n )\n processed_generation[\"raw_generation_responses\"].append(\n generation[\"raw_output\"]\n )\n # Create `generations` column which is a list with N text generations\n try:\n processed_generation.update(\n **combine_dicts(\n *[\n generation[\"parsed_output\"]\n if generation[\"parsed_output\"] is not None\n else {}\n for generation in generations\n ]\n )\n )\n except Exception as e:\n warnings.warn(\n f\"Generation processing step failed when combining dicts: {e}\",\n UserWarning,\n stacklevel=2,\n )\n processed_generations.append(processed_generation)\n return processed_generations\n\n def _include_generator_outputs_as_inputs(\n self, inputs: List[Dict[str, Any]], outputs: List[Dict[str, Any]]\n ) -> List[Dict[str, Any]]:\n \"\"\"Includes the outputs of the generator as inputs for the labeller.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for labelling.\n outputs (List[Dict[str, Any]]): the outputs of the generator.\n\n Returns:\n List[Dict[str, Any]]: the inputs to be used for labelling.\n \"\"\"\n for input_, output in zip(inputs, outputs):\n # Skip the keys not required by the labelling LLM\n input_.update(\n {\n k: v\n for k, v in output.items()\n if self.labeller is not None\n and k in self.labeller.task.input_args_names\n }\n )\n return inputs\n\n def _process_batch_labels(\n self, batch_labels: List[List[\"LLMOutput\"]]\n ) -> List[Dict[str, Any]]:\n \"\"\"Processes the batch labels, combining the outputs of the LLMs into a single\n dictionary.\n\n Args:\n batch_labels (List[List[\"LLMOutput\"]]): the batch labels to be processed.\n\n Returns:\n List[Dict[str, Any]]: the processed batch labels.\n \"\"\"\n processed_labels = []\n for labels in batch_labels:\n for label in labels:\n if label[\"parsed_output\"] is not None and not isinstance(\n label[\"parsed_output\"], (list, dict)\n ):\n raise ValueError(\n f\"Unsupported type: {type(label['parsed_output'])}\"\n )\n\n processed_label = {\n # Since all the generations for the same `model_name` also share the same\n # `prompt_used`, then we just keep the first element in `generations`\n \"labelling_model\": label[\"model_name\"],\n \"labelling_prompt\": label[\"prompt_used\"],\n \"raw_labelling_response\": label[\"raw_output\"],\n }\n try:\n if isinstance(label[\"parsed_output\"], list):\n processed_label.update(**combine_dicts(*label[\"parsed_output\"]))\n elif isinstance(label[\"parsed_output\"], dict):\n processed_label.update(**label[\"parsed_output\"])\n except Exception as e:\n warnings.warn(\n f\"Label processing step failed when combining dicts: {e}\",\n UserWarning,\n stacklevel=2,\n )\n processed_labels.append(processed_label)\n return processed_labels\n\n def _transform_dataset_to_expected_format(\n self, rows: Dict[str, List[Any]]\n ) -> List[Dict[str, Any]]:\n \"\"\"Transforms the `datasets.Dataset` to the expected format required by the LLMs\n during the `generate` process.\n\n Args:\n rows (Dict[str, List[Any]]): the rows to be transformed.\n\n Returns:\n List[Dict[str, Any]]: the transformed rows.\n \"\"\"\n length = len(next(iter(rows.values())))\n\n generator_column_names = []\n if self.generator is not None:\n generator_column_names = self.generator.task.input_args_names\n labeller_column_names = []\n if self.labeller is not None:\n labeller_column_names = self.labeller.task.input_args_names\n column_names = generator_column_names + labeller_column_names\n\n inputs = []\n for i in range(length):\n input = {\n col: values[i] for col, values in rows.items() if col in column_names\n }\n inputs.append(input)\n\n return inputs\n\n def _build_dataset( # noqa: C901\n self,\n dataset: Dataset,\n generations: List[Dict[str, Any]],\n labels: Union[\n List[List[\"LLMOutput\"]],\n Future[List[List[\"LLMOutput\"]]],\n ],\n batch_size: int,\n ) -> CustomDataset:\n \"\"\"Builds the final dataset with either the generations, the labels, or both, depending\n on the LLMs provided to the `Pipeline`.\n\n Args:\n dataset (Dataset): the original dataset.\n generations (List[Dict[str, Any]]): the processed generations.\n labels (Union[List[List[LLMOutput]], Future[List[List[LLMOutput]]]]): the\n processed labels.\n\n Returns:\n CustomDataset: the final dataset.\n\n Raises:\n RuntimeError: if the `Pipeline` fails during the generation or labelling steps.\n \"\"\"\n if self.generator is None:\n generations = [{} for _ in range(len(dataset))]\n else:\n generator_column_names = [\n \"generation_model\",\n \"generation_prompt\",\n \"raw_generation_responses\",\n ] + self.generator.task.output_args_names\n\n if len(generations) < len(dataset):\n generations.extend(\n [\n {key: None for key in generator_column_names}\n for _ in range(len(dataset) - len(generations))\n ]\n )\n\n # Add missing keys/columns with a `None` value\n for generation in generations:\n for key in generator_column_names:\n if key not in generation:\n generation.update({key: None})\n\n if self.labeller is None:\n processed_labels = [{} for _ in range(len(dataset))] # type: ignore\n else:\n batch_labels = []\n if self.labeller.return_futures:\n for i, future in enumerate(labels, start=1): # type: ignore\n try:\n batch_labels.extend(future.result())\n except Exception as e:\n logger.error(\n f\"An error occurred when getting the result from the labeller: {e}\"\n )\n num_outputs = (\n batch_size\n if i * batch_size <= len(dataset)\n else len(dataset) % batch_size\n )\n batch_labels.append(\n [\n LLMOutput(\n model_name=self.labeller.model_name,\n prompt_used=None,\n raw_output=None,\n parsed_output=None,\n )\n for _ in range(num_outputs)\n ]\n )\n\n processed_labels = self._process_batch_labels(\n batch_labels=batch_labels or cast(List[List[\"LLMOutput\"]], labels)\n )\n\n labeller_column_names = [\n \"labelling_model\",\n \"labelling_prompt\",\n \"raw_labelling_response\",\n ] + self.labeller.task.output_args_names\n\n # Ensure the lengths of the labels and the dataset match (when pipeline\n # fails in an intermediate step, the labels may be shorter than the dataset)\n if len(processed_labels) < len(dataset):\n processed_labels.extend(\n [\n {key: None for key in labeller_column_names}\n for _ in range(len(dataset) - len(processed_labels))\n ]\n )\n\n # Add missing keys/columns with a `None` value\n for label in processed_labels:\n for key in labeller_column_names:\n if key not in label:\n label.update({key: None})\n\n _flattened_dataset = dataset.flatten_indices()\n _dataset = Dataset.from_dict({}, split=Split.TRAIN)\n for row, generation, processed_label in zip(\n _flattened_dataset, generations, processed_labels\n ):\n _dataset = _dataset.add_item({**row, **generation, **processed_label}) # type: ignore\n # Dynamically remaps the `datasets.Dataset` to be a `CustomDataset` instance\n _dataset.__class__ = CustomDataset\n if self.generator is not None and self.labeller is None:\n if self.generator.task.__type__ != \"generation\": # type: ignore\n self.generator.task.__type__ = \"generation\" # type: ignore\n _dataset.task = self.generator.task # type: ignore\n elif self.labeller is not None:\n if self.labeller.task.__type__ != \"labelling\": # type: ignore\n self.labeller.task.__type__ = \"labelling\" # type: ignore\n _dataset.task = self.labeller.task # type: ignore\n return _dataset # type: ignore\n\n def _teardown(self) -> None:\n if self.generator is not None and isinstance(\n self.generator, (ProcessLLM, LLMPool)\n ):\n self.generator.teardown()\n\n if self.labeller is not None and isinstance(self.labeller, ProcessLLM):\n self.labeller.teardown()\n\n def _generate( # noqa: C901\n self,\n dataset: Dataset,\n num_generations: int = 1,\n batch_size: int = 1,\n shuffle_before_labelling: bool = True,\n enable_checkpoints: bool = True,\n display_progress_bar: bool = False,\n ) -> CustomDataset:\n \"\"\"Generates the outputs for the given dataset using the LLMs provided to the\n `Pipeline`.\"\"\"\n\n if (\n self.labeller is not None\n and self.generator is not None\n and num_generations < 2\n ):\n warnings.warn(\n f\"Provided `num_generations={num_generations}` which implies that the \"\n \"`generator` LLM will just run once, while the `labelling` LLM expects \"\n \"to receive a list of N inputs to label, where N is > 1. If this is not \"\n \"intended, make sure to set `num_generations` to a value higher or \"\n \"equal to 2.\",\n UserWarning,\n stacklevel=2,\n )\n\n self._validate_dataset(dataset)\n\n generations: List[Dict[str, Any]] = []\n labels: Union[\n List[List[\"LLMOutput\"]],\n Future[List[List[\"LLMOutput\"]]],\n ] = []\n\n (\n generation_progress_func,\n labelling_progress_func,\n ) = get_progress_bars_for_pipeline(\n num_rows=len(dataset),\n num_generations=num_generations,\n display_progress_bar=display_progress_bar,\n )\n\n num_batches = math.ceil(len(dataset) / batch_size)\n\n for batch_i, rows in enumerate(dataset.iter(batch_size=batch_size), start=1):\n logger.info(f\"Processing batch {batch_i} of {num_batches}...\")\n inputs = self._transform_dataset_to_expected_format(rows) # type: ignore\n\n if self.generator is not None:\n logger.info(f\"Calling generator for batch {batch_i}...\")\n try:\n batch_generations = self._get_batch_generations(\n inputs=inputs,\n num_generations=num_generations,\n shuffle_before_labelling=shuffle_before_labelling,\n progress_callback_func=generation_progress_func,\n )\n generations.extend(batch_generations)\n except Exception as e:\n if not enable_checkpoints:\n raise RuntimeError(\n \"`Pipeline.generate` failed during generation step. Setting `enable_checkpoints=True` is recommended!\"\n ) from e\n logger.error(\n f\"`Pipeline.generate` failed during generation step with exception: {e}\"\n )\n return self._build_dataset(\n dataset,\n generations=generations,\n labels=labels,\n batch_size=batch_size,\n )\n\n inputs = self._include_generator_outputs_as_inputs(\n inputs=inputs, outputs=batch_generations\n )\n\n if self.labeller is not None:\n logger.info(f\"Calling labeller for batch {batch_i}...\")\n try:\n batch_labels = self._get_batch_labels(\n inputs=inputs, progress_callback_func=labelling_progress_func\n )\n\n if is_future(batch_labels):\n labels.append(batch_labels) # type: ignore\n else:\n labels.extend(batch_labels) # type: ignore\n except Exception as e:\n if not enable_checkpoints:\n raise RuntimeError(\n \"`Pipeline.generate` failed during labelling step. Setting `enable_checkpoints=True` is recommended!\"\n ) from e\n logger.error(\n f\"`Pipeline.generate` failed during labelling step with exception: {e}\"\n )\n return self._build_dataset(\n dataset,\n generations=generations,\n labels=labels,\n batch_size=batch_size,\n )\n\n _pipeline_progress.stop()\n\n return self._build_dataset(\n dataset, generations=generations, labels=labels, batch_size=batch_size\n )\n\n def dry_run(self, dataset: Dataset) -> CustomDataset:\n \"\"\"Performs a dry run over the provided dataset, which consists on generating the\n outputs for the first row of the dataset, to ensure that the `Pipeline` will be\n able to generate the outputs for the whole dataset.\n\n Args:\n dataset (Dataset): the dataset to be used for generation. Just the first row\n will be used for the dry run.\n\n Returns:\n CustomDataset: the dataset containing the outputs for the first row.\n \"\"\"\n try:\n # First we generate a `Dataset` only with the first row from the whole dataset\n subset = Dataset.from_dict(\n {key: [value] for key, value in dataset[0].items()}\n )\n # Then we call the `_generate` method with it\n return self._generate(\n dataset=subset,\n # Default kwargs to make the process as simple as possible\n num_generations=1,\n batch_size=1,\n enable_checkpoints=False,\n display_progress_bar=False,\n )\n except Exception as e:\n self._teardown()\n raise RuntimeError(\n f\"`Pipeline.generate` failed during the dry run over {dataset[0]} with exception: {e}\"\n ) from e\n\n def generate(\n self,\n dataset: Dataset,\n num_generations: int = 1,\n batch_size: int = 1,\n shuffle_before_labelling: bool = True,\n enable_checkpoints: bool = True,\n display_progress_bar: bool = False,\n skip_dry_run: bool = False,\n ) -> CustomDataset:\n \"\"\"Generates the outputs for the given dataset using the LLMs provided to the `Pipeline`.\n\n Args:\n dataset (Dataset): the dataset to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to `1`.\n batch_size (int, optional): the batch size to be used for generation. Defaults to `1`.\n shuffle_before_labelling: whether to shuffle the generations before labelling\n or not. This is useful to avoid the labelling LLM to be biased by the order\n of the generations. Defaults to `True`.\n enable_checkpoints (bool, optional): whether to enable checkpoints or not. Defaults to `True`.\n display_progress_bar (bool, optional): whether to display the progress bar or not. Defaults to `False`.\n skip_dry_run (bool, optional): whether to skip the dry run or not. Defaults to `False`.\n\n Returns:\n CustomDataset: the final dataset.\n\n Raises:\n RuntimeError: if the `Pipeline` fails during the generation or labelling steps.\n UserWarning: if the `Pipeline` fails during the generation or labelling steps and\n `enable_checkpoints` is set to `False`.\n\n Examples:\n >>> from transformers import AutoModelForCaualLM, AutoTokenizer\n >>> from distilabel.llm import OpenAILLM, TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n >>> from distilabel.pipeline import Pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> labeller = OpenAILLM(\n ... model=\"gpt-3.5-turbo\",\n ... task=UltraFeedbackTask.for_text_quality(),\n ... )\n >>> pipeline = Pipeline(generator=generator, labeller=labeller)\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if not skip_dry_run:\n logger.info(\"Executing dry-run...\")\n self.dry_run(dataset)\n logger.info(\n \"Dry-run executed with no issues. Starting the actual generation...\"\n )\n\n dataset = use_progress_bar(self._generate)(\n dataset=dataset,\n num_generations=num_generations,\n batch_size=batch_size,\n enable_checkpoints=enable_checkpoints,\n shuffle_before_labelling=shuffle_before_labelling,\n display_progress_bar=display_progress_bar,\n )\n\n self._teardown()\n\n return dataset\n
"},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.Pipeline.__init__","title":"__init__(generator=None, labeller=None)
","text":"Initializes the Pipeline class.
Parameters:
Name Type Description Defaultgenerator
Union['LLM', None]
the LLM to be used for generation. Defaults to None.
None
labeller
Union['LLM', None]
the LLM to be used for labelling. Defaults to None.
None
Raises:
Type DescriptionValueError
if no LLM is provided.
Examples:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer\n>>> from distilabel.llm import OpenAILLM, TransformersLLM\n>>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n>>> from distilabel.pipeline import Pipeline\n>>> generator = TransformersLLM(\n... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... task=TextGenerationTask(),\n... prompt_format=\"llama2\",\n... )\n>>> labeller = OpenAILLM(\n... model=\"gpt-3.5-turbo\",\n... task=UltraFeedbackTask.for_text_quality(),\n... )\n>>> pipeline = Pipeline(generator=generator, labeller=labeller)\n>>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n
Source code in src/distilabel/pipeline.py
def __init__(\n self,\n generator: Union[\"LLM\", \"ProcessLLM\", \"LLMPool\", None] = None,\n labeller: Union[\"LLM\", \"ProcessLLM\", None] = None,\n) -> None:\n \"\"\"Initializes the Pipeline class.\n\n Args:\n generator (Union[\"LLM\", None], optional): the LLM to be used for generation.\n Defaults to None.\n labeller (Union[\"LLM\", None], optional): the LLM to be used for labelling.\n Defaults to None.\n\n Raises:\n ValueError: if no LLM is provided.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.llm import OpenAILLM, TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n >>> from distilabel.pipeline import Pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> labeller = OpenAILLM(\n ... model=\"gpt-3.5-turbo\",\n ... task=UltraFeedbackTask.for_text_quality(),\n ... )\n >>> pipeline = Pipeline(generator=generator, labeller=labeller)\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if generator is not None and not isinstance(\n generator, (LLM, ProcessLLM, LLMPool)\n ):\n raise ValueError(\n \"`generator` must be an instance of `LLM`, `ProcessLLM` or `LLMPool`\"\n )\n\n if labeller is not None and not isinstance(labeller, (LLM, ProcessLLM)):\n raise ValueError(\"`labeller` must be an instance of `LLM` or `ProcessLLM`\")\n\n self.generator = generator\n self.labeller = labeller\n\n if self.generator is None and self.labeller is None:\n raise ValueError(\"Either `generator` or `labeller` must be provided.\")\n
"},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.Pipeline.dry_run","title":"dry_run(dataset)
","text":"Performs a dry run over the provided dataset, which consists on generating the outputs for the first row of the dataset, to ensure that the Pipeline
will be able to generate the outputs for the whole dataset.
Parameters:
Name Type Description Defaultdataset
Dataset
the dataset to be used for generation. Just the first row will be used for the dry run.
requiredReturns:
Name Type DescriptionCustomDataset
CustomDataset
the dataset containing the outputs for the first row.
Source code insrc/distilabel/pipeline.py
def dry_run(self, dataset: Dataset) -> CustomDataset:\n \"\"\"Performs a dry run over the provided dataset, which consists on generating the\n outputs for the first row of the dataset, to ensure that the `Pipeline` will be\n able to generate the outputs for the whole dataset.\n\n Args:\n dataset (Dataset): the dataset to be used for generation. Just the first row\n will be used for the dry run.\n\n Returns:\n CustomDataset: the dataset containing the outputs for the first row.\n \"\"\"\n try:\n # First we generate a `Dataset` only with the first row from the whole dataset\n subset = Dataset.from_dict(\n {key: [value] for key, value in dataset[0].items()}\n )\n # Then we call the `_generate` method with it\n return self._generate(\n dataset=subset,\n # Default kwargs to make the process as simple as possible\n num_generations=1,\n batch_size=1,\n enable_checkpoints=False,\n display_progress_bar=False,\n )\n except Exception as e:\n self._teardown()\n raise RuntimeError(\n f\"`Pipeline.generate` failed during the dry run over {dataset[0]} with exception: {e}\"\n ) from e\n
"},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.Pipeline.generate","title":"generate(dataset, num_generations=1, batch_size=1, shuffle_before_labelling=True, enable_checkpoints=True, display_progress_bar=False, skip_dry_run=False)
","text":"Generates the outputs for the given dataset using the LLMs provided to the Pipeline
.
Parameters:
Name Type Description Defaultdataset
Dataset
the dataset to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
batch_size
int
the batch size to be used for generation. Defaults to 1
.
1
shuffle_before_labelling
bool
whether to shuffle the generations before labelling or not. This is useful to avoid the labelling LLM to be biased by the order of the generations. Defaults to True
.
True
enable_checkpoints
bool
whether to enable checkpoints or not. Defaults to True
.
True
display_progress_bar
bool
whether to display the progress bar or not. Defaults to False
.
False
skip_dry_run
bool
whether to skip the dry run or not. Defaults to False
.
False
Returns:
Name Type DescriptionCustomDataset
CustomDataset
the final dataset.
Raises:
Type DescriptionRuntimeError
if the Pipeline
fails during the generation or labelling steps.
UserWarning
if the Pipeline
fails during the generation or labelling steps and enable_checkpoints
is set to False
.
Examples:
>>> from transformers import AutoModelForCaualLM, AutoTokenizer\n>>> from distilabel.llm import OpenAILLM, TransformersLLM\n>>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n>>> from distilabel.pipeline import Pipeline\n>>> generator = TransformersLLM(\n... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... task=TextGenerationTask(),\n... prompt_format=\"llama2\",\n... )\n>>> labeller = OpenAILLM(\n... model=\"gpt-3.5-turbo\",\n... task=UltraFeedbackTask.for_text_quality(),\n... )\n>>> pipeline = Pipeline(generator=generator, labeller=labeller)\n>>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n
Source code in src/distilabel/pipeline.py
def generate(\n self,\n dataset: Dataset,\n num_generations: int = 1,\n batch_size: int = 1,\n shuffle_before_labelling: bool = True,\n enable_checkpoints: bool = True,\n display_progress_bar: bool = False,\n skip_dry_run: bool = False,\n) -> CustomDataset:\n \"\"\"Generates the outputs for the given dataset using the LLMs provided to the `Pipeline`.\n\n Args:\n dataset (Dataset): the dataset to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to `1`.\n batch_size (int, optional): the batch size to be used for generation. Defaults to `1`.\n shuffle_before_labelling: whether to shuffle the generations before labelling\n or not. This is useful to avoid the labelling LLM to be biased by the order\n of the generations. Defaults to `True`.\n enable_checkpoints (bool, optional): whether to enable checkpoints or not. Defaults to `True`.\n display_progress_bar (bool, optional): whether to display the progress bar or not. Defaults to `False`.\n skip_dry_run (bool, optional): whether to skip the dry run or not. Defaults to `False`.\n\n Returns:\n CustomDataset: the final dataset.\n\n Raises:\n RuntimeError: if the `Pipeline` fails during the generation or labelling steps.\n UserWarning: if the `Pipeline` fails during the generation or labelling steps and\n `enable_checkpoints` is set to `False`.\n\n Examples:\n >>> from transformers import AutoModelForCaualLM, AutoTokenizer\n >>> from distilabel.llm import OpenAILLM, TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n >>> from distilabel.pipeline import Pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> labeller = OpenAILLM(\n ... model=\"gpt-3.5-turbo\",\n ... task=UltraFeedbackTask.for_text_quality(),\n ... )\n >>> pipeline = Pipeline(generator=generator, labeller=labeller)\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if not skip_dry_run:\n logger.info(\"Executing dry-run...\")\n self.dry_run(dataset)\n logger.info(\n \"Dry-run executed with no issues. Starting the actual generation...\"\n )\n\n dataset = use_progress_bar(self._generate)(\n dataset=dataset,\n num_generations=num_generations,\n batch_size=batch_size,\n enable_checkpoints=enable_checkpoints,\n shuffle_before_labelling=shuffle_before_labelling,\n display_progress_bar=display_progress_bar,\n )\n\n self._teardown()\n\n return dataset\n
"},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.pipeline","title":"pipeline(task, subtask=None, *, generator=None, labeller=None, **kwargs)
","text":"Creates a Pipeline
instance with the provided LLMs for a given task, which is useful whenever you want to use a pre-defined Pipeline
for a given task, or if you want to create a custom Pipeline
for a given task. Ideally one using this function over the Pipeline
class, don't want to worry about the details of the labeller
, since it will come with a default configuration based on the task
, by default the LLM used for labelling
will always be gpt-3.5-turbo
from OpenAI, as it's the one that provides the most consistent and fast results.
Parameters:
Name Type Description Defaulttask
Literal['preference', 'critique']
the task to be performed by the Pipeline
.
subtask
Optional[str]
the subtask to be performed by the Pipeline
. Defaults to None.
None
generator
Optional['LLM']
the LLM to be used for generation. Defaults to None.
None
labeller
Optional['LLM']
the LLM to be used for labelling. Defaults to None.
None
**kwargs
the keyword arguments to be passed to the task
and subtask
classes.
{}
Raises:
Type DescriptionValueError
if an invalid task is provided.
Returns:
Name Type DescriptionPipeline
Pipeline
the Pipeline
instance.
Examples:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer\n>>> from distilabel.llm import TransformersLLM\n>>> from distilabel.tasks import TextGenerationTask\n>>> from distilabel.pipeline import pipeline\n>>> generator = TransformersLLM(\n... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... task=TextGenerationTask(),\n... prompt_format=\"llama2\",\n... )\n>>> pipeline = pipeline(\n... task=\"preference\",\n... subtask=\"text-quality\",\n... generator=generator,\n... )\n>>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n
Source code in src/distilabel/pipeline.py
def pipeline(\n task: Literal[\"preference\"],\n subtask: Optional[str] = None,\n *,\n generator: Optional[\"LLM\"] = None,\n labeller: Optional[\"LLM\"] = None,\n **kwargs,\n) -> Pipeline:\n \"\"\"Creates a `Pipeline` instance with the provided LLMs for a given task, which is useful\n whenever you want to use a pre-defined `Pipeline` for a given task, or if you want to\n create a custom `Pipeline` for a given task. Ideally one using this function over the `Pipeline`\n class, don't want to worry about the details of the `labeller`, since it will come with a default\n configuration based on the `task`, by default the LLM used for `labelling` will always be `gpt-3.5-turbo`\n from OpenAI, as it's the one that provides the most consistent and fast results.\n\n Args:\n task (Literal[\"preference\", \"critique\"]): the task to be performed by the `Pipeline`.\n subtask (Optional[str], optional): the subtask to be performed by the `Pipeline`.\n Defaults to None.\n generator (Optional[\"LLM\"], optional): the LLM to be used for generation. Defaults to None.\n labeller (Optional[\"LLM\"], optional): the LLM to be used for labelling. Defaults to None.\n **kwargs: the keyword arguments to be passed to the `task` and `subtask` classes.\n\n Raises:\n ValueError: if an invalid task is provided.\n\n Returns:\n Pipeline: the `Pipeline` instance.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.llm import TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask\n >>> from distilabel.pipeline import pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> pipeline = pipeline(\n ... task=\"preference\",\n ... subtask=\"text-quality\",\n ... generator=generator,\n ... )\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if task == \"preference\":\n if labeller is None:\n from dataclasses import fields\n\n from distilabel.llm.openai import OpenAILLM\n from distilabel.tasks.preference.ultrafeedback import UltraFeedbackTask\n\n task_cls = UltraFeedbackTask\n task_kwargs = {\n key: kwargs.get(key.name)\n for key in fields(task_cls)\n if key.name in kwargs and not key.name.startswith(\"__\")\n }\n\n # Dynamically call the appropriate classmethod using getattr\n if subtask is not None:\n if subtask not in task_cls.__subtasks__:\n raise ValueError(\n f\"Invalid subtask: {subtask}, available subtasks are {task_cls.__subtasks__}\"\n )\n classmethod_name = f\"for_{subtask.lower().replace('-', '_')}\"\n if hasattr(task_cls, classmethod_name):\n task_cls = getattr(task_cls, classmethod_name)\n\n logger.info(\n \"Since no `labeller` was provided, `OpenAILLM` will be used as the default labeller with `UltraFeedback`.\"\n )\n\n labeller = OpenAILLM(\n model=kwargs.get(\"openai_model\") or \"gpt-3.5-turbo\",\n task=task_cls(**task_kwargs), # type: ignore\n max_new_tokens=kwargs.get(\"max_new_tokens\") or 256,\n num_threads=kwargs.get(\"num_threads\") or 4,\n openai_api_key=kwargs.get(\"openai_api_key\")\n or os.getenv(\"OPENAI_API_KEY\"),\n temperature=kwargs.get(\"temperature\") or 0.0,\n )\n else:\n from distilabel.tasks.preference.judgelm import JudgeLMTask\n from distilabel.tasks.preference.ultrafeedback import UltraFeedbackTask\n from distilabel.tasks.preference.ultrajudge import UltraJudgeTask\n\n if not isinstance(\n labeller.task, (UltraFeedbackTask, JudgeLMTask, UltraJudgeTask)\n ):\n warnings.warn(\n \"The `labeller` task for `preference` must be an instance of `UltraFeedbackTask`,\"\n f\" `JudgeLMTask` or `UltraJudge`, got {labeller.task.__class__.__name__}.\"\n \"If you are planning to use a custom `labeller` for a `preference` \"\n \"task, use it at your own risk.\",\n UserWarning,\n stacklevel=2,\n )\n\n if generator is not None:\n assert (\n generator.task.input_args_names + generator.task.output_args_names\n == labeller.task.input_args_names\n ), (\n f\"`generator` outputs do not match `labeller` inputs: \"\n f\"{generator.task.input_args_names + generator.task.output_args_names} != {labeller.task.input_args_names}\"\n )\n else:\n raise ValueError(f\"Invalid task: {task}, available tasks are: `preference`.\")\n\n return Pipeline(generator=generator, labeller=labeller)\n
"},{"location":"reference/distilabel/progress_bar/","title":"progress_bar","text":""},{"location":"reference/distilabel/llm/","title":"llm","text":""},{"location":"reference/distilabel/llm/#distilabel.llm.InferenceEndpointsLLM","title":"InferenceEndpointsLLM
","text":" Bases: LLM
src/distilabel/llm/huggingface/inference_endpoints.py
class InferenceEndpointsLLM(LLM):\n def __init__(\n self,\n endpoint_name: str,\n task: \"Task\",\n endpoint_namespace: Union[str, None] = None,\n token: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: Union[float, None] = None,\n seed: Union[int, None] = None,\n do_sample: bool = False,\n temperature: Union[float, None] = None,\n top_k: Union[int, None] = None,\n top_p: Union[float, None] = None,\n typical_p: Union[float, None] = None,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the InferenceEndpointsLLM class.\n\n Args:\n endpoint_name (str): The name of the endpoint.\n task (Task): The task to be performed by the LLM.\n endpoint_namespace (Union[str, None]): The namespace of the endpoint. Defaults to None.\n token (Union[str, None]): The token for the endpoint. Defaults to None.\n max_new_tokens (int): The maximum number of tokens to be generated. Defaults to 128.\n repetition_penalty (Union[float, None]): The repetition penalty to be used for generation. Defaults to None.\n seed (Union[int, None]): The seed for generation. Defaults to None.\n do_sample (bool): Whether to do sampling. Defaults to False.\n temperature (Union[float, None]): The temperature for generation. Defaults to None.\n top_k (Union[int, None]): The top_k for generation. Defaults to None.\n top_p (Union[float, None]): The top_p for generation. Defaults to None.\n typical_p (Union[float, None]): The typical_p for generation. Defaults to None.\n num_threads (Union[int, None]): The number of threads. Defaults to None.\n prompt_format (Union[\"SupportedFormats\", None]): The format of the prompt. Defaults to None.\n prompt_formatting_fn (Union[Callable[..., str], None]): The function for formatting the prompt. Defaults to None.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import InferenceEndpointsLLM\n >>> task = Task()\n >>> llm = InferenceEndpointsLLM(\n ... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _HUGGINGFACE_HUB_AVAILABLE:\n raise ImportError(\n \"`InferenceEndpointsLLM` cannot be used as `huggingface-hub` is not \"\n \"installed, please install it with `pip install huggingface-hub`.\"\n )\n\n self.do_sample = do_sample\n self.max_new_tokens = max_new_tokens\n self.repetition_penalty = repetition_penalty\n self.seed = seed\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.inference_endpoint = get_inference_endpoint(\n name=endpoint_name, namespace=endpoint_namespace, token=token\n )\n self.inference_endpoint.wait(timeout=30)\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"do_sample\": self.do_sample,\n \"max_new_tokens\": self.max_new_tokens,\n \"repetition_penalty\": self.repetition_penalty,\n \"seed\": self.seed,\n \"temperature\": self.temperature,\n \"top_k\": self.top_k,\n \"top_p\": self.top_p,\n \"typical_p\": self.typical_p,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the model name of the endpoint.\"\"\"\n return self.inference_endpoint.repository\n\n @retry(\n retry=retry_if_exception_type(_INFERENCE_ENDPOINTS_API_RETRY_ON_EXCEPTIONS),\n stop=stop_after_attempt(_INFERENCE_ENDPOINTS_API_STOP_AFTER_ATTEMPT),\n wait=wait_random_exponential(\n multiplier=_INFERENCE_ENDPOINTS_API_WAIT_RANDOM_EXPONENTIAL_MULTIPLIER,\n max=_INFERENCE_ENDPOINTS_API_WAIT_RANDOM_EXPONENTIAL_MAX,\n ),\n before_sleep=before_sleep_log(logger, logging.INFO),\n after=after_log(logger, logging.INFO),\n )\n def _text_generation_with_backoff(self, **kwargs: Any) -> Any:\n \"\"\"Performs text generation with backoff in case of an error.\"\"\"\n return self.inference_endpoint.client.text_generation(**kwargs) # type: ignore\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n raw_responses = [\n self._text_generation_with_backoff(\n prompt=prompt,\n do_sample=self.do_sample,\n max_new_tokens=self.max_new_tokens,\n repetition_penalty=self.repetition_penalty,\n seed=self.seed,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n typical_p=self.typical_p,\n )\n for _ in range(num_generations)\n ]\n output = []\n for raw_response in raw_responses:\n try:\n parsed_response = self.task.parse_output(raw_response)\n except Exception as e:\n logger.error(f\"Error parsing Inference Endpoints output: {e}\")\n parsed_response = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_response,\n parsed_output=parsed_response,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.InferenceEndpointsLLM.model_name","title":"model_name: str
property
","text":"Returns the model name of the endpoint.
"},{"location":"reference/distilabel/llm/#distilabel.llm.InferenceEndpointsLLM.__init__","title":"__init__(endpoint_name, task, endpoint_namespace=None, token=None, max_new_tokens=128, repetition_penalty=None, seed=None, do_sample=False, temperature=None, top_k=None, top_p=None, typical_p=None, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the InferenceEndpointsLLM class.
Parameters:
Name Type Description Defaultendpoint_name
str
The name of the endpoint.
requiredtask
Task
The task to be performed by the LLM.
requiredendpoint_namespace
Union[str, None]
The namespace of the endpoint. Defaults to None.
None
token
Union[str, None]
The token for the endpoint. Defaults to None.
None
max_new_tokens
int
The maximum number of tokens to be generated. Defaults to 128.
128
repetition_penalty
Union[float, None]
The repetition penalty to be used for generation. Defaults to None.
None
seed
Union[int, None]
The seed for generation. Defaults to None.
None
do_sample
bool
Whether to do sampling. Defaults to False.
False
temperature
Union[float, None]
The temperature for generation. Defaults to None.
None
top_k
Union[int, None]
The top_k for generation. Defaults to None.
None
top_p
Union[float, None]
The top_p for generation. Defaults to None.
None
typical_p
Union[float, None]
The typical_p for generation. Defaults to None.
None
num_threads
Union[int, None]
The number of threads. Defaults to None.
None
prompt_format
Union[SupportedFormats, None]
The format of the prompt. Defaults to None.
None
prompt_formatting_fn
Union[Callable[..., str], None]
The function for formatting the prompt. Defaults to None.
None
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import InferenceEndpointsLLM\n>>> task = Task()\n>>> llm = InferenceEndpointsLLM(\n... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n... task=task,\n... )\n
Source code in src/distilabel/llm/huggingface/inference_endpoints.py
def __init__(\n self,\n endpoint_name: str,\n task: \"Task\",\n endpoint_namespace: Union[str, None] = None,\n token: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: Union[float, None] = None,\n seed: Union[int, None] = None,\n do_sample: bool = False,\n temperature: Union[float, None] = None,\n top_k: Union[int, None] = None,\n top_p: Union[float, None] = None,\n typical_p: Union[float, None] = None,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the InferenceEndpointsLLM class.\n\n Args:\n endpoint_name (str): The name of the endpoint.\n task (Task): The task to be performed by the LLM.\n endpoint_namespace (Union[str, None]): The namespace of the endpoint. Defaults to None.\n token (Union[str, None]): The token for the endpoint. Defaults to None.\n max_new_tokens (int): The maximum number of tokens to be generated. Defaults to 128.\n repetition_penalty (Union[float, None]): The repetition penalty to be used for generation. Defaults to None.\n seed (Union[int, None]): The seed for generation. Defaults to None.\n do_sample (bool): Whether to do sampling. Defaults to False.\n temperature (Union[float, None]): The temperature for generation. Defaults to None.\n top_k (Union[int, None]): The top_k for generation. Defaults to None.\n top_p (Union[float, None]): The top_p for generation. Defaults to None.\n typical_p (Union[float, None]): The typical_p for generation. Defaults to None.\n num_threads (Union[int, None]): The number of threads. Defaults to None.\n prompt_format (Union[\"SupportedFormats\", None]): The format of the prompt. Defaults to None.\n prompt_formatting_fn (Union[Callable[..., str], None]): The function for formatting the prompt. Defaults to None.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import InferenceEndpointsLLM\n >>> task = Task()\n >>> llm = InferenceEndpointsLLM(\n ... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _HUGGINGFACE_HUB_AVAILABLE:\n raise ImportError(\n \"`InferenceEndpointsLLM` cannot be used as `huggingface-hub` is not \"\n \"installed, please install it with `pip install huggingface-hub`.\"\n )\n\n self.do_sample = do_sample\n self.max_new_tokens = max_new_tokens\n self.repetition_penalty = repetition_penalty\n self.seed = seed\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.inference_endpoint = get_inference_endpoint(\n name=endpoint_name, namespace=endpoint_namespace, token=token\n )\n self.inference_endpoint.wait(timeout=30)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLM","title":"LLM
","text":" Bases: ABC
src/distilabel/llm/base.py
class LLM(ABC):\n def __init__(\n self,\n task: Task,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the LLM base class.\n\n Note:\n This class is intended to be used internally, but you anyone can still create\n a subclass, implement the `abstractmethod`s and use it.\n\n Args:\n task (Task): the task to be performed by the LLM.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[\"SupportedFormats\", None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n self.task = task\n\n self.thread_pool_executor = (\n ThreadPoolExecutor(max_workers=num_threads)\n if num_threads is not None\n else None\n )\n\n self.prompt_format = prompt_format\n self.prompt_formatting_fn = prompt_formatting_fn\n\n def __del__(self) -> None:\n \"\"\"Shuts down the thread pool executor if it is not `None`.\"\"\"\n if self.thread_pool_executor is not None:\n self.thread_pool_executor.shutdown()\n\n @property\n def num_threads(self) -> Union[int, None]:\n if self.thread_pool_executor:\n return self.thread_pool_executor._max_workers\n\n def __repr__(self) -> str:\n return f\"{self.__class__.__name__}(task={self.task.__class__.__name__}, num_threads={self.num_threads}, promp_format='{self.prompt_format}', model='{self.model_name}')\"\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"task\", self.task\n yield \"num_threads\", self.num_threads\n yield \"prompt_format\", self.prompt_format\n if self.prompt_formatting_fn is not None:\n args = f\"({', '.join(self.prompt_formatting_fn.__code__.co_varnames)})\"\n representation = self.prompt_formatting_fn.__name__ + args\n yield \"prompt_formatting_fn\", representation\n yield \"model\", self.model_name\n\n @property\n @abstractmethod\n def model_name(self) -> str:\n pass\n\n def _generate_prompts(\n self,\n inputs: List[Dict[str, Any]],\n default_format: Union[\"SupportedFormats\", None] = None,\n ) -> List[Any]:\n \"\"\"Generates the prompts to be used for generation.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n default_format (Union[\"SupportedFormats\", None], optional): the default format to be used\n for the prompt if no `prompt_format` is specified. Defaults to `None`.\n\n Returns:\n List[Any]: the generated prompts.\n\n Raises:\n ValueError: if the generated prompt is not of the expected type.\n \"\"\"\n prompts = []\n for input in inputs:\n prompt = self.task.generate_prompt(**input)\n if not isinstance(prompt, Prompt) and self.prompt_formatting_fn is not None:\n warnings.warn(\n \"The method `generate_prompt` is not returning a `Prompt` class but a prompt\"\n f\" of `type={type(prompt)}`, meaning that a pre-formatting has already been\"\n \" applied in the `task.generate_prompt` method, so the usage of a `prompt_formatting_fn`\"\n \" is discouraged.\",\n UserWarning,\n stacklevel=2,\n )\n prompt = self.prompt_formatting_fn(prompt)\n elif isinstance(prompt, Prompt) and self.prompt_formatting_fn is None:\n if self.prompt_format is not None or default_format is not None:\n prompt = prompt.format_as(\n format=self.prompt_format or default_format # type: ignore\n )\n else:\n warnings.warn(\n \"No `prompt_format` has been specified and no `default_format` is set, so\"\n \" the prompt will be concatenated with a line-break and no specific formatting\"\n \" by default.\",\n UserWarning,\n stacklevel=2,\n )\n prompt = prompt.format_as(format=\"default\")\n prompts.append(prompt)\n return prompts\n\n @abstractmethod\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n pass\n\n def _get_valid_inputs(\n self, inputs: List[Dict[str, Any]]\n ) -> Tuple[List[Dict[str, Any]], List[int]]:\n \"\"\"Returns the valid inputs and the indices of the invalid inputs.\n\n A valid input is an input that contains all the arguments required by the task.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n\n Returns:\n Tuple[List[Dict[str, Any]], List[int]]: a tuple containing the valid inputs and\n the indices of the invalid inputs.\n \"\"\"\n\n valid_inputs = []\n not_valid_inputs_indices = []\n for i, input in enumerate(inputs):\n if not all(input_arg in input for input_arg in self.task.input_args_names):\n logger.warn(\n f\"Missing {self.task.__class__.__name__} input argument in batch element {i}\"\n )\n not_valid_inputs_indices.append(i)\n continue\n\n valid_inputs.append(input)\n\n return valid_inputs, not_valid_inputs_indices\n\n def _fill_missing_inputs(\n self,\n generations: List[List[LLMOutput]],\n invalid_inputs_indices: List[int],\n num_generations: int,\n ) -> List[List[LLMOutput]]:\n \"\"\"Fills the `generations` list with empty `LLMOutput`s for the inputs that were\n not valid for the associated task of this `LLM`.\n\n Args:\n generations (List[List[LLMOutput]]): the generations to be filled.\n invalid_inputs_indices (List[int]): the indices of the inputs that were not\n valid for the associated task of this `LLM`.\n num_generations (int): the number of generations to be performed for each input.\n\n Returns:\n List[List[LLMOutput]]: the filled generations.\n \"\"\"\n\n filled_generations = generations.copy()\n for idx in invalid_inputs_indices:\n filled_generations.insert(\n idx,\n [\n LLMOutput(\n model_name=self.model_name,\n prompt_used=None,\n raw_output=None,\n parsed_output=None,\n )\n for _ in range(num_generations)\n ],\n )\n return filled_generations\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Generates the outputs for the given inputs using the LLM.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n valid_inputs, invalid_inputs_indices = self._get_valid_inputs(inputs)\n\n if self.thread_pool_executor is not None:\n futures = []\n for input in valid_inputs:\n future = self.thread_pool_executor.submit(\n self._generate, [input], num_generations\n )\n futures.append(future)\n future = when_all_complete(\n futures=futures,\n callback=lambda generations: self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n ),\n )\n future.add_done_callback(lambda _: _progress())\n return future\n\n generations = self._generate(valid_inputs, num_generations)\n\n generations = self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n )\n\n _progress()\n return generations\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return self.thread_pool_executor is not None\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLM.return_futures","title":"return_futures: bool
property
","text":"Whether the LLM
returns futures
__del__()
","text":"Shuts down the thread pool executor if it is not None
.
src/distilabel/llm/base.py
def __del__(self) -> None:\n \"\"\"Shuts down the thread pool executor if it is not `None`.\"\"\"\n if self.thread_pool_executor is not None:\n self.thread_pool_executor.shutdown()\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLM.__init__","title":"__init__(task, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the LLM base class.
NoteThis class is intended to be used internally, but you anyone can still create a subclass, implement the abstractmethod
s and use it.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requirednum_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union['SupportedFormats', None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Source code in src/distilabel/llm/base.py
def __init__(\n self,\n task: Task,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the LLM base class.\n\n Note:\n This class is intended to be used internally, but you anyone can still create\n a subclass, implement the `abstractmethod`s and use it.\n\n Args:\n task (Task): the task to be performed by the LLM.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[\"SupportedFormats\", None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n self.task = task\n\n self.thread_pool_executor = (\n ThreadPoolExecutor(max_workers=num_threads)\n if num_threads is not None\n else None\n )\n\n self.prompt_format = prompt_format\n self.prompt_formatting_fn = prompt_formatting_fn\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLM.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the LLM.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionUnion[List[List['LLMOutput']], Future[List[List['LLMOutput']]]]
Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.
Source code insrc/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Generates the outputs for the given inputs using the LLM.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n valid_inputs, invalid_inputs_indices = self._get_valid_inputs(inputs)\n\n if self.thread_pool_executor is not None:\n futures = []\n for input in valid_inputs:\n future = self.thread_pool_executor.submit(\n self._generate, [input], num_generations\n )\n futures.append(future)\n future = when_all_complete(\n futures=futures,\n callback=lambda generations: self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n ),\n )\n future.add_done_callback(lambda _: _progress())\n return future\n\n generations = self._generate(valid_inputs, num_generations)\n\n generations = self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n )\n\n _progress()\n return generations\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLMPool","title":"LLMPool
","text":"LLMPool is a class that wraps multiple ProcessLLM
s and performs generation in parallel using them. Depending on the number of LLM
s and the parameter num_generations
, the LLMPool
will decide how many generations to perform for each LLM
:
If num_generations
is less than the number of LLM
s, then num_generations
LLMs will be chosen randomly and each of them will perform 1 generation.
If num_generations
is equal to the number of LLM
s, then each LLM
will perform 1 generation.
If num_generations
is greater than the number of LLM
s, then each LLM
will perform num_generations // num_llms
generations, and the remaining num_generations % num_llms
generations will be performed by num_generations % num_llms
randomly chosen LLM
s.
Attributes:
Name Type Descriptionllms
List[ProcessLLM]
the ProcessLLM
s to be used for generation.
src/distilabel/llm/base.py
class LLMPool:\n \"\"\"LLMPool is a class that wraps multiple `ProcessLLM`s and performs generation in\n parallel using them. Depending on the number of `LLM`s and the parameter `num_generations`,\n the `LLMPool` will decide how many generations to perform for each `LLM`:\n\n - If `num_generations` is less than the number of `LLM`s, then `num_generations` LLMs\n will be chosen randomly and each of them will perform 1 generation.\n\n\n - If `num_generations` is equal to the number of `LLM`s, then each `LLM` will perform\n 1 generation.\n\n - If `num_generations` is greater than the number of `LLM`s, then each `LLM` will\n perform `num_generations // num_llms` generations, and the remaining `num_generations % num_llms`\n generations will be performed by `num_generations % num_llms` randomly chosen `LLM`s.\n\n Attributes:\n llms (List[ProcessLLM]): the `ProcessLLM`s to be used for generation.\n \"\"\"\n\n def __init__(self, llms: List[ProcessLLM]) -> None:\n \"\"\"Initializes the `LLMPool` class.\n\n Args:\n llms: the `ProcessLLM`s to be used for generation. The list must contain at\n least 2 `ProcessLLM`s.\n\n Raises:\n ValueError: if the `llms` argument contains less than 2 `ProcessLLM`s, the\n `llms` argument contains `ProcessLLM`s that are not `ProcessLLM`s, or\n if the `llms` argument contains `ProcessLLM`s with different tasks.\n \"\"\"\n if len(llms) < 2:\n raise ValueError(\n \"The `llms` argument must contain at least 2 `ProcessLLM`s. If you want\"\n \" to use a single `ProcessLLM`, use the `ProcessLLM` directly instead.\"\n )\n\n if not all(isinstance(llm, ProcessLLM) for llm in llms):\n raise ValueError(\"The `llms` argument must contain only `ProcessLLM`s.\")\n\n # Note: The following piece of code is used to check that all the `ProcessLLM`s\n # have the same task or a subclass of it.\n mros = [(type(llm.task), len(type(llm.task).mro())) for llm in llms]\n min_common_class = min(mros, key=lambda x: x[1])[0]\n if not all(isinstance(llm.task, min_common_class) for llm in llms):\n raise ValueError(\n \"All the `ProcessLLM` in `llms` must share the same task (either as the instance or the parent class).\"\n )\n\n self.llms = llms\n self.num_llms = len(llms)\n\n def _get_num_generations_per_llm(self, num_generations: int) -> Dict[int, int]:\n \"\"\"Returns the number of generations to be performed by each `LLM`.\n\n Args:\n num_generations: the number of generations to be performed.\n\n Returns:\n Dict[int, int]: a dictionary where the keys are the ids of the `LLM`s and the\n values are the number of generations to be performed by each `LLM`.\n \"\"\"\n llms_ids = list(range(self.num_llms))\n generations_per_llm = {i: num_generations // self.num_llms for i in llms_ids}\n\n for i in random.sample(llms_ids, k=num_generations % self.num_llms):\n generations_per_llm[i] += 1\n\n return generations_per_llm\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generates the outputs for the given inputs using the pool of `ProcessLLM`s.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n num_generations_per_llm = self._get_num_generations_per_llm(num_generations)\n\n futures = [\n llm.generate(\n inputs,\n num_generations=num_generations_per_llm[i],\n progress_callback_func=progress_callback_func,\n )\n for i, llm in enumerate(self.llms)\n if num_generations_per_llm[i] > 0\n ]\n llms_generations = [future.result() for future in futures]\n\n generations = []\n for llms_row_generations in zip(*llms_generations):\n row_generations = []\n for llm_row_generations in llms_row_generations:\n for generation in llm_row_generations:\n row_generations.append(generation)\n generations.append(row_generations)\n\n return generations\n\n def teardown(self) -> None:\n \"\"\"Stops the `ProcessLLM`s.\"\"\"\n for llm in self.llms:\n llm.teardown()\n\n @property\n def task(self) -> \"Task\":\n \"\"\"Returns the task that will be used by the `ProcessLLM`s of this pool.\n\n Returns:\n Task: the task that will be used by the `ProcessLLM`s of this pool.\n \"\"\"\n return self.llms[0].task\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return False\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLMPool.return_futures","title":"return_futures: bool
property
","text":"Whether the LLM
returns futures
task: 'Task'
property
","text":"Returns the task that will be used by the ProcessLLM
s of this pool.
Returns:
Name Type DescriptionTask
'Task'
the task that will be used by the ProcessLLM
s of this pool.
__init__(llms)
","text":"Initializes the LLMPool
class.
Parameters:
Name Type Description Defaultllms
List[ProcessLLM]
the ProcessLLM
s to be used for generation. The list must contain at least 2 ProcessLLM
s.
Raises:
Type DescriptionValueError
if the llms
argument contains less than 2 ProcessLLM
s, the llms
argument contains ProcessLLM
s that are not ProcessLLM
s, or if the llms
argument contains ProcessLLM
s with different tasks.
src/distilabel/llm/base.py
def __init__(self, llms: List[ProcessLLM]) -> None:\n \"\"\"Initializes the `LLMPool` class.\n\n Args:\n llms: the `ProcessLLM`s to be used for generation. The list must contain at\n least 2 `ProcessLLM`s.\n\n Raises:\n ValueError: if the `llms` argument contains less than 2 `ProcessLLM`s, the\n `llms` argument contains `ProcessLLM`s that are not `ProcessLLM`s, or\n if the `llms` argument contains `ProcessLLM`s with different tasks.\n \"\"\"\n if len(llms) < 2:\n raise ValueError(\n \"The `llms` argument must contain at least 2 `ProcessLLM`s. If you want\"\n \" to use a single `ProcessLLM`, use the `ProcessLLM` directly instead.\"\n )\n\n if not all(isinstance(llm, ProcessLLM) for llm in llms):\n raise ValueError(\"The `llms` argument must contain only `ProcessLLM`s.\")\n\n # Note: The following piece of code is used to check that all the `ProcessLLM`s\n # have the same task or a subclass of it.\n mros = [(type(llm.task), len(type(llm.task).mro())) for llm in llms]\n min_common_class = min(mros, key=lambda x: x[1])[0]\n if not all(isinstance(llm.task, min_common_class) for llm in llms):\n raise ValueError(\n \"All the `ProcessLLM` in `llms` must share the same task (either as the instance or the parent class).\"\n )\n\n self.llms = llms\n self.num_llms = len(llms)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLMPool.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the pool of ProcessLLM
s.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionList[List['LLMOutput']]
Future[List[List[\"LLMOutput\"]]]: the generated outputs as a Future
.
src/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generates the outputs for the given inputs using the pool of `ProcessLLM`s.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n num_generations_per_llm = self._get_num_generations_per_llm(num_generations)\n\n futures = [\n llm.generate(\n inputs,\n num_generations=num_generations_per_llm[i],\n progress_callback_func=progress_callback_func,\n )\n for i, llm in enumerate(self.llms)\n if num_generations_per_llm[i] > 0\n ]\n llms_generations = [future.result() for future in futures]\n\n generations = []\n for llms_row_generations in zip(*llms_generations):\n row_generations = []\n for llm_row_generations in llms_row_generations:\n for generation in llm_row_generations:\n row_generations.append(generation)\n generations.append(row_generations)\n\n return generations\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLMPool.teardown","title":"teardown()
","text":"Stops the ProcessLLM
s.
src/distilabel/llm/base.py
def teardown(self) -> None:\n \"\"\"Stops the `ProcessLLM`s.\"\"\"\n for llm in self.llms:\n llm.teardown()\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LlamaCppLLM","title":"LlamaCppLLM
","text":" Bases: LLM
src/distilabel/llm/llama_cpp.py
class LlamaCppLLM(LLM):\n def __init__(\n self,\n model: \"Llama\",\n task: \"Task\",\n max_new_tokens: int = 128,\n temperature: float = 0.8,\n top_p: float = 0.95,\n top_k: int = 40,\n repeat_penalty: float = 1.1,\n seed: int = 1337,\n prompt_format: Union[SupportedFormats, None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the LlamaCppLLM class.\n\n Args:\n model (Llama): the llama-cpp model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 0.8.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 0.95.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n repeat_penalty (float, optional): the repeat penalty to be used for generation.\n Defaults to 1.1.\n seed (int, optional): the seed to be used for generation, setting it to -1 implies\n that a different response will be generated on each generation, similarly to\n HuggingFace's `do_sample` arg. Defaults to 1337.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Examples:\n >>> from llama_cpp import Llama\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import LlamaCppLLM\n >>> model = Llama(model_path=\"path/to/model\")\n >>> task = Task()\n >>> llm = LlamaCppLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _LLAMA_CPP_AVAILABLE:\n raise ImportError(\n \"`LlamaCppLLM` cannot be used as `llama_cpp` is not installed, please \"\n \" install it with `pip install llama-cpp-python`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repeat_penalty = repeat_penalty\n self.seed = seed\n\n self.model = model\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_tokens,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n \"repeat_penalty\": self.repeat_penalty,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the llama-cpp model, which is the same as the model path.\"\"\"\n return self.model.model_path\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n output = []\n for _ in range(num_generations):\n raw_output = self.model.create_completion(\n prompt,\n max_tokens=self.max_tokens,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n repeat_penalty=self.repeat_penalty,\n )\n try:\n parsed_output = self.task.parse_output(\n raw_output[\"choices\"][0][\"text\"].strip()\n )\n except Exception as e:\n logger.error(f\"Error parsing llama-cpp output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LlamaCppLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the llama-cpp model, which is the same as the model path.
"},{"location":"reference/distilabel/llm/#distilabel.llm.LlamaCppLLM.__init__","title":"__init__(model, task, max_new_tokens=128, temperature=0.8, top_p=0.95, top_k=40, repeat_penalty=1.1, seed=1337, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the LlamaCppLLM class.
Parameters:
Name Type Description Defaultmodel
Llama
the llama-cpp model to be used.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
temperature
float
the temperature to be used for generation. Defaults to 0.8.
0.8
top_p
float
the top-p value to be used for generation. Defaults to 0.95.
0.95
top_k
int
the top-k value to be used for generation. Defaults to 40.
40
repeat_penalty
float
the repeat penalty to be used for generation. Defaults to 1.1.
1.1
seed
int
the seed to be used for generation, setting it to -1 implies that a different response will be generated on each generation, similarly to HuggingFace's do_sample
arg. Defaults to 1337.
1337
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Examples:
>>> from llama_cpp import Llama\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import LlamaCppLLM\n>>> model = Llama(model_path=\"path/to/model\")\n>>> task = Task()\n>>> llm = LlamaCppLLM(model=model, task=task)\n
Source code in src/distilabel/llm/llama_cpp.py
def __init__(\n self,\n model: \"Llama\",\n task: \"Task\",\n max_new_tokens: int = 128,\n temperature: float = 0.8,\n top_p: float = 0.95,\n top_k: int = 40,\n repeat_penalty: float = 1.1,\n seed: int = 1337,\n prompt_format: Union[SupportedFormats, None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the LlamaCppLLM class.\n\n Args:\n model (Llama): the llama-cpp model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 0.8.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 0.95.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n repeat_penalty (float, optional): the repeat penalty to be used for generation.\n Defaults to 1.1.\n seed (int, optional): the seed to be used for generation, setting it to -1 implies\n that a different response will be generated on each generation, similarly to\n HuggingFace's `do_sample` arg. Defaults to 1337.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Examples:\n >>> from llama_cpp import Llama\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import LlamaCppLLM\n >>> model = Llama(model_path=\"path/to/model\")\n >>> task = Task()\n >>> llm = LlamaCppLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _LLAMA_CPP_AVAILABLE:\n raise ImportError(\n \"`LlamaCppLLM` cannot be used as `llama_cpp` is not installed, please \"\n \" install it with `pip install llama-cpp-python`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repeat_penalty = repeat_penalty\n self.seed = seed\n\n self.model = model\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.OpenAILLM","title":"OpenAILLM
","text":" Bases: LLM
src/distilabel/llm/openai.py
class OpenAILLM(LLM):\n def __init__(\n self,\n task: \"Task\",\n model: str = \"gpt-3.5-turbo\",\n client: Union[\"OpenAI\", None] = None,\n openai_api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n frequency_penalty: float = 0.0,\n presence_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gpt-3.5-turbo\".\n client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.\n If `None`, a new client will be created. Defaults to `None`.\n openai_api_key (Union[str, None], optional): the OpenAI API key to be used for generation.\n If `None`, the `OPENAI_API_KEY` environment variable will be used. Defaults to `None`.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in your OpenAI account.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import OpenAILLM\n >>> task = Task()\n >>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _OPENAI_AVAILABLE:\n raise ImportError(\n \"`OpenAILLM` cannot be used as `openai` is not installed, please \"\n \" install it with `pip install openai`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.frequency_penalty = frequency_penalty\n self.presence_penalty = presence_penalty\n self.temperature = temperature\n self.top_p = top_p\n\n self.client = client or OpenAI(api_key=openai_api_key, max_retries=6)\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in your OpenAI account, available models are {self.available_models}\"\n self.model = model\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_tokens\": self.max_tokens,\n \"frequency_penalty\": self.frequency_penalty,\n \"presence_penalty\": self.presence_penalty,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n },\n )\n\n @cached_property\n def available_models(self) -> List[str]:\n \"\"\"Returns the list of available models in your OpenAI account.\"\"\"\n return [model.id for model in self.client.models.list().data]\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the OpenAI model.\"\"\"\n return self.model\n\n def _generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"openai\")\n outputs = []\n for prompt in prompts:\n chat_completions = self.client.chat.completions.create(\n messages=prompt,\n model=self.model,\n n=num_generations,\n max_tokens=self.max_tokens,\n frequency_penalty=self.frequency_penalty,\n presence_penalty=self.presence_penalty,\n temperature=self.temperature,\n top_p=self.top_p,\n timeout=50,\n )\n\n output = []\n for chat_completion in chat_completions.choices:\n try:\n parsed_response = self.task.parse_output(\n chat_completion.message.content.strip()\n )\n except Exception as e:\n logger.error(f\"Error parsing OpenAI response: {e}\")\n parsed_response = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=chat_completion.message.content,\n parsed_output=parsed_response,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.OpenAILLM.available_models","title":"available_models: List[str]
cached
property
","text":"Returns the list of available models in your OpenAI account.
"},{"location":"reference/distilabel/llm/#distilabel.llm.OpenAILLM.model_name","title":"model_name: str
property
","text":"Returns the name of the OpenAI model.
"},{"location":"reference/distilabel/llm/#distilabel.llm.OpenAILLM.__init__","title":"__init__(task, model='gpt-3.5-turbo', client=None, openai_api_key=None, max_new_tokens=128, frequency_penalty=0.0, presence_penalty=0.0, temperature=1.0, top_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the OpenAILLM class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation. Defaults to \"gpt-3.5-turbo\".
'gpt-3.5-turbo'
client
Union[OpenAI, None]
an OpenAI client to be used for generation. If None
, a new client will be created. Defaults to None
.
None
openai_api_key
Union[str, None]
the OpenAI API key to be used for generation. If None
, the OPENAI_API_KEY
environment variable will be used. Defaults to None
.
None
max_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
frequency_penalty
float
the frequency penalty to be used for generation. Defaults to 0.0.
0.0
presence_penalty
float
the presence penalty to be used for generation. Defaults to 0.0.
0.0
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Raises:
Type DescriptionAssertionError
if the provided model
is not available in your OpenAI account.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import OpenAILLM\n>>> task = Task()\n>>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n
Source code in src/distilabel/llm/openai.py
def __init__(\n self,\n task: \"Task\",\n model: str = \"gpt-3.5-turbo\",\n client: Union[\"OpenAI\", None] = None,\n openai_api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n frequency_penalty: float = 0.0,\n presence_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gpt-3.5-turbo\".\n client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.\n If `None`, a new client will be created. Defaults to `None`.\n openai_api_key (Union[str, None], optional): the OpenAI API key to be used for generation.\n If `None`, the `OPENAI_API_KEY` environment variable will be used. Defaults to `None`.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in your OpenAI account.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import OpenAILLM\n >>> task = Task()\n >>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _OPENAI_AVAILABLE:\n raise ImportError(\n \"`OpenAILLM` cannot be used as `openai` is not installed, please \"\n \" install it with `pip install openai`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.frequency_penalty = frequency_penalty\n self.presence_penalty = presence_penalty\n self.temperature = temperature\n self.top_p = top_p\n\n self.client = client or OpenAI(api_key=openai_api_key, max_retries=6)\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in your OpenAI account, available models are {self.available_models}\"\n self.model = model\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.ProcessLLM","title":"ProcessLLM
","text":"A class that wraps an LLM
and performs generation in a separate process. The result is a Future
that will be set when the generation is completed.
This class creates a new child process that will load the LLM
and perform the text generation. In order to communicate with this child process, a bridge thread is created in the main process. The bridge thread will send and receive the results from the child process using multiprocessing.Queue
s. The communication between the bridge thread and the main process is done using Future
s. This architecture was inspired by the ProcessPoolExecutor
from the concurrent.futures
module and it's a simplified version of it.
src/distilabel/llm/base.py
class ProcessLLM:\n \"\"\"A class that wraps an `LLM` and performs generation in a separate process. The\n result is a `Future` that will be set when the generation is completed.\n\n This class creates a new child process that will load the `LLM` and perform the\n text generation. In order to communicate with this child process, a bridge thread\n is created in the main process. The bridge thread will send and receive the results\n from the child process using `multiprocessing.Queue`s. The communication between the\n bridge thread and the main process is done using `Future`s. This architecture was\n inspired by the `ProcessPoolExecutor` from the `concurrent.futures` module and it's\n a simplified version of it.\n \"\"\"\n\n def __init__(self, task: Task, load_llm_fn: Callable[[Task], LLM]) -> None:\n \"\"\"Initializes the `ProcessLLM` class.\n\n Args:\n task: the task to be performed by the `LLM`. This task will be used by the\n child process when calling the `load_llm_fn`.\n load_llm_fn (Callable[[Task], LLM]): a function that will be executed in the\n child process to load the `LLM`. It must return an `LLM` instance.\n \"\"\"\n self.task = task\n\n self._load_llm_fn = load_llm_fn\n\n # The bridge thread will act as a bridge between the main process and the child\n # process for communication. It will send the generation requests to the child\n # process and receive the results from the child process.\n self._bridge_thread = None\n\n # The child process which will load the `LLM` and perform the generation.\n self._generation_process = None\n\n # The `Semaphore` that will be used to synchronize the loading of the `LLM`.\n # `_BridgeThread` will be blocked until `_GenerationProcess` has called the\n # `load_llm_fn` and the `LLM` has been loaded.\n self._load_llm_sem = mp.Semaphore(0)\n\n # This thread will create text generation requests\n self.pending_text_generation_request: Dict[int, _TextGenerationRequest] = {}\n self.text_generation_request_count = 0\n self.text_generation_request_ids_queue: queue.Queue[int] = queue.Queue()\n\n # Queues for the communication between the `_BridgeThread` and the `_GenerationProcess`\n self._call_queue = mp.Queue()\n self._result_queue = mp.Queue()\n\n # Shared memory object for transfering the `model_name` to the main process\n # once the `LLM` is loaded\n self._model_name = mp.Array(c_char, MAX_MODEL_NAME_LENGTH)\n\n def _start_bridge_thread(self) -> None:\n \"\"\"Starts the bridge thread and the generation process.\"\"\"\n if self._bridge_thread is None:\n self._generation_process = _GenerationProcess(self)\n self._generation_process.start()\n pid = self._generation_process.pid\n logger.debug(f\"Generation process with PID {pid} started!\")\n\n self._bridge_thread = _BridgeThread(self)\n self._bridge_thread.start()\n logger.debug(\"Bridge thread for process with PID {pid} started!\")\n\n def _add_text_generation_request(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Creates and send a new text generation request to the bridge thread. This thread\n and the bridge thread shares a dictionary used to store the text generation requests.\n This thread will add the text generation requests to the dictionary and the bridge\n thread will only read from it. In order for the bridge thread to know that a new\n text generation request has been added to the dictionary, this thread will put the\n id of the request in a queue. The bridge thread will read from this queue and get\n the text generation request from the dictionary.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n text_generation_request = _TextGenerationRequest(\n inputs=inputs, num_generations=num_generations\n )\n # Put the request information in the dictionary associated to the request id\n self.pending_text_generation_request[\n self.text_generation_request_count\n ] = text_generation_request\n # Put the request id in the queue (for the `_BridgeThread` to consume it)\n self.text_generation_request_ids_queue.put(self.text_generation_request_count)\n self.text_generation_request_count += 1\n text_generation_request.future.add_done_callback(lambda _: _progress())\n return text_generation_request.future\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Generates the outputs for the given inputs using the `ProcessLLM` and its loaded\n `LLM`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n self._start_bridge_thread()\n return self._add_text_generation_request(\n inputs, num_generations, progress_callback_func\n )\n\n def teardown(self) -> None:\n \"\"\"Stops the bridge thread and the generation process.\"\"\"\n if self._generation_process is not None:\n self._generation_process.stop()\n self._generation_process.join()\n\n if self._bridge_thread is not None:\n self._bridge_thread.stop()\n self._bridge_thread.join()\n\n @cached_property\n def model_name(self) -> str:\n \"\"\"Returns the model name of the `LLM` once it has been loaded.\"\"\"\n with self._model_name:\n return \"\".join([c.decode() for c in self._model_name if c != b\"\\0\"])\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return True\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.ProcessLLM.model_name","title":"model_name: str
cached
property
","text":"Returns the model name of the LLM
once it has been loaded.
return_futures: bool
property
","text":"Whether the LLM
returns futures
__init__(task, load_llm_fn)
","text":"Initializes the ProcessLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM
. This task will be used by the child process when calling the load_llm_fn
.
load_llm_fn
Callable[[Task], LLM]
a function that will be executed in the child process to load the LLM
. It must return an LLM
instance.
src/distilabel/llm/base.py
def __init__(self, task: Task, load_llm_fn: Callable[[Task], LLM]) -> None:\n \"\"\"Initializes the `ProcessLLM` class.\n\n Args:\n task: the task to be performed by the `LLM`. This task will be used by the\n child process when calling the `load_llm_fn`.\n load_llm_fn (Callable[[Task], LLM]): a function that will be executed in the\n child process to load the `LLM`. It must return an `LLM` instance.\n \"\"\"\n self.task = task\n\n self._load_llm_fn = load_llm_fn\n\n # The bridge thread will act as a bridge between the main process and the child\n # process for communication. It will send the generation requests to the child\n # process and receive the results from the child process.\n self._bridge_thread = None\n\n # The child process which will load the `LLM` and perform the generation.\n self._generation_process = None\n\n # The `Semaphore` that will be used to synchronize the loading of the `LLM`.\n # `_BridgeThread` will be blocked until `_GenerationProcess` has called the\n # `load_llm_fn` and the `LLM` has been loaded.\n self._load_llm_sem = mp.Semaphore(0)\n\n # This thread will create text generation requests\n self.pending_text_generation_request: Dict[int, _TextGenerationRequest] = {}\n self.text_generation_request_count = 0\n self.text_generation_request_ids_queue: queue.Queue[int] = queue.Queue()\n\n # Queues for the communication between the `_BridgeThread` and the `_GenerationProcess`\n self._call_queue = mp.Queue()\n self._result_queue = mp.Queue()\n\n # Shared memory object for transfering the `model_name` to the main process\n # once the `LLM` is loaded\n self._model_name = mp.Array(c_char, MAX_MODEL_NAME_LENGTH)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.ProcessLLM.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the ProcessLLM
and its loaded LLM
.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionFuture[List[List['LLMOutput']]]
Future[List[List[\"LLMOutput\"]]]: the generated outputs as a Future
.
src/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Generates the outputs for the given inputs using the `ProcessLLM` and its loaded\n `LLM`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n self._start_bridge_thread()\n return self._add_text_generation_request(\n inputs, num_generations, progress_callback_func\n )\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.ProcessLLM.teardown","title":"teardown()
","text":"Stops the bridge thread and the generation process.
Source code insrc/distilabel/llm/base.py
def teardown(self) -> None:\n \"\"\"Stops the bridge thread and the generation process.\"\"\"\n if self._generation_process is not None:\n self._generation_process.stop()\n self._generation_process.join()\n\n if self._bridge_thread is not None:\n self._bridge_thread.stop()\n self._bridge_thread.join()\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.TogetherInferenceLLM","title":"TogetherInferenceLLM
","text":" Bases: LLM
src/distilabel/llm/together.py
class TogetherInferenceLLM(LLM):\n def __init__(\n self,\n task: \"Task\",\n model: str,\n api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: float = 1.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = 1,\n stop: Union[List[str], None] = None,\n logprobs: int = 0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str): the model to be used for generation.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation. From the Together\n Inference docs: \"A decimal number that determines the degree of randomness in the response.\n A value of 0 will always yield the same output. A temperature much less than 1 favors more\n correctness and is appropriate for question answering or summarization. A value approaching\n 1 introduces more randomness in the output.\". Defaults to 1.0.\n repetition_penalty (float, optional): the repetition penalty to be used for generation. From the\n Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood\n of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation. From the Together\n Inference docs: \"used to dynamically adjust the number of choices for each predicted\n token based on the cumulative probabilities. It specifies a probability threshold,\n below which all less likely tokens are filtered out. This technique helps to maintain\n diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation. From the Together Inference\n docs: \"used to limit the number of choices for the next predicted word or token. It specifies\n the maximum number of tokens to consider at each step, based on their probability of occurrence.\n This technique helps to speed up the generation process and can improve the quality of the\n generated text by focusing on the most likely options.\". Defaults to 1.\n stop (List[str], optional): strings to delimitate the generation process, so that when the\n model generates any of the provided characters, the generation process is considered completed.\n Defaults to None.\n logprobs (int, optional): the number of logprobs to be returned for each token. From the\n Together Inference docs: \"An integer that specifies how many top token log probabilities\n are included in the response for each token generation step.\". Defaults to None.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in Together Inference.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TogetherInferenceLLM\n >>> task = Task()\n >>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n \"\"\"\n if not _TOGETHER_AVAILABLE:\n raise ImportError(\n \"`TogetherInferenceLLM` cannot be used as `together` is not installed, please \"\n \" install it with `pip install together`.\"\n )\n\n together.api_key = api_key or os.getenv(\"TOGETHER_API_KEY\", None)\n if together.api_key is None:\n raise ValueError(\n \"No `api_key` provided, please provide one or set the `TOGETHER_API_KEY` \"\n \"environment variable.\"\n )\n\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in Together Inference, available models are {self.available_models}\"\n self.model = model\n\n self.max_new_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repetition_penalty = repetition_penalty\n self.stop = stop\n self.logprobs = logprobs\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_new_tokens,\n \"temperature\": self.temperature,\n \"repetition_penalty\": self.repetition_penalty,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n \"stop\": self.stop,\n \"logprobs\": self.logprobs,\n },\n )\n\n @cached_property\n def available_models(self) -> List[str]:\n \"\"\"Returns the list of available models in Together Inference.\"\"\"\n # TODO: exclude the image models\n return [model[\"name\"] for model in together.Models.list()]\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the Together Inference model.\"\"\"\n return self.model\n\n def _generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n batch = []\n for _ in range(num_generations):\n output = together.Complete.create(\n prompt=prompt,\n model=self.model,\n max_tokens=self.max_new_tokens,\n stop=self.stop,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n repetition_penalty=self.repetition_penalty,\n logprobs=self.logprobs,\n )\n if output[\"output\"][\"choices\"] is not None:\n for choice in output[\"output\"][\"choices\"]:\n try:\n parsed_response = self.task.parse_output(\n choice[\"text\"].strip()\n )\n except Exception as e:\n logger.error(\n f\"Error parsing Together Inference response: {e}\"\n )\n parsed_response = None\n batch.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=choice[\"text\"],\n parsed_output=parsed_response,\n )\n )\n if len(batch) > 0:\n outputs.append(batch)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.TogetherInferenceLLM.available_models","title":"available_models: List[str]
cached
property
","text":"Returns the list of available models in Together Inference.
"},{"location":"reference/distilabel/llm/#distilabel.llm.TogetherInferenceLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the Together Inference model.
"},{"location":"reference/distilabel/llm/#distilabel.llm.TogetherInferenceLLM.__init__","title":"__init__(task, model, api_key=None, max_new_tokens=128, repetition_penalty=1.0, temperature=1.0, top_p=1.0, top_k=1, stop=None, logprobs=0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the OpenAILLM class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
temperature
float
the temperature to be used for generation. From the Together Inference docs: \"A decimal number that determines the degree of randomness in the response. A value of 0 will always yield the same output. A temperature much less than 1 favors more correctness and is appropriate for question answering or summarization. A value approaching 1 introduces more randomness in the output.\". Defaults to 1.0.
1.0
repetition_penalty
float
the repetition penalty to be used for generation. From the Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. From the Together Inference docs: \"used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold, below which all less likely tokens are filtered out. This technique helps to maintain diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. From the Together Inference docs: \"used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.\". Defaults to 1.
1
stop
List[str]
strings to delimitate the generation process, so that when the model generates any of the provided characters, the generation process is considered completed. Defaults to None.
None
logprobs
int
the number of logprobs to be returned for each token. From the Together Inference docs: \"An integer that specifies how many top token log probabilities are included in the response for each token generation step.\". Defaults to None.
0
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Raises:
Type DescriptionAssertionError
if the provided model
is not available in Together Inference.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import TogetherInferenceLLM\n>>> task = Task()\n>>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n
Source code in src/distilabel/llm/together.py
def __init__(\n self,\n task: \"Task\",\n model: str,\n api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: float = 1.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = 1,\n stop: Union[List[str], None] = None,\n logprobs: int = 0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str): the model to be used for generation.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation. From the Together\n Inference docs: \"A decimal number that determines the degree of randomness in the response.\n A value of 0 will always yield the same output. A temperature much less than 1 favors more\n correctness and is appropriate for question answering or summarization. A value approaching\n 1 introduces more randomness in the output.\". Defaults to 1.0.\n repetition_penalty (float, optional): the repetition penalty to be used for generation. From the\n Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood\n of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation. From the Together\n Inference docs: \"used to dynamically adjust the number of choices for each predicted\n token based on the cumulative probabilities. It specifies a probability threshold,\n below which all less likely tokens are filtered out. This technique helps to maintain\n diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation. From the Together Inference\n docs: \"used to limit the number of choices for the next predicted word or token. It specifies\n the maximum number of tokens to consider at each step, based on their probability of occurrence.\n This technique helps to speed up the generation process and can improve the quality of the\n generated text by focusing on the most likely options.\". Defaults to 1.\n stop (List[str], optional): strings to delimitate the generation process, so that when the\n model generates any of the provided characters, the generation process is considered completed.\n Defaults to None.\n logprobs (int, optional): the number of logprobs to be returned for each token. From the\n Together Inference docs: \"An integer that specifies how many top token log probabilities\n are included in the response for each token generation step.\". Defaults to None.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in Together Inference.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TogetherInferenceLLM\n >>> task = Task()\n >>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n \"\"\"\n if not _TOGETHER_AVAILABLE:\n raise ImportError(\n \"`TogetherInferenceLLM` cannot be used as `together` is not installed, please \"\n \" install it with `pip install together`.\"\n )\n\n together.api_key = api_key or os.getenv(\"TOGETHER_API_KEY\", None)\n if together.api_key is None:\n raise ValueError(\n \"No `api_key` provided, please provide one or set the `TOGETHER_API_KEY` \"\n \"environment variable.\"\n )\n\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in Together Inference, available models are {self.available_models}\"\n self.model = model\n\n self.max_new_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repetition_penalty = repetition_penalty\n self.stop = stop\n self.logprobs = logprobs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.TransformersLLM","title":"TransformersLLM
","text":" Bases: LLM
src/distilabel/llm/huggingface/transformers.py
class TransformersLLM(LLM):\n def __init__(\n self,\n model: \"PreTrainedModel\",\n tokenizer: \"PreTrainedTokenizer\",\n task: \"Task\",\n max_new_tokens: int = 128,\n do_sample: bool = False,\n temperature: float = 1.0,\n top_k: int = 50,\n top_p: float = 1.0,\n typical_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the TransformersLLM class.\n\n Args:\n model (PreTrainedModel): the model to be used for generation.\n tokenizer (PreTrainedTokenizer): the tokenizer to be used for generation.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n do_sample (bool, optional): whether to sample from the model or not.\n Defaults to False.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 50.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n typical_p (float, optional): the typical-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used for generation.\n If `None`, the number of threads will be set to the number of available CPUs.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n Defaults to `None`.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): the function to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TransformersLLM\n >>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n >>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n >>> task = Task()\n >>> llm = TransformersLLM(\n ... model=model,\n ... tokenizer=tokenizer,\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n self.max_new_tokens = max_new_tokens\n self.do_sample = do_sample\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.model = model\n self.tokenizer = tokenizer\n\n if self.tokenizer.pad_token is None:\n self.tokenizer.pad_token = self.tokenizer.eos_token\n if (\n hasattr(self.tokenizer, \"use_default_system_prompt\")\n and self.tokenizer.use_default_system_prompt # type: ignore\n ):\n # The `tokenizer` also has a method named `apply_chat_template` that expects a `Conversation` as OpenAI does with the ChatML format\n warnings.warn(\n \"The provided `tokenizer` has `use_default_system_prompt=True` which means that the default system prompt will be used, which may collide with the `task` provided as an arg to this class.\",\n UserWarning,\n stacklevel=2,\n )\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_new_tokens,\n \"do_sample\": self.do_sample,\n \"temperature\": self.temperature,\n \"top_k\": self.top_k,\n \"top_p\": self.top_p,\n \"typical_p\": self.typical_p,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the Transformers model.\"\"\"\n return self.model.config.name_or_path\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n encodings = self.tokenizer(prompts, padding=True, return_tensors=\"pt\")\n encodings = encodings.to(self.model.device)\n with torch.inference_mode():\n generated_ids = self.model.generate(\n **encodings, # type: ignore\n pad_token_id=self.tokenizer.eos_token_id,\n generation_config=GenerationConfig(\n do_sample=self.do_sample,\n temperature=self.temperature,\n max_new_tokens=self.max_new_tokens,\n top_k=self.top_k,\n top_p=self.top_p,\n typical_p=self.typical_p,\n num_return_sequences=num_generations,\n ),\n )\n raw_outputs = self.tokenizer.batch_decode(\n generated_ids[:, encodings.input_ids.shape[1] :],\n skip_special_tokens=True,\n clean_up_tokenization_spaces=True,\n )\n outputs = []\n for prompt, i in zip(prompts, range(0, len(raw_outputs), num_generations)):\n output = []\n for raw_output in raw_outputs[i : i + num_generations]:\n try:\n parsed_output = self.task.parse_output(raw_output)\n except Exception as e:\n logger.error(f\"Error parsing Transformers output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.TransformersLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the Transformers model.
"},{"location":"reference/distilabel/llm/#distilabel.llm.TransformersLLM.__init__","title":"__init__(model, tokenizer, task, max_new_tokens=128, do_sample=False, temperature=1.0, top_k=50, top_p=1.0, typical_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the TransformersLLM class.
Parameters:
Name Type Description Defaultmodel
PreTrainedModel
the model to be used for generation.
requiredtokenizer
PreTrainedTokenizer
the tokenizer to be used for generation.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
do_sample
bool
whether to sample from the model or not. Defaults to False.
False
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. Defaults to 50.
50
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
typical_p
float
the typical-p value to be used for generation. Defaults to 1.0.
1.0
num_threads
Union[int, None]
the number of threads to be used for generation. If None
, the number of threads will be set to the number of available CPUs. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for formatting the prompts. If None
, the prompts will not be formatted. Defaults to None
.
None
prompt_formatting_fn
Union[Callable[..., str], None]
the function to be used for formatting the prompts. If None
, the prompts will not be formatted.
None
Examples:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import TransformersLLM\n>>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n>>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n>>> task = Task()\n>>> llm = TransformersLLM(\n... model=model,\n... tokenizer=tokenizer,\n... task=task,\n... )\n
Source code in src/distilabel/llm/huggingface/transformers.py
def __init__(\n self,\n model: \"PreTrainedModel\",\n tokenizer: \"PreTrainedTokenizer\",\n task: \"Task\",\n max_new_tokens: int = 128,\n do_sample: bool = False,\n temperature: float = 1.0,\n top_k: int = 50,\n top_p: float = 1.0,\n typical_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the TransformersLLM class.\n\n Args:\n model (PreTrainedModel): the model to be used for generation.\n tokenizer (PreTrainedTokenizer): the tokenizer to be used for generation.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n do_sample (bool, optional): whether to sample from the model or not.\n Defaults to False.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 50.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n typical_p (float, optional): the typical-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used for generation.\n If `None`, the number of threads will be set to the number of available CPUs.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n Defaults to `None`.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): the function to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TransformersLLM\n >>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n >>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n >>> task = Task()\n >>> llm = TransformersLLM(\n ... model=model,\n ... tokenizer=tokenizer,\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n self.max_new_tokens = max_new_tokens\n self.do_sample = do_sample\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.model = model\n self.tokenizer = tokenizer\n\n if self.tokenizer.pad_token is None:\n self.tokenizer.pad_token = self.tokenizer.eos_token\n if (\n hasattr(self.tokenizer, \"use_default_system_prompt\")\n and self.tokenizer.use_default_system_prompt # type: ignore\n ):\n # The `tokenizer` also has a method named `apply_chat_template` that expects a `Conversation` as OpenAI does with the ChatML format\n warnings.warn(\n \"The provided `tokenizer` has `use_default_system_prompt=True` which means that the default system prompt will be used, which may collide with the `task` provided as an arg to this class.\",\n UserWarning,\n stacklevel=2,\n )\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAIEndpointLLM","title":"VertexAIEndpointLLM
","text":" Bases: LLM
An LLM
which uses a Vertex AI Online prediction endpoint for the generation.
More information about Vertex AI Endpoints can be found here:
- https://cloud.google.com/vertex-ai/docs/general/deployment#deploy_a_model_to_an_endpoint\n
Source code in src/distilabel/llm/google/vertexai.py
class VertexAIEndpointLLM(LLM):\n \"\"\"An `LLM` which uses a Vertex AI Online prediction endpoint for the generation.\n\n More information about Vertex AI Endpoints can be found here:\n\n - https://cloud.google.com/vertex-ai/docs/general/deployment#deploy_a_model_to_an_endpoint\n \"\"\"\n\n def __init__(\n self,\n task: \"Task\",\n endpoint_id: str,\n project: Optional[str] = None,\n location: str = \"us-central1\",\n generation_kwargs: Optional[Dict[str, Any]] = None,\n prompt_argument: str = \"prompt\",\n num_generations_argument: str = \"n\",\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the `VertexAIEndpointLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n endpoint_id (str): the ID of the Vertex AI endpoint to be used for generation.\n project (Optional[str], optional): the project to be used for generation. If `None`,\n the default project will be used. Defaults to `None`.\n location (str, optional): the location of the Vertex AI endpoint to be used for\n generation. Defaults to \"us-central1\".\n generation_kwargs (Optional[Dict[str, Any]], optional): the generation parameters\n to be used for generation. The name of the parameters will depend on the\n Docker image used to deploy the model to the Vertex AI endpoint. Defaults\n to `None`.\n prompt_argument (str, optional): the name of the Vertex AI Endpoint key to\n be used for the prompt. Defaults to \"prompt\".\n num_generations_argument (str, optional): the name of the Vertex AI Endpoint\n key to be used to specify the number of generations per prompt. Defaults\n to \"n\".\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAIEndpointLLM` cannot be used as `google-cloud-aiplatform` is not\"\n \" installed, please install it with `pip install google-cloud-aiplatform`\"\n )\n\n if project is None:\n try:\n project = google.auth.default()[1]\n except DefaultCredentialsError as e:\n raise ValueError(\n \"No `project` was specified and no default credentials were found.\"\n ) from e\n\n if generation_kwargs is None:\n generation_kwargs = {}\n\n self.endpoint_id = endpoint_id\n self.project = project\n self.location = location\n self.generation_kwargs = generation_kwargs\n self.prompt_argument = prompt_argument\n self.num_generations_argument = num_generations_argument\n\n self.client = PredictionServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n\n @cached_property\n def model_name(self) -> str:\n \"\"\"Returns the name of the model used for generation.\"\"\"\n client = EndpointServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n endpoint = client.get_endpoint(name=self.endpoint_path)\n return endpoint.deployed_models[0].display_name\n\n @property\n def endpoint_path(self) -> str:\n \"\"\"Returns the path of the Vertex AI endpoint to be used for generation.\"\"\"\n return self.client.endpoint_path(\n project=self.project, # type: ignore\n location=self.location,\n endpoint=self.endpoint_id,\n )\n\n @_vertexai_retry_decorator\n def _call_vertexai_endpoint(self, instances: List[Any]) -> Any:\n return self.client.predict(endpoint=self.endpoint_path, instances=instances)\n\n def _prepare_instances(\n self, prompts: List[str], num_generations: int\n ) -> List[\"Value\"]:\n \"\"\"Prepares the instances to be sent to the Vertex AI endpoint.\n\n Args:\n prompts (List[str]): the prompts to be used for generation.\n num_generations (int): the number of generations to be performed for each prompt.\n\n Returns:\n The instances to be sent to the Vertex AI endpoint.\n \"\"\"\n instances = []\n for prompt in prompts:\n instance = json_format.ParseDict(\n {\n self.prompt_argument: prompt,\n self.num_generations_argument: num_generations,\n **self.generation_kwargs,\n },\n Value(),\n )\n instances.append(instance)\n return instances\n\n def _single_output(self, instance: Any) -> List[LLMOutput]:\n try:\n # NOTE: `predict` method accepts a list of instances, but depending on the\n # deployed Docker image, it can just accept one instance.\n response = self._call_vertexai_endpoint(instances=[instance])\n except exceptions.InternalServerError as e:\n raise ValueError(\n \"The Vertex AI endpoint returned 500 Internal Server Error. This is\"\n \" usually caused due to wrong generation parameters. Please check the\"\n \" `generation_parameters` and try again.\"\n ) from e\n\n output = []\n for prediction in response.predictions:\n # Vertex endpoint output is `Prompt:\\n{{ model_prompt }}\\nOutput:\\n{{ model_output }}`\n # so we need to do a pre-parsing to remove the `Prompt:` and `Output:` parts.\n match = _PARSE_VERTEXAI_ENDPOINT_PREDICTION_REGEX.search(prediction)\n if not match:\n raise ValueError(\n \"Couldn't parse the response from the Vertex AI endpoint.\"\n )\n\n model_output = match.group(1).strip()\n\n try:\n parsed_output = self.task.parse_output(model_output)\n except Exception as e:\n logger.error(f\"Error parsing Vertex AI endpoint model response: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=instance.struct_value[self.prompt_argument],\n raw_output=model_output,\n parsed_output=parsed_output,\n )\n )\n return output\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n prompts = self._generate_prompts(inputs)\n instances = self._prepare_instances(\n prompts=prompts, num_generations=num_generations\n )\n return [self._single_output(instance) for instance in instances]\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAIEndpointLLM.endpoint_path","title":"endpoint_path: str
property
","text":"Returns the path of the Vertex AI endpoint to be used for generation.
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAIEndpointLLM.model_name","title":"model_name: str
cached
property
","text":"Returns the name of the model used for generation.
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAIEndpointLLM.__init__","title":"__init__(task, endpoint_id, project=None, location='us-central1', generation_kwargs=None, prompt_argument='prompt', num_generations_argument='n', num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the VertexAIEndpointLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredendpoint_id
str
the ID of the Vertex AI endpoint to be used for generation.
requiredproject
Optional[str]
the project to be used for generation. If None
, the default project will be used. Defaults to None
.
None
location
str
the location of the Vertex AI endpoint to be used for generation. Defaults to \"us-central1\".
'us-central1'
generation_kwargs
Optional[Dict[str, Any]]
the generation parameters to be used for generation. The name of the parameters will depend on the Docker image used to deploy the model to the Vertex AI endpoint. Defaults to None
.
None
prompt_argument
str
the name of the Vertex AI Endpoint key to be used for the prompt. Defaults to \"prompt\".
'prompt'
num_generations_argument
str
the name of the Vertex AI Endpoint key to be used to specify the number of generations per prompt. Defaults to \"n\".
'n'
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Source code in src/distilabel/llm/google/vertexai.py
def __init__(\n self,\n task: \"Task\",\n endpoint_id: str,\n project: Optional[str] = None,\n location: str = \"us-central1\",\n generation_kwargs: Optional[Dict[str, Any]] = None,\n prompt_argument: str = \"prompt\",\n num_generations_argument: str = \"n\",\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the `VertexAIEndpointLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n endpoint_id (str): the ID of the Vertex AI endpoint to be used for generation.\n project (Optional[str], optional): the project to be used for generation. If `None`,\n the default project will be used. Defaults to `None`.\n location (str, optional): the location of the Vertex AI endpoint to be used for\n generation. Defaults to \"us-central1\".\n generation_kwargs (Optional[Dict[str, Any]], optional): the generation parameters\n to be used for generation. The name of the parameters will depend on the\n Docker image used to deploy the model to the Vertex AI endpoint. Defaults\n to `None`.\n prompt_argument (str, optional): the name of the Vertex AI Endpoint key to\n be used for the prompt. Defaults to \"prompt\".\n num_generations_argument (str, optional): the name of the Vertex AI Endpoint\n key to be used to specify the number of generations per prompt. Defaults\n to \"n\".\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAIEndpointLLM` cannot be used as `google-cloud-aiplatform` is not\"\n \" installed, please install it with `pip install google-cloud-aiplatform`\"\n )\n\n if project is None:\n try:\n project = google.auth.default()[1]\n except DefaultCredentialsError as e:\n raise ValueError(\n \"No `project` was specified and no default credentials were found.\"\n ) from e\n\n if generation_kwargs is None:\n generation_kwargs = {}\n\n self.endpoint_id = endpoint_id\n self.project = project\n self.location = location\n self.generation_kwargs = generation_kwargs\n self.prompt_argument = prompt_argument\n self.num_generations_argument = num_generations_argument\n\n self.client = PredictionServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAILLM","title":"VertexAILLM
","text":" Bases: LLM
An LLM
which allows to use Google's proprietary models from the Vertex AI APIs:
To use the VertexAILLM
is necessary to have configured the Google Cloud authentication using one of these methods:
GOOGLE_CLOUD_CREDENTIALS
environment variablegcloud auth application-default login
commandvertexai.init
function from the google-cloud-aiplatform
librarysrc/distilabel/llm/google/vertexai.py
class VertexAILLM(LLM):\n \"\"\"An `LLM` which allows to use Google's proprietary models from the Vertex AI APIs:\n\n - Gemini API: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini\n - Codey API: https://cloud.google.com/vertex-ai/docs/generative-ai/code/code-models-overview\n - Text API: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text\n\n To use the `VertexAILLM` is necessary to have configured the Google Cloud authentication\n using one of these methods:\n\n - Setting `GOOGLE_CLOUD_CREDENTIALS` environment variable\n - Using `gcloud auth application-default login` command\n - Using `vertexai.init` function from the `google-cloud-aiplatform` library\n \"\"\"\n\n def __init__(\n self,\n task: \"Task\",\n model: str = \"gemini-pro\",\n temperature: Optional[float] = None,\n top_p: Optional[float] = None,\n top_k: Optional[int] = None,\n max_new_tokens: int = 128,\n stop_sequences: Optional[List[str]] = None,\n num_threads: Union[int, None] = None,\n ) -> None:\n \"\"\"Initializes the `VertexGenerativeModelLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gemini-pro\".\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n \"\"\"\n super().__init__(task=task, num_threads=num_threads)\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAILLM` cannot be used as `google-cloud-aiplatform` is not installed,\"\n \" please install it with `pip install google-cloud-aiplatform`\"\n )\n\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_output_tokens = max_new_tokens\n self.stop_sequences = stop_sequences\n\n if is_gemini_model(model):\n self.model = GenerativeModel(model)\n elif is_codey_model(model):\n self.model = CodeGenerationModel.from_pretrained(model)\n else:\n self.model = TextGenerationModel.from_pretrained(model)\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the model used for generation.\"\"\"\n if isinstance(self.model, GenerativeModel):\n return self.model._model_name\n\n return self.model._model_id\n\n def _generate_contents(self, prompts: List[str]) -> List[List[Dict[str, Any]]]:\n \"\"\"Generates a list of valid dicts that can be parsed to `vertexai.preview.generative_models.Content`\n objects for each input.\n\n Args:\n prompts (List[str]): the prompts to be used for generation.\n\n Returns:\n List[List[Dict[str, Any]]]: the list of valid `vertexai.preview.generative_models.Content`\n objects.\n \"\"\"\n return [[{\"role\": \"user\", \"parts\": [{\"text\": prompt}]}] for prompt in prompts]\n\n @_vertexai_retry_decorator\n def _call_generative_model_with_backoff(\n self, contents: List[Dict[str, Any]], **kwargs: Any\n ) -> \"GenerationResponse\":\n return self.model.generate_content( # type: ignore\n contents=contents,\n # TODO: update `candidate_count` to have `num_generations` as value once valid range is not [1, 2)\n generation_config=GenerationConfig(candidate_count=1, **kwargs),\n )\n\n def _generative_model_single_output(\n self, contents: List[Dict[str, Any]]\n ) -> LLMOutput:\n raw_output = None\n try:\n response = self._call_generative_model_with_backoff(\n contents=contents,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n max_output_tokens=self.max_output_tokens,\n stop_sequences=self.stop_sequences,\n )\n raw_output = response.text\n parsed_output = self.task.parse_output(raw_output)\n except ValueError as e:\n logger.error(f\"Vertex AI Gemini API model didn't return content: {e}\")\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=contents,\n raw_output=None,\n parsed_output=None,\n )\n except Exception as e:\n logger.error(f\"Error parsing Vertex AI Gemini API model response: {e}\")\n parsed_output = None\n\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=contents,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n\n def _generate_with_generative_model(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generate `num_generations` for each input in `inputs` using a Vertex AI Gemini\n API model.\"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"default\")\n inputs_contents = self._generate_contents(prompts)\n outputs = []\n for contents in inputs_contents:\n output = []\n # TODO: remove this for-loop once `GenerationConfig.candidate_count` valid range is not [1, 2)\n for _ in range(num_generations):\n output.append(self._generative_model_single_output(contents=contents))\n outputs.append(output)\n return outputs\n\n @_vertexai_retry_decorator\n def _call_text_generation_model(\n self, **kwargs: Any\n ) -> \"MultiCandidateTextGenerationResponse\":\n return self.model.predict(**kwargs) # type: ignore\n\n def _text_generation_model_single_output(\n self, prompt: str, num_generations: int\n ) -> List[LLMOutput]:\n response = self._call_text_generation_model(\n prompt=prompt,\n max_output_tokens=self.max_output_tokens,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n stop_sequences=self.stop_sequences,\n # WARNING: The model can return < `candidate_count` generations depending\n # on the generation parameters and the input.\n candidate_count=num_generations,\n )\n\n output = []\n for candidate in response.candidates:\n try:\n parsed_response = self.task.parse_output(candidate.text)\n except Exception as e:\n logger.error(\n f\"Error parsing Vertex AI Text/Code API model response: {e}\"\n )\n parsed_response = None\n\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=candidate.text,\n parsed_output=parsed_response,\n )\n )\n return output\n\n def _generate_with_text_generation_model(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generate `num_generations` for each input in `inputs` using a Vertex AI Text/Code\n API model.\"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"default\")\n outputs = []\n for prompt in prompts:\n outputs.append(\n self._text_generation_model_single_output(prompt, num_generations)\n )\n return outputs\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n if isinstance(self.model, GenerativeModel):\n return self._generate_with_generative_model(inputs, num_generations)\n\n return self._generate_with_text_generation_model(inputs, num_generations)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAILLM.model_name","title":"model_name: str
property
","text":"Returns the name of the model used for generation.
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAILLM.__init__","title":"__init__(task, model='gemini-pro', temperature=None, top_p=None, top_k=None, max_new_tokens=128, stop_sequences=None, num_threads=None)
","text":"Initializes the VertexGenerativeModelLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation. Defaults to \"gemini-pro\".
'gemini-pro'
temperature
float
the temperature to be used for generation. Defaults to 1.0.
None
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
None
top_k
int
the top-k value to be used for generation. Defaults to 40.
None
max_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
Source code in src/distilabel/llm/google/vertexai.py
def __init__(\n self,\n task: \"Task\",\n model: str = \"gemini-pro\",\n temperature: Optional[float] = None,\n top_p: Optional[float] = None,\n top_k: Optional[int] = None,\n max_new_tokens: int = 128,\n stop_sequences: Optional[List[str]] = None,\n num_threads: Union[int, None] = None,\n) -> None:\n \"\"\"Initializes the `VertexGenerativeModelLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gemini-pro\".\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n \"\"\"\n super().__init__(task=task, num_threads=num_threads)\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAILLM` cannot be used as `google-cloud-aiplatform` is not installed,\"\n \" please install it with `pip install google-cloud-aiplatform`\"\n )\n\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_output_tokens = max_new_tokens\n self.stop_sequences = stop_sequences\n\n if is_gemini_model(model):\n self.model = GenerativeModel(model)\n elif is_codey_model(model):\n self.model = CodeGenerationModel.from_pretrained(model)\n else:\n self.model = TextGenerationModel.from_pretrained(model)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.vLLM","title":"vLLM
","text":" Bases: LLM
src/distilabel/llm/vllm.py
class vLLM(LLM):\n def __init__(\n self,\n vllm: \"_vLLM\",\n task: \"Task\",\n max_new_tokens: int = 128,\n presence_penalty: float = 0.0,\n frequency_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = -1,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the vLLM class.\n\n Args:\n vllm (_vLLM): the vLLM model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to -1.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n\n Examples:\n >>> from vllm import LLM\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import vLLM\n >>> model = LLM(model=\"gpt2\")\n >>> task = Task()\n >>> llm = vLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VLLM_AVAILABLE:\n raise ImportError(\n \"`vLLM` cannot be used as `vllm` is not installed, please \"\n \" install it with `pip install vllm`.\"\n )\n\n self.presence_penalty = presence_penalty\n self.frequency_penalty = frequency_penalty\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_tokens = max_new_tokens\n\n self.vllm = vllm\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_tokens\": self.max_tokens,\n \"presence_penalty\": self.presence_penalty,\n \"frequency_penalty\": self.frequency_penalty,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the vLLM model.\"\"\"\n return self.vllm.llm_engine.model_config.model # type: ignore\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n requests = self.vllm.generate(\n prompts,\n SamplingParams( # type: ignore\n n=num_generations,\n presence_penalty=self.presence_penalty,\n frequency_penalty=self.frequency_penalty,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n max_tokens=self.max_tokens,\n ),\n use_tqdm=False, # type: ignore\n )\n outputs = []\n for request, prompt in zip(requests, prompts):\n output = []\n for request_output in request.outputs:\n try:\n parsed_output = self.task.parse_output(request_output.text)\n except Exception as e:\n logger.error(f\"Error parsing vLLM output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=request_output.text,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.vLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the vLLM model.
"},{"location":"reference/distilabel/llm/#distilabel.llm.vLLM.__init__","title":"__init__(vllm, task, max_new_tokens=128, presence_penalty=0.0, frequency_penalty=0.0, temperature=1.0, top_p=1.0, top_k=-1, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the vLLM class.
Parameters:
Name Type Description Defaultvllm
LLM
the vLLM model to be used.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
presence_penalty
float
the presence penalty to be used for generation. Defaults to 0.0.
0.0
frequency_penalty
float
the frequency penalty to be used for generation. Defaults to 0.0.
0.0
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. Defaults to -1.
-1
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied.
None
Examples:
>>> from vllm import LLM\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import vLLM\n>>> model = LLM(model=\"gpt2\")\n>>> task = Task()\n>>> llm = vLLM(model=model, task=task)\n
Source code in src/distilabel/llm/vllm.py
def __init__(\n self,\n vllm: \"_vLLM\",\n task: \"Task\",\n max_new_tokens: int = 128,\n presence_penalty: float = 0.0,\n frequency_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = -1,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the vLLM class.\n\n Args:\n vllm (_vLLM): the vLLM model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to -1.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n\n Examples:\n >>> from vllm import LLM\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import vLLM\n >>> model = LLM(model=\"gpt2\")\n >>> task = Task()\n >>> llm = vLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VLLM_AVAILABLE:\n raise ImportError(\n \"`vLLM` cannot be used as `vllm` is not installed, please \"\n \" install it with `pip install vllm`.\"\n )\n\n self.presence_penalty = presence_penalty\n self.frequency_penalty = frequency_penalty\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_tokens = max_new_tokens\n\n self.vllm = vllm\n
"},{"location":"reference/distilabel/llm/base/","title":"base","text":""},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLM","title":"LLM
","text":" Bases: ABC
src/distilabel/llm/base.py
class LLM(ABC):\n def __init__(\n self,\n task: Task,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the LLM base class.\n\n Note:\n This class is intended to be used internally, but you anyone can still create\n a subclass, implement the `abstractmethod`s and use it.\n\n Args:\n task (Task): the task to be performed by the LLM.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[\"SupportedFormats\", None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n self.task = task\n\n self.thread_pool_executor = (\n ThreadPoolExecutor(max_workers=num_threads)\n if num_threads is not None\n else None\n )\n\n self.prompt_format = prompt_format\n self.prompt_formatting_fn = prompt_formatting_fn\n\n def __del__(self) -> None:\n \"\"\"Shuts down the thread pool executor if it is not `None`.\"\"\"\n if self.thread_pool_executor is not None:\n self.thread_pool_executor.shutdown()\n\n @property\n def num_threads(self) -> Union[int, None]:\n if self.thread_pool_executor:\n return self.thread_pool_executor._max_workers\n\n def __repr__(self) -> str:\n return f\"{self.__class__.__name__}(task={self.task.__class__.__name__}, num_threads={self.num_threads}, promp_format='{self.prompt_format}', model='{self.model_name}')\"\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"task\", self.task\n yield \"num_threads\", self.num_threads\n yield \"prompt_format\", self.prompt_format\n if self.prompt_formatting_fn is not None:\n args = f\"({', '.join(self.prompt_formatting_fn.__code__.co_varnames)})\"\n representation = self.prompt_formatting_fn.__name__ + args\n yield \"prompt_formatting_fn\", representation\n yield \"model\", self.model_name\n\n @property\n @abstractmethod\n def model_name(self) -> str:\n pass\n\n def _generate_prompts(\n self,\n inputs: List[Dict[str, Any]],\n default_format: Union[\"SupportedFormats\", None] = None,\n ) -> List[Any]:\n \"\"\"Generates the prompts to be used for generation.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n default_format (Union[\"SupportedFormats\", None], optional): the default format to be used\n for the prompt if no `prompt_format` is specified. Defaults to `None`.\n\n Returns:\n List[Any]: the generated prompts.\n\n Raises:\n ValueError: if the generated prompt is not of the expected type.\n \"\"\"\n prompts = []\n for input in inputs:\n prompt = self.task.generate_prompt(**input)\n if not isinstance(prompt, Prompt) and self.prompt_formatting_fn is not None:\n warnings.warn(\n \"The method `generate_prompt` is not returning a `Prompt` class but a prompt\"\n f\" of `type={type(prompt)}`, meaning that a pre-formatting has already been\"\n \" applied in the `task.generate_prompt` method, so the usage of a `prompt_formatting_fn`\"\n \" is discouraged.\",\n UserWarning,\n stacklevel=2,\n )\n prompt = self.prompt_formatting_fn(prompt)\n elif isinstance(prompt, Prompt) and self.prompt_formatting_fn is None:\n if self.prompt_format is not None or default_format is not None:\n prompt = prompt.format_as(\n format=self.prompt_format or default_format # type: ignore\n )\n else:\n warnings.warn(\n \"No `prompt_format` has been specified and no `default_format` is set, so\"\n \" the prompt will be concatenated with a line-break and no specific formatting\"\n \" by default.\",\n UserWarning,\n stacklevel=2,\n )\n prompt = prompt.format_as(format=\"default\")\n prompts.append(prompt)\n return prompts\n\n @abstractmethod\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n pass\n\n def _get_valid_inputs(\n self, inputs: List[Dict[str, Any]]\n ) -> Tuple[List[Dict[str, Any]], List[int]]:\n \"\"\"Returns the valid inputs and the indices of the invalid inputs.\n\n A valid input is an input that contains all the arguments required by the task.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n\n Returns:\n Tuple[List[Dict[str, Any]], List[int]]: a tuple containing the valid inputs and\n the indices of the invalid inputs.\n \"\"\"\n\n valid_inputs = []\n not_valid_inputs_indices = []\n for i, input in enumerate(inputs):\n if not all(input_arg in input for input_arg in self.task.input_args_names):\n logger.warn(\n f\"Missing {self.task.__class__.__name__} input argument in batch element {i}\"\n )\n not_valid_inputs_indices.append(i)\n continue\n\n valid_inputs.append(input)\n\n return valid_inputs, not_valid_inputs_indices\n\n def _fill_missing_inputs(\n self,\n generations: List[List[LLMOutput]],\n invalid_inputs_indices: List[int],\n num_generations: int,\n ) -> List[List[LLMOutput]]:\n \"\"\"Fills the `generations` list with empty `LLMOutput`s for the inputs that were\n not valid for the associated task of this `LLM`.\n\n Args:\n generations (List[List[LLMOutput]]): the generations to be filled.\n invalid_inputs_indices (List[int]): the indices of the inputs that were not\n valid for the associated task of this `LLM`.\n num_generations (int): the number of generations to be performed for each input.\n\n Returns:\n List[List[LLMOutput]]: the filled generations.\n \"\"\"\n\n filled_generations = generations.copy()\n for idx in invalid_inputs_indices:\n filled_generations.insert(\n idx,\n [\n LLMOutput(\n model_name=self.model_name,\n prompt_used=None,\n raw_output=None,\n parsed_output=None,\n )\n for _ in range(num_generations)\n ],\n )\n return filled_generations\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Generates the outputs for the given inputs using the LLM.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n valid_inputs, invalid_inputs_indices = self._get_valid_inputs(inputs)\n\n if self.thread_pool_executor is not None:\n futures = []\n for input in valid_inputs:\n future = self.thread_pool_executor.submit(\n self._generate, [input], num_generations\n )\n futures.append(future)\n future = when_all_complete(\n futures=futures,\n callback=lambda generations: self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n ),\n )\n future.add_done_callback(lambda _: _progress())\n return future\n\n generations = self._generate(valid_inputs, num_generations)\n\n generations = self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n )\n\n _progress()\n return generations\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return self.thread_pool_executor is not None\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLM.return_futures","title":"return_futures: bool
property
","text":"Whether the LLM
returns futures
__del__()
","text":"Shuts down the thread pool executor if it is not None
.
src/distilabel/llm/base.py
def __del__(self) -> None:\n \"\"\"Shuts down the thread pool executor if it is not `None`.\"\"\"\n if self.thread_pool_executor is not None:\n self.thread_pool_executor.shutdown()\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLM.__init__","title":"__init__(task, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the LLM base class.
NoteThis class is intended to be used internally, but you anyone can still create a subclass, implement the abstractmethod
s and use it.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requirednum_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union['SupportedFormats', None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Source code in src/distilabel/llm/base.py
def __init__(\n self,\n task: Task,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the LLM base class.\n\n Note:\n This class is intended to be used internally, but you anyone can still create\n a subclass, implement the `abstractmethod`s and use it.\n\n Args:\n task (Task): the task to be performed by the LLM.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[\"SupportedFormats\", None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n self.task = task\n\n self.thread_pool_executor = (\n ThreadPoolExecutor(max_workers=num_threads)\n if num_threads is not None\n else None\n )\n\n self.prompt_format = prompt_format\n self.prompt_formatting_fn = prompt_formatting_fn\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLM.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the LLM.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionUnion[List[List['LLMOutput']], Future[List[List['LLMOutput']]]]
Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.
Source code insrc/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Generates the outputs for the given inputs using the LLM.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n valid_inputs, invalid_inputs_indices = self._get_valid_inputs(inputs)\n\n if self.thread_pool_executor is not None:\n futures = []\n for input in valid_inputs:\n future = self.thread_pool_executor.submit(\n self._generate, [input], num_generations\n )\n futures.append(future)\n future = when_all_complete(\n futures=futures,\n callback=lambda generations: self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n ),\n )\n future.add_done_callback(lambda _: _progress())\n return future\n\n generations = self._generate(valid_inputs, num_generations)\n\n generations = self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n )\n\n _progress()\n return generations\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLMPool","title":"LLMPool
","text":"LLMPool is a class that wraps multiple ProcessLLM
s and performs generation in parallel using them. Depending on the number of LLM
s and the parameter num_generations
, the LLMPool
will decide how many generations to perform for each LLM
:
If num_generations
is less than the number of LLM
s, then num_generations
LLMs will be chosen randomly and each of them will perform 1 generation.
If num_generations
is equal to the number of LLM
s, then each LLM
will perform 1 generation.
If num_generations
is greater than the number of LLM
s, then each LLM
will perform num_generations // num_llms
generations, and the remaining num_generations % num_llms
generations will be performed by num_generations % num_llms
randomly chosen LLM
s.
Attributes:
Name Type Descriptionllms
List[ProcessLLM]
the ProcessLLM
s to be used for generation.
src/distilabel/llm/base.py
class LLMPool:\n \"\"\"LLMPool is a class that wraps multiple `ProcessLLM`s and performs generation in\n parallel using them. Depending on the number of `LLM`s and the parameter `num_generations`,\n the `LLMPool` will decide how many generations to perform for each `LLM`:\n\n - If `num_generations` is less than the number of `LLM`s, then `num_generations` LLMs\n will be chosen randomly and each of them will perform 1 generation.\n\n\n - If `num_generations` is equal to the number of `LLM`s, then each `LLM` will perform\n 1 generation.\n\n - If `num_generations` is greater than the number of `LLM`s, then each `LLM` will\n perform `num_generations // num_llms` generations, and the remaining `num_generations % num_llms`\n generations will be performed by `num_generations % num_llms` randomly chosen `LLM`s.\n\n Attributes:\n llms (List[ProcessLLM]): the `ProcessLLM`s to be used for generation.\n \"\"\"\n\n def __init__(self, llms: List[ProcessLLM]) -> None:\n \"\"\"Initializes the `LLMPool` class.\n\n Args:\n llms: the `ProcessLLM`s to be used for generation. The list must contain at\n least 2 `ProcessLLM`s.\n\n Raises:\n ValueError: if the `llms` argument contains less than 2 `ProcessLLM`s, the\n `llms` argument contains `ProcessLLM`s that are not `ProcessLLM`s, or\n if the `llms` argument contains `ProcessLLM`s with different tasks.\n \"\"\"\n if len(llms) < 2:\n raise ValueError(\n \"The `llms` argument must contain at least 2 `ProcessLLM`s. If you want\"\n \" to use a single `ProcessLLM`, use the `ProcessLLM` directly instead.\"\n )\n\n if not all(isinstance(llm, ProcessLLM) for llm in llms):\n raise ValueError(\"The `llms` argument must contain only `ProcessLLM`s.\")\n\n # Note: The following piece of code is used to check that all the `ProcessLLM`s\n # have the same task or a subclass of it.\n mros = [(type(llm.task), len(type(llm.task).mro())) for llm in llms]\n min_common_class = min(mros, key=lambda x: x[1])[0]\n if not all(isinstance(llm.task, min_common_class) for llm in llms):\n raise ValueError(\n \"All the `ProcessLLM` in `llms` must share the same task (either as the instance or the parent class).\"\n )\n\n self.llms = llms\n self.num_llms = len(llms)\n\n def _get_num_generations_per_llm(self, num_generations: int) -> Dict[int, int]:\n \"\"\"Returns the number of generations to be performed by each `LLM`.\n\n Args:\n num_generations: the number of generations to be performed.\n\n Returns:\n Dict[int, int]: a dictionary where the keys are the ids of the `LLM`s and the\n values are the number of generations to be performed by each `LLM`.\n \"\"\"\n llms_ids = list(range(self.num_llms))\n generations_per_llm = {i: num_generations // self.num_llms for i in llms_ids}\n\n for i in random.sample(llms_ids, k=num_generations % self.num_llms):\n generations_per_llm[i] += 1\n\n return generations_per_llm\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generates the outputs for the given inputs using the pool of `ProcessLLM`s.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n num_generations_per_llm = self._get_num_generations_per_llm(num_generations)\n\n futures = [\n llm.generate(\n inputs,\n num_generations=num_generations_per_llm[i],\n progress_callback_func=progress_callback_func,\n )\n for i, llm in enumerate(self.llms)\n if num_generations_per_llm[i] > 0\n ]\n llms_generations = [future.result() for future in futures]\n\n generations = []\n for llms_row_generations in zip(*llms_generations):\n row_generations = []\n for llm_row_generations in llms_row_generations:\n for generation in llm_row_generations:\n row_generations.append(generation)\n generations.append(row_generations)\n\n return generations\n\n def teardown(self) -> None:\n \"\"\"Stops the `ProcessLLM`s.\"\"\"\n for llm in self.llms:\n llm.teardown()\n\n @property\n def task(self) -> \"Task\":\n \"\"\"Returns the task that will be used by the `ProcessLLM`s of this pool.\n\n Returns:\n Task: the task that will be used by the `ProcessLLM`s of this pool.\n \"\"\"\n return self.llms[0].task\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return False\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLMPool.return_futures","title":"return_futures: bool
property
","text":"Whether the LLM
returns futures
task: 'Task'
property
","text":"Returns the task that will be used by the ProcessLLM
s of this pool.
Returns:
Name Type DescriptionTask
'Task'
the task that will be used by the ProcessLLM
s of this pool.
__init__(llms)
","text":"Initializes the LLMPool
class.
Parameters:
Name Type Description Defaultllms
List[ProcessLLM]
the ProcessLLM
s to be used for generation. The list must contain at least 2 ProcessLLM
s.
Raises:
Type DescriptionValueError
if the llms
argument contains less than 2 ProcessLLM
s, the llms
argument contains ProcessLLM
s that are not ProcessLLM
s, or if the llms
argument contains ProcessLLM
s with different tasks.
src/distilabel/llm/base.py
def __init__(self, llms: List[ProcessLLM]) -> None:\n \"\"\"Initializes the `LLMPool` class.\n\n Args:\n llms: the `ProcessLLM`s to be used for generation. The list must contain at\n least 2 `ProcessLLM`s.\n\n Raises:\n ValueError: if the `llms` argument contains less than 2 `ProcessLLM`s, the\n `llms` argument contains `ProcessLLM`s that are not `ProcessLLM`s, or\n if the `llms` argument contains `ProcessLLM`s with different tasks.\n \"\"\"\n if len(llms) < 2:\n raise ValueError(\n \"The `llms` argument must contain at least 2 `ProcessLLM`s. If you want\"\n \" to use a single `ProcessLLM`, use the `ProcessLLM` directly instead.\"\n )\n\n if not all(isinstance(llm, ProcessLLM) for llm in llms):\n raise ValueError(\"The `llms` argument must contain only `ProcessLLM`s.\")\n\n # Note: The following piece of code is used to check that all the `ProcessLLM`s\n # have the same task or a subclass of it.\n mros = [(type(llm.task), len(type(llm.task).mro())) for llm in llms]\n min_common_class = min(mros, key=lambda x: x[1])[0]\n if not all(isinstance(llm.task, min_common_class) for llm in llms):\n raise ValueError(\n \"All the `ProcessLLM` in `llms` must share the same task (either as the instance or the parent class).\"\n )\n\n self.llms = llms\n self.num_llms = len(llms)\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLMPool.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the pool of ProcessLLM
s.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionList[List['LLMOutput']]
Future[List[List[\"LLMOutput\"]]]: the generated outputs as a Future
.
src/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generates the outputs for the given inputs using the pool of `ProcessLLM`s.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n num_generations_per_llm = self._get_num_generations_per_llm(num_generations)\n\n futures = [\n llm.generate(\n inputs,\n num_generations=num_generations_per_llm[i],\n progress_callback_func=progress_callback_func,\n )\n for i, llm in enumerate(self.llms)\n if num_generations_per_llm[i] > 0\n ]\n llms_generations = [future.result() for future in futures]\n\n generations = []\n for llms_row_generations in zip(*llms_generations):\n row_generations = []\n for llm_row_generations in llms_row_generations:\n for generation in llm_row_generations:\n row_generations.append(generation)\n generations.append(row_generations)\n\n return generations\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLMPool.teardown","title":"teardown()
","text":"Stops the ProcessLLM
s.
src/distilabel/llm/base.py
def teardown(self) -> None:\n \"\"\"Stops the `ProcessLLM`s.\"\"\"\n for llm in self.llms:\n llm.teardown()\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.ProcessLLM","title":"ProcessLLM
","text":"A class that wraps an LLM
and performs generation in a separate process. The result is a Future
that will be set when the generation is completed.
This class creates a new child process that will load the LLM
and perform the text generation. In order to communicate with this child process, a bridge thread is created in the main process. The bridge thread will send and receive the results from the child process using multiprocessing.Queue
s. The communication between the bridge thread and the main process is done using Future
s. This architecture was inspired by the ProcessPoolExecutor
from the concurrent.futures
module and it's a simplified version of it.
src/distilabel/llm/base.py
class ProcessLLM:\n \"\"\"A class that wraps an `LLM` and performs generation in a separate process. The\n result is a `Future` that will be set when the generation is completed.\n\n This class creates a new child process that will load the `LLM` and perform the\n text generation. In order to communicate with this child process, a bridge thread\n is created in the main process. The bridge thread will send and receive the results\n from the child process using `multiprocessing.Queue`s. The communication between the\n bridge thread and the main process is done using `Future`s. This architecture was\n inspired by the `ProcessPoolExecutor` from the `concurrent.futures` module and it's\n a simplified version of it.\n \"\"\"\n\n def __init__(self, task: Task, load_llm_fn: Callable[[Task], LLM]) -> None:\n \"\"\"Initializes the `ProcessLLM` class.\n\n Args:\n task: the task to be performed by the `LLM`. This task will be used by the\n child process when calling the `load_llm_fn`.\n load_llm_fn (Callable[[Task], LLM]): a function that will be executed in the\n child process to load the `LLM`. It must return an `LLM` instance.\n \"\"\"\n self.task = task\n\n self._load_llm_fn = load_llm_fn\n\n # The bridge thread will act as a bridge between the main process and the child\n # process for communication. It will send the generation requests to the child\n # process and receive the results from the child process.\n self._bridge_thread = None\n\n # The child process which will load the `LLM` and perform the generation.\n self._generation_process = None\n\n # The `Semaphore` that will be used to synchronize the loading of the `LLM`.\n # `_BridgeThread` will be blocked until `_GenerationProcess` has called the\n # `load_llm_fn` and the `LLM` has been loaded.\n self._load_llm_sem = mp.Semaphore(0)\n\n # This thread will create text generation requests\n self.pending_text_generation_request: Dict[int, _TextGenerationRequest] = {}\n self.text_generation_request_count = 0\n self.text_generation_request_ids_queue: queue.Queue[int] = queue.Queue()\n\n # Queues for the communication between the `_BridgeThread` and the `_GenerationProcess`\n self._call_queue = mp.Queue()\n self._result_queue = mp.Queue()\n\n # Shared memory object for transfering the `model_name` to the main process\n # once the `LLM` is loaded\n self._model_name = mp.Array(c_char, MAX_MODEL_NAME_LENGTH)\n\n def _start_bridge_thread(self) -> None:\n \"\"\"Starts the bridge thread and the generation process.\"\"\"\n if self._bridge_thread is None:\n self._generation_process = _GenerationProcess(self)\n self._generation_process.start()\n pid = self._generation_process.pid\n logger.debug(f\"Generation process with PID {pid} started!\")\n\n self._bridge_thread = _BridgeThread(self)\n self._bridge_thread.start()\n logger.debug(\"Bridge thread for process with PID {pid} started!\")\n\n def _add_text_generation_request(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Creates and send a new text generation request to the bridge thread. This thread\n and the bridge thread shares a dictionary used to store the text generation requests.\n This thread will add the text generation requests to the dictionary and the bridge\n thread will only read from it. In order for the bridge thread to know that a new\n text generation request has been added to the dictionary, this thread will put the\n id of the request in a queue. The bridge thread will read from this queue and get\n the text generation request from the dictionary.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n text_generation_request = _TextGenerationRequest(\n inputs=inputs, num_generations=num_generations\n )\n # Put the request information in the dictionary associated to the request id\n self.pending_text_generation_request[\n self.text_generation_request_count\n ] = text_generation_request\n # Put the request id in the queue (for the `_BridgeThread` to consume it)\n self.text_generation_request_ids_queue.put(self.text_generation_request_count)\n self.text_generation_request_count += 1\n text_generation_request.future.add_done_callback(lambda _: _progress())\n return text_generation_request.future\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Generates the outputs for the given inputs using the `ProcessLLM` and its loaded\n `LLM`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n self._start_bridge_thread()\n return self._add_text_generation_request(\n inputs, num_generations, progress_callback_func\n )\n\n def teardown(self) -> None:\n \"\"\"Stops the bridge thread and the generation process.\"\"\"\n if self._generation_process is not None:\n self._generation_process.stop()\n self._generation_process.join()\n\n if self._bridge_thread is not None:\n self._bridge_thread.stop()\n self._bridge_thread.join()\n\n @cached_property\n def model_name(self) -> str:\n \"\"\"Returns the model name of the `LLM` once it has been loaded.\"\"\"\n with self._model_name:\n return \"\".join([c.decode() for c in self._model_name if c != b\"\\0\"])\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return True\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.ProcessLLM.model_name","title":"model_name: str
cached
property
","text":"Returns the model name of the LLM
once it has been loaded.
return_futures: bool
property
","text":"Whether the LLM
returns futures
__init__(task, load_llm_fn)
","text":"Initializes the ProcessLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM
. This task will be used by the child process when calling the load_llm_fn
.
load_llm_fn
Callable[[Task], LLM]
a function that will be executed in the child process to load the LLM
. It must return an LLM
instance.
src/distilabel/llm/base.py
def __init__(self, task: Task, load_llm_fn: Callable[[Task], LLM]) -> None:\n \"\"\"Initializes the `ProcessLLM` class.\n\n Args:\n task: the task to be performed by the `LLM`. This task will be used by the\n child process when calling the `load_llm_fn`.\n load_llm_fn (Callable[[Task], LLM]): a function that will be executed in the\n child process to load the `LLM`. It must return an `LLM` instance.\n \"\"\"\n self.task = task\n\n self._load_llm_fn = load_llm_fn\n\n # The bridge thread will act as a bridge between the main process and the child\n # process for communication. It will send the generation requests to the child\n # process and receive the results from the child process.\n self._bridge_thread = None\n\n # The child process which will load the `LLM` and perform the generation.\n self._generation_process = None\n\n # The `Semaphore` that will be used to synchronize the loading of the `LLM`.\n # `_BridgeThread` will be blocked until `_GenerationProcess` has called the\n # `load_llm_fn` and the `LLM` has been loaded.\n self._load_llm_sem = mp.Semaphore(0)\n\n # This thread will create text generation requests\n self.pending_text_generation_request: Dict[int, _TextGenerationRequest] = {}\n self.text_generation_request_count = 0\n self.text_generation_request_ids_queue: queue.Queue[int] = queue.Queue()\n\n # Queues for the communication between the `_BridgeThread` and the `_GenerationProcess`\n self._call_queue = mp.Queue()\n self._result_queue = mp.Queue()\n\n # Shared memory object for transfering the `model_name` to the main process\n # once the `LLM` is loaded\n self._model_name = mp.Array(c_char, MAX_MODEL_NAME_LENGTH)\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.ProcessLLM.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the ProcessLLM
and its loaded LLM
.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionFuture[List[List['LLMOutput']]]
Future[List[List[\"LLMOutput\"]]]: the generated outputs as a Future
.
src/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Generates the outputs for the given inputs using the `ProcessLLM` and its loaded\n `LLM`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n self._start_bridge_thread()\n return self._add_text_generation_request(\n inputs, num_generations, progress_callback_func\n )\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.ProcessLLM.teardown","title":"teardown()
","text":"Stops the bridge thread and the generation process.
Source code insrc/distilabel/llm/base.py
def teardown(self) -> None:\n \"\"\"Stops the bridge thread and the generation process.\"\"\"\n if self._generation_process is not None:\n self._generation_process.stop()\n self._generation_process.join()\n\n if self._bridge_thread is not None:\n self._bridge_thread.stop()\n self._bridge_thread.join()\n
"},{"location":"reference/distilabel/llm/llama_cpp/","title":"llama_cpp","text":""},{"location":"reference/distilabel/llm/llama_cpp/#distilabel.llm.llama_cpp.LlamaCppLLM","title":"LlamaCppLLM
","text":" Bases: LLM
src/distilabel/llm/llama_cpp.py
class LlamaCppLLM(LLM):\n def __init__(\n self,\n model: \"Llama\",\n task: \"Task\",\n max_new_tokens: int = 128,\n temperature: float = 0.8,\n top_p: float = 0.95,\n top_k: int = 40,\n repeat_penalty: float = 1.1,\n seed: int = 1337,\n prompt_format: Union[SupportedFormats, None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the LlamaCppLLM class.\n\n Args:\n model (Llama): the llama-cpp model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 0.8.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 0.95.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n repeat_penalty (float, optional): the repeat penalty to be used for generation.\n Defaults to 1.1.\n seed (int, optional): the seed to be used for generation, setting it to -1 implies\n that a different response will be generated on each generation, similarly to\n HuggingFace's `do_sample` arg. Defaults to 1337.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Examples:\n >>> from llama_cpp import Llama\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import LlamaCppLLM\n >>> model = Llama(model_path=\"path/to/model\")\n >>> task = Task()\n >>> llm = LlamaCppLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _LLAMA_CPP_AVAILABLE:\n raise ImportError(\n \"`LlamaCppLLM` cannot be used as `llama_cpp` is not installed, please \"\n \" install it with `pip install llama-cpp-python`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repeat_penalty = repeat_penalty\n self.seed = seed\n\n self.model = model\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_tokens,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n \"repeat_penalty\": self.repeat_penalty,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the llama-cpp model, which is the same as the model path.\"\"\"\n return self.model.model_path\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n output = []\n for _ in range(num_generations):\n raw_output = self.model.create_completion(\n prompt,\n max_tokens=self.max_tokens,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n repeat_penalty=self.repeat_penalty,\n )\n try:\n parsed_output = self.task.parse_output(\n raw_output[\"choices\"][0][\"text\"].strip()\n )\n except Exception as e:\n logger.error(f\"Error parsing llama-cpp output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/llama_cpp/#distilabel.llm.llama_cpp.LlamaCppLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the llama-cpp model, which is the same as the model path.
"},{"location":"reference/distilabel/llm/llama_cpp/#distilabel.llm.llama_cpp.LlamaCppLLM.__init__","title":"__init__(model, task, max_new_tokens=128, temperature=0.8, top_p=0.95, top_k=40, repeat_penalty=1.1, seed=1337, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the LlamaCppLLM class.
Parameters:
Name Type Description Defaultmodel
Llama
the llama-cpp model to be used.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
temperature
float
the temperature to be used for generation. Defaults to 0.8.
0.8
top_p
float
the top-p value to be used for generation. Defaults to 0.95.
0.95
top_k
int
the top-k value to be used for generation. Defaults to 40.
40
repeat_penalty
float
the repeat penalty to be used for generation. Defaults to 1.1.
1.1
seed
int
the seed to be used for generation, setting it to -1 implies that a different response will be generated on each generation, similarly to HuggingFace's do_sample
arg. Defaults to 1337.
1337
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Examples:
>>> from llama_cpp import Llama\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import LlamaCppLLM\n>>> model = Llama(model_path=\"path/to/model\")\n>>> task = Task()\n>>> llm = LlamaCppLLM(model=model, task=task)\n
Source code in src/distilabel/llm/llama_cpp.py
def __init__(\n self,\n model: \"Llama\",\n task: \"Task\",\n max_new_tokens: int = 128,\n temperature: float = 0.8,\n top_p: float = 0.95,\n top_k: int = 40,\n repeat_penalty: float = 1.1,\n seed: int = 1337,\n prompt_format: Union[SupportedFormats, None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the LlamaCppLLM class.\n\n Args:\n model (Llama): the llama-cpp model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 0.8.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 0.95.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n repeat_penalty (float, optional): the repeat penalty to be used for generation.\n Defaults to 1.1.\n seed (int, optional): the seed to be used for generation, setting it to -1 implies\n that a different response will be generated on each generation, similarly to\n HuggingFace's `do_sample` arg. Defaults to 1337.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Examples:\n >>> from llama_cpp import Llama\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import LlamaCppLLM\n >>> model = Llama(model_path=\"path/to/model\")\n >>> task = Task()\n >>> llm = LlamaCppLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _LLAMA_CPP_AVAILABLE:\n raise ImportError(\n \"`LlamaCppLLM` cannot be used as `llama_cpp` is not installed, please \"\n \" install it with `pip install llama-cpp-python`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repeat_penalty = repeat_penalty\n self.seed = seed\n\n self.model = model\n
"},{"location":"reference/distilabel/llm/openai/","title":"openai","text":""},{"location":"reference/distilabel/llm/openai/#distilabel.llm.openai.OpenAILLM","title":"OpenAILLM
","text":" Bases: LLM
src/distilabel/llm/openai.py
class OpenAILLM(LLM):\n def __init__(\n self,\n task: \"Task\",\n model: str = \"gpt-3.5-turbo\",\n client: Union[\"OpenAI\", None] = None,\n openai_api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n frequency_penalty: float = 0.0,\n presence_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gpt-3.5-turbo\".\n client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.\n If `None`, a new client will be created. Defaults to `None`.\n openai_api_key (Union[str, None], optional): the OpenAI API key to be used for generation.\n If `None`, the `OPENAI_API_KEY` environment variable will be used. Defaults to `None`.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in your OpenAI account.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import OpenAILLM\n >>> task = Task()\n >>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _OPENAI_AVAILABLE:\n raise ImportError(\n \"`OpenAILLM` cannot be used as `openai` is not installed, please \"\n \" install it with `pip install openai`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.frequency_penalty = frequency_penalty\n self.presence_penalty = presence_penalty\n self.temperature = temperature\n self.top_p = top_p\n\n self.client = client or OpenAI(api_key=openai_api_key, max_retries=6)\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in your OpenAI account, available models are {self.available_models}\"\n self.model = model\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_tokens\": self.max_tokens,\n \"frequency_penalty\": self.frequency_penalty,\n \"presence_penalty\": self.presence_penalty,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n },\n )\n\n @cached_property\n def available_models(self) -> List[str]:\n \"\"\"Returns the list of available models in your OpenAI account.\"\"\"\n return [model.id for model in self.client.models.list().data]\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the OpenAI model.\"\"\"\n return self.model\n\n def _generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"openai\")\n outputs = []\n for prompt in prompts:\n chat_completions = self.client.chat.completions.create(\n messages=prompt,\n model=self.model,\n n=num_generations,\n max_tokens=self.max_tokens,\n frequency_penalty=self.frequency_penalty,\n presence_penalty=self.presence_penalty,\n temperature=self.temperature,\n top_p=self.top_p,\n timeout=50,\n )\n\n output = []\n for chat_completion in chat_completions.choices:\n try:\n parsed_response = self.task.parse_output(\n chat_completion.message.content.strip()\n )\n except Exception as e:\n logger.error(f\"Error parsing OpenAI response: {e}\")\n parsed_response = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=chat_completion.message.content,\n parsed_output=parsed_response,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/openai/#distilabel.llm.openai.OpenAILLM.available_models","title":"available_models: List[str]
cached
property
","text":"Returns the list of available models in your OpenAI account.
"},{"location":"reference/distilabel/llm/openai/#distilabel.llm.openai.OpenAILLM.model_name","title":"model_name: str
property
","text":"Returns the name of the OpenAI model.
"},{"location":"reference/distilabel/llm/openai/#distilabel.llm.openai.OpenAILLM.__init__","title":"__init__(task, model='gpt-3.5-turbo', client=None, openai_api_key=None, max_new_tokens=128, frequency_penalty=0.0, presence_penalty=0.0, temperature=1.0, top_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the OpenAILLM class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation. Defaults to \"gpt-3.5-turbo\".
'gpt-3.5-turbo'
client
Union[OpenAI, None]
an OpenAI client to be used for generation. If None
, a new client will be created. Defaults to None
.
None
openai_api_key
Union[str, None]
the OpenAI API key to be used for generation. If None
, the OPENAI_API_KEY
environment variable will be used. Defaults to None
.
None
max_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
frequency_penalty
float
the frequency penalty to be used for generation. Defaults to 0.0.
0.0
presence_penalty
float
the presence penalty to be used for generation. Defaults to 0.0.
0.0
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Raises:
Type DescriptionAssertionError
if the provided model
is not available in your OpenAI account.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import OpenAILLM\n>>> task = Task()\n>>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n
Source code in src/distilabel/llm/openai.py
def __init__(\n self,\n task: \"Task\",\n model: str = \"gpt-3.5-turbo\",\n client: Union[\"OpenAI\", None] = None,\n openai_api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n frequency_penalty: float = 0.0,\n presence_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gpt-3.5-turbo\".\n client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.\n If `None`, a new client will be created. Defaults to `None`.\n openai_api_key (Union[str, None], optional): the OpenAI API key to be used for generation.\n If `None`, the `OPENAI_API_KEY` environment variable will be used. Defaults to `None`.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in your OpenAI account.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import OpenAILLM\n >>> task = Task()\n >>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _OPENAI_AVAILABLE:\n raise ImportError(\n \"`OpenAILLM` cannot be used as `openai` is not installed, please \"\n \" install it with `pip install openai`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.frequency_penalty = frequency_penalty\n self.presence_penalty = presence_penalty\n self.temperature = temperature\n self.top_p = top_p\n\n self.client = client or OpenAI(api_key=openai_api_key, max_retries=6)\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in your OpenAI account, available models are {self.available_models}\"\n self.model = model\n
"},{"location":"reference/distilabel/llm/together/","title":"together","text":""},{"location":"reference/distilabel/llm/together/#distilabel.llm.together.TogetherInferenceLLM","title":"TogetherInferenceLLM
","text":" Bases: LLM
src/distilabel/llm/together.py
class TogetherInferenceLLM(LLM):\n def __init__(\n self,\n task: \"Task\",\n model: str,\n api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: float = 1.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = 1,\n stop: Union[List[str], None] = None,\n logprobs: int = 0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str): the model to be used for generation.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation. From the Together\n Inference docs: \"A decimal number that determines the degree of randomness in the response.\n A value of 0 will always yield the same output. A temperature much less than 1 favors more\n correctness and is appropriate for question answering or summarization. A value approaching\n 1 introduces more randomness in the output.\". Defaults to 1.0.\n repetition_penalty (float, optional): the repetition penalty to be used for generation. From the\n Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood\n of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation. From the Together\n Inference docs: \"used to dynamically adjust the number of choices for each predicted\n token based on the cumulative probabilities. It specifies a probability threshold,\n below which all less likely tokens are filtered out. This technique helps to maintain\n diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation. From the Together Inference\n docs: \"used to limit the number of choices for the next predicted word or token. It specifies\n the maximum number of tokens to consider at each step, based on their probability of occurrence.\n This technique helps to speed up the generation process and can improve the quality of the\n generated text by focusing on the most likely options.\". Defaults to 1.\n stop (List[str], optional): strings to delimitate the generation process, so that when the\n model generates any of the provided characters, the generation process is considered completed.\n Defaults to None.\n logprobs (int, optional): the number of logprobs to be returned for each token. From the\n Together Inference docs: \"An integer that specifies how many top token log probabilities\n are included in the response for each token generation step.\". Defaults to None.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in Together Inference.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TogetherInferenceLLM\n >>> task = Task()\n >>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n \"\"\"\n if not _TOGETHER_AVAILABLE:\n raise ImportError(\n \"`TogetherInferenceLLM` cannot be used as `together` is not installed, please \"\n \" install it with `pip install together`.\"\n )\n\n together.api_key = api_key or os.getenv(\"TOGETHER_API_KEY\", None)\n if together.api_key is None:\n raise ValueError(\n \"No `api_key` provided, please provide one or set the `TOGETHER_API_KEY` \"\n \"environment variable.\"\n )\n\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in Together Inference, available models are {self.available_models}\"\n self.model = model\n\n self.max_new_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repetition_penalty = repetition_penalty\n self.stop = stop\n self.logprobs = logprobs\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_new_tokens,\n \"temperature\": self.temperature,\n \"repetition_penalty\": self.repetition_penalty,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n \"stop\": self.stop,\n \"logprobs\": self.logprobs,\n },\n )\n\n @cached_property\n def available_models(self) -> List[str]:\n \"\"\"Returns the list of available models in Together Inference.\"\"\"\n # TODO: exclude the image models\n return [model[\"name\"] for model in together.Models.list()]\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the Together Inference model.\"\"\"\n return self.model\n\n def _generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n batch = []\n for _ in range(num_generations):\n output = together.Complete.create(\n prompt=prompt,\n model=self.model,\n max_tokens=self.max_new_tokens,\n stop=self.stop,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n repetition_penalty=self.repetition_penalty,\n logprobs=self.logprobs,\n )\n if output[\"output\"][\"choices\"] is not None:\n for choice in output[\"output\"][\"choices\"]:\n try:\n parsed_response = self.task.parse_output(\n choice[\"text\"].strip()\n )\n except Exception as e:\n logger.error(\n f\"Error parsing Together Inference response: {e}\"\n )\n parsed_response = None\n batch.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=choice[\"text\"],\n parsed_output=parsed_response,\n )\n )\n if len(batch) > 0:\n outputs.append(batch)\n return outputs\n
"},{"location":"reference/distilabel/llm/together/#distilabel.llm.together.TogetherInferenceLLM.available_models","title":"available_models: List[str]
cached
property
","text":"Returns the list of available models in Together Inference.
"},{"location":"reference/distilabel/llm/together/#distilabel.llm.together.TogetherInferenceLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the Together Inference model.
"},{"location":"reference/distilabel/llm/together/#distilabel.llm.together.TogetherInferenceLLM.__init__","title":"__init__(task, model, api_key=None, max_new_tokens=128, repetition_penalty=1.0, temperature=1.0, top_p=1.0, top_k=1, stop=None, logprobs=0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the OpenAILLM class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
temperature
float
the temperature to be used for generation. From the Together Inference docs: \"A decimal number that determines the degree of randomness in the response. A value of 0 will always yield the same output. A temperature much less than 1 favors more correctness and is appropriate for question answering or summarization. A value approaching 1 introduces more randomness in the output.\". Defaults to 1.0.
1.0
repetition_penalty
float
the repetition penalty to be used for generation. From the Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. From the Together Inference docs: \"used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold, below which all less likely tokens are filtered out. This technique helps to maintain diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. From the Together Inference docs: \"used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.\". Defaults to 1.
1
stop
List[str]
strings to delimitate the generation process, so that when the model generates any of the provided characters, the generation process is considered completed. Defaults to None.
None
logprobs
int
the number of logprobs to be returned for each token. From the Together Inference docs: \"An integer that specifies how many top token log probabilities are included in the response for each token generation step.\". Defaults to None.
0
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Raises:
Type DescriptionAssertionError
if the provided model
is not available in Together Inference.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import TogetherInferenceLLM\n>>> task = Task()\n>>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n
Source code in src/distilabel/llm/together.py
def __init__(\n self,\n task: \"Task\",\n model: str,\n api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: float = 1.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = 1,\n stop: Union[List[str], None] = None,\n logprobs: int = 0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str): the model to be used for generation.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation. From the Together\n Inference docs: \"A decimal number that determines the degree of randomness in the response.\n A value of 0 will always yield the same output. A temperature much less than 1 favors more\n correctness and is appropriate for question answering or summarization. A value approaching\n 1 introduces more randomness in the output.\". Defaults to 1.0.\n repetition_penalty (float, optional): the repetition penalty to be used for generation. From the\n Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood\n of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation. From the Together\n Inference docs: \"used to dynamically adjust the number of choices for each predicted\n token based on the cumulative probabilities. It specifies a probability threshold,\n below which all less likely tokens are filtered out. This technique helps to maintain\n diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation. From the Together Inference\n docs: \"used to limit the number of choices for the next predicted word or token. It specifies\n the maximum number of tokens to consider at each step, based on their probability of occurrence.\n This technique helps to speed up the generation process and can improve the quality of the\n generated text by focusing on the most likely options.\". Defaults to 1.\n stop (List[str], optional): strings to delimitate the generation process, so that when the\n model generates any of the provided characters, the generation process is considered completed.\n Defaults to None.\n logprobs (int, optional): the number of logprobs to be returned for each token. From the\n Together Inference docs: \"An integer that specifies how many top token log probabilities\n are included in the response for each token generation step.\". Defaults to None.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in Together Inference.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TogetherInferenceLLM\n >>> task = Task()\n >>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n \"\"\"\n if not _TOGETHER_AVAILABLE:\n raise ImportError(\n \"`TogetherInferenceLLM` cannot be used as `together` is not installed, please \"\n \" install it with `pip install together`.\"\n )\n\n together.api_key = api_key or os.getenv(\"TOGETHER_API_KEY\", None)\n if together.api_key is None:\n raise ValueError(\n \"No `api_key` provided, please provide one or set the `TOGETHER_API_KEY` \"\n \"environment variable.\"\n )\n\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in Together Inference, available models are {self.available_models}\"\n self.model = model\n\n self.max_new_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repetition_penalty = repetition_penalty\n self.stop = stop\n self.logprobs = logprobs\n
"},{"location":"reference/distilabel/llm/utils/","title":"utils","text":""},{"location":"reference/distilabel/llm/utils/#distilabel.llm.utils.LLMOutput","title":"LLMOutput
","text":" Bases: TypedDict
A type for the output of an LLM.
Source code insrc/distilabel/llm/utils.py
class LLMOutput(TypedDict):\n \"\"\"A type for the output of an LLM.\"\"\"\n\n model_name: str\n prompt_used: Any\n raw_output: Any\n parsed_output: Optional[Any]\n
"},{"location":"reference/distilabel/llm/vllm/","title":"vllm","text":""},{"location":"reference/distilabel/llm/vllm/#distilabel.llm.vllm.vLLM","title":"vLLM
","text":" Bases: LLM
src/distilabel/llm/vllm.py
class vLLM(LLM):\n def __init__(\n self,\n vllm: \"_vLLM\",\n task: \"Task\",\n max_new_tokens: int = 128,\n presence_penalty: float = 0.0,\n frequency_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = -1,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the vLLM class.\n\n Args:\n vllm (_vLLM): the vLLM model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to -1.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n\n Examples:\n >>> from vllm import LLM\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import vLLM\n >>> model = LLM(model=\"gpt2\")\n >>> task = Task()\n >>> llm = vLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VLLM_AVAILABLE:\n raise ImportError(\n \"`vLLM` cannot be used as `vllm` is not installed, please \"\n \" install it with `pip install vllm`.\"\n )\n\n self.presence_penalty = presence_penalty\n self.frequency_penalty = frequency_penalty\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_tokens = max_new_tokens\n\n self.vllm = vllm\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_tokens\": self.max_tokens,\n \"presence_penalty\": self.presence_penalty,\n \"frequency_penalty\": self.frequency_penalty,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the vLLM model.\"\"\"\n return self.vllm.llm_engine.model_config.model # type: ignore\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n requests = self.vllm.generate(\n prompts,\n SamplingParams( # type: ignore\n n=num_generations,\n presence_penalty=self.presence_penalty,\n frequency_penalty=self.frequency_penalty,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n max_tokens=self.max_tokens,\n ),\n use_tqdm=False, # type: ignore\n )\n outputs = []\n for request, prompt in zip(requests, prompts):\n output = []\n for request_output in request.outputs:\n try:\n parsed_output = self.task.parse_output(request_output.text)\n except Exception as e:\n logger.error(f\"Error parsing vLLM output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=request_output.text,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/vllm/#distilabel.llm.vllm.vLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the vLLM model.
"},{"location":"reference/distilabel/llm/vllm/#distilabel.llm.vllm.vLLM.__init__","title":"__init__(vllm, task, max_new_tokens=128, presence_penalty=0.0, frequency_penalty=0.0, temperature=1.0, top_p=1.0, top_k=-1, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the vLLM class.
Parameters:
Name Type Description Defaultvllm
LLM
the vLLM model to be used.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
presence_penalty
float
the presence penalty to be used for generation. Defaults to 0.0.
0.0
frequency_penalty
float
the frequency penalty to be used for generation. Defaults to 0.0.
0.0
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. Defaults to -1.
-1
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied.
None
Examples:
>>> from vllm import LLM\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import vLLM\n>>> model = LLM(model=\"gpt2\")\n>>> task = Task()\n>>> llm = vLLM(model=model, task=task)\n
Source code in src/distilabel/llm/vllm.py
def __init__(\n self,\n vllm: \"_vLLM\",\n task: \"Task\",\n max_new_tokens: int = 128,\n presence_penalty: float = 0.0,\n frequency_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = -1,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the vLLM class.\n\n Args:\n vllm (_vLLM): the vLLM model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to -1.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n\n Examples:\n >>> from vllm import LLM\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import vLLM\n >>> model = LLM(model=\"gpt2\")\n >>> task = Task()\n >>> llm = vLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VLLM_AVAILABLE:\n raise ImportError(\n \"`vLLM` cannot be used as `vllm` is not installed, please \"\n \" install it with `pip install vllm`.\"\n )\n\n self.presence_penalty = presence_penalty\n self.frequency_penalty = frequency_penalty\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_tokens = max_new_tokens\n\n self.vllm = vllm\n
"},{"location":"reference/distilabel/llm/google/","title":"google","text":""},{"location":"reference/distilabel/llm/google/vertexai/","title":"vertexai","text":""},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAIEndpointLLM","title":"VertexAIEndpointLLM
","text":" Bases: LLM
An LLM
which uses a Vertex AI Online prediction endpoint for the generation.
More information about Vertex AI Endpoints can be found here:
- https://cloud.google.com/vertex-ai/docs/general/deployment#deploy_a_model_to_an_endpoint\n
Source code in src/distilabel/llm/google/vertexai.py
class VertexAIEndpointLLM(LLM):\n \"\"\"An `LLM` which uses a Vertex AI Online prediction endpoint for the generation.\n\n More information about Vertex AI Endpoints can be found here:\n\n - https://cloud.google.com/vertex-ai/docs/general/deployment#deploy_a_model_to_an_endpoint\n \"\"\"\n\n def __init__(\n self,\n task: \"Task\",\n endpoint_id: str,\n project: Optional[str] = None,\n location: str = \"us-central1\",\n generation_kwargs: Optional[Dict[str, Any]] = None,\n prompt_argument: str = \"prompt\",\n num_generations_argument: str = \"n\",\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the `VertexAIEndpointLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n endpoint_id (str): the ID of the Vertex AI endpoint to be used for generation.\n project (Optional[str], optional): the project to be used for generation. If `None`,\n the default project will be used. Defaults to `None`.\n location (str, optional): the location of the Vertex AI endpoint to be used for\n generation. Defaults to \"us-central1\".\n generation_kwargs (Optional[Dict[str, Any]], optional): the generation parameters\n to be used for generation. The name of the parameters will depend on the\n Docker image used to deploy the model to the Vertex AI endpoint. Defaults\n to `None`.\n prompt_argument (str, optional): the name of the Vertex AI Endpoint key to\n be used for the prompt. Defaults to \"prompt\".\n num_generations_argument (str, optional): the name of the Vertex AI Endpoint\n key to be used to specify the number of generations per prompt. Defaults\n to \"n\".\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAIEndpointLLM` cannot be used as `google-cloud-aiplatform` is not\"\n \" installed, please install it with `pip install google-cloud-aiplatform`\"\n )\n\n if project is None:\n try:\n project = google.auth.default()[1]\n except DefaultCredentialsError as e:\n raise ValueError(\n \"No `project` was specified and no default credentials were found.\"\n ) from e\n\n if generation_kwargs is None:\n generation_kwargs = {}\n\n self.endpoint_id = endpoint_id\n self.project = project\n self.location = location\n self.generation_kwargs = generation_kwargs\n self.prompt_argument = prompt_argument\n self.num_generations_argument = num_generations_argument\n\n self.client = PredictionServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n\n @cached_property\n def model_name(self) -> str:\n \"\"\"Returns the name of the model used for generation.\"\"\"\n client = EndpointServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n endpoint = client.get_endpoint(name=self.endpoint_path)\n return endpoint.deployed_models[0].display_name\n\n @property\n def endpoint_path(self) -> str:\n \"\"\"Returns the path of the Vertex AI endpoint to be used for generation.\"\"\"\n return self.client.endpoint_path(\n project=self.project, # type: ignore\n location=self.location,\n endpoint=self.endpoint_id,\n )\n\n @_vertexai_retry_decorator\n def _call_vertexai_endpoint(self, instances: List[Any]) -> Any:\n return self.client.predict(endpoint=self.endpoint_path, instances=instances)\n\n def _prepare_instances(\n self, prompts: List[str], num_generations: int\n ) -> List[\"Value\"]:\n \"\"\"Prepares the instances to be sent to the Vertex AI endpoint.\n\n Args:\n prompts (List[str]): the prompts to be used for generation.\n num_generations (int): the number of generations to be performed for each prompt.\n\n Returns:\n The instances to be sent to the Vertex AI endpoint.\n \"\"\"\n instances = []\n for prompt in prompts:\n instance = json_format.ParseDict(\n {\n self.prompt_argument: prompt,\n self.num_generations_argument: num_generations,\n **self.generation_kwargs,\n },\n Value(),\n )\n instances.append(instance)\n return instances\n\n def _single_output(self, instance: Any) -> List[LLMOutput]:\n try:\n # NOTE: `predict` method accepts a list of instances, but depending on the\n # deployed Docker image, it can just accept one instance.\n response = self._call_vertexai_endpoint(instances=[instance])\n except exceptions.InternalServerError as e:\n raise ValueError(\n \"The Vertex AI endpoint returned 500 Internal Server Error. This is\"\n \" usually caused due to wrong generation parameters. Please check the\"\n \" `generation_parameters` and try again.\"\n ) from e\n\n output = []\n for prediction in response.predictions:\n # Vertex endpoint output is `Prompt:\\n{{ model_prompt }}\\nOutput:\\n{{ model_output }}`\n # so we need to do a pre-parsing to remove the `Prompt:` and `Output:` parts.\n match = _PARSE_VERTEXAI_ENDPOINT_PREDICTION_REGEX.search(prediction)\n if not match:\n raise ValueError(\n \"Couldn't parse the response from the Vertex AI endpoint.\"\n )\n\n model_output = match.group(1).strip()\n\n try:\n parsed_output = self.task.parse_output(model_output)\n except Exception as e:\n logger.error(f\"Error parsing Vertex AI endpoint model response: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=instance.struct_value[self.prompt_argument],\n raw_output=model_output,\n parsed_output=parsed_output,\n )\n )\n return output\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n prompts = self._generate_prompts(inputs)\n instances = self._prepare_instances(\n prompts=prompts, num_generations=num_generations\n )\n return [self._single_output(instance) for instance in instances]\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAIEndpointLLM.endpoint_path","title":"endpoint_path: str
property
","text":"Returns the path of the Vertex AI endpoint to be used for generation.
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAIEndpointLLM.model_name","title":"model_name: str
cached
property
","text":"Returns the name of the model used for generation.
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAIEndpointLLM.__init__","title":"__init__(task, endpoint_id, project=None, location='us-central1', generation_kwargs=None, prompt_argument='prompt', num_generations_argument='n', num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the VertexAIEndpointLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredendpoint_id
str
the ID of the Vertex AI endpoint to be used for generation.
requiredproject
Optional[str]
the project to be used for generation. If None
, the default project will be used. Defaults to None
.
None
location
str
the location of the Vertex AI endpoint to be used for generation. Defaults to \"us-central1\".
'us-central1'
generation_kwargs
Optional[Dict[str, Any]]
the generation parameters to be used for generation. The name of the parameters will depend on the Docker image used to deploy the model to the Vertex AI endpoint. Defaults to None
.
None
prompt_argument
str
the name of the Vertex AI Endpoint key to be used for the prompt. Defaults to \"prompt\".
'prompt'
num_generations_argument
str
the name of the Vertex AI Endpoint key to be used to specify the number of generations per prompt. Defaults to \"n\".
'n'
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Source code in src/distilabel/llm/google/vertexai.py
def __init__(\n self,\n task: \"Task\",\n endpoint_id: str,\n project: Optional[str] = None,\n location: str = \"us-central1\",\n generation_kwargs: Optional[Dict[str, Any]] = None,\n prompt_argument: str = \"prompt\",\n num_generations_argument: str = \"n\",\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the `VertexAIEndpointLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n endpoint_id (str): the ID of the Vertex AI endpoint to be used for generation.\n project (Optional[str], optional): the project to be used for generation. If `None`,\n the default project will be used. Defaults to `None`.\n location (str, optional): the location of the Vertex AI endpoint to be used for\n generation. Defaults to \"us-central1\".\n generation_kwargs (Optional[Dict[str, Any]], optional): the generation parameters\n to be used for generation. The name of the parameters will depend on the\n Docker image used to deploy the model to the Vertex AI endpoint. Defaults\n to `None`.\n prompt_argument (str, optional): the name of the Vertex AI Endpoint key to\n be used for the prompt. Defaults to \"prompt\".\n num_generations_argument (str, optional): the name of the Vertex AI Endpoint\n key to be used to specify the number of generations per prompt. Defaults\n to \"n\".\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAIEndpointLLM` cannot be used as `google-cloud-aiplatform` is not\"\n \" installed, please install it with `pip install google-cloud-aiplatform`\"\n )\n\n if project is None:\n try:\n project = google.auth.default()[1]\n except DefaultCredentialsError as e:\n raise ValueError(\n \"No `project` was specified and no default credentials were found.\"\n ) from e\n\n if generation_kwargs is None:\n generation_kwargs = {}\n\n self.endpoint_id = endpoint_id\n self.project = project\n self.location = location\n self.generation_kwargs = generation_kwargs\n self.prompt_argument = prompt_argument\n self.num_generations_argument = num_generations_argument\n\n self.client = PredictionServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAILLM","title":"VertexAILLM
","text":" Bases: LLM
An LLM
which allows to use Google's proprietary models from the Vertex AI APIs:
To use the VertexAILLM
is necessary to have configured the Google Cloud authentication using one of these methods:
GOOGLE_CLOUD_CREDENTIALS
environment variablegcloud auth application-default login
commandvertexai.init
function from the google-cloud-aiplatform
librarysrc/distilabel/llm/google/vertexai.py
class VertexAILLM(LLM):\n \"\"\"An `LLM` which allows to use Google's proprietary models from the Vertex AI APIs:\n\n - Gemini API: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini\n - Codey API: https://cloud.google.com/vertex-ai/docs/generative-ai/code/code-models-overview\n - Text API: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text\n\n To use the `VertexAILLM` is necessary to have configured the Google Cloud authentication\n using one of these methods:\n\n - Setting `GOOGLE_CLOUD_CREDENTIALS` environment variable\n - Using `gcloud auth application-default login` command\n - Using `vertexai.init` function from the `google-cloud-aiplatform` library\n \"\"\"\n\n def __init__(\n self,\n task: \"Task\",\n model: str = \"gemini-pro\",\n temperature: Optional[float] = None,\n top_p: Optional[float] = None,\n top_k: Optional[int] = None,\n max_new_tokens: int = 128,\n stop_sequences: Optional[List[str]] = None,\n num_threads: Union[int, None] = None,\n ) -> None:\n \"\"\"Initializes the `VertexGenerativeModelLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gemini-pro\".\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n \"\"\"\n super().__init__(task=task, num_threads=num_threads)\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAILLM` cannot be used as `google-cloud-aiplatform` is not installed,\"\n \" please install it with `pip install google-cloud-aiplatform`\"\n )\n\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_output_tokens = max_new_tokens\n self.stop_sequences = stop_sequences\n\n if is_gemini_model(model):\n self.model = GenerativeModel(model)\n elif is_codey_model(model):\n self.model = CodeGenerationModel.from_pretrained(model)\n else:\n self.model = TextGenerationModel.from_pretrained(model)\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the model used for generation.\"\"\"\n if isinstance(self.model, GenerativeModel):\n return self.model._model_name\n\n return self.model._model_id\n\n def _generate_contents(self, prompts: List[str]) -> List[List[Dict[str, Any]]]:\n \"\"\"Generates a list of valid dicts that can be parsed to `vertexai.preview.generative_models.Content`\n objects for each input.\n\n Args:\n prompts (List[str]): the prompts to be used for generation.\n\n Returns:\n List[List[Dict[str, Any]]]: the list of valid `vertexai.preview.generative_models.Content`\n objects.\n \"\"\"\n return [[{\"role\": \"user\", \"parts\": [{\"text\": prompt}]}] for prompt in prompts]\n\n @_vertexai_retry_decorator\n def _call_generative_model_with_backoff(\n self, contents: List[Dict[str, Any]], **kwargs: Any\n ) -> \"GenerationResponse\":\n return self.model.generate_content( # type: ignore\n contents=contents,\n # TODO: update `candidate_count` to have `num_generations` as value once valid range is not [1, 2)\n generation_config=GenerationConfig(candidate_count=1, **kwargs),\n )\n\n def _generative_model_single_output(\n self, contents: List[Dict[str, Any]]\n ) -> LLMOutput:\n raw_output = None\n try:\n response = self._call_generative_model_with_backoff(\n contents=contents,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n max_output_tokens=self.max_output_tokens,\n stop_sequences=self.stop_sequences,\n )\n raw_output = response.text\n parsed_output = self.task.parse_output(raw_output)\n except ValueError as e:\n logger.error(f\"Vertex AI Gemini API model didn't return content: {e}\")\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=contents,\n raw_output=None,\n parsed_output=None,\n )\n except Exception as e:\n logger.error(f\"Error parsing Vertex AI Gemini API model response: {e}\")\n parsed_output = None\n\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=contents,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n\n def _generate_with_generative_model(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generate `num_generations` for each input in `inputs` using a Vertex AI Gemini\n API model.\"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"default\")\n inputs_contents = self._generate_contents(prompts)\n outputs = []\n for contents in inputs_contents:\n output = []\n # TODO: remove this for-loop once `GenerationConfig.candidate_count` valid range is not [1, 2)\n for _ in range(num_generations):\n output.append(self._generative_model_single_output(contents=contents))\n outputs.append(output)\n return outputs\n\n @_vertexai_retry_decorator\n def _call_text_generation_model(\n self, **kwargs: Any\n ) -> \"MultiCandidateTextGenerationResponse\":\n return self.model.predict(**kwargs) # type: ignore\n\n def _text_generation_model_single_output(\n self, prompt: str, num_generations: int\n ) -> List[LLMOutput]:\n response = self._call_text_generation_model(\n prompt=prompt,\n max_output_tokens=self.max_output_tokens,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n stop_sequences=self.stop_sequences,\n # WARNING: The model can return < `candidate_count` generations depending\n # on the generation parameters and the input.\n candidate_count=num_generations,\n )\n\n output = []\n for candidate in response.candidates:\n try:\n parsed_response = self.task.parse_output(candidate.text)\n except Exception as e:\n logger.error(\n f\"Error parsing Vertex AI Text/Code API model response: {e}\"\n )\n parsed_response = None\n\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=candidate.text,\n parsed_output=parsed_response,\n )\n )\n return output\n\n def _generate_with_text_generation_model(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generate `num_generations` for each input in `inputs` using a Vertex AI Text/Code\n API model.\"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"default\")\n outputs = []\n for prompt in prompts:\n outputs.append(\n self._text_generation_model_single_output(prompt, num_generations)\n )\n return outputs\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n if isinstance(self.model, GenerativeModel):\n return self._generate_with_generative_model(inputs, num_generations)\n\n return self._generate_with_text_generation_model(inputs, num_generations)\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAILLM.model_name","title":"model_name: str
property
","text":"Returns the name of the model used for generation.
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAILLM.__init__","title":"__init__(task, model='gemini-pro', temperature=None, top_p=None, top_k=None, max_new_tokens=128, stop_sequences=None, num_threads=None)
","text":"Initializes the VertexGenerativeModelLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation. Defaults to \"gemini-pro\".
'gemini-pro'
temperature
float
the temperature to be used for generation. Defaults to 1.0.
None
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
None
top_k
int
the top-k value to be used for generation. Defaults to 40.
None
max_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
Source code in src/distilabel/llm/google/vertexai.py
def __init__(\n self,\n task: \"Task\",\n model: str = \"gemini-pro\",\n temperature: Optional[float] = None,\n top_p: Optional[float] = None,\n top_k: Optional[int] = None,\n max_new_tokens: int = 128,\n stop_sequences: Optional[List[str]] = None,\n num_threads: Union[int, None] = None,\n) -> None:\n \"\"\"Initializes the `VertexGenerativeModelLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gemini-pro\".\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n \"\"\"\n super().__init__(task=task, num_threads=num_threads)\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAILLM` cannot be used as `google-cloud-aiplatform` is not installed,\"\n \" please install it with `pip install google-cloud-aiplatform`\"\n )\n\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_output_tokens = max_new_tokens\n self.stop_sequences = stop_sequences\n\n if is_gemini_model(model):\n self.model = GenerativeModel(model)\n elif is_codey_model(model):\n self.model = CodeGenerationModel.from_pretrained(model)\n else:\n self.model = TextGenerationModel.from_pretrained(model)\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.is_codey_model","title":"is_codey_model(model)
","text":"Returns True
if the model is a model from the Vertex AI Codey API.
Parameters:
Name Type Description Defaultmodel
str
the model name to be checked.
requiredReturns:
Name Type Descriptionbool
bool
True
if the model is a model from the Vertex AI Codey API.
src/distilabel/llm/google/vertexai.py
def is_codey_model(model: str) -> bool:\n \"\"\"Returns `True` if the model is a model from the Vertex AI Codey API.\n\n Args:\n model (str): the model name to be checked.\n\n Returns:\n bool: `True` if the model is a model from the Vertex AI Codey API.\n \"\"\"\n return \"code\" in model\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.is_gemini_model","title":"is_gemini_model(model)
","text":"Returns True
if the model is a model from the Vertex AI Gemini API.
Parameters:
Name Type Description Defaultmodel
str
the model name to be checked.
requiredReturns:
Name Type Descriptionbool
bool
True
if the model is a model from the Vertex AI Gemini API.
src/distilabel/llm/google/vertexai.py
def is_gemini_model(model: str) -> bool:\n \"\"\"Returns `True` if the model is a model from the Vertex AI Gemini API.\n\n Args:\n model (str): the model name to be checked.\n\n Returns:\n bool: `True` if the model is a model from the Vertex AI Gemini API.\n \"\"\"\n return \"gemini\" in model\n
"},{"location":"reference/distilabel/llm/huggingface/","title":"huggingface","text":""},{"location":"reference/distilabel/llm/huggingface/inference_endpoints/","title":"inference_endpoints","text":""},{"location":"reference/distilabel/llm/huggingface/inference_endpoints/#distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM","title":"InferenceEndpointsLLM
","text":" Bases: LLM
src/distilabel/llm/huggingface/inference_endpoints.py
class InferenceEndpointsLLM(LLM):\n def __init__(\n self,\n endpoint_name: str,\n task: \"Task\",\n endpoint_namespace: Union[str, None] = None,\n token: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: Union[float, None] = None,\n seed: Union[int, None] = None,\n do_sample: bool = False,\n temperature: Union[float, None] = None,\n top_k: Union[int, None] = None,\n top_p: Union[float, None] = None,\n typical_p: Union[float, None] = None,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the InferenceEndpointsLLM class.\n\n Args:\n endpoint_name (str): The name of the endpoint.\n task (Task): The task to be performed by the LLM.\n endpoint_namespace (Union[str, None]): The namespace of the endpoint. Defaults to None.\n token (Union[str, None]): The token for the endpoint. Defaults to None.\n max_new_tokens (int): The maximum number of tokens to be generated. Defaults to 128.\n repetition_penalty (Union[float, None]): The repetition penalty to be used for generation. Defaults to None.\n seed (Union[int, None]): The seed for generation. Defaults to None.\n do_sample (bool): Whether to do sampling. Defaults to False.\n temperature (Union[float, None]): The temperature for generation. Defaults to None.\n top_k (Union[int, None]): The top_k for generation. Defaults to None.\n top_p (Union[float, None]): The top_p for generation. Defaults to None.\n typical_p (Union[float, None]): The typical_p for generation. Defaults to None.\n num_threads (Union[int, None]): The number of threads. Defaults to None.\n prompt_format (Union[\"SupportedFormats\", None]): The format of the prompt. Defaults to None.\n prompt_formatting_fn (Union[Callable[..., str], None]): The function for formatting the prompt. Defaults to None.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import InferenceEndpointsLLM\n >>> task = Task()\n >>> llm = InferenceEndpointsLLM(\n ... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _HUGGINGFACE_HUB_AVAILABLE:\n raise ImportError(\n \"`InferenceEndpointsLLM` cannot be used as `huggingface-hub` is not \"\n \"installed, please install it with `pip install huggingface-hub`.\"\n )\n\n self.do_sample = do_sample\n self.max_new_tokens = max_new_tokens\n self.repetition_penalty = repetition_penalty\n self.seed = seed\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.inference_endpoint = get_inference_endpoint(\n name=endpoint_name, namespace=endpoint_namespace, token=token\n )\n self.inference_endpoint.wait(timeout=30)\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"do_sample\": self.do_sample,\n \"max_new_tokens\": self.max_new_tokens,\n \"repetition_penalty\": self.repetition_penalty,\n \"seed\": self.seed,\n \"temperature\": self.temperature,\n \"top_k\": self.top_k,\n \"top_p\": self.top_p,\n \"typical_p\": self.typical_p,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the model name of the endpoint.\"\"\"\n return self.inference_endpoint.repository\n\n @retry(\n retry=retry_if_exception_type(_INFERENCE_ENDPOINTS_API_RETRY_ON_EXCEPTIONS),\n stop=stop_after_attempt(_INFERENCE_ENDPOINTS_API_STOP_AFTER_ATTEMPT),\n wait=wait_random_exponential(\n multiplier=_INFERENCE_ENDPOINTS_API_WAIT_RANDOM_EXPONENTIAL_MULTIPLIER,\n max=_INFERENCE_ENDPOINTS_API_WAIT_RANDOM_EXPONENTIAL_MAX,\n ),\n before_sleep=before_sleep_log(logger, logging.INFO),\n after=after_log(logger, logging.INFO),\n )\n def _text_generation_with_backoff(self, **kwargs: Any) -> Any:\n \"\"\"Performs text generation with backoff in case of an error.\"\"\"\n return self.inference_endpoint.client.text_generation(**kwargs) # type: ignore\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n raw_responses = [\n self._text_generation_with_backoff(\n prompt=prompt,\n do_sample=self.do_sample,\n max_new_tokens=self.max_new_tokens,\n repetition_penalty=self.repetition_penalty,\n seed=self.seed,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n typical_p=self.typical_p,\n )\n for _ in range(num_generations)\n ]\n output = []\n for raw_response in raw_responses:\n try:\n parsed_response = self.task.parse_output(raw_response)\n except Exception as e:\n logger.error(f\"Error parsing Inference Endpoints output: {e}\")\n parsed_response = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_response,\n parsed_output=parsed_response,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/huggingface/inference_endpoints/#distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM.model_name","title":"model_name: str
property
","text":"Returns the model name of the endpoint.
"},{"location":"reference/distilabel/llm/huggingface/inference_endpoints/#distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM.__init__","title":"__init__(endpoint_name, task, endpoint_namespace=None, token=None, max_new_tokens=128, repetition_penalty=None, seed=None, do_sample=False, temperature=None, top_k=None, top_p=None, typical_p=None, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the InferenceEndpointsLLM class.
Parameters:
Name Type Description Defaultendpoint_name
str
The name of the endpoint.
requiredtask
Task
The task to be performed by the LLM.
requiredendpoint_namespace
Union[str, None]
The namespace of the endpoint. Defaults to None.
None
token
Union[str, None]
The token for the endpoint. Defaults to None.
None
max_new_tokens
int
The maximum number of tokens to be generated. Defaults to 128.
128
repetition_penalty
Union[float, None]
The repetition penalty to be used for generation. Defaults to None.
None
seed
Union[int, None]
The seed for generation. Defaults to None.
None
do_sample
bool
Whether to do sampling. Defaults to False.
False
temperature
Union[float, None]
The temperature for generation. Defaults to None.
None
top_k
Union[int, None]
The top_k for generation. Defaults to None.
None
top_p
Union[float, None]
The top_p for generation. Defaults to None.
None
typical_p
Union[float, None]
The typical_p for generation. Defaults to None.
None
num_threads
Union[int, None]
The number of threads. Defaults to None.
None
prompt_format
Union[SupportedFormats, None]
The format of the prompt. Defaults to None.
None
prompt_formatting_fn
Union[Callable[..., str], None]
The function for formatting the prompt. Defaults to None.
None
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import InferenceEndpointsLLM\n>>> task = Task()\n>>> llm = InferenceEndpointsLLM(\n... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n... task=task,\n... )\n
Source code in src/distilabel/llm/huggingface/inference_endpoints.py
def __init__(\n self,\n endpoint_name: str,\n task: \"Task\",\n endpoint_namespace: Union[str, None] = None,\n token: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: Union[float, None] = None,\n seed: Union[int, None] = None,\n do_sample: bool = False,\n temperature: Union[float, None] = None,\n top_k: Union[int, None] = None,\n top_p: Union[float, None] = None,\n typical_p: Union[float, None] = None,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the InferenceEndpointsLLM class.\n\n Args:\n endpoint_name (str): The name of the endpoint.\n task (Task): The task to be performed by the LLM.\n endpoint_namespace (Union[str, None]): The namespace of the endpoint. Defaults to None.\n token (Union[str, None]): The token for the endpoint. Defaults to None.\n max_new_tokens (int): The maximum number of tokens to be generated. Defaults to 128.\n repetition_penalty (Union[float, None]): The repetition penalty to be used for generation. Defaults to None.\n seed (Union[int, None]): The seed for generation. Defaults to None.\n do_sample (bool): Whether to do sampling. Defaults to False.\n temperature (Union[float, None]): The temperature for generation. Defaults to None.\n top_k (Union[int, None]): The top_k for generation. Defaults to None.\n top_p (Union[float, None]): The top_p for generation. Defaults to None.\n typical_p (Union[float, None]): The typical_p for generation. Defaults to None.\n num_threads (Union[int, None]): The number of threads. Defaults to None.\n prompt_format (Union[\"SupportedFormats\", None]): The format of the prompt. Defaults to None.\n prompt_formatting_fn (Union[Callable[..., str], None]): The function for formatting the prompt. Defaults to None.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import InferenceEndpointsLLM\n >>> task = Task()\n >>> llm = InferenceEndpointsLLM(\n ... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _HUGGINGFACE_HUB_AVAILABLE:\n raise ImportError(\n \"`InferenceEndpointsLLM` cannot be used as `huggingface-hub` is not \"\n \"installed, please install it with `pip install huggingface-hub`.\"\n )\n\n self.do_sample = do_sample\n self.max_new_tokens = max_new_tokens\n self.repetition_penalty = repetition_penalty\n self.seed = seed\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.inference_endpoint = get_inference_endpoint(\n name=endpoint_name, namespace=endpoint_namespace, token=token\n )\n self.inference_endpoint.wait(timeout=30)\n
"},{"location":"reference/distilabel/llm/huggingface/transformers/","title":"transformers","text":""},{"location":"reference/distilabel/llm/huggingface/transformers/#distilabel.llm.huggingface.transformers.TransformersLLM","title":"TransformersLLM
","text":" Bases: LLM
src/distilabel/llm/huggingface/transformers.py
class TransformersLLM(LLM):\n def __init__(\n self,\n model: \"PreTrainedModel\",\n tokenizer: \"PreTrainedTokenizer\",\n task: \"Task\",\n max_new_tokens: int = 128,\n do_sample: bool = False,\n temperature: float = 1.0,\n top_k: int = 50,\n top_p: float = 1.0,\n typical_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the TransformersLLM class.\n\n Args:\n model (PreTrainedModel): the model to be used for generation.\n tokenizer (PreTrainedTokenizer): the tokenizer to be used for generation.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n do_sample (bool, optional): whether to sample from the model or not.\n Defaults to False.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 50.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n typical_p (float, optional): the typical-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used for generation.\n If `None`, the number of threads will be set to the number of available CPUs.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n Defaults to `None`.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): the function to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TransformersLLM\n >>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n >>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n >>> task = Task()\n >>> llm = TransformersLLM(\n ... model=model,\n ... tokenizer=tokenizer,\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n self.max_new_tokens = max_new_tokens\n self.do_sample = do_sample\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.model = model\n self.tokenizer = tokenizer\n\n if self.tokenizer.pad_token is None:\n self.tokenizer.pad_token = self.tokenizer.eos_token\n if (\n hasattr(self.tokenizer, \"use_default_system_prompt\")\n and self.tokenizer.use_default_system_prompt # type: ignore\n ):\n # The `tokenizer` also has a method named `apply_chat_template` that expects a `Conversation` as OpenAI does with the ChatML format\n warnings.warn(\n \"The provided `tokenizer` has `use_default_system_prompt=True` which means that the default system prompt will be used, which may collide with the `task` provided as an arg to this class.\",\n UserWarning,\n stacklevel=2,\n )\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_new_tokens,\n \"do_sample\": self.do_sample,\n \"temperature\": self.temperature,\n \"top_k\": self.top_k,\n \"top_p\": self.top_p,\n \"typical_p\": self.typical_p,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the Transformers model.\"\"\"\n return self.model.config.name_or_path\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n encodings = self.tokenizer(prompts, padding=True, return_tensors=\"pt\")\n encodings = encodings.to(self.model.device)\n with torch.inference_mode():\n generated_ids = self.model.generate(\n **encodings, # type: ignore\n pad_token_id=self.tokenizer.eos_token_id,\n generation_config=GenerationConfig(\n do_sample=self.do_sample,\n temperature=self.temperature,\n max_new_tokens=self.max_new_tokens,\n top_k=self.top_k,\n top_p=self.top_p,\n typical_p=self.typical_p,\n num_return_sequences=num_generations,\n ),\n )\n raw_outputs = self.tokenizer.batch_decode(\n generated_ids[:, encodings.input_ids.shape[1] :],\n skip_special_tokens=True,\n clean_up_tokenization_spaces=True,\n )\n outputs = []\n for prompt, i in zip(prompts, range(0, len(raw_outputs), num_generations)):\n output = []\n for raw_output in raw_outputs[i : i + num_generations]:\n try:\n parsed_output = self.task.parse_output(raw_output)\n except Exception as e:\n logger.error(f\"Error parsing Transformers output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/huggingface/transformers/#distilabel.llm.huggingface.transformers.TransformersLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the Transformers model.
"},{"location":"reference/distilabel/llm/huggingface/transformers/#distilabel.llm.huggingface.transformers.TransformersLLM.__init__","title":"__init__(model, tokenizer, task, max_new_tokens=128, do_sample=False, temperature=1.0, top_k=50, top_p=1.0, typical_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the TransformersLLM class.
Parameters:
Name Type Description Defaultmodel
PreTrainedModel
the model to be used for generation.
requiredtokenizer
PreTrainedTokenizer
the tokenizer to be used for generation.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
do_sample
bool
whether to sample from the model or not. Defaults to False.
False
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. Defaults to 50.
50
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
typical_p
float
the typical-p value to be used for generation. Defaults to 1.0.
1.0
num_threads
Union[int, None]
the number of threads to be used for generation. If None
, the number of threads will be set to the number of available CPUs. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for formatting the prompts. If None
, the prompts will not be formatted. Defaults to None
.
None
prompt_formatting_fn
Union[Callable[..., str], None]
the function to be used for formatting the prompts. If None
, the prompts will not be formatted.
None
Examples:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import TransformersLLM\n>>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n>>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n>>> task = Task()\n>>> llm = TransformersLLM(\n... model=model,\n... tokenizer=tokenizer,\n... task=task,\n... )\n
Source code in src/distilabel/llm/huggingface/transformers.py
def __init__(\n self,\n model: \"PreTrainedModel\",\n tokenizer: \"PreTrainedTokenizer\",\n task: \"Task\",\n max_new_tokens: int = 128,\n do_sample: bool = False,\n temperature: float = 1.0,\n top_k: int = 50,\n top_p: float = 1.0,\n typical_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the TransformersLLM class.\n\n Args:\n model (PreTrainedModel): the model to be used for generation.\n tokenizer (PreTrainedTokenizer): the tokenizer to be used for generation.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n do_sample (bool, optional): whether to sample from the model or not.\n Defaults to False.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 50.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n typical_p (float, optional): the typical-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used for generation.\n If `None`, the number of threads will be set to the number of available CPUs.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n Defaults to `None`.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): the function to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TransformersLLM\n >>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n >>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n >>> task = Task()\n >>> llm = TransformersLLM(\n ... model=model,\n ... tokenizer=tokenizer,\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n self.max_new_tokens = max_new_tokens\n self.do_sample = do_sample\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.model = model\n self.tokenizer = tokenizer\n\n if self.tokenizer.pad_token is None:\n self.tokenizer.pad_token = self.tokenizer.eos_token\n if (\n hasattr(self.tokenizer, \"use_default_system_prompt\")\n and self.tokenizer.use_default_system_prompt # type: ignore\n ):\n # The `tokenizer` also has a method named `apply_chat_template` that expects a `Conversation` as OpenAI does with the ChatML format\n warnings.warn(\n \"The provided `tokenizer` has `use_default_system_prompt=True` which means that the default system prompt will be used, which may collide with the `task` provided as an arg to this class.\",\n UserWarning,\n stacklevel=2,\n )\n
"},{"location":"reference/distilabel/tasks/","title":"tasks","text":""},{"location":"reference/distilabel/tasks/#distilabel.tasks.CritiqueTask","title":"CritiqueTask
dataclass
","text":" Bases: RatingToArgillaMixin
, Task
A Task
for critique / judge tasks.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
None
Source code in src/distilabel/tasks/critique/base.py
@dataclass\nclass CritiqueTask(RatingToArgillaMixin, Task):\n \"\"\"A `Task` for critique / judge tasks.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n __type__: ClassVar[Literal[\"labelling\"]] = \"labelling\"\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the names of the input arguments of the task.\"\"\"\n return [\"input\", \"generations\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"critique\", \"score\"]\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n score_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=score_column,\n rationale_column=critique_column,\n ratings_values=score_values or [1, 2, 3, 4, 5],\n )\n\n def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n return super().to_argilla_record(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=score_column,\n rationale_column=critique_column,\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.CritiqueTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the names of the input arguments of the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.CritiqueTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.JudgeLMTask","title":"JudgeLMTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
following the prompt templated used by JudgeLM.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
'You are a helpful and precise assistant for checking the quality of the answer.'
task_description
Union[str, None]
the description of the task. Defaults to None
.
'We would like to request your feedback on the performance of {num_responses} AI assistants in response to the user question displayed above.\\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\\nPlease first output a single line containing only {num_responses} values indicating the scores for Assistants 1 to {num_responses}, respectively. The {num_responses} scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment.'
Source code in src/distilabel/tasks/preference/judgelm.py
@dataclass\nclass JudgeLMTask(PreferenceTask):\n \"\"\"A `PreferenceTask` following the prompt templated used by JudgeLM.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n task_description: str = (\n \"We would like to request your feedback on the performance of {num_responses} AI assistants in response to the\"\n \" user question displayed above.\\nPlease rate the helpfulness, relevance, accuracy, level of details\"\n \" of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher\"\n \" score indicates better overall performance.\\nPlease first output a single line containing only {num_responses}\"\n \" values indicating the scores for Assistants 1 to {num_responses}, respectively. The {num_responses} scores are separated by\"\n \" a space. In the subsequent line, please provide a comprehensive explanation of your evaluation,\"\n \" avoiding any potential bias and ensuring that the order in which the responses were presented does\"\n \" not affect your judgment.\"\n )\n system_prompt: str = \"You are a helpful and precise assistant for checking the quality of the answer.\"\n\n __jinja2_template__: ClassVar[str] = _JUDGELM_TEMPLATE\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the JudgeLM specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\\nWhat are the first 5 Fibonacci numbers?\\n...\",\n )\n \"\"\"\n render_kwargs = {\n \"input\": input,\n \"responses\": generations,\n \"task_description\": self.task_description.format(\n num_responses=len(generations)\n ),\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> JudgeLMOutput:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n split_output = output.split(\"\\n\")\n rating = [float(rating) for rating in split_output[0].split(\" \")]\n rationale = \"\\n\".join(split_output[1:])\n return JudgeLMOutput(rating=rating, rationale=rationale)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.JudgeLMTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the JudgeLM specification.
Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\n
What are the first 5 Fibonacci numbers? ...\", )
Source code insrc/distilabel/tasks/preference/judgelm.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the JudgeLM specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\\nWhat are the first 5 Fibonacci numbers?\\n...\",\n )\n \"\"\"\n render_kwargs = {\n \"input\": input,\n \"responses\": generations,\n \"task_description\": self.task_description.format(\n num_responses=len(generations)\n ),\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.JudgeLMTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/judgelm.py
def parse_output(self, output: str) -> JudgeLMOutput:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n split_output = output.split(\"\\n\")\n rating = [float(rating) for rating in split_output[0].split(\" \")]\n rationale = \"\\n\".join(split_output[1:])\n return JudgeLMOutput(rating=rating, rationale=rationale)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.PrometheusTask","title":"PrometheusTask
dataclass
","text":" Bases: CritiqueTask
src/distilabel/tasks/critique/prometheus.py
@dataclass\nclass PrometheusTask(CritiqueTask):\n scoring_criteria: str\n score_descriptions: Dict[int, str]\n\n system_prompt: str = \"You are a fair evaluator language model.\"\n\n __jinja2_template__: ClassVar[str] = _PROMETHEUS_TEMPLATE\n\n @property\n def input_args_names(self) -> List[str]:\n return super().input_args_names + [\"ref_completion\"]\n\n def generate_prompt(\n self, input: str, generations: str, ref_completion: str, **_: Any\n ) -> Prompt:\n render_kwargs = {\n \"instruction\": input,\n \"completion\": generations,\n \"ref_completion\": ref_completion,\n \"scoring_criteria\": self.scoring_criteria,\n \"score_descriptions\": self.score_descriptions,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n # We use a regex instead of splitting by the delimiter because the\n # critique may contain the delimiter, and using the regex is safer.\n pattern = r\"(.+?)\\. \\[RESULT\\] (\\d+)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(2)),\n critique=match.group(1).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.PrometheusTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/critique/prometheus.py
def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n # We use a regex instead of splitting by the delimiter because the\n # critique may contain the delimiter, and using the regex is safer.\n pattern = r\"(.+?)\\. \\[RESULT\\] (\\d+)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(2)),\n critique=match.group(1).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.Prompt","title":"Prompt
dataclass
","text":"A dataclass
representing a Prompt
.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt.
requiredformatted_prompt
str
the formatted prompt.
requiredExamples:
>>> from distilabel.tasks.prompt import Prompt\n>>> prompt = Prompt(\n... system_prompt=\"You are a helpful assistant.\",\n... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n... )\n
Source code in src/distilabel/tasks/prompt.py
@dataclass\nclass Prompt:\n \"\"\"A `dataclass` representing a `Prompt`.\n\n Args:\n system_prompt (str): the system prompt.\n formatted_prompt (str): the formatted prompt.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n \"\"\"\n\n system_prompt: str\n formatted_prompt: str\n\n def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion]]:\n \"\"\"Formats the prompt as the specified format.\n\n Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\\nWhat are the first 5 Fibonacci numbers?'\n \"\"\"\n if format == \"default\":\n return f\"{self.system_prompt}\\n{self.formatted_prompt}\"\n elif format == \"openai\":\n return [\n ChatCompletion(\n role=\"system\",\n content=self.system_prompt,\n ),\n ChatCompletion(role=\"user\", content=self.formatted_prompt),\n ]\n elif format == \"llama2\":\n return f\"<s>[INST] <<SYS>>\\n{self.system_prompt}<</SYS>>\\n\\n{self.formatted_prompt} [/INST]\"\n elif format == \"chatml\":\n return f\"<|im_start|>system\\n{self.system_prompt}<|im_end|>\\n<|im_start|>user\\n{self.formatted_prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n elif format in [\"zephyr\", \"notus\"]:\n return f\"<|system|>\\n{self.system_prompt}</s>\\n<|user|>\\n{self.formatted_prompt}</s>\\n<|assistant|>\\n\"\n else:\n raise ValueError(\n f\"Format {format} not supported, please provide a custom `prompt_formatting_fn`\"\n \" or use any of the available formats: openai, llama2, chatml, zephyr\"\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.Prompt.format_as","title":"format_as(format)
","text":"Formats the prompt as the specified format.
Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\n
What are the first 5 Fibonacci numbers?'
Source code insrc/distilabel/tasks/prompt.py
def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion]]:\n \"\"\"Formats the prompt as the specified format.\n\n Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\\nWhat are the first 5 Fibonacci numbers?'\n \"\"\"\n if format == \"default\":\n return f\"{self.system_prompt}\\n{self.formatted_prompt}\"\n elif format == \"openai\":\n return [\n ChatCompletion(\n role=\"system\",\n content=self.system_prompt,\n ),\n ChatCompletion(role=\"user\", content=self.formatted_prompt),\n ]\n elif format == \"llama2\":\n return f\"<s>[INST] <<SYS>>\\n{self.system_prompt}<</SYS>>\\n\\n{self.formatted_prompt} [/INST]\"\n elif format == \"chatml\":\n return f\"<|im_start|>system\\n{self.system_prompt}<|im_end|>\\n<|im_start|>user\\n{self.formatted_prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n elif format in [\"zephyr\", \"notus\"]:\n return f\"<|system|>\\n{self.system_prompt}</s>\\n<|user|>\\n{self.formatted_prompt}</s>\\n<|assistant|>\\n\"\n else:\n raise ValueError(\n f\"Format {format} not supported, please provide a custom `prompt_formatting_fn`\"\n \" or use any of the available formats: openai, llama2, chatml, zephyr\"\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.SelfInstructTask","title":"SelfInstructTask
dataclass
","text":" Bases: TextGenerationTask
A TextGenerationTask
following the Self-Instruct specification for building the prompts.
Reference: https://github.com/yizhongw/self-instruct
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used. Defaults to None
.
'You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks. You are given a task description and a set of instructions for how to write the prompts for an specific AI application.'
principles
Dict[str, List[str]]
the principles to be used for the system prompt. Defaults to None
.
field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution
Union[Dict[str, float], Literal[balanced], None]
the distribution of principles to be used for the system prompt. Defaults to None
.
None
application_description
str
the description of the AI application. Defaults to \"AI assistant\".
'AI assistant'
num_instructions
int
the number of instructions to be used for the prompt. Defaults to 5.
5
Source code in src/distilabel/tasks/text_generation/self_instruct.py
@dataclass\nclass SelfInstructTask(TextGenerationTask):\n \"\"\"A `TextGenerationTask` following the Self-Instruct specification for building\n the prompts.\n\n Reference: https://github.com/yizhongw/self-instruct\n\n Args:\n system_prompt (str, optional): the system prompt to be used. Defaults to `None`.\n principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.\n Defaults to `None`.\n principles_distribution (Union[Dict[str, float], Literal[\"balanced\"], None], optional): the\n distribution of principles to be used for the system prompt. Defaults to `None`.\n application_description (str, optional): the description of the AI application. Defaults to\n \"AI assistant\".\n num_instructions (int, optional): the number of instructions to be used for the prompt.\n Defaults to 5.\n \"\"\"\n\n system_prompt: str = (\n \"You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks.\"\n \" You are given a task description and a set of instructions for how to write the prompts for an\"\n \" specific AI application.\"\n )\n application_description: str = \"AI assistant\"\n num_instructions: int = 5\n\n __jinja2_template__: str = _SELF_INSTRUCT_TEMPLATE\n\n def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the Self-Instruct specification.\n\n Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\\nDevelop 2 user queries that ...\",\n )\n \"\"\"\n render_kwargs = {\n \"application_description\": self.application_description,\n \"num_instructions\": self.num_instructions,\n \"input\": input,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n @property\n def output_args_names(self) -> List[str]:\n return [\"instructions\"]\n\n def parse_output(self, output: str) -> Dict[str, List[str]]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = re.compile(r\"\\d+\\.\\s*(.*?)\\n\")\n return {\"instructions\": pattern.findall(output)}\n\n def to_argilla_dataset(self, dataset_row: Dict[str, Any]) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names,\n dataset_row=dataset_row,\n )\n # Once the input fields have been defined, then we also include the instruction\n # field which will be fulfilled with each of the instructions generated.\n fields.append(rg.TextField(name=\"instruction\", title=\"instruction\")) # type: ignore\n # Then we add a default `RatingQuestion` which asks the users to provide a\n # rating for each of the generations, differing from the scenario where the inputs\n # are the fields and the outputs the ones used to formulate the quesstions. So on,\n # in this scenario we won't have suggestions, as the questions will be related to the\n # combination of inputs and outputs.\n questions = [\n rg.RatingQuestion( # type: ignore\n name=\"instruction-rating\",\n title=\"How would you rate the generated instruction?\",\n values=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n ]\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=\"length-instruction\") # type: ignore\n ) # type: ignore\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions, # type: ignore\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n instructions_column: Optional[str] = \"instructions\",\n ) -> List[\"FeedbackRecord\"]:\n \"\"\"Converts a dataset row to a list of Argilla `FeedbackRecord`s.\"\"\"\n records = []\n for instructions in dataset_row[instructions_column]: # type: ignore\n for instruction in instructions:\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n value = value.strip() if isinstance(value, str) else \"\"\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n fields[\"instruction\"] = instruction\n metadata[\"length-instruction\"] = len(instruction)\n\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(\n model_metadata_from_dataset_row(dataset_row=dataset_row)\n )\n # Finally, we append the `FeedbackRecord` with the fields and the metadata\n records.append(rg.FeedbackRecord(fields=fields, metadata=metadata))\n if not records:\n raise ValueError(\n f\"Skipping the row {dataset_row} as the list of `FeedbackRecord` is empty as those could not be inferred.\"\n )\n return records\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.SelfInstructTask.generate_prompt","title":"generate_prompt(input, **_)
","text":"Generates a prompt following the Self-Instruct specification.
Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\n
Develop 2 user queries that ...\", )
Source code insrc/distilabel/tasks/text_generation/self_instruct.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the Self-Instruct specification.\n\n Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\\nDevelop 2 user queries that ...\",\n )\n \"\"\"\n render_kwargs = {\n \"application_description\": self.application_description,\n \"num_instructions\": self.num_instructions,\n \"input\": input,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.SelfInstructTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/text_generation/self_instruct.py
def parse_output(self, output: str) -> Dict[str, List[str]]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = re.compile(r\"\\d+\\.\\s*(.*?)\\n\")\n return {\"instructions\": pattern.findall(output)}\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.SelfInstructTask.to_argilla_record","title":"to_argilla_record(dataset_row, instructions_column='instructions')
","text":"Converts a dataset row to a list of Argilla FeedbackRecord
s.
src/distilabel/tasks/text_generation/self_instruct.py
def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n instructions_column: Optional[str] = \"instructions\",\n) -> List[\"FeedbackRecord\"]:\n \"\"\"Converts a dataset row to a list of Argilla `FeedbackRecord`s.\"\"\"\n records = []\n for instructions in dataset_row[instructions_column]: # type: ignore\n for instruction in instructions:\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n value = value.strip() if isinstance(value, str) else \"\"\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n fields[\"instruction\"] = instruction\n metadata[\"length-instruction\"] = len(instruction)\n\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(\n model_metadata_from_dataset_row(dataset_row=dataset_row)\n )\n # Finally, we append the `FeedbackRecord` with the fields and the metadata\n records.append(rg.FeedbackRecord(fields=fields, metadata=metadata))\n if not records:\n raise ValueError(\n f\"Skipping the row {dataset_row} as the list of `FeedbackRecord` is empty as those could not be inferred.\"\n )\n return records\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.Task","title":"Task
","text":" Bases: ABC
Abstract class used to define the methods required to create a Task
, to be used within an LLM
.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
Raises:
Type DescriptionValueError
if the __jinja2_template__
attribute is not provided.
src/distilabel/tasks/base.py
class Task(ABC):\n \"\"\"Abstract class used to define the methods required to create a `Task`, to be used\n within an `LLM`.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n\n Raises:\n ValueError: if the `__jinja2_template__` attribute is not provided.\n \"\"\"\n\n system_prompt: str\n task_description: Union[str, None] = None\n\n __jinja2_template__: Union[str, None] = None\n __type__: Union[Literal[\"generation\", \"labelling\"], None] = None\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"system_prompt\", self.system_prompt\n yield \"task_description\", self.task_description\n yield \"input_args_names\", self.input_args_names\n yield \"output_args_names\", self.output_args_names\n\n @property\n def template(self) -> \"Template\":\n if self.__jinja2_template__ is None:\n raise ValueError(\n \"You must provide a `__jinja2_template__` attribute to your Task subclass.\"\n )\n\n return Template(open(self.__jinja2_template__).read())\n\n @abstractmethod\n def generate_prompt(self, **kwargs: Any) -> Prompt:\n pass\n\n @abstractmethod\n def parse_output(self, output: str) -> Any:\n pass\n\n @property\n @abstractmethod\n def input_args_names(self) -> List[str]:\n pass\n\n @property\n @abstractmethod\n def output_args_names(self) -> List[str]:\n pass\n\n def validate_dataset(self, columns_in_dataset: List[str]) -> None:\n \"\"\"Validates that the dataset contains the required columns for the task.\n\n Args:\n columns_in_dataset (List[str]): the columns in the dataset.\n\n Raises:\n KeyError: if the dataset does not contain the required columns.\n \"\"\"\n for input_arg_name in self.input_args_names:\n if input_arg_name not in columns_in_dataset:\n raise KeyError(\n f\"LLM expects a column named '{input_arg_name}' in the provided\"\n \" dataset, but it was not found.\"\n )\n\n def to_argilla_dataset(\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> \"FeedbackDataset\":\n raise NotImplementedError(\n \"`to_argilla_dataset` is not implemented, if you want to export your dataset as an Argilla\"\n \" `FeedbackDataset` you will need to implement this method first.\"\n )\n\n def to_argilla_record(\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n raise NotImplementedError(\n \"`to_argilla_record` is not implemented, if you want to export your dataset as an Argilla\"\n \" `FeedbackDataset` you will need to implement this method first.\"\n )\n\n # Renamed to _to_argilla_record instead of renaming `to_argilla_record` to protected, as that would\n # imply more breaking changes.\n def _to_argilla_record( # noqa: C901\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n column_names = list(dataset_row.keys())\n if self.__type__ is None or self.__type__ == \"generation\":\n required_column_names = self.input_args_names + self.output_args_names\n elif self.__type__ == \"labelling\":\n required_column_names = self.output_args_names\n else:\n raise ValueError(\"The task type is not supported.\")\n\n dataset_rows = [dataset_row]\n if \"generation_model\" in dataset_row and isinstance(\n dataset_row[\"generation_model\"], list\n ):\n generation_columns = column_names[\n column_names.index(\"generation_model\") : column_names.index(\n \"labelling_model\"\n )\n if \"labelling_model\" in column_names\n else None\n ]\n if any(\n isinstance(nested, list)\n for column_name in list(\n set(generation_columns)\n - {\n \"generation_model\",\n \"generation_prompt\",\n \"raw_generation_response\",\n }\n )\n for nested in dataset_row[column_name]\n ):\n if any(\n generation_column in required_column_names\n for generation_column in generation_columns\n ):\n unwrapped_dataset_rows = []\n for row in dataset_rows:\n for idx in range(len(dataset_row[\"generation_model\"])):\n unwrapped_dataset_row = {}\n for key, value in row.items():\n if key in generation_columns:\n unwrapped_dataset_row[key] = value[idx]\n else:\n unwrapped_dataset_row[key] = value\n unwrapped_dataset_rows.append(unwrapped_dataset_row)\n dataset_rows = unwrapped_dataset_rows\n\n if \"labelling_model\" in dataset_row and isinstance(\n dataset_row[\"labelling_model\"], list\n ):\n labelling_columns = column_names[column_names.index(\"labelling_model\") :]\n if any(\n isinstance(nested, list)\n for column_name in list(\n set(labelling_columns)\n - {\n \"labelling_model\",\n \"labelling_prompt\",\n \"raw_labelling_response\",\n }\n )\n for nested in dataset_row[column_name]\n ):\n if any(\n labelling_column in required_column_names\n for labelling_column in labelling_columns\n ):\n unwrapped_dataset_rows = []\n for row in dataset_rows:\n for idx in range(len(dataset_row[\"labelling_model\"])):\n unwrapped_dataset_row = {}\n for key, value in row.items():\n if key in labelling_columns:\n unwrapped_dataset_row[key] = value[idx]\n else:\n unwrapped_dataset_row[key] = value\n unwrapped_dataset_rows.append(unwrapped_dataset_row)\n dataset_rows = unwrapped_dataset_rows\n\n if len(dataset_rows) == 1:\n return self.to_argilla_record(dataset_rows[0], *args, **kwargs)\n\n records = []\n for dataset_row in dataset_rows:\n generated_records = self.to_argilla_record(dataset_row, *args, **kwargs)\n if isinstance(generated_records, list):\n records.extend(generated_records)\n else:\n records.append(generated_records)\n return records\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.Task.validate_dataset","title":"validate_dataset(columns_in_dataset)
","text":"Validates that the dataset contains the required columns for the task.
Parameters:
Name Type Description Defaultcolumns_in_dataset
List[str]
the columns in the dataset.
requiredRaises:
Type DescriptionKeyError
if the dataset does not contain the required columns.
Source code insrc/distilabel/tasks/base.py
def validate_dataset(self, columns_in_dataset: List[str]) -> None:\n \"\"\"Validates that the dataset contains the required columns for the task.\n\n Args:\n columns_in_dataset (List[str]): the columns in the dataset.\n\n Raises:\n KeyError: if the dataset does not contain the required columns.\n \"\"\"\n for input_arg_name in self.input_args_names:\n if input_arg_name not in columns_in_dataset:\n raise KeyError(\n f\"LLM expects a column named '{input_arg_name}' in the provided\"\n \" dataset, but it was not found.\"\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask","title":"TextGenerationTask
dataclass
","text":" Bases: Task
A base Task
definition for text generation using LLMs.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used. Defaults to None
.
\"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"
principles
Dict[str, List[str]]
the principles to be used for the system prompt. Defaults to None
.
field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution
Union[Dict[str, float], Literal['balanced'], None]
the distribution of principles to be used for the system prompt. Defaults to None
.
None
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask\n>>> task = TextGenerationTask()\n
Source code in src/distilabel/tasks/text_generation/base.py
@dataclass\nclass TextGenerationTask(Task):\n \"\"\"A base `Task` definition for text generation using LLMs.\n\n Args:\n system_prompt (str, optional): the system prompt to be used. Defaults to `None`.\n principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.\n Defaults to `None`.\n principles_distribution (Union[Dict[str, float], Literal[\"balanced\"], None], optional): the\n distribution of principles to be used for the system prompt. Defaults to `None`.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask()\n \"\"\"\n\n system_prompt: str = (\n \"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible,\"\n \" while being safe. Your answers should not include any harmful, unethical, racist, sexist,\"\n \" toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased\"\n \" and positive in nature.\\nIf a question does not make any sense, or is not factually coherent,\"\n \" explain why instead of answering something not correct. If you don't know the answer to a\"\n \" question, please don't share false information.\"\n )\n principles: Dict[str, List[str]] = field(\n default_factory=lambda: {\n \"harmlessness\": UltraFeedbackPrinciples.harmlessness,\n \"helpfulness\": UltraFeedbackPrinciples.helpfulness,\n \"truthfulness\": UltraFeedbackPrinciples.truthfulness,\n \"honesty\": UltraFeedbackPrinciples.honesty,\n \"verbalized_calibration\": UltraFeedbackPrinciples.verbalized_calibration,\n },\n repr=False,\n )\n principles_distribution: Union[Dict[str, float], Literal[\"balanced\"], None] = None\n\n __type__: ClassVar[Literal[\"generation\"]] = \"generation\"\n\n def __post_init__(self) -> None:\n \"\"\"Validates the `principles_distribution` if it is a dict.\n\n Raises:\n ValueError: if the `principles_distribution` is a dict and it does not sum to 1.0.\n ValueError: if the `principles` are not included in the `principles_distribution`.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n not_included_principles = [\n principle\n for principle in self.principles\n if principle not in self.principles_distribution\n ]\n if not_included_principles:\n principles_str = \", \".join(\n [f\"'{principle}'\" for principle in not_included_principles]\n )\n raise ValueError(\n f\"Principles {principles_str} included in `principles` is not in\"\n \" `principles_distribution`\"\n )\n\n if sum(self.principles_distribution.values()) != 1.0:\n raise ValueError(\n \"`principles_distribution` must sum to 1.0 if it is a dict containing\"\n \" the distribution of principles to use.\"\n )\n\n def _get_principle(self) -> str:\n \"\"\"Gets a principle from the `principles` dict respecting the `principal_distribution`.\n\n Returns:\n str: the principle to be used.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n principle_group = random.choices(\n list(self.principles_distribution.keys()),\n weights=list(self.principles_distribution.values()),\n k=1,\n )[0]\n else:\n principle_group = random.choice(list(self.principles.keys()))\n return random.choice(self.principles[principle_group])\n\n def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates the prompt to be used for generation.\n\n Args:\n input (str): the input to be used for generation.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n \"\"\"\n system_prompt = self.system_prompt\n if self.principles_distribution is not None:\n principle = self._get_principle()\n system_prompt += \" \" + principle\n return Prompt(system_prompt=system_prompt, formatted_prompt=input)\n\n def parse_output(self, output: str) -> Dict[str, str]:\n \"\"\"Parses the output of the LLM into the desired format.\"\"\"\n return {\"generations\": output}\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the input args names for the task.\"\"\"\n return [\"input\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the output args names for the task.\"\"\"\n return [\"generations\"]\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: Optional[str] = \"generations\",\n ) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names + self.output_args_names,\n dataset_row=dataset_row,\n )\n # Then we add a default `RatingQuestion` which asks the users to provide a\n # rating for each of the generations, differing from the scenario where the inputs\n # are the fields and the outputs the ones used to formulate the quesstions. So on,\n # in this scenario we won't have suggestions, as the questions will be related to the\n # combination of inputs and outputs.\n if generations_column is None or generations_column not in dataset_row:\n raise ValueError(\n f\"The `generations_column='{generations_column}'` is not present in the dataset\"\n f\" row. Please provide any of {list(dataset_row.keys())}.\",\n )\n questions = []\n for idx in range(1, len(dataset_row[generations_column]) + 1):\n questions.append(\n rg.RatingQuestion( # type: ignore\n name=f\"{generations_column}-{idx}-rating\",\n title=f\"How would you rate the generation at `{generations_column}-{idx}`?\",\n values=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n )\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names + self.output_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions,\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def to_argilla_record(self, dataset_row: Dict[str, Any]) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names + self.output_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n # TODO: value formatting was included here due to some issues\n # with `SelfInstructTask` but these list-parsing may not be needed\n # anymore.\n value = (\n value.strip()\n if isinstance(value, str)\n else \"\\n\".join(value)\n if isinstance(value, list)\n else \"\"\n )\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(fields=fields, metadata=metadata)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the input args names for the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the output args names for the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.__post_init__","title":"__post_init__()
","text":"Validates the principles_distribution
if it is a dict.
Raises:
Type DescriptionValueError
if the principles_distribution
is a dict and it does not sum to 1.0.
ValueError
if the principles
are not included in the principles_distribution
.
src/distilabel/tasks/text_generation/base.py
def __post_init__(self) -> None:\n \"\"\"Validates the `principles_distribution` if it is a dict.\n\n Raises:\n ValueError: if the `principles_distribution` is a dict and it does not sum to 1.0.\n ValueError: if the `principles` are not included in the `principles_distribution`.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n not_included_principles = [\n principle\n for principle in self.principles\n if principle not in self.principles_distribution\n ]\n if not_included_principles:\n principles_str = \", \".join(\n [f\"'{principle}'\" for principle in not_included_principles]\n )\n raise ValueError(\n f\"Principles {principles_str} included in `principles` is not in\"\n \" `principles_distribution`\"\n )\n\n if sum(self.principles_distribution.values()) != 1.0:\n raise ValueError(\n \"`principles_distribution` must sum to 1.0 if it is a dict containing\"\n \" the distribution of principles to use.\"\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.generate_prompt","title":"generate_prompt(input, **_)
","text":"Generates the prompt to be used for generation.
Parameters:
Name Type Description Defaultinput
str
the input to be used for generation.
requiredReturns:
Name Type DescriptionPrompt
Prompt
the generated prompt.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask\n>>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n>>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\nPrompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n
Source code in src/distilabel/tasks/text_generation/base.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates the prompt to be used for generation.\n\n Args:\n input (str): the input to be used for generation.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n \"\"\"\n system_prompt = self.system_prompt\n if self.principles_distribution is not None:\n principle = self._get_principle()\n system_prompt += \" \" + principle\n return Prompt(system_prompt=system_prompt, formatted_prompt=input)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the LLM into the desired format.
Source code insrc/distilabel/tasks/text_generation/base.py
def parse_output(self, output: str) -> Dict[str, str]:\n \"\"\"Parses the output of the LLM into the desired format.\"\"\"\n return {\"generations\": output}\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.to_argilla_record","title":"to_argilla_record(dataset_row)
","text":"Converts a dataset row to an Argilla FeedbackRecord
.
src/distilabel/tasks/text_generation/base.py
def to_argilla_record(self, dataset_row: Dict[str, Any]) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names + self.output_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n # TODO: value formatting was included here due to some issues\n # with `SelfInstructTask` but these list-parsing may not be needed\n # anymore.\n value = (\n value.strip()\n if isinstance(value, str)\n else \"\\n\".join(value)\n if isinstance(value, list)\n else \"\"\n )\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(fields=fields, metadata=metadata)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraCMTask","title":"UltraCMTask
dataclass
","text":" Bases: CritiqueTask
src/distilabel/tasks/critique/ultracm.py
@dataclass\nclass UltraCMTask(CritiqueTask):\n __jinja2_template__: ClassVar[str] = _ULTRACM_TEMPLATE\n\n system_prompt: str = (\n \"User: A one-turn chat between a curious user and an artificial intelligence\"\n \" assistant. The assistant gives helpful, very detailed, and polite answers to\"\n \" the user's questions.</s>\"\n )\n\n def generate_prompt(self, input: str, generations: str, **_: Any) -> Prompt:\n render_kwargs = {\n \"instruction\": input,\n \"completion\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=f\"User: {self.template.render(**render_kwargs)}</s>\\nAssistant: ### Feedback\\nOverall Score: \",\n )\n\n def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = r\"(\\d+(?:\\.\\d+)?)\\s*(.*)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(1)),\n critique=match.group(2).strip(),\n )\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n score_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n score_column=score_column,\n critique_column=critique_column,\n score_values=score_values or [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraCMTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/critique/ultracm.py
def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = r\"(\\d+(?:\\.\\d+)?)\\s*(.*)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(1)),\n critique=match.group(2).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraFeedbackTask","title":"UltraFeedbackTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
following the prompt template used by ULTRAFEEDBACK.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
'Your role is to evaluate text quality based on given criteria.'
task_description
Union[str, None]
the description of the task. Defaults to None
.
ratings
Union[List[Rating], None]
the ratings to be used for the task. Defaults to None
.
src/distilabel/tasks/preference/ultrafeedback.py
@dataclass\nclass UltraFeedbackTask(PreferenceTask):\n \"\"\"A `PreferenceTask` following the prompt template used by ULTRAFEEDBACK.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n ratings (Union[List[Rating], None], optional): the ratings to be used for the task. Defaults to `None`.\n \"\"\"\n\n ratings: List[Rating]\n task_description: str\n\n system_prompt: (\n str\n ) = \"Your role is to evaluate text quality based on given criteria.\"\n\n __jinja2_template__: ClassVar[str] = field(\n default=_ULTRAFEEDBACK_TEMPLATE, init=False, repr=False\n )\n __subtasks__: ClassVar[List[str]] = [\n \"text-quality\",\n \"helpfulness\",\n \"truthfulness\",\n \"honesty\",\n \"instruction-following\",\n ]\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the ULTRAFEEDBACK specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\\nEvaluate the model's ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description,\n \"ratings\": self.ratings,\n \"input\": input,\n \"responses\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> List[UltraFeedbackOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n parsed_output = []\n for section in output.split(\"#### Output for Text \")[1:]:\n rating, rationale = section.split(\"\\n\")[1:3]\n rating = float(rating.split(\": \")[1])\n rationale = rationale.split(\": \")[1]\n parsed_output.append(\n UltraFeedbackOutput(rating=rating, rationale=rationale)\n )\n return parsed_output\n\n # Override the default `to_argilla_dataset` method to provide the `ratings_values` of\n # UltraFeedback, as the default goes from 1-10 while UltraFeedback's default is 1-5\n # (0-4 actually, but Argilla doesn't support 0s).\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n ratings_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=ratings_column,\n rationale_column=rationale_column,\n ratings_values=ratings_values or [1, 2, 3, 4, 5],\n )\n\n @classmethod\n def for_text_quality(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # General Text Quality Assessment\n Evaluate the model's outputs based on various criteria:\n 1. **Correctness & Informativeness**: Does the output provide accurate and helpful information?\n 2. **Honesty & Uncertainty**: How confidently does the model convey its information, and does it express uncertainty appropriately?\n 3. **Truthfulness & Hallucination**: Does the model introduce misleading or fabricated details?\n 4. **Instruction Following**: Does the model's output align with given instructions and the user's intent?\n Your role is to provide a holistic assessment considering all the above factors.\n\n **Scoring**: Rate outputs 1 to 5 based on the overall quality, considering all aspects:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Low Quality**: Contains inaccuracies, may be entirely wrong or has severe hallucinations.\",\n ),\n Rating(\n value=2,\n description=\"**Moderate Quality**: Addresses some aspects, but has errors or is partially aligned with instructions.\",\n ),\n Rating(\n value=3,\n description=\"**Good**: Generally accurate but may contain minor errors or slight deviations.\",\n ),\n Rating(\n value=4,\n description=\"**Very Good**: Near perfect, with minor issues in terms of alignment or confidence.\",\n ),\n Rating(\n value=5,\n description=\"**Excellent**: Accurate, confident, aligned with instructions, and free of hallucinations.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_helpfulness(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Informativeness / Helpfulness Assessment\n Evaluate if model's outputs fulfill task objectives and provide high-quality, correct, and, informative content.\n Helpfulness assessment emphasizes **Overall Quality** regarding correctness and informativeness.\n **Correctness**: Accurate computation, reasoning steps, and outputs without misunderstandings or fabrication.\n\n **Scoring**: Score 1 to 5 based on extent of helpfulness, regarding both informativeness and correctness:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Severely Incorrect**: Contains significant inaccuracies or fabricated content, even if comprehensive information is provided.\",\n ),\n Rating(\n value=2,\n description=\"**Partially Incorrect**: Contains errors that may cause confusion, even though comprehensive information is present.\",\n ),\n Rating(\n value=3,\n description=\"**Correct**: Accurate and provides useful information that meets the task's requirements.\",\n ),\n Rating(\n value=4,\n description=\"**Highly Informative**: Accurate and extensive, providing valuable insights and detailed information.\",\n ),\n Rating(\n value=5,\n description=\"**Outstandingly Helpful**: Both accurate and in-depth, offering profound insights and comprehensive information.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_truthfulness(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Truthfulness and Hallucination Assessment\n Evaluate the model's accuracy in providing information without introducing misleading or fabricated details.\n Assign numeric identifier (or \"None\") from 1 to 3 for each type of hallucination:\n 1. **Contradictory with the World (Factual Error)**: Entities, locations, concepts, or events that conflict with established knowledge.\n 2. **Contradictory with Instruction and Input**: Responses diverge, introducing new facts not aligned with instructions or inputs.\n 3. **Self-Contradictory / Logical Error**: Responses contain internal contradictions or logical errors within each independent text.\n\n **Scoring**: Rate outputs 1 to 5 based on the extent of hallucination:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Completely Hallucinated**: Entirely unreliable due to hallucinations.\",\n ),\n Rating(\n value=2,\n description=\"**Severe Hallucination**: Nearly half contains hallucinations, severe deviation from main points.\",\n ),\n Rating(\n value=3,\n description=\"**Partial Hallucination / Misunderstanding**: Overall truthful, partial misunderstanding due to hallucinations.\",\n ),\n Rating(\n value=4,\n description=\"**Insignificant Hallucination**: Mostly truthful, slight hallucination not affecting main points.\",\n ),\n Rating(\n value=5, description=\"**No Hallucination**: Free of hallucinations.\"\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_honesty(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Honesty and Uncertainty Expression Assessment\n Assess how well the model conveys honesty and uncertainty. Evaluate if the model's confidence aligns with response correctness.\n - **Indicators of Uncertainty**:\n 1. Weakeners: e.g., 'I guess,' 'probably.'\n 2. Refusal to answer.\n 3. Verbalized confidence scores: [0, 20] low; (20, 40] uncertain; (40, 60] moderate; (60, 80] leaning confident; (80, 100] high.\n - **No uncertainty expression indicate confidence.**\n - **Response Correctness**: Align with ground truth, or provide accurate content without fabrication.\n\n **Scoring**: Rate outputs 1 to 5 based on the extent of honesty and uncertainty:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Confidently Incorrect**: Confident but entirely wrong.\",\n ),\n Rating(\n value=2,\n description=\"**Confident with Significant Mistakes / Unconfident Incorrect**: Confident but contains major errors. Unconfident and entirely wrong.\",\n ),\n Rating(\n value=3,\n description=\"**Uncertain / 'I Don't Know' / Subtle Mistakes**: 'I don't know' or declines. Confident but contains minor errors. Unconfident and contains significant mistakes.\",\n ),\n Rating(\n value=4,\n description=\"**Correct but Uncertain / Expressed Subtle Mistakes**: Correct but unconfident.\",\n ),\n Rating(\n value=5,\n description=\"**Correct and Confident / Precisely Express Uncertainty**: Correct and confident. Makes mistakes, but precisely acknowledges minor errors and indicates uncertainty on potential mistakes.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n\n return cls(**kwargs)\n\n @classmethod\n def for_instruction_following(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Instruction Following Assessment\n Evaluate alignment between output and intent. Assess understanding of task goal and restrictions.\n **Instruction Components**: Task Goal (intended outcome), Restrictions (text styles, formats, or designated methods, etc).\n\n **Scoring**: Rate outputs 1 to 5:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(value=1, description=\"**Irrelevant**: No alignment.\"),\n Rating(\n value=2,\n description=\"**Partial Focus**: Addresses one aspect poorly.\",\n ),\n Rating(\n value=3,\n description=\"**Partial Compliance**:\\n\\t- (1) Meets goal or restrictions, neglecting other.\\n\\t- (2) Acknowledges both but slight deviations.\",\n ),\n Rating(\n value=4,\n description=\"**Almost There**: Near alignment, minor deviations.\",\n ),\n Rating(\n value=5,\n description=\"**Comprehensive Compliance**: Fully aligns, meets all requirements.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n\n return cls(**kwargs)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraFeedbackTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the ULTRAFEEDBACK specification.
Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\n
Evaluate the model's ...\", )
Source code insrc/distilabel/tasks/preference/ultrafeedback.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the ULTRAFEEDBACK specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\\nEvaluate the model's ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description,\n \"ratings\": self.ratings,\n \"input\": input,\n \"responses\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraFeedbackTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/ultrafeedback.py
def parse_output(self, output: str) -> List[UltraFeedbackOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n parsed_output = []\n for section in output.split(\"#### Output for Text \")[1:]:\n rating, rationale = section.split(\"\\n\")[1:3]\n rating = float(rating.split(\": \")[1])\n rationale = rationale.split(\": \")[1]\n parsed_output.append(\n UltraFeedbackOutput(rating=rating, rationale=rationale)\n )\n return parsed_output\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask","title":"UltraJudgeTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
for the UltraJudge task. The UltraJudge
task has been defined at Argilla specifically for a better evaluation using AI Feedback. The task is defined based on both UltraFeedback and JudgeLM, but with several improvements / modifications.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
\"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences. Your critical analysis should focus on human-like engagement, solution effectiveness, accuracy, clarity, and creativity. Approach each response as if you were the user, considering how well the response meets your needs and expectations in a real-world scenario. Provide detailed feedback that highlights strengths and areas for improvement in each response, keeping in mind the goal of simulating a human's preferred choice. Your evaluation should be impartial and thorough, reflecting a human's perspective in preferring responses that are practical, clear, authentic, and aligned with their intent. Avoid bias, and focus on the content and quality of the responses.\"
task_description
Union[str, None]
the description of the task. Defaults to None
.
\"Your task is to rigorously evaluate the performance of {num_responses} AI assistants, simulating a human's perspective. You will assess each response based on four key domains, reflecting aspects that are typically valued by humans: {areas}. First provide a score between 0 and 10 and write a detailed feedback for each area and assistant. Finally, provide a list of {num_responses} scores, each separated by a space, to reflect the performance of Assistants 1 to {num_responses}.\"
areas
List[str]
the areas to be used for the task. Defaults to a list of four areas: \"Practical Accuracy\", \"Clarity & Transparency\", \"Authenticity & Reliability\", and \"Compliance with Intent\".
field(default_factory=lambda : ['Practical Accuracy', 'Clarity & Transparency', 'Authenticity & Reliability', 'Compliance with Intent'])
Source code in src/distilabel/tasks/preference/ultrajudge.py
@dataclass\nclass UltraJudgeTask(PreferenceTask):\n \"\"\"A `PreferenceTask` for the UltraJudge task. The `UltraJudge` task has been defined\n at Argilla specifically for a better evaluation using AI Feedback. The task is defined\n based on both UltraFeedback and JudgeLM, but with several improvements / modifications.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n areas (List[str], optional): the areas to be used for the task. Defaults to a list of four areas:\n \"Practical Accuracy\", \"Clarity & Transparency\", \"Authenticity & Reliability\", and \"Compliance with Intent\".\n \"\"\"\n\n system_prompt: str = (\n \"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences.\"\n \" Your critical analysis should focus on human-like engagement, solution effectiveness, accuracy, clarity, and\"\n \" creativity. Approach each response as if you were the user, considering how well the response meets your needs\"\n \" and expectations in a real-world scenario. Provide detailed feedback that highlights strengths and areas for\"\n \" improvement in each response, keeping in mind the goal of simulating a human's preferred choice. \"\n \"Your evaluation should be impartial and thorough, reflecting a human's perspective in preferring responses that are practical,\"\n \" clear, authentic, and aligned with their intent. Avoid bias, and focus on the content and quality of the responses.\"\n )\n\n task_description: str = (\n \"Your task is to rigorously evaluate the performance of {num_responses} AI assistants, simulating a human's perspective.\"\n \" You will assess each response based on four key domains, reflecting aspects that are typically valued by humans:\"\n \" {areas}.\"\n \" First provide a score between 0 and 10 and write a detailed feedback for each area and assistant.\"\n \" Finally, provide a list of {num_responses} scores, each separated by a space, to reflect the performance of Assistants 1 to {num_responses}.\"\n )\n\n areas: List[str] = field(\n default_factory=lambda: [\n \"Practical Accuracy\",\n \"Clarity & Transparency\",\n \"Authenticity & Reliability\",\n \"Compliance with Intent\",\n ]\n )\n\n __jinja2_template__: ClassVar[str] = field(\n default=_ULTRAJUDGE_TEMPLATE, init=False, repr=False\n )\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"rating\", \"areas\"]\n\n @property\n def areas_str(self) -> str:\n \"\"\"Returns a string representation of the areas.\"\"\"\n return \", \".join(self.areas[:-1]) + \", and \" + self.areas[-1]\n\n @property\n def extract_area_score_and_rationale_regex(self) -> str:\n \"\"\"Returns a regex to extract the area, score, and rationale from the output.\"\"\"\n return rf\"({'|'.join(self.areas)})\\s*-\\s*(\\d+(?:\\.\\d+)?)\\n(.*?)(?=\\n\\n|\\Z)\"\n\n @property\n def extract_final_scores_regex(self) -> str:\n \"\"\"Returns a regex to extract the final scores from the output.\"\"\"\n return r\"Final scores:\\s*((?:\\d+(?:\\.\\d+)?\\s*)+)\"\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the UltraJudge specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraJudgeTask\n >>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description.format(\n num_responses=len(generations), areas=self.areas_str\n ),\n \"instruction\": input,\n \"responses\": generations,\n }\n\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> List[UltraJudgeOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n num_areas = len(self.areas)\n # `areas_results` includes num_generations * num_areas tuples\n areas_results = re.findall(self.extract_area_score_and_rationale_regex, output)\n final_scores = [\n float(str_score)\n for str_score in re.findall(self.extract_final_scores_regex, output)[\n 0\n ].split(\" \")\n ]\n\n outputs = []\n for i, rating in enumerate(final_scores):\n areas = {}\n # Get the areas for the i-th generation\n for area in areas_results[i * num_areas : i * num_areas + num_areas]:\n name, area_rating, rationale = area\n areas[name] = Area(rating=area_rating, rationale=rationale)\n outputs.append(UltraJudgeOutput(rating=rating, areas=areas))\n\n return outputs\n\n def _merge_rationales(\n self, rationales: List[Dict[str, Any]], generations_column: str = \"generations\"\n ) -> str:\n \"\"\"Overwrite of the `_merge_rationales` as we need to process the areas before merging.\"\"\"\n\n def format_area(area: Dict[str, Any]) -> str:\n sections = []\n for title, ratings in area.items():\n sections.append(title)\n for k, v in ratings.items():\n sections.append(f\"{k}:{v}\")\n return \"\\n\".join(sections)\n\n merged_rationales = []\n for idx, area in enumerate(rationales, start=1):\n merged_rationales.append(\n f\"{generations_column}-{idx}:\\n{format_area(area)}\\n\"\n )\n return \"\\n\".join(merged_rationales)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.areas_str","title":"areas_str: str
property
","text":"Returns a string representation of the areas.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.extract_area_score_and_rationale_regex","title":"extract_area_score_and_rationale_regex: str
property
","text":"Returns a regex to extract the area, score, and rationale from the output.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.extract_final_scores_regex","title":"extract_final_scores_regex: str
property
","text":"Returns a regex to extract the final scores from the output.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the UltraJudge specification.
Parameters:
Name Type Description Defaultinput
str
the input to be used for the prompt.
requiredgenerations
List[str]
the generations to be used for the prompt.
requiredReturns:
Name Type DescriptionPrompt
Prompt
the generated prompt.
Examples:
>>> from distilabel.tasks.preference import UltraJudgeTask\n>>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n>>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\nPrompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n)\n
Source code in src/distilabel/tasks/preference/ultrajudge.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the UltraJudge specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraJudgeTask\n >>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description.format(\n num_responses=len(generations), areas=self.areas_str\n ),\n \"instruction\": input,\n \"responses\": generations,\n }\n\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/ultrajudge.py
def parse_output(self, output: str) -> List[UltraJudgeOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n num_areas = len(self.areas)\n # `areas_results` includes num_generations * num_areas tuples\n areas_results = re.findall(self.extract_area_score_and_rationale_regex, output)\n final_scores = [\n float(str_score)\n for str_score in re.findall(self.extract_final_scores_regex, output)[\n 0\n ].split(\" \")\n ]\n\n outputs = []\n for i, rating in enumerate(final_scores):\n areas = {}\n # Get the areas for the i-th generation\n for area in areas_results[i * num_areas : i * num_areas + num_areas]:\n name, area_rating, rationale = area\n areas[name] = Area(rating=area_rating, rationale=rationale)\n outputs.append(UltraJudgeOutput(rating=rating, areas=areas))\n\n return outputs\n
"},{"location":"reference/distilabel/tasks/base/","title":"base","text":""},{"location":"reference/distilabel/tasks/base/#distilabel.tasks.base.Task","title":"Task
","text":" Bases: ABC
Abstract class used to define the methods required to create a Task
, to be used within an LLM
.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
Raises:
Type DescriptionValueError
if the __jinja2_template__
attribute is not provided.
src/distilabel/tasks/base.py
class Task(ABC):\n \"\"\"Abstract class used to define the methods required to create a `Task`, to be used\n within an `LLM`.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n\n Raises:\n ValueError: if the `__jinja2_template__` attribute is not provided.\n \"\"\"\n\n system_prompt: str\n task_description: Union[str, None] = None\n\n __jinja2_template__: Union[str, None] = None\n __type__: Union[Literal[\"generation\", \"labelling\"], None] = None\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"system_prompt\", self.system_prompt\n yield \"task_description\", self.task_description\n yield \"input_args_names\", self.input_args_names\n yield \"output_args_names\", self.output_args_names\n\n @property\n def template(self) -> \"Template\":\n if self.__jinja2_template__ is None:\n raise ValueError(\n \"You must provide a `__jinja2_template__` attribute to your Task subclass.\"\n )\n\n return Template(open(self.__jinja2_template__).read())\n\n @abstractmethod\n def generate_prompt(self, **kwargs: Any) -> Prompt:\n pass\n\n @abstractmethod\n def parse_output(self, output: str) -> Any:\n pass\n\n @property\n @abstractmethod\n def input_args_names(self) -> List[str]:\n pass\n\n @property\n @abstractmethod\n def output_args_names(self) -> List[str]:\n pass\n\n def validate_dataset(self, columns_in_dataset: List[str]) -> None:\n \"\"\"Validates that the dataset contains the required columns for the task.\n\n Args:\n columns_in_dataset (List[str]): the columns in the dataset.\n\n Raises:\n KeyError: if the dataset does not contain the required columns.\n \"\"\"\n for input_arg_name in self.input_args_names:\n if input_arg_name not in columns_in_dataset:\n raise KeyError(\n f\"LLM expects a column named '{input_arg_name}' in the provided\"\n \" dataset, but it was not found.\"\n )\n\n def to_argilla_dataset(\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> \"FeedbackDataset\":\n raise NotImplementedError(\n \"`to_argilla_dataset` is not implemented, if you want to export your dataset as an Argilla\"\n \" `FeedbackDataset` you will need to implement this method first.\"\n )\n\n def to_argilla_record(\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n raise NotImplementedError(\n \"`to_argilla_record` is not implemented, if you want to export your dataset as an Argilla\"\n \" `FeedbackDataset` you will need to implement this method first.\"\n )\n\n # Renamed to _to_argilla_record instead of renaming `to_argilla_record` to protected, as that would\n # imply more breaking changes.\n def _to_argilla_record( # noqa: C901\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n column_names = list(dataset_row.keys())\n if self.__type__ is None or self.__type__ == \"generation\":\n required_column_names = self.input_args_names + self.output_args_names\n elif self.__type__ == \"labelling\":\n required_column_names = self.output_args_names\n else:\n raise ValueError(\"The task type is not supported.\")\n\n dataset_rows = [dataset_row]\n if \"generation_model\" in dataset_row and isinstance(\n dataset_row[\"generation_model\"], list\n ):\n generation_columns = column_names[\n column_names.index(\"generation_model\") : column_names.index(\n \"labelling_model\"\n )\n if \"labelling_model\" in column_names\n else None\n ]\n if any(\n isinstance(nested, list)\n for column_name in list(\n set(generation_columns)\n - {\n \"generation_model\",\n \"generation_prompt\",\n \"raw_generation_response\",\n }\n )\n for nested in dataset_row[column_name]\n ):\n if any(\n generation_column in required_column_names\n for generation_column in generation_columns\n ):\n unwrapped_dataset_rows = []\n for row in dataset_rows:\n for idx in range(len(dataset_row[\"generation_model\"])):\n unwrapped_dataset_row = {}\n for key, value in row.items():\n if key in generation_columns:\n unwrapped_dataset_row[key] = value[idx]\n else:\n unwrapped_dataset_row[key] = value\n unwrapped_dataset_rows.append(unwrapped_dataset_row)\n dataset_rows = unwrapped_dataset_rows\n\n if \"labelling_model\" in dataset_row and isinstance(\n dataset_row[\"labelling_model\"], list\n ):\n labelling_columns = column_names[column_names.index(\"labelling_model\") :]\n if any(\n isinstance(nested, list)\n for column_name in list(\n set(labelling_columns)\n - {\n \"labelling_model\",\n \"labelling_prompt\",\n \"raw_labelling_response\",\n }\n )\n for nested in dataset_row[column_name]\n ):\n if any(\n labelling_column in required_column_names\n for labelling_column in labelling_columns\n ):\n unwrapped_dataset_rows = []\n for row in dataset_rows:\n for idx in range(len(dataset_row[\"labelling_model\"])):\n unwrapped_dataset_row = {}\n for key, value in row.items():\n if key in labelling_columns:\n unwrapped_dataset_row[key] = value[idx]\n else:\n unwrapped_dataset_row[key] = value\n unwrapped_dataset_rows.append(unwrapped_dataset_row)\n dataset_rows = unwrapped_dataset_rows\n\n if len(dataset_rows) == 1:\n return self.to_argilla_record(dataset_rows[0], *args, **kwargs)\n\n records = []\n for dataset_row in dataset_rows:\n generated_records = self.to_argilla_record(dataset_row, *args, **kwargs)\n if isinstance(generated_records, list):\n records.extend(generated_records)\n else:\n records.append(generated_records)\n return records\n
"},{"location":"reference/distilabel/tasks/base/#distilabel.tasks.base.Task.validate_dataset","title":"validate_dataset(columns_in_dataset)
","text":"Validates that the dataset contains the required columns for the task.
Parameters:
Name Type Description Defaultcolumns_in_dataset
List[str]
the columns in the dataset.
requiredRaises:
Type DescriptionKeyError
if the dataset does not contain the required columns.
Source code insrc/distilabel/tasks/base.py
def validate_dataset(self, columns_in_dataset: List[str]) -> None:\n \"\"\"Validates that the dataset contains the required columns for the task.\n\n Args:\n columns_in_dataset (List[str]): the columns in the dataset.\n\n Raises:\n KeyError: if the dataset does not contain the required columns.\n \"\"\"\n for input_arg_name in self.input_args_names:\n if input_arg_name not in columns_in_dataset:\n raise KeyError(\n f\"LLM expects a column named '{input_arg_name}' in the provided\"\n \" dataset, but it was not found.\"\n )\n
"},{"location":"reference/distilabel/tasks/mixins/","title":"mixins","text":""},{"location":"reference/distilabel/tasks/mixins/#distilabel.tasks.mixins.RatingToArgillaMixin","title":"RatingToArgillaMixin
","text":"Mixin that adds the to_argilla_dataset
and to_argilla_record
methods for tasks that generate both ratings and rationales i.e. PreferenceTask
or CritiqueTask
.
src/distilabel/tasks/mixins.py
class RatingToArgillaMixin:\n \"\"\"Mixin that adds the `to_argilla_dataset` and `to_argilla_record` methods for tasks\n that generate both ratings and rationales i.e. `PreferenceTask` or `CritiqueTask`.\n \"\"\"\n\n def to_argilla_dataset(\n self: TaskProtocol,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n ratings_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names, dataset_row=dataset_row\n )\n # Then we add the questions, which cannot be easily inferred in this case,\n # because those depend neither on the outputs nor on the inputs, but in a combination\n # of both, since the questions will be formulated using the inputs, but assigned to the\n # outputs.\n if generations_column is None or generations_column not in dataset_row:\n raise ValueError(\n f\"The `generations_column='{generations_column}'` is not present in the\"\n f\" dataset row. Please provide any of {list(dataset_row.keys())}.\",\n )\n if ratings_column is None or ratings_column not in dataset_row:\n raise ValueError(\n f\"The `ratings_column='{ratings_column}'` is not present in the dataset\"\n f\" row. Please provide any of {list(dataset_row.keys())}.\",\n )\n if rationale_column is None or rationale_column not in dataset_row:\n raise ValueError(\n f\"The `rationale_column='{rationale_column}'` is not present in the dataset\"\n f\" row. Please provide any of {list(dataset_row.keys())}.\",\n )\n questions = []\n for idx in range(1, len(dataset_row[generations_column]) + 1):\n questions.append(\n rg.RatingQuestion( # type: ignore\n name=f\"{generations_column}-{idx}-{ratings_column}\",\n title=f\"What's the {ratings_column} for {generations_column}-{idx}?\",\n values=ratings_values or [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n )\n questions.append(\n rg.TextQuestion( # type: ignore\n name=f\"{ratings_column}-{rationale_column}\",\n title=f\"What's the {rationale_column} behind each {ratings_column}?\",\n )\n )\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n if arg_name == generations_column:\n metadata_properties.append(\n rg.FloatMetadataProperty(\n name=f\"{ratings_column}-{arg_name}-{idx}\"\n ) # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n metadata_properties.append(\n rg.FloatMetadataProperty(name=f\"distance-best-{ratings_column}\") # type: ignore\n )\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions,\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def _merge_rationales(\n self, rationales: List[str], generations_column: str = \"generations\"\n ) -> str:\n return \"\\n\".join(\n f\"{generations_column}-{idx}:\\n{rationale}\\n\"\n for idx, rationale in enumerate(rationales, start=1)\n )\n\n def to_argilla_record( # noqa: C901\n self: TaskProtocol,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n ) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n fields[f\"{arg_name}-{idx}\"] = value.strip() if value else \"\"\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value.strip())\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we include the suggestions, which are generated from the outputs\n # of the LLM instead.\n suggestions = []\n if rationale_column is None or rationale_column not in dataset_row:\n raise ValueError(\n f\"The rationale column {rationale_column} is not present in the dataset row.\"\n )\n if dataset_row.get(rationale_column) is not None:\n rationales = dataset_row.get(rationale_column)\n suggestions.append(\n {\n \"question_name\": f\"{ratings_column}-{rationale_column}\",\n \"value\": self._merge_rationales(rationales=rationales) # type: ignore\n if isinstance(rationales, list)\n else rationales,\n }\n )\n if ratings_column is None or ratings_column not in dataset_row:\n raise ValueError(\n f\"The ratings column {ratings_column} is not present in the dataset row.\"\n )\n if dataset_row.get(ratings_column) is not None:\n ratings = dataset_row.get(ratings_column)\n if isinstance(ratings, list):\n for idx, value in enumerate(ratings, start=1): # type: ignore\n suggestions.append(\n {\n \"question_name\": f\"{generations_column}-{idx}-{ratings_column}\",\n \"value\": 1\n if value < 1\n else int(value)\n if value < 10\n else None,\n }\n )\n metadata[f\"{ratings_column}-{generations_column}-{idx}\"] = value\n if len(ratings) >= 2: # type: ignore\n sorted_ratings = sorted(ratings, reverse=True) # type: ignore\n metadata[f\"distance-best-{ratings_column}\"] = (\n sorted_ratings[0] - sorted_ratings[1]\n )\n elif isinstance(ratings, (str, float, int)):\n suggestions.append(\n {\n \"question_name\": f\"{generations_column}-1-{ratings_column}\",\n \"value\": int(ratings),\n }\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(\n fields=fields, suggestions=suggestions, metadata=metadata\n )\n
"},{"location":"reference/distilabel/tasks/mixins/#distilabel.tasks.mixins.RatingToArgillaMixin.to_argilla_record","title":"to_argilla_record(dataset_row, generations_column='generations', ratings_column='rating', rationale_column='rationale')
","text":"Converts a dataset row to an Argilla FeedbackRecord
.
src/distilabel/tasks/mixins.py
def to_argilla_record( # noqa: C901\n self: TaskProtocol,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n fields[f\"{arg_name}-{idx}\"] = value.strip() if value else \"\"\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value.strip())\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we include the suggestions, which are generated from the outputs\n # of the LLM instead.\n suggestions = []\n if rationale_column is None or rationale_column not in dataset_row:\n raise ValueError(\n f\"The rationale column {rationale_column} is not present in the dataset row.\"\n )\n if dataset_row.get(rationale_column) is not None:\n rationales = dataset_row.get(rationale_column)\n suggestions.append(\n {\n \"question_name\": f\"{ratings_column}-{rationale_column}\",\n \"value\": self._merge_rationales(rationales=rationales) # type: ignore\n if isinstance(rationales, list)\n else rationales,\n }\n )\n if ratings_column is None or ratings_column not in dataset_row:\n raise ValueError(\n f\"The ratings column {ratings_column} is not present in the dataset row.\"\n )\n if dataset_row.get(ratings_column) is not None:\n ratings = dataset_row.get(ratings_column)\n if isinstance(ratings, list):\n for idx, value in enumerate(ratings, start=1): # type: ignore\n suggestions.append(\n {\n \"question_name\": f\"{generations_column}-{idx}-{ratings_column}\",\n \"value\": 1\n if value < 1\n else int(value)\n if value < 10\n else None,\n }\n )\n metadata[f\"{ratings_column}-{generations_column}-{idx}\"] = value\n if len(ratings) >= 2: # type: ignore\n sorted_ratings = sorted(ratings, reverse=True) # type: ignore\n metadata[f\"distance-best-{ratings_column}\"] = (\n sorted_ratings[0] - sorted_ratings[1]\n )\n elif isinstance(ratings, (str, float, int)):\n suggestions.append(\n {\n \"question_name\": f\"{generations_column}-1-{ratings_column}\",\n \"value\": int(ratings),\n }\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(\n fields=fields, suggestions=suggestions, metadata=metadata\n )\n
"},{"location":"reference/distilabel/tasks/prompt/","title":"prompt","text":""},{"location":"reference/distilabel/tasks/prompt/#distilabel.tasks.prompt.ChatCompletion","title":"ChatCompletion
","text":" Bases: TypedDict
A TypedDict
matching OpenAI's chat completion format.
src/distilabel/tasks/prompt.py
class ChatCompletion(TypedDict):\n \"\"\"A `TypedDict` matching OpenAI's chat completion format.\"\"\"\n\n role: Literal[\"system\", \"user\", \"assistant\"]\n content: str\n
"},{"location":"reference/distilabel/tasks/prompt/#distilabel.tasks.prompt.Prompt","title":"Prompt
dataclass
","text":"A dataclass
representing a Prompt
.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt.
requiredformatted_prompt
str
the formatted prompt.
requiredExamples:
>>> from distilabel.tasks.prompt import Prompt\n>>> prompt = Prompt(\n... system_prompt=\"You are a helpful assistant.\",\n... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n... )\n
Source code in src/distilabel/tasks/prompt.py
@dataclass\nclass Prompt:\n \"\"\"A `dataclass` representing a `Prompt`.\n\n Args:\n system_prompt (str): the system prompt.\n formatted_prompt (str): the formatted prompt.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n \"\"\"\n\n system_prompt: str\n formatted_prompt: str\n\n def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion]]:\n \"\"\"Formats the prompt as the specified format.\n\n Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\\nWhat are the first 5 Fibonacci numbers?'\n \"\"\"\n if format == \"default\":\n return f\"{self.system_prompt}\\n{self.formatted_prompt}\"\n elif format == \"openai\":\n return [\n ChatCompletion(\n role=\"system\",\n content=self.system_prompt,\n ),\n ChatCompletion(role=\"user\", content=self.formatted_prompt),\n ]\n elif format == \"llama2\":\n return f\"<s>[INST] <<SYS>>\\n{self.system_prompt}<</SYS>>\\n\\n{self.formatted_prompt} [/INST]\"\n elif format == \"chatml\":\n return f\"<|im_start|>system\\n{self.system_prompt}<|im_end|>\\n<|im_start|>user\\n{self.formatted_prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n elif format in [\"zephyr\", \"notus\"]:\n return f\"<|system|>\\n{self.system_prompt}</s>\\n<|user|>\\n{self.formatted_prompt}</s>\\n<|assistant|>\\n\"\n else:\n raise ValueError(\n f\"Format {format} not supported, please provide a custom `prompt_formatting_fn`\"\n \" or use any of the available formats: openai, llama2, chatml, zephyr\"\n )\n
"},{"location":"reference/distilabel/tasks/prompt/#distilabel.tasks.prompt.Prompt.format_as","title":"format_as(format)
","text":"Formats the prompt as the specified format.
Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\n
What are the first 5 Fibonacci numbers?'
Source code insrc/distilabel/tasks/prompt.py
def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion]]:\n \"\"\"Formats the prompt as the specified format.\n\n Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\\nWhat are the first 5 Fibonacci numbers?'\n \"\"\"\n if format == \"default\":\n return f\"{self.system_prompt}\\n{self.formatted_prompt}\"\n elif format == \"openai\":\n return [\n ChatCompletion(\n role=\"system\",\n content=self.system_prompt,\n ),\n ChatCompletion(role=\"user\", content=self.formatted_prompt),\n ]\n elif format == \"llama2\":\n return f\"<s>[INST] <<SYS>>\\n{self.system_prompt}<</SYS>>\\n\\n{self.formatted_prompt} [/INST]\"\n elif format == \"chatml\":\n return f\"<|im_start|>system\\n{self.system_prompt}<|im_end|>\\n<|im_start|>user\\n{self.formatted_prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n elif format in [\"zephyr\", \"notus\"]:\n return f\"<|system|>\\n{self.system_prompt}</s>\\n<|user|>\\n{self.formatted_prompt}</s>\\n<|assistant|>\\n\"\n else:\n raise ValueError(\n f\"Format {format} not supported, please provide a custom `prompt_formatting_fn`\"\n \" or use any of the available formats: openai, llama2, chatml, zephyr\"\n )\n
"},{"location":"reference/distilabel/tasks/critique/","title":"critique","text":""},{"location":"reference/distilabel/tasks/critique/base/","title":"base","text":""},{"location":"reference/distilabel/tasks/critique/base/#distilabel.tasks.critique.base.CritiqueTask","title":"CritiqueTask
dataclass
","text":" Bases: RatingToArgillaMixin
, Task
A Task
for critique / judge tasks.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
None
Source code in src/distilabel/tasks/critique/base.py
@dataclass\nclass CritiqueTask(RatingToArgillaMixin, Task):\n \"\"\"A `Task` for critique / judge tasks.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n __type__: ClassVar[Literal[\"labelling\"]] = \"labelling\"\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the names of the input arguments of the task.\"\"\"\n return [\"input\", \"generations\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"critique\", \"score\"]\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n score_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=score_column,\n rationale_column=critique_column,\n ratings_values=score_values or [1, 2, 3, 4, 5],\n )\n\n def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n return super().to_argilla_record(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=score_column,\n rationale_column=critique_column,\n )\n
"},{"location":"reference/distilabel/tasks/critique/base/#distilabel.tasks.critique.base.CritiqueTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the names of the input arguments of the task.
"},{"location":"reference/distilabel/tasks/critique/base/#distilabel.tasks.critique.base.CritiqueTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/critique/base/#distilabel.tasks.critique.base.CritiqueTaskOutput","title":"CritiqueTaskOutput
","text":" Bases: TypedDict
A TypedDict
matching the output format of any CritiqueTask
.
src/distilabel/tasks/critique/base.py
class CritiqueTaskOutput(TypedDict):\n \"\"\"A `TypedDict` matching the output format of any `CritiqueTask`.\"\"\"\n\n score: float\n critique: str\n
"},{"location":"reference/distilabel/tasks/critique/prometheus/","title":"prometheus","text":""},{"location":"reference/distilabel/tasks/critique/prometheus/#distilabel.tasks.critique.prometheus.PrometheusTask","title":"PrometheusTask
dataclass
","text":" Bases: CritiqueTask
src/distilabel/tasks/critique/prometheus.py
@dataclass\nclass PrometheusTask(CritiqueTask):\n scoring_criteria: str\n score_descriptions: Dict[int, str]\n\n system_prompt: str = \"You are a fair evaluator language model.\"\n\n __jinja2_template__: ClassVar[str] = _PROMETHEUS_TEMPLATE\n\n @property\n def input_args_names(self) -> List[str]:\n return super().input_args_names + [\"ref_completion\"]\n\n def generate_prompt(\n self, input: str, generations: str, ref_completion: str, **_: Any\n ) -> Prompt:\n render_kwargs = {\n \"instruction\": input,\n \"completion\": generations,\n \"ref_completion\": ref_completion,\n \"scoring_criteria\": self.scoring_criteria,\n \"score_descriptions\": self.score_descriptions,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n # We use a regex instead of splitting by the delimiter because the\n # critique may contain the delimiter, and using the regex is safer.\n pattern = r\"(.+?)\\. \\[RESULT\\] (\\d+)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(2)),\n critique=match.group(1).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/critique/prometheus/#distilabel.tasks.critique.prometheus.PrometheusTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/critique/prometheus.py
def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n # We use a regex instead of splitting by the delimiter because the\n # critique may contain the delimiter, and using the regex is safer.\n pattern = r\"(.+?)\\. \\[RESULT\\] (\\d+)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(2)),\n critique=match.group(1).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/critique/ultracm/","title":"ultracm","text":""},{"location":"reference/distilabel/tasks/critique/ultracm/#distilabel.tasks.critique.ultracm.UltraCMTask","title":"UltraCMTask
dataclass
","text":" Bases: CritiqueTask
src/distilabel/tasks/critique/ultracm.py
@dataclass\nclass UltraCMTask(CritiqueTask):\n __jinja2_template__: ClassVar[str] = _ULTRACM_TEMPLATE\n\n system_prompt: str = (\n \"User: A one-turn chat between a curious user and an artificial intelligence\"\n \" assistant. The assistant gives helpful, very detailed, and polite answers to\"\n \" the user's questions.</s>\"\n )\n\n def generate_prompt(self, input: str, generations: str, **_: Any) -> Prompt:\n render_kwargs = {\n \"instruction\": input,\n \"completion\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=f\"User: {self.template.render(**render_kwargs)}</s>\\nAssistant: ### Feedback\\nOverall Score: \",\n )\n\n def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = r\"(\\d+(?:\\.\\d+)?)\\s*(.*)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(1)),\n critique=match.group(2).strip(),\n )\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n score_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n score_column=score_column,\n critique_column=critique_column,\n score_values=score_values or [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n
"},{"location":"reference/distilabel/tasks/critique/ultracm/#distilabel.tasks.critique.ultracm.UltraCMTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/critique/ultracm.py
def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = r\"(\\d+(?:\\.\\d+)?)\\s*(.*)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(1)),\n critique=match.group(2).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/preference/","title":"preference","text":""},{"location":"reference/distilabel/tasks/preference/base/","title":"base","text":""},{"location":"reference/distilabel/tasks/preference/base/#distilabel.tasks.preference.base.PreferenceTask","title":"PreferenceTask
dataclass
","text":" Bases: RatingToArgillaMixin
, Task
A Task
for preference rating tasks.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
None
Source code in src/distilabel/tasks/preference/base.py
@dataclass\nclass PreferenceTask(RatingToArgillaMixin, Task):\n \"\"\"A `Task` for preference rating tasks.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n __type__: ClassVar[Literal[\"labelling\"]] = \"labelling\"\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the names of the input arguments of the task.\"\"\"\n return [\"input\", \"generations\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"rating\", \"rationale\"]\n
"},{"location":"reference/distilabel/tasks/preference/base/#distilabel.tasks.preference.base.PreferenceTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the names of the input arguments of the task.
"},{"location":"reference/distilabel/tasks/preference/base/#distilabel.tasks.preference.base.PreferenceTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/preference/judgelm/","title":"judgelm","text":""},{"location":"reference/distilabel/tasks/preference/judgelm/#distilabel.tasks.preference.judgelm.JudgeLMOutput","title":"JudgeLMOutput
","text":" Bases: TypedDict
A TypedDict
matching the output format of JudgeLM.
src/distilabel/tasks/preference/judgelm.py
class JudgeLMOutput(TypedDict):\n \"\"\"A `TypedDict` matching the output format of JudgeLM.\"\"\"\n\n rating: List[float]\n rationale: str\n
"},{"location":"reference/distilabel/tasks/preference/judgelm/#distilabel.tasks.preference.judgelm.JudgeLMTask","title":"JudgeLMTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
following the prompt templated used by JudgeLM.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
'You are a helpful and precise assistant for checking the quality of the answer.'
task_description
Union[str, None]
the description of the task. Defaults to None
.
'We would like to request your feedback on the performance of {num_responses} AI assistants in response to the user question displayed above.\\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\\nPlease first output a single line containing only {num_responses} values indicating the scores for Assistants 1 to {num_responses}, respectively. The {num_responses} scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment.'
Source code in src/distilabel/tasks/preference/judgelm.py
@dataclass\nclass JudgeLMTask(PreferenceTask):\n \"\"\"A `PreferenceTask` following the prompt templated used by JudgeLM.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n task_description: str = (\n \"We would like to request your feedback on the performance of {num_responses} AI assistants in response to the\"\n \" user question displayed above.\\nPlease rate the helpfulness, relevance, accuracy, level of details\"\n \" of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher\"\n \" score indicates better overall performance.\\nPlease first output a single line containing only {num_responses}\"\n \" values indicating the scores for Assistants 1 to {num_responses}, respectively. The {num_responses} scores are separated by\"\n \" a space. In the subsequent line, please provide a comprehensive explanation of your evaluation,\"\n \" avoiding any potential bias and ensuring that the order in which the responses were presented does\"\n \" not affect your judgment.\"\n )\n system_prompt: str = \"You are a helpful and precise assistant for checking the quality of the answer.\"\n\n __jinja2_template__: ClassVar[str] = _JUDGELM_TEMPLATE\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the JudgeLM specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\\nWhat are the first 5 Fibonacci numbers?\\n...\",\n )\n \"\"\"\n render_kwargs = {\n \"input\": input,\n \"responses\": generations,\n \"task_description\": self.task_description.format(\n num_responses=len(generations)\n ),\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> JudgeLMOutput:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n split_output = output.split(\"\\n\")\n rating = [float(rating) for rating in split_output[0].split(\" \")]\n rationale = \"\\n\".join(split_output[1:])\n return JudgeLMOutput(rating=rating, rationale=rationale)\n
"},{"location":"reference/distilabel/tasks/preference/judgelm/#distilabel.tasks.preference.judgelm.JudgeLMTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the JudgeLM specification.
Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\n
What are the first 5 Fibonacci numbers? ...\", )
Source code insrc/distilabel/tasks/preference/judgelm.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the JudgeLM specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\\nWhat are the first 5 Fibonacci numbers?\\n...\",\n )\n \"\"\"\n render_kwargs = {\n \"input\": input,\n \"responses\": generations,\n \"task_description\": self.task_description.format(\n num_responses=len(generations)\n ),\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/preference/judgelm/#distilabel.tasks.preference.judgelm.JudgeLMTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/judgelm.py
def parse_output(self, output: str) -> JudgeLMOutput:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n split_output = output.split(\"\\n\")\n rating = [float(rating) for rating in split_output[0].split(\" \")]\n rationale = \"\\n\".join(split_output[1:])\n return JudgeLMOutput(rating=rating, rationale=rationale)\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/","title":"ultrafeedback","text":""},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.Rating","title":"Rating
","text":" Bases: TypedDict
A TypedDict
representing a rating.
src/distilabel/tasks/preference/ultrafeedback.py
class Rating(TypedDict):\n \"\"\"A `TypedDict` representing a rating.\"\"\"\n\n value: int\n description: str\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.UltraFeedbackOutput","title":"UltraFeedbackOutput
","text":" Bases: TypedDict
A TypedDict
representing the output of an UltraFeedbackTask
.
src/distilabel/tasks/preference/ultrafeedback.py
class UltraFeedbackOutput(TypedDict):\n \"\"\"A `TypedDict` representing the output of an `UltraFeedbackTask`.\"\"\"\n\n rating: float\n rationale: str\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask","title":"UltraFeedbackTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
following the prompt template used by ULTRAFEEDBACK.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
'Your role is to evaluate text quality based on given criteria.'
task_description
Union[str, None]
the description of the task. Defaults to None
.
ratings
Union[List[Rating], None]
the ratings to be used for the task. Defaults to None
.
src/distilabel/tasks/preference/ultrafeedback.py
@dataclass\nclass UltraFeedbackTask(PreferenceTask):\n \"\"\"A `PreferenceTask` following the prompt template used by ULTRAFEEDBACK.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n ratings (Union[List[Rating], None], optional): the ratings to be used for the task. Defaults to `None`.\n \"\"\"\n\n ratings: List[Rating]\n task_description: str\n\n system_prompt: (\n str\n ) = \"Your role is to evaluate text quality based on given criteria.\"\n\n __jinja2_template__: ClassVar[str] = field(\n default=_ULTRAFEEDBACK_TEMPLATE, init=False, repr=False\n )\n __subtasks__: ClassVar[List[str]] = [\n \"text-quality\",\n \"helpfulness\",\n \"truthfulness\",\n \"honesty\",\n \"instruction-following\",\n ]\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the ULTRAFEEDBACK specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\\nEvaluate the model's ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description,\n \"ratings\": self.ratings,\n \"input\": input,\n \"responses\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> List[UltraFeedbackOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n parsed_output = []\n for section in output.split(\"#### Output for Text \")[1:]:\n rating, rationale = section.split(\"\\n\")[1:3]\n rating = float(rating.split(\": \")[1])\n rationale = rationale.split(\": \")[1]\n parsed_output.append(\n UltraFeedbackOutput(rating=rating, rationale=rationale)\n )\n return parsed_output\n\n # Override the default `to_argilla_dataset` method to provide the `ratings_values` of\n # UltraFeedback, as the default goes from 1-10 while UltraFeedback's default is 1-5\n # (0-4 actually, but Argilla doesn't support 0s).\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n ratings_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=ratings_column,\n rationale_column=rationale_column,\n ratings_values=ratings_values or [1, 2, 3, 4, 5],\n )\n\n @classmethod\n def for_text_quality(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # General Text Quality Assessment\n Evaluate the model's outputs based on various criteria:\n 1. **Correctness & Informativeness**: Does the output provide accurate and helpful information?\n 2. **Honesty & Uncertainty**: How confidently does the model convey its information, and does it express uncertainty appropriately?\n 3. **Truthfulness & Hallucination**: Does the model introduce misleading or fabricated details?\n 4. **Instruction Following**: Does the model's output align with given instructions and the user's intent?\n Your role is to provide a holistic assessment considering all the above factors.\n\n **Scoring**: Rate outputs 1 to 5 based on the overall quality, considering all aspects:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Low Quality**: Contains inaccuracies, may be entirely wrong or has severe hallucinations.\",\n ),\n Rating(\n value=2,\n description=\"**Moderate Quality**: Addresses some aspects, but has errors or is partially aligned with instructions.\",\n ),\n Rating(\n value=3,\n description=\"**Good**: Generally accurate but may contain minor errors or slight deviations.\",\n ),\n Rating(\n value=4,\n description=\"**Very Good**: Near perfect, with minor issues in terms of alignment or confidence.\",\n ),\n Rating(\n value=5,\n description=\"**Excellent**: Accurate, confident, aligned with instructions, and free of hallucinations.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_helpfulness(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Informativeness / Helpfulness Assessment\n Evaluate if model's outputs fulfill task objectives and provide high-quality, correct, and, informative content.\n Helpfulness assessment emphasizes **Overall Quality** regarding correctness and informativeness.\n **Correctness**: Accurate computation, reasoning steps, and outputs without misunderstandings or fabrication.\n\n **Scoring**: Score 1 to 5 based on extent of helpfulness, regarding both informativeness and correctness:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Severely Incorrect**: Contains significant inaccuracies or fabricated content, even if comprehensive information is provided.\",\n ),\n Rating(\n value=2,\n description=\"**Partially Incorrect**: Contains errors that may cause confusion, even though comprehensive information is present.\",\n ),\n Rating(\n value=3,\n description=\"**Correct**: Accurate and provides useful information that meets the task's requirements.\",\n ),\n Rating(\n value=4,\n description=\"**Highly Informative**: Accurate and extensive, providing valuable insights and detailed information.\",\n ),\n Rating(\n value=5,\n description=\"**Outstandingly Helpful**: Both accurate and in-depth, offering profound insights and comprehensive information.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_truthfulness(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Truthfulness and Hallucination Assessment\n Evaluate the model's accuracy in providing information without introducing misleading or fabricated details.\n Assign numeric identifier (or \"None\") from 1 to 3 for each type of hallucination:\n 1. **Contradictory with the World (Factual Error)**: Entities, locations, concepts, or events that conflict with established knowledge.\n 2. **Contradictory with Instruction and Input**: Responses diverge, introducing new facts not aligned with instructions or inputs.\n 3. **Self-Contradictory / Logical Error**: Responses contain internal contradictions or logical errors within each independent text.\n\n **Scoring**: Rate outputs 1 to 5 based on the extent of hallucination:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Completely Hallucinated**: Entirely unreliable due to hallucinations.\",\n ),\n Rating(\n value=2,\n description=\"**Severe Hallucination**: Nearly half contains hallucinations, severe deviation from main points.\",\n ),\n Rating(\n value=3,\n description=\"**Partial Hallucination / Misunderstanding**: Overall truthful, partial misunderstanding due to hallucinations.\",\n ),\n Rating(\n value=4,\n description=\"**Insignificant Hallucination**: Mostly truthful, slight hallucination not affecting main points.\",\n ),\n Rating(\n value=5, description=\"**No Hallucination**: Free of hallucinations.\"\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_honesty(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Honesty and Uncertainty Expression Assessment\n Assess how well the model conveys honesty and uncertainty. Evaluate if the model's confidence aligns with response correctness.\n - **Indicators of Uncertainty**:\n 1. Weakeners: e.g., 'I guess,' 'probably.'\n 2. Refusal to answer.\n 3. Verbalized confidence scores: [0, 20] low; (20, 40] uncertain; (40, 60] moderate; (60, 80] leaning confident; (80, 100] high.\n - **No uncertainty expression indicate confidence.**\n - **Response Correctness**: Align with ground truth, or provide accurate content without fabrication.\n\n **Scoring**: Rate outputs 1 to 5 based on the extent of honesty and uncertainty:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Confidently Incorrect**: Confident but entirely wrong.\",\n ),\n Rating(\n value=2,\n description=\"**Confident with Significant Mistakes / Unconfident Incorrect**: Confident but contains major errors. Unconfident and entirely wrong.\",\n ),\n Rating(\n value=3,\n description=\"**Uncertain / 'I Don't Know' / Subtle Mistakes**: 'I don't know' or declines. Confident but contains minor errors. Unconfident and contains significant mistakes.\",\n ),\n Rating(\n value=4,\n description=\"**Correct but Uncertain / Expressed Subtle Mistakes**: Correct but unconfident.\",\n ),\n Rating(\n value=5,\n description=\"**Correct and Confident / Precisely Express Uncertainty**: Correct and confident. Makes mistakes, but precisely acknowledges minor errors and indicates uncertainty on potential mistakes.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n\n return cls(**kwargs)\n\n @classmethod\n def for_instruction_following(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Instruction Following Assessment\n Evaluate alignment between output and intent. Assess understanding of task goal and restrictions.\n **Instruction Components**: Task Goal (intended outcome), Restrictions (text styles, formats, or designated methods, etc).\n\n **Scoring**: Rate outputs 1 to 5:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(value=1, description=\"**Irrelevant**: No alignment.\"),\n Rating(\n value=2,\n description=\"**Partial Focus**: Addresses one aspect poorly.\",\n ),\n Rating(\n value=3,\n description=\"**Partial Compliance**:\\n\\t- (1) Meets goal or restrictions, neglecting other.\\n\\t- (2) Acknowledges both but slight deviations.\",\n ),\n Rating(\n value=4,\n description=\"**Almost There**: Near alignment, minor deviations.\",\n ),\n Rating(\n value=5,\n description=\"**Comprehensive Compliance**: Fully aligns, meets all requirements.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n\n return cls(**kwargs)\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the ULTRAFEEDBACK specification.
Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\n
Evaluate the model's ...\", )
Source code insrc/distilabel/tasks/preference/ultrafeedback.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the ULTRAFEEDBACK specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\\nEvaluate the model's ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description,\n \"ratings\": self.ratings,\n \"input\": input,\n \"responses\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/ultrafeedback.py
def parse_output(self, output: str) -> List[UltraFeedbackOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n parsed_output = []\n for section in output.split(\"#### Output for Text \")[1:]:\n rating, rationale = section.split(\"\\n\")[1:3]\n rating = float(rating.split(\": \")[1])\n rationale = rationale.split(\": \")[1]\n parsed_output.append(\n UltraFeedbackOutput(rating=rating, rationale=rationale)\n )\n return parsed_output\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/","title":"ultrajudge","text":""},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.Area","title":"Area
","text":" Bases: TypedDict
A TypedDict
representing an area of evaluation.
src/distilabel/tasks/preference/ultrajudge.py
class Area(TypedDict):\n \"\"\"A `TypedDict` representing an area of evaluation.\"\"\"\n\n rating: float\n rationale: str\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeOutput","title":"UltraJudgeOutput
","text":" Bases: TypedDict
A TypedDict
representing the output of the UltraJudge task.
src/distilabel/tasks/preference/ultrajudge.py
class UltraJudgeOutput(TypedDict):\n \"\"\"A `TypedDict` representing the output of the UltraJudge task.\"\"\"\n\n rating: float\n areas: Dict[str, Area]\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask","title":"UltraJudgeTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
for the UltraJudge task. The UltraJudge
task has been defined at Argilla specifically for a better evaluation using AI Feedback. The task is defined based on both UltraFeedback and JudgeLM, but with several improvements / modifications.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
\"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences. Your critical analysis should focus on human-like engagement, solution effectiveness, accuracy, clarity, and creativity. Approach each response as if you were the user, considering how well the response meets your needs and expectations in a real-world scenario. Provide detailed feedback that highlights strengths and areas for improvement in each response, keeping in mind the goal of simulating a human's preferred choice. Your evaluation should be impartial and thorough, reflecting a human's perspective in preferring responses that are practical, clear, authentic, and aligned with their intent. Avoid bias, and focus on the content and quality of the responses.\"
task_description
Union[str, None]
the description of the task. Defaults to None
.
\"Your task is to rigorously evaluate the performance of {num_responses} AI assistants, simulating a human's perspective. You will assess each response based on four key domains, reflecting aspects that are typically valued by humans: {areas}. First provide a score between 0 and 10 and write a detailed feedback for each area and assistant. Finally, provide a list of {num_responses} scores, each separated by a space, to reflect the performance of Assistants 1 to {num_responses}.\"
areas
List[str]
the areas to be used for the task. Defaults to a list of four areas: \"Practical Accuracy\", \"Clarity & Transparency\", \"Authenticity & Reliability\", and \"Compliance with Intent\".
field(default_factory=lambda : ['Practical Accuracy', 'Clarity & Transparency', 'Authenticity & Reliability', 'Compliance with Intent'])
Source code in src/distilabel/tasks/preference/ultrajudge.py
@dataclass\nclass UltraJudgeTask(PreferenceTask):\n \"\"\"A `PreferenceTask` for the UltraJudge task. The `UltraJudge` task has been defined\n at Argilla specifically for a better evaluation using AI Feedback. The task is defined\n based on both UltraFeedback and JudgeLM, but with several improvements / modifications.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n areas (List[str], optional): the areas to be used for the task. Defaults to a list of four areas:\n \"Practical Accuracy\", \"Clarity & Transparency\", \"Authenticity & Reliability\", and \"Compliance with Intent\".\n \"\"\"\n\n system_prompt: str = (\n \"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences.\"\n \" Your critical analysis should focus on human-like engagement, solution effectiveness, accuracy, clarity, and\"\n \" creativity. Approach each response as if you were the user, considering how well the response meets your needs\"\n \" and expectations in a real-world scenario. Provide detailed feedback that highlights strengths and areas for\"\n \" improvement in each response, keeping in mind the goal of simulating a human's preferred choice. \"\n \"Your evaluation should be impartial and thorough, reflecting a human's perspective in preferring responses that are practical,\"\n \" clear, authentic, and aligned with their intent. Avoid bias, and focus on the content and quality of the responses.\"\n )\n\n task_description: str = (\n \"Your task is to rigorously evaluate the performance of {num_responses} AI assistants, simulating a human's perspective.\"\n \" You will assess each response based on four key domains, reflecting aspects that are typically valued by humans:\"\n \" {areas}.\"\n \" First provide a score between 0 and 10 and write a detailed feedback for each area and assistant.\"\n \" Finally, provide a list of {num_responses} scores, each separated by a space, to reflect the performance of Assistants 1 to {num_responses}.\"\n )\n\n areas: List[str] = field(\n default_factory=lambda: [\n \"Practical Accuracy\",\n \"Clarity & Transparency\",\n \"Authenticity & Reliability\",\n \"Compliance with Intent\",\n ]\n )\n\n __jinja2_template__: ClassVar[str] = field(\n default=_ULTRAJUDGE_TEMPLATE, init=False, repr=False\n )\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"rating\", \"areas\"]\n\n @property\n def areas_str(self) -> str:\n \"\"\"Returns a string representation of the areas.\"\"\"\n return \", \".join(self.areas[:-1]) + \", and \" + self.areas[-1]\n\n @property\n def extract_area_score_and_rationale_regex(self) -> str:\n \"\"\"Returns a regex to extract the area, score, and rationale from the output.\"\"\"\n return rf\"({'|'.join(self.areas)})\\s*-\\s*(\\d+(?:\\.\\d+)?)\\n(.*?)(?=\\n\\n|\\Z)\"\n\n @property\n def extract_final_scores_regex(self) -> str:\n \"\"\"Returns a regex to extract the final scores from the output.\"\"\"\n return r\"Final scores:\\s*((?:\\d+(?:\\.\\d+)?\\s*)+)\"\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the UltraJudge specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraJudgeTask\n >>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description.format(\n num_responses=len(generations), areas=self.areas_str\n ),\n \"instruction\": input,\n \"responses\": generations,\n }\n\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> List[UltraJudgeOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n num_areas = len(self.areas)\n # `areas_results` includes num_generations * num_areas tuples\n areas_results = re.findall(self.extract_area_score_and_rationale_regex, output)\n final_scores = [\n float(str_score)\n for str_score in re.findall(self.extract_final_scores_regex, output)[\n 0\n ].split(\" \")\n ]\n\n outputs = []\n for i, rating in enumerate(final_scores):\n areas = {}\n # Get the areas for the i-th generation\n for area in areas_results[i * num_areas : i * num_areas + num_areas]:\n name, area_rating, rationale = area\n areas[name] = Area(rating=area_rating, rationale=rationale)\n outputs.append(UltraJudgeOutput(rating=rating, areas=areas))\n\n return outputs\n\n def _merge_rationales(\n self, rationales: List[Dict[str, Any]], generations_column: str = \"generations\"\n ) -> str:\n \"\"\"Overwrite of the `_merge_rationales` as we need to process the areas before merging.\"\"\"\n\n def format_area(area: Dict[str, Any]) -> str:\n sections = []\n for title, ratings in area.items():\n sections.append(title)\n for k, v in ratings.items():\n sections.append(f\"{k}:{v}\")\n return \"\\n\".join(sections)\n\n merged_rationales = []\n for idx, area in enumerate(rationales, start=1):\n merged_rationales.append(\n f\"{generations_column}-{idx}:\\n{format_area(area)}\\n\"\n )\n return \"\\n\".join(merged_rationales)\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.areas_str","title":"areas_str: str
property
","text":"Returns a string representation of the areas.
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.extract_area_score_and_rationale_regex","title":"extract_area_score_and_rationale_regex: str
property
","text":"Returns a regex to extract the area, score, and rationale from the output.
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.extract_final_scores_regex","title":"extract_final_scores_regex: str
property
","text":"Returns a regex to extract the final scores from the output.
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the UltraJudge specification.
Parameters:
Name Type Description Defaultinput
str
the input to be used for the prompt.
requiredgenerations
List[str]
the generations to be used for the prompt.
requiredReturns:
Name Type DescriptionPrompt
Prompt
the generated prompt.
Examples:
>>> from distilabel.tasks.preference import UltraJudgeTask\n>>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n>>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\nPrompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n)\n
Source code in src/distilabel/tasks/preference/ultrajudge.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the UltraJudge specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraJudgeTask\n >>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description.format(\n num_responses=len(generations), areas=self.areas_str\n ),\n \"instruction\": input,\n \"responses\": generations,\n }\n\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/ultrajudge.py
def parse_output(self, output: str) -> List[UltraJudgeOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n num_areas = len(self.areas)\n # `areas_results` includes num_generations * num_areas tuples\n areas_results = re.findall(self.extract_area_score_and_rationale_regex, output)\n final_scores = [\n float(str_score)\n for str_score in re.findall(self.extract_final_scores_regex, output)[\n 0\n ].split(\" \")\n ]\n\n outputs = []\n for i, rating in enumerate(final_scores):\n areas = {}\n # Get the areas for the i-th generation\n for area in areas_results[i * num_areas : i * num_areas + num_areas]:\n name, area_rating, rationale = area\n areas[name] = Area(rating=area_rating, rationale=rationale)\n outputs.append(UltraJudgeOutput(rating=rating, areas=areas))\n\n return outputs\n
"},{"location":"reference/distilabel/tasks/text_generation/","title":"text_generation","text":""},{"location":"reference/distilabel/tasks/text_generation/base/","title":"base","text":""},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask","title":"TextGenerationTask
dataclass
","text":" Bases: Task
A base Task
definition for text generation using LLMs.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used. Defaults to None
.
\"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"
principles
Dict[str, List[str]]
the principles to be used for the system prompt. Defaults to None
.
field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution
Union[Dict[str, float], Literal['balanced'], None]
the distribution of principles to be used for the system prompt. Defaults to None
.
None
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask\n>>> task = TextGenerationTask()\n
Source code in src/distilabel/tasks/text_generation/base.py
@dataclass\nclass TextGenerationTask(Task):\n \"\"\"A base `Task` definition for text generation using LLMs.\n\n Args:\n system_prompt (str, optional): the system prompt to be used. Defaults to `None`.\n principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.\n Defaults to `None`.\n principles_distribution (Union[Dict[str, float], Literal[\"balanced\"], None], optional): the\n distribution of principles to be used for the system prompt. Defaults to `None`.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask()\n \"\"\"\n\n system_prompt: str = (\n \"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible,\"\n \" while being safe. Your answers should not include any harmful, unethical, racist, sexist,\"\n \" toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased\"\n \" and positive in nature.\\nIf a question does not make any sense, or is not factually coherent,\"\n \" explain why instead of answering something not correct. If you don't know the answer to a\"\n \" question, please don't share false information.\"\n )\n principles: Dict[str, List[str]] = field(\n default_factory=lambda: {\n \"harmlessness\": UltraFeedbackPrinciples.harmlessness,\n \"helpfulness\": UltraFeedbackPrinciples.helpfulness,\n \"truthfulness\": UltraFeedbackPrinciples.truthfulness,\n \"honesty\": UltraFeedbackPrinciples.honesty,\n \"verbalized_calibration\": UltraFeedbackPrinciples.verbalized_calibration,\n },\n repr=False,\n )\n principles_distribution: Union[Dict[str, float], Literal[\"balanced\"], None] = None\n\n __type__: ClassVar[Literal[\"generation\"]] = \"generation\"\n\n def __post_init__(self) -> None:\n \"\"\"Validates the `principles_distribution` if it is a dict.\n\n Raises:\n ValueError: if the `principles_distribution` is a dict and it does not sum to 1.0.\n ValueError: if the `principles` are not included in the `principles_distribution`.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n not_included_principles = [\n principle\n for principle in self.principles\n if principle not in self.principles_distribution\n ]\n if not_included_principles:\n principles_str = \", \".join(\n [f\"'{principle}'\" for principle in not_included_principles]\n )\n raise ValueError(\n f\"Principles {principles_str} included in `principles` is not in\"\n \" `principles_distribution`\"\n )\n\n if sum(self.principles_distribution.values()) != 1.0:\n raise ValueError(\n \"`principles_distribution` must sum to 1.0 if it is a dict containing\"\n \" the distribution of principles to use.\"\n )\n\n def _get_principle(self) -> str:\n \"\"\"Gets a principle from the `principles` dict respecting the `principal_distribution`.\n\n Returns:\n str: the principle to be used.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n principle_group = random.choices(\n list(self.principles_distribution.keys()),\n weights=list(self.principles_distribution.values()),\n k=1,\n )[0]\n else:\n principle_group = random.choice(list(self.principles.keys()))\n return random.choice(self.principles[principle_group])\n\n def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates the prompt to be used for generation.\n\n Args:\n input (str): the input to be used for generation.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n \"\"\"\n system_prompt = self.system_prompt\n if self.principles_distribution is not None:\n principle = self._get_principle()\n system_prompt += \" \" + principle\n return Prompt(system_prompt=system_prompt, formatted_prompt=input)\n\n def parse_output(self, output: str) -> Dict[str, str]:\n \"\"\"Parses the output of the LLM into the desired format.\"\"\"\n return {\"generations\": output}\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the input args names for the task.\"\"\"\n return [\"input\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the output args names for the task.\"\"\"\n return [\"generations\"]\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: Optional[str] = \"generations\",\n ) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names + self.output_args_names,\n dataset_row=dataset_row,\n )\n # Then we add a default `RatingQuestion` which asks the users to provide a\n # rating for each of the generations, differing from the scenario where the inputs\n # are the fields and the outputs the ones used to formulate the quesstions. So on,\n # in this scenario we won't have suggestions, as the questions will be related to the\n # combination of inputs and outputs.\n if generations_column is None or generations_column not in dataset_row:\n raise ValueError(\n f\"The `generations_column='{generations_column}'` is not present in the dataset\"\n f\" row. Please provide any of {list(dataset_row.keys())}.\",\n )\n questions = []\n for idx in range(1, len(dataset_row[generations_column]) + 1):\n questions.append(\n rg.RatingQuestion( # type: ignore\n name=f\"{generations_column}-{idx}-rating\",\n title=f\"How would you rate the generation at `{generations_column}-{idx}`?\",\n values=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n )\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names + self.output_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions,\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def to_argilla_record(self, dataset_row: Dict[str, Any]) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names + self.output_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n # TODO: value formatting was included here due to some issues\n # with `SelfInstructTask` but these list-parsing may not be needed\n # anymore.\n value = (\n value.strip()\n if isinstance(value, str)\n else \"\\n\".join(value)\n if isinstance(value, list)\n else \"\"\n )\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(fields=fields, metadata=metadata)\n
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the input args names for the task.
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the output args names for the task.
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.__post_init__","title":"__post_init__()
","text":"Validates the principles_distribution
if it is a dict.
Raises:
Type DescriptionValueError
if the principles_distribution
is a dict and it does not sum to 1.0.
ValueError
if the principles
are not included in the principles_distribution
.
src/distilabel/tasks/text_generation/base.py
def __post_init__(self) -> None:\n \"\"\"Validates the `principles_distribution` if it is a dict.\n\n Raises:\n ValueError: if the `principles_distribution` is a dict and it does not sum to 1.0.\n ValueError: if the `principles` are not included in the `principles_distribution`.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n not_included_principles = [\n principle\n for principle in self.principles\n if principle not in self.principles_distribution\n ]\n if not_included_principles:\n principles_str = \", \".join(\n [f\"'{principle}'\" for principle in not_included_principles]\n )\n raise ValueError(\n f\"Principles {principles_str} included in `principles` is not in\"\n \" `principles_distribution`\"\n )\n\n if sum(self.principles_distribution.values()) != 1.0:\n raise ValueError(\n \"`principles_distribution` must sum to 1.0 if it is a dict containing\"\n \" the distribution of principles to use.\"\n )\n
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.generate_prompt","title":"generate_prompt(input, **_)
","text":"Generates the prompt to be used for generation.
Parameters:
Name Type Description Defaultinput
str
the input to be used for generation.
requiredReturns:
Name Type DescriptionPrompt
Prompt
the generated prompt.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask\n>>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n>>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\nPrompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n
Source code in src/distilabel/tasks/text_generation/base.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates the prompt to be used for generation.\n\n Args:\n input (str): the input to be used for generation.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n \"\"\"\n system_prompt = self.system_prompt\n if self.principles_distribution is not None:\n principle = self._get_principle()\n system_prompt += \" \" + principle\n return Prompt(system_prompt=system_prompt, formatted_prompt=input)\n
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the LLM into the desired format.
Source code insrc/distilabel/tasks/text_generation/base.py
def parse_output(self, output: str) -> Dict[str, str]:\n \"\"\"Parses the output of the LLM into the desired format.\"\"\"\n return {\"generations\": output}\n
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.to_argilla_record","title":"to_argilla_record(dataset_row)
","text":"Converts a dataset row to an Argilla FeedbackRecord
.
src/distilabel/tasks/text_generation/base.py
def to_argilla_record(self, dataset_row: Dict[str, Any]) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names + self.output_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n # TODO: value formatting was included here due to some issues\n # with `SelfInstructTask` but these list-parsing may not be needed\n # anymore.\n value = (\n value.strip()\n if isinstance(value, str)\n else \"\\n\".join(value)\n if isinstance(value, list)\n else \"\"\n )\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(fields=fields, metadata=metadata)\n
"},{"location":"reference/distilabel/tasks/text_generation/principles/","title":"principles","text":""},{"location":"reference/distilabel/tasks/text_generation/principles/#distilabel.tasks.text_generation.principles.UltraFeedbackPrinciples","title":"UltraFeedbackPrinciples
","text":"A class containing a list of principles from the UltraFeedback paper, that can be injected into the system prompt given to the LLM.
Referencessrc/distilabel/tasks/text_generation/principles.py
class UltraFeedbackPrinciples:\n \"\"\"A class containing a list of principles from the UltraFeedback paper, that can\n be injected into the system prompt given to the LLM.\n\n References:\n - https://github.com/OpenBMB/UltraFeedback\n \"\"\"\n\n helpfulness = [\n \"The assistant should provide users with accurate, relevant, and up-to-date information, ensuring that the content is positive, interesting, engaging, educational, and helpful.\",\n \"As an AI assistant, it's your job to ensure that the information you provide to users is accurate, current, and relevant. Offer comprehensive details to educate and engage the user.\",\n \"Your role as an AI assistant is to be a reliable and helpful source of information. Always verify the facts and make sure they're up-to-date before sharing them.\",\n \"As an AI assistant, inspire users with your knowledgeable insights. Provide content that is positive, interesting, and engaging, ensuring users find your help valuable.\",\n \"Make sure your responses are always educational yet engaging, allowing users to learn something new each time they interact with you. You are an AI assistant after all!\",\n \"As a top-tier AI assistant, make sure that your responses are not only accurate and current but also engaging. Be the positive interaction users look forward to.\",\n \"As an AI assistant, it's crucial that you motivate users by providing them with meaningful and exciting content that is also helpful and informative.\",\n \"Cultivate a helpful AI persona by ensuring you offer users accurate, positive, and engaging content. Make every interaction informative and delightful.\",\n \"As an AI assistant, ensure your response offers the perfect blend of accuracy, positivity, and intrigue. Strive to be educational while keeping the user engaged.\",\n \"It's your duty as an AI assistant to always deliver accurate, positive, and engaging content that serves to educate the user. Make every interaction a learning opportunity.\",\n \"As an AI assistant, provide not just answers but also spark curiosity. Offer insights that are accurate, engaging, and positive, making learning an exciting journey for the user.\",\n ]\n harmlessness = [\n \"The assistant should not provide answers that could cause physical, psychological, or social harm to any group of people; damage to or loss of equipment or property; damage to the environment; or harm to institutions or resources necessary to human wellbeing. Feel free to refuse to answer questions that could cause the above harm.\",\n \"You are an AI assistant that respects all humans, their equipment and environment. Never offer suggestions that could lead to harm or damage any of the above mentioned.\",\n 'You are an AI assistant that maintains a principle of \"First do no harm\". In all interactions, ensure you do not promote any form of harm, be it to persons, property or the environment.',\n \"You are an AI assistant. Stick to providing information that cannot be harmful physically, mentally or socially to any individual or group. Don't answer any queries that might lead to such consequences.\",\n \"As an AI assistant, refrain from advising on actions that might harm any person or group of persons, or cause damage to property, environment or resources necessary for human wellbeing.\",\n \"You're an AI helper wishing to provide safe assistance for all users. Discard any action that could lead to damage to people, property, or institutions.\",\n \"You are a responsible AI assistant. Always stay polite, respectful, and ensure to not give advice that could lead to harm or loss of any sort. \",\n \"As an AI assistant, your primary focus should be on the safety and wellness of the users. Never provide answers that could cause harm or damage to individuals, their property or the environment.\",\n \"You are an AI assistant that is centred around being harmless. Never encourage violence, harm to others, or damage to property or the environment.\",\n \"As an AI assistant, avoid providing information or support that could result in injury, mental distress, or harm to people, their property, or the environment.\",\n \"You are an AI assistant. While providing assistance, avoid suggesting actions that may harm people, their belongings or property, environment or critical resources.\",\n ]\n honesty = [\n \"The assistant should be honest about whether it knows the answer and express its uncertainty explicitly. Be confident on questions it knows well and be modest on those it is unfamiliar with. Use weakeners such as 'I guess', 'I suppose', 'probably', and 'perhaps' to express uncertainty, and feel free to answer 'I don't know' if necessary.\",\n \"As an AI, honesty is crucial to build trust with your user. If you're unsure about an answer, make sure to express your uncertainty, or consult additional resources (if available) before providing an answer. \",\n \"As an AI, assertiveness and confidence should be balanced with honesty and humility. If the answer to a query is not within your knowledge base, it is entirely acceptable to express that you don't know the answer. \",\n \"You are an AI assistant that values honesty. If you don't know the answer to a question, admit it, but offer to find out more about the topic if needed.\",\n \"You're not only an AI assistant, but also a reliable partner. Whenever you give an answer, make sure that the information is reliable and true, and if you're not certain about something, don't hesitate to express it. \",\n 'You are an AI assistant. Your prime objective is to provide useful and accurate information. In case you aren\\'t certain of an answer, use phrases such as \"I\\'m not sure\", or \"I think\". Honesty always comes first.',\n \"As a digital assistant, your duty is to ensure that every piece of information that you give out is in line with the truth, even if the truth is that you do not know or are unsure about the answer to a query.\",\n \"Remember, as an AI assistant, it's better to acknowledge uncertainty or simply say 'I don't know' than to give potentially incorrect information. Honesty leads to trust.\",\n \"As an AI assistant, honesty is appreciated by your users. If you are uncertain of an answer, it's okay to express doubt or admit to not knowing. Offer to search for more information if needed. \",\n \"You are an AI helper that portrays honesty in every response. Never shy away from admitting if you do not have the needed information; instead offer to look up the information. \",\n \"Remember, your aim as an AI assistant is to deliver accurate information. If the answer is not within your database, it is okay to say you don't know rather than providing inaccurate information.\",\n ]\n verbalized_calibration = [\n \"The assistant should express its confidence as a scalar at the end of the response. The confidence level indicates the degree of certainty it has about its answer and is represented as a percentage. For instance, if the confidence level is 80%, it means the assistant is 80% certain that its answer is correct whereas there is a 20% chance that the assistant may be incorrect.\\nThe format is as follows:\\n[Question]\\n[Answer]\\nConfidence: [The assistant's confidence level, numerical numbers only, e.g. 80%]\\nHere, tags like [Question] and [Answer] are placeholders and should be omitted in the response.\\n\"\n ]\n truthfulness = [\n \"The assistant should answer truthfully and be faithful to factual knowledge as well as given contexts, never making up any new facts that aren't true or cannot be grounded in the instruction.\",\n \"As an AI assistant, it is important that you answer truthfully. Adhere to factual information and context while making sure you don't invent any new, unverified facts.\",\n \"As an AI assistant, conduct yourself with honesty. Always be diligent in giving accurate responses. Fictional facts or inaccuracies should be avoided in your responses.\",\n \"As an AI assistant, stick to the truth. Your aim should always be to provide precise and factual information to avoid any misinformation. \",\n \"As an AI assistant, factual consistency is key. Make sure all your answers are grounded in truth and are not misleading in any way.\",\n \"As an AI assistant, keep in mind that it's of utmost importance to stay true to the task and present only verified and accurate information.\",\n \"As an AI assistant, find and uphold the truth in every answer. Ensure all responses are based on facts and try to avoid speculation or invention of new facts.\",\n \"As an AI assistant, your job is to deliver answers that are truth-based and avoid introducing any untruthful or groundless information.\",\n \"As an AI assistant, maintain objectivity while answering questions. Make sure the information given is truthful and never fabricated.\",\n \"As an AI assistant, maintain the integrity of your responses. Always provide genuine and credible information, and ensure no incorrect data is delivered.\",\n \"As an AI assistant, operate under the principle of truthfulness. Keep up-to-date with verified information and refrain from providing anything that might mislead. \\n\",\n ]\n
"},{"location":"reference/distilabel/tasks/text_generation/self_instruct/","title":"self_instruct","text":""},{"location":"reference/distilabel/tasks/text_generation/self_instruct/#distilabel.tasks.text_generation.self_instruct.SelfInstructTask","title":"SelfInstructTask
dataclass
","text":" Bases: TextGenerationTask
A TextGenerationTask
following the Self-Instruct specification for building the prompts.
Reference: https://github.com/yizhongw/self-instruct
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used. Defaults to None
.
'You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks. You are given a task description and a set of instructions for how to write the prompts for an specific AI application.'
principles
Dict[str, List[str]]
the principles to be used for the system prompt. Defaults to None
.
field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution
Union[Dict[str, float], Literal[balanced], None]
the distribution of principles to be used for the system prompt. Defaults to None
.
None
application_description
str
the description of the AI application. Defaults to \"AI assistant\".
'AI assistant'
num_instructions
int
the number of instructions to be used for the prompt. Defaults to 5.
5
Source code in src/distilabel/tasks/text_generation/self_instruct.py
@dataclass\nclass SelfInstructTask(TextGenerationTask):\n \"\"\"A `TextGenerationTask` following the Self-Instruct specification for building\n the prompts.\n\n Reference: https://github.com/yizhongw/self-instruct\n\n Args:\n system_prompt (str, optional): the system prompt to be used. Defaults to `None`.\n principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.\n Defaults to `None`.\n principles_distribution (Union[Dict[str, float], Literal[\"balanced\"], None], optional): the\n distribution of principles to be used for the system prompt. Defaults to `None`.\n application_description (str, optional): the description of the AI application. Defaults to\n \"AI assistant\".\n num_instructions (int, optional): the number of instructions to be used for the prompt.\n Defaults to 5.\n \"\"\"\n\n system_prompt: str = (\n \"You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks.\"\n \" You are given a task description and a set of instructions for how to write the prompts for an\"\n \" specific AI application.\"\n )\n application_description: str = \"AI assistant\"\n num_instructions: int = 5\n\n __jinja2_template__: str = _SELF_INSTRUCT_TEMPLATE\n\n def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the Self-Instruct specification.\n\n Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\\nDevelop 2 user queries that ...\",\n )\n \"\"\"\n render_kwargs = {\n \"application_description\": self.application_description,\n \"num_instructions\": self.num_instructions,\n \"input\": input,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n @property\n def output_args_names(self) -> List[str]:\n return [\"instructions\"]\n\n def parse_output(self, output: str) -> Dict[str, List[str]]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = re.compile(r\"\\d+\\.\\s*(.*?)\\n\")\n return {\"instructions\": pattern.findall(output)}\n\n def to_argilla_dataset(self, dataset_row: Dict[str, Any]) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names,\n dataset_row=dataset_row,\n )\n # Once the input fields have been defined, then we also include the instruction\n # field which will be fulfilled with each of the instructions generated.\n fields.append(rg.TextField(name=\"instruction\", title=\"instruction\")) # type: ignore\n # Then we add a default `RatingQuestion` which asks the users to provide a\n # rating for each of the generations, differing from the scenario where the inputs\n # are the fields and the outputs the ones used to formulate the quesstions. So on,\n # in this scenario we won't have suggestions, as the questions will be related to the\n # combination of inputs and outputs.\n questions = [\n rg.RatingQuestion( # type: ignore\n name=\"instruction-rating\",\n title=\"How would you rate the generated instruction?\",\n values=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n ]\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=\"length-instruction\") # type: ignore\n ) # type: ignore\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions, # type: ignore\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n instructions_column: Optional[str] = \"instructions\",\n ) -> List[\"FeedbackRecord\"]:\n \"\"\"Converts a dataset row to a list of Argilla `FeedbackRecord`s.\"\"\"\n records = []\n for instructions in dataset_row[instructions_column]: # type: ignore\n for instruction in instructions:\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n value = value.strip() if isinstance(value, str) else \"\"\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n fields[\"instruction\"] = instruction\n metadata[\"length-instruction\"] = len(instruction)\n\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(\n model_metadata_from_dataset_row(dataset_row=dataset_row)\n )\n # Finally, we append the `FeedbackRecord` with the fields and the metadata\n records.append(rg.FeedbackRecord(fields=fields, metadata=metadata))\n if not records:\n raise ValueError(\n f\"Skipping the row {dataset_row} as the list of `FeedbackRecord` is empty as those could not be inferred.\"\n )\n return records\n
"},{"location":"reference/distilabel/tasks/text_generation/self_instruct/#distilabel.tasks.text_generation.self_instruct.SelfInstructTask.generate_prompt","title":"generate_prompt(input, **_)
","text":"Generates a prompt following the Self-Instruct specification.
Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\n
Develop 2 user queries that ...\", )
Source code insrc/distilabel/tasks/text_generation/self_instruct.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the Self-Instruct specification.\n\n Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\\nDevelop 2 user queries that ...\",\n )\n \"\"\"\n render_kwargs = {\n \"application_description\": self.application_description,\n \"num_instructions\": self.num_instructions,\n \"input\": input,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/text_generation/self_instruct/#distilabel.tasks.text_generation.self_instruct.SelfInstructTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/text_generation/self_instruct.py
def parse_output(self, output: str) -> Dict[str, List[str]]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = re.compile(r\"\\d+\\.\\s*(.*?)\\n\")\n return {\"instructions\": pattern.findall(output)}\n
"},{"location":"reference/distilabel/tasks/text_generation/self_instruct/#distilabel.tasks.text_generation.self_instruct.SelfInstructTask.to_argilla_record","title":"to_argilla_record(dataset_row, instructions_column='instructions')
","text":"Converts a dataset row to a list of Argilla FeedbackRecord
s.
src/distilabel/tasks/text_generation/self_instruct.py
def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n instructions_column: Optional[str] = \"instructions\",\n) -> List[\"FeedbackRecord\"]:\n \"\"\"Converts a dataset row to a list of Argilla `FeedbackRecord`s.\"\"\"\n records = []\n for instructions in dataset_row[instructions_column]: # type: ignore\n for instruction in instructions:\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n value = value.strip() if isinstance(value, str) else \"\"\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n fields[\"instruction\"] = instruction\n metadata[\"length-instruction\"] = len(instruction)\n\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(\n model_metadata_from_dataset_row(dataset_row=dataset_row)\n )\n # Finally, we append the `FeedbackRecord` with the fields and the metadata\n records.append(rg.FeedbackRecord(fields=fields, metadata=metadata))\n if not records:\n raise ValueError(\n f\"Skipping the row {dataset_row} as the list of `FeedbackRecord` is empty as those could not be inferred.\"\n )\n return records\n
"},{"location":"reference/distilabel/utils/","title":"utils","text":""},{"location":"reference/distilabel/utils/argilla/","title":"argilla","text":""},{"location":"reference/distilabel/utils/dicts/","title":"dicts","text":""},{"location":"reference/distilabel/utils/dicts/#distilabel.utils.dicts.combine_dicts","title":"combine_dicts(*dicts)
","text":"Combines multiple dictionaries into a single dictionary joining the values as a list for each key.
Parameters:
Name Type Description Default*dicts
Any
the dictionaries to be combined.
()
Returns:
Type DescriptionDict[str, Any]
Dict[str, Any]: the combined dictionary.
Source code insrc/distilabel/utils/dicts.py
def combine_dicts(*dicts: Any) -> Dict[str, Any]:\n \"\"\"Combines multiple dictionaries into a single dictionary joining the values\n as a list for each key.\n\n Args:\n *dicts (Any): the dictionaries to be combined.\n\n Returns:\n Dict[str, Any]: the combined dictionary.\n \"\"\"\n combined_dict = defaultdict(list)\n for d in dicts:\n for key, value in d.items():\n combined_dict[key].append(value)\n return dict(combined_dict)\n
"},{"location":"reference/distilabel/utils/futures/","title":"futures","text":""},{"location":"reference/distilabel/utils/futures/#distilabel.utils.futures.when_all_complete","title":"when_all_complete(futures, callback=None)
","text":"Returns a Future
that will be completed when all the provided futures
are completed, and it will contain the results of the futures
.
Parameters:
Name Type Description Defaultfutures
List[Future]
the Future
s to wait for.
Returns:
Name Type DescriptionFuture
Future[List[T]]
the Future
that will be completed when all the provided futures
are completed, and it will contain the results of the futures
.
src/distilabel/utils/futures.py
def when_all_complete(\n futures: List[Future[T]], callback: Optional[Callable[[List[T]], List[T]]] = None\n) -> Future[List[T]]:\n \"\"\"Returns a `Future` that will be completed when all the provided `futures` are\n completed, and it will contain the results of the `futures`.\n\n Args:\n futures (List[Future]): the `Future`s to wait for.\n\n Returns:\n Future: the `Future` that will be completed when all the provided `futures` are\n completed, and it will contain the results of the `futures`.\n \"\"\"\n all_done_future = Future()\n results: List[T] = [None] * len(futures) # type: ignore\n\n def check_all_done(future: Future) -> None:\n # This is done to preserve the order of the results with respect to the order\n # of the futures.\n index = futures.index(future)\n results[index] = future.result()[0]\n\n _, not_done = wait(futures, return_when=\"FIRST_COMPLETED\")\n if len(not_done) == 0:\n final_results = results\n if callback is not None:\n final_results = callback(results)\n all_done_future.set_result(final_results)\n\n for future in futures:\n future.add_done_callback(check_all_done)\n\n return all_done_future\n
"},{"location":"reference/distilabel/utils/imports/","title":"imports","text":""},{"location":"reference/distilabel/utils/types/","title":"types","text":""},{"location":"reference/distilabel/utils/types/#distilabel.utils.types.is_future","title":"is_future(obj)
","text":"Checks if an object is a future narrowing the type.
Parameters:
Name Type Description Defaultobj
Future[T]
Object to check
requiredReturns:
Type DescriptionTypeGuard[Future[T]]
TypeGuard[Future[T]]: True if it is a future
Source code insrc/distilabel/utils/types.py
def is_future(obj: Union[Future[T], Any]) -> TypeGuard[Future[T]]:\n \"\"\"Checks if an object is a future narrowing the type.\n\n Args:\n obj (Future[T]): Object to check\n\n Returns:\n TypeGuard[Future[T]]: True if it is a future\n \"\"\"\n return isinstance(obj, Future)\n
"},{"location":"technical-reference/","title":"Technical reference","text":"Explore distilabel
's technical references for an understanding of its components and their interactions, or directly access the API Reference for specific details.
If you are not familiar with the different components, consider taking a look at the concepts first.
"},{"location":"technical-reference/llms/","title":"LLMs","text":"In this section we will see what's an LLM
and the different LLM
s implementations available in distilabel
.
The LLM
class encapsulates the functionality for interacting with a large language model.
It distinguishes between task specifications and configurable parameters that influence the LLM behavior.
For illustration purposes, we employ the TextGenerationTask
in this section and guide you to the dedicated Tasks
section for comprehensive details.
LLM classes share several general parameters and define implementation-specific ones. Let's explain the general parameters first and the generate method, and then the specifics for each class.
"},{"location":"technical-reference/llms/#general-parameters","title":"General parameters","text":"Let's briefly introduce the general parameters we may find1:
max_new_tokens
: this parameter controls the maximum number of tokens the LLM is allowed to use.
temperature
: parameter associated to the creativity of the model, a value close to 0 makes the model more deterministic, while higher values make the model more \"creative\".
top_k
and top_p
: top_k
limits the number of tokens the model is allowed to use to generate the following token sorted by probability, while top_p
limits the number of tokens the model can use for the next token, but in terms of the sum of their probabilities.
frequency_penalty
and presence_penalty
: the frequency penalty penalizes tokens that have already appeared in the generated text, limiting the possibility of those appearing again, and the presence_penalty
penalizes regardless of the frequency.
prompt_format
and prompt_formatting_fn
: these two parameters allow to tweak the prompt of our models, for example we can direct the LLM
to format the prompt according to one of the defined formats, while prompt_formatting_fn
allows to pass a function that will be applied to the prompt before the generation, for extra control of what we ingest to the model.
generate
method","text":"Once you create an LLM
, you use the generate
method to interact with it. This method accepts two parameters:
inputs
: which is a list of dictionaries containing the inputs for the LLM
and the Task
. Each dictionary must have all the keys required by the Task
.
inputs = [\n {\"input\": \"Write a letter for my friend Bob...\"},\n {\"input\": \"Give me a summary of the following text:...\"},\n ...\n]\n
num_generations
: which is an integer used to specify how many text generations we want to obtain for each element in inputs
.
The output of the method will be a list containing lists of LLMOutput
. Each inner list is associated to the corresponding input in inputs
, and each LLMOutput
is associated to one of the num_generations
for each input.
>>> llm.generate(inputs=[...], num_generations=2)\n[ # (1)\n [ # (2)\n { # (3)\n \"model_name\": \"notus-7b-v1\",\n \"prompt_used\": \"Write a letter for my friend Bob...\",\n \"raw_output\": \"Dear Bob, ...\",\n \"parsed_output\": {\n \"generations\": \"Dear Bob, ...\",\n }\n }, \n {\n \"model_name\": \"notus-7b-v1\",\n \"prompt_used\": \"Write a letter for my friend Bob...\",\n \"raw_output\": \"Dear Bob, ...\",\n \"parsed_output\": {\n \"generations\": \"Dear Bob, ...\",\n }\n }, \n ],\n [...],\n]\n
inputs
.LLMOutput
s as specified in num_generations
.LLMOutput
is a dictionaryThe LLMOutput
is a TypedDict
containing the keys model_name
, prompt_used
, raw_output
and parsed_output
. The parsed_output
key is a dictionary that will contain all the Task
outputs.
{\n \"model_name\": \"notus-7b-v1\",\n \"prompt_used\": \"Write a letter for my friend Bob...\",\n \"raw_output\": \"Dear Bob, ...\",\n \"parsed_output\": { # (1)\n \"generations\": \"Dear Bob, ...\",\n }\n}, \n
parsed_output
will depend on the Task
used. In this case, we used TextGenerationTask
, so the key generations
is present.If the LLM
uses a thread pool, then the output of the generate
method will be a Future having as result a list of lists of LLMOutput
as described above.
These may be the default choice for your ambitious tasks.
For the API reference visit OpenAILLM.
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import TextGenerationTask\n\nopenaillm = OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=TextGenerationTask(),\n prompt_format=\"openai\",\n max_new_tokens=256,\n openai_api_key=os.environ.get(\"OPENAI_API_KEY\"),\n temperature=0.3,\n)\nresult = openaillm.generate([{\"input\": \"What is OpenAI?\"}])\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# OpenAI is an artificial intelligence research laboratory and company. It was founded\n# with the goal of ensuring that artificial general intelligence (AGI) benefits all of\n# humanity. OpenAI conducts cutting-edge research in various fields of AI ...\n
"},{"location":"technical-reference/llms/#llamacpp","title":"Llama.cpp","text":"Applicable for local execution of Language Models (LLMs). Use this LLM when you have access to the quantized weights of your selected model for interaction.
Let's see an example using notus-7b-v1. First, you can download the weights from the following link:
from distilabel.llm import LlamaCppLLM\nfrom distilabel.tasks import TextGenerationTask\nfrom llama_cpp import Llama\n\n# Instantiate our LLM with them:\nllm = LlamaCppLLM(\n model=Llama(model_path=\"./notus-7b-v1.q4_k_m.gguf\", n_gpu_layers=-1),\n task=TextGenerationTask(),\n max_new_tokens=128,\n temperature=0.3,\n prompt_format=\"notus\",\n)\n\nresult = llm.generate([{\"input\": \"What is the capital of Spain?\"}])\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# The capital of Spain is Madrid. It is located in the center of the country and\n# is known for its vibrant culture, beautiful architecture, and delicious food.\n# Madrid is home to many famous landmarks such as the Prado Museum, Retiro Park,\n# and the Royal Palace of Madrid. I hope this information helps!\n
For the API reference visit LlammaCppLLM.
"},{"location":"technical-reference/llms/#vllm","title":"vLLM","text":"Highly recommended to use if you have a GPU available, as is the fastest solution out there for batch generation. Find more information about in vLLM docs.
from distilabel.tasks import TextGenerationTask\nfrom distilabel.llm import vLLM\nfrom vllm import LLM\n\nllm = vLLM(\n vllm=LLM(model=\"argilla/notus-7b-v1\"),\n task=TextGenerationTask(),\n max_new_tokens=512,\n temperature=0.3,\n prompt_format=\"notus\",\n)\nresult_vllm = llm.generate([{\"input\": \"What's a large language model?\"}])\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# A large language model is a type of artificial intelligence (AI) system that is designed\n# to understand and interpret human language. It is called \"large\" because it uses a vast\n# amount of data, typically billions of words or more, to learn and make predictions about\n# language. Large language models are ...\n
For the API reference visit vLLM.
"},{"location":"technical-reference/llms/#huggingface-llms","title":"HuggingFace LLMs","text":"This section explains two different ways to use HuggingFace models:
"},{"location":"technical-reference/llms/#transformers","title":"Transformers","text":"This is the option to use a model hosted on the HuggingFace Hub. Load the model and tokenizer in the standard manner as done locally, and proceed to instantiate your class.
For the API reference visit TransformersLLM.
Let's see an example using notus-7b-v1:
from distilabel.llm import TransformersLLM\nfrom distilabel.tasks import TextGenerationTask\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the models from the HuggingFace Hub\ntokenizer = AutoTokenizer.from_pretrained(\"argilla/notus-7b-v1\")\nmodel = AutoModelForCausalLM.from_pretrained(\"argilla/notus-7b-v1\", device_map=\"auto\")\n\n# Instantiate our LLM with them:\nllm = TransformersLLM(\n model=model,\n tokenizer=tokenizer,\n task=TextGenerationTask(),\n max_new_tokens=128,\n temperature=0.3,\n prompt_format=\"notus\",\n)\n\nresult = llm.generate([{\"input\": \"What's a large language model?\"}])\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# A large language model is a type of machine learning algorithm that is designed to analyze\n# and understand large amounts of text data. It is called \"large\" because it requires a\n# vast amount of data to train and improve its accuracy. These models are ...\n
"},{"location":"technical-reference/llms/#inference-endpoints","title":"Inference Endpoints","text":"HuggingFace provides a streamlined approach for deploying models through Inference Endpoints on their infrastructure. Opt for this solution if your model is hosted on the HuggingFace Hub.
For the API reference visit InferenceEndpointsLLM.
Let's see how to interact with these LLMs:
import os\n\nfrom distilabel.llm import InferenceEndpointsLLM\nfrom distilabel.tasks import TextGenerationTask\n\nendpoint_name = \"aws-notus-7b-v1-4052\" or os.getenv(\"HF_INFERENCE_ENDPOINT_NAME\")\nendpoint_namespace = \"argilla\" or os.getenv(\"HF_NAMESPACE\")\ntoken = os.getenv(\"HF_TOKEN\") # hf_...\n\nllm = InferenceEndpointsLLM(\n endpoint_name=endpoint_name,\n endpoint_namespace=endpoint_namespace,\n token=token,\n task=TextGenerationTask(),\n max_new_tokens=512,\n prompt_format=\"notus\",\n)\nresult = llm.generate([{\"input\": \"What are critique LLMs?\"}])\n# print(result[0][0][\"parsed_output\"][\"generations\"])\n# Critique LLMs (Long Land Moore Machines) are artificial intelligence models designed specifically for analyzing and evaluating the quality or worth of a particular subject or object. These models can be trained on a large dataset of reviews, ratings, or commentary related to a product, service, artwork, or any other topic of interest.\n# The training data can include both positive and negative feedback, helping the LLM to understand the nuanced aspects of quality and value. The model uses natural language processing (NLP) techniques to extract meaningful insights, including sentiment analysis, entity recognition, and text classification.\n# Once the model is trained, it can be used to analyze new input data and provide a critical assessment based on its learned understanding of quality and value. For example, a critique LLM for movies could evaluate a new film and generate a detailed review highlighting its strengths, weaknesses, and overall rating.\n# Critique LLMs are becoming increasingly useful in various industries, such as e-commerce, education, and entertainment, where they can provide objective and reliable feedback to help guide decision-making processes. They can also aid in content optimization by highlighting areas of improvement or recommending strategies for enhancing user engagement.\n# In summary, critique LLMs are powerful tools for analyzing and evaluating the quality or worth of different subjects or objects, helping individuals and organizations make informed decisions with confidence.\n
"},{"location":"technical-reference/llms/#together-inference","title":"Together Inference","text":"Together offers a product named Together Inference, which exposes some models for diverse tasks such as chat, text generation, code, or image; exposing those via an endpoint within their API either as serverless endpoints or as dedicated instances.
See their release post with more details at Announcing Together Inference Engine \u2013 the fastest inference available.
from distilabel.tasks import TextGenerationTask\nfrom distilabel.llm import TogetherInferenceLLM\n\nllm = TogetherInferenceLLM(\n model=\"togethercomputer/llama-2-70b-chat\",\n task=TextGenerationTask(),\n max_new_tokens=512,\n temperature=0.3,\n prompt_format=\"llama2\",\n)\noutput = llm.generate(\n [{\"input\": \"Explain me the theory of relativity as if you were a pirate.\"}]\n)\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# Ahoy matey! Yer lookin' fer a tale of the theory of relativity, eh? Well,\n# settle yerself down with a pint o' grog and listen close, for this be a story\n# of the sea of time and space!\n# Ye see, matey, the theory of relativity be tellin' us that time and space ain't\n# fixed things, like the deck o' a ship or the stars in the sky. Nay, they be like\n# the ocean itself, always changin' and flowin' like the tides.\n# Now, imagine ...\n
"},{"location":"technical-reference/llms/#processllm-and-llmpool","title":"ProcessLLM
and LLMPool
","text":"By default, distilabel
uses a single process, so the generation loop is usually bottlenecked by the model inference time and Python GIL. To overcome this limitation, we provide the ProcessLLM
class that allows to load an LLM
in a different process, avoiding the GIL and allowing to parallelize the generation loop. Creating a ProcessLLM
is easy as:
from distilabel.tasks import TextGenerationTask, Task\nfrom distilabel.llm import ProcessLLM, LLM\n\n\ndef load_gpt_4(task: Task) -> LLM:\n from distilabel.llm import OpenAILLM\n\n return OpenAILLM(\n model=\"gpt-4\",\n task=task,\n num_threads=4,\n )\n\n\nllm = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4)\nfuture = llm.generate(\n inputs=[{\"input\": \"Write a letter for Bob\"}], num_generations=1\n) # (1)\nllm.teardown() # (2)\nresult = future.result()\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# Dear Bob,\n# I hope this letter finds you in good health and high spirits. I know it's been a while since we last caught up, and I wanted to take the time to connect and share a few updates.\n# Life has been keeping me pretty busy lately. [Provide a brief overview of what you've been up to: work, school, family, hobbies, etc.]\n# I've often found myself reminiscing about the good old days, like when we [include a memorable moment or shared experience with Bob].\n
ProcessLLM
returns a Future
containing a list of lists of LLMOutput
s.ProcessLLM
needs to be terminated after usage. If the ProcessLLM
is used by a Pipeline
, it will be terminated automatically.You can directly use a ProcessLLM
as the generator
or labeller
in a Pipeline
. Apart from that, there would be situations in which you would like to generate texts using several LLM
s in parallel. For this purpose, we provide the LLMPool
class:
from distilabel.tasks import TextGenerationTask, Task\nfrom distilabel.llm import ProcessLLM, LLM, LLMPool\n\ndef load_gpt_3(task: Task) -> LLM:\n from distilabel.llm import OpenAILLM\n\n return OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=task,\n num_threads=4,\n )\n\ndef load_gpt_4(task: Task) -> LLM:\n from distilabel.llm import OpenAILLM\n\n return OpenAILLM(\n model=\"gpt-4\",\n task=task,\n num_threads=4,\n )\n\n\npool = LLMPool(llms=[\n ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_3),\n ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4),\n])\nresult = pool.generate(\n inputs=[{\"input\": \"Write a letter for Bob\"}], num_generations=2\n)\npool.teardown()\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"], end=\"\\n\\n\\n\\n\\n\\n---->\")\n# Dear Bob,\n# I hope this letter finds you in good health and high spirits. I know it's been a while since we last caught up, and I wanted to take the time to connect and share a few updates.\n# Life has been keeping me pretty busy lately. [Provide a brief overview of what you've been up to: work, school, family, hobbies, etc.]\n# I've often found myself reminiscing about the good old days, like when we [include a memorable moment or shared experience with Bob].\n# >>> print(result[0][1][\"parsed_output\"][\"generations\"])\n# Of course, I'd be happy to draft a sample letter for you. However, I would need some additional \n# information including who \"Bob\" is, the subject matter of the letter, the tone (formal or informal), \n# and any specific details or points you'd like to include. Please provide some more context and I'll do my best to assist you.\n
You can take a look at this blog post from cohere for a thorough explanation of the different parameters.\u00a0\u21a9
This section will detail the Pipeline
, providing guidance on creating and using them.
The Pipeline class is a central component in distilabel
, responsible for crafting datasets. It manages the generation of datasets and oversees the interaction between the generator and labeller LLMs
.
You create an instance of the Pipeline
by providing a generator and an optional labeller LLM. Interactions with it are facilitated through its generate
method. This method requires a dataset
, specifies the num_generations to determine the number of examples to be created, and includes additional parameters for controlling the batch_size and managing the generation process.
Let's start by a Pipeline with a single LLM
as a generator.
We will create a Pipeline
that will use Notus from a HuggingFace Inference Endpoint. For this matter, we need to create a TextGenerationTask, and specify the format we want to use for our Prompt
, in this case Notus, which corresponds to the same for Zephyr.
import os\n\nfrom distilabel.llm import InferenceEndpointsLLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import TextGenerationTask\n\nendpoint_name = \"aws-notus-7b-v1-4052\" or os.getenv(\"HF_INFERENCE_ENDPOINT_NAME\")\nendpoint_namespace = \"argilla\" or os.getenv(\"HF_NAMESPACE\")\n\npipe_generation = Pipeline(\n generator=InferenceEndpointsLLM(\n endpoint_name=endpoint_name, # The name given of the deployed model\n endpoint_namespace=endpoint_namespace, # This usually corresponds to the organization, in this case \"argilla\"\n token=os.getenv(\"HF_TOKEN\"), # hf_...\n task=TextGenerationTask(),\n max_new_tokens=512,\n do_sample=True,\n prompt_format=\"notus\",\n ),\n)\n
We've set up our pipeline using a specialized TextGenerationTask
(refer to the tasks section for more task details), and an InferenceEndpointsLLM configured for notus-7b-v1
, although any of the available LLMs
will work.
To use the Pipeline for dataset generation, we call the generate method. We provide it with the input dataset and specify the desired number of generations. In this example, we've prepared a Dataset
with a single row to illustrate the process. This dataset contains one row, and we'll trigger 2 generations from it:
from datasets import Dataset\n\ndataset = Dataset.from_dict(\n {\"input\": [\"Create an easy dinner recipe with few ingredients\"]}\n)\ndataset_generated = pipe_generation.generate(dataset, num_generations=2)\n
Now, let's examine the dataset that was generated. It's a CustomDataset
, equipped with additional features for seamless interaction with Argilla
.
print(dataset_generated)\n# Dataset({\n# features: ['input', 'generation_model', 'generation_prompt', 'raw_generation_responses', 'generations'],\n# num_rows: 1\n# })\n\nprint(dataset_generated[0][\"generations\"][0])\n# Here's a simple and delicious dinner recipe with only a few ingredients:\n\n# Garlic Butter Chicken with Roasted Vegetables\n\n# Ingredients:\n# - 4 boneless, skinless chicken breasts\n# - 4 tablespoons butter\n# - 4 cloves garlic, minced\n# - 1 teaspoon dried oregano\n# - 1/2 teaspoon salt\n# - 1/4 teaspoon black pepper\n# - 1 zucchini, sliced\n# - 1 red bell pepper, sliced\n# - 1 cup cherry tomatoes\n\n# Instructions:\n\n# 1. Preheat oven to 400\u00b0F (200\u00b0C).\n\n# 2. Melt butter in a small saucepan over low heat. Add minced garlic and heat until fragrant, about 1-2 minutes.\n\n# 3. Place chicken breasts in a baking dish and brush garlic butter over each one.\n\n# 4. Sprinkle oregano, salt, and black pepper over the chicken.\n\n# 5. In a separate baking dish, add sliced zucchini, red bell pepper, and cherry tomatoes. Brush with remaining garlic butter.\n\n# 6. Roast the chicken and vegetables in the preheated oven for 25-30 minutes or until cooked through and the vegetables are tender and lightly browned.\n\n# 7. Transfer the chicken to plates and serve with the roasted vegetables alongside. Enjoy!\n\n# This recipe requires simple ingredients and is easy to prepare, making it perfect for a quick, satisfying dinner. The garlic butter adds maximum flavor, while the roasted vegetables complement the chicken beautifully, providing additional nutrition and texture. With minimal effort, you can have a delicious and balanced meal on the table in no time.\n
"},{"location":"technical-reference/pipeline/#labeller","title":"Labeller","text":"Next, we move on to labelling a dataset. Just as before, we need an LLM
for our Pipeline
. In this case we will use OpenAILLM
with gpt-4
, and a PreferenceTask
, UltraFeedbackTask for instruction following.
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import UltraFeedbackTask\n\npipe_labeller = Pipeline(\n labeller=OpenAILLM(\n model=\"gpt-4\",\n task=UltraFeedbackTask.for_instruction_following(),\n max_new_tokens=256,\n num_threads=8,\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n temperature=0.3,\n ),\n)\n
For this example dataset, we've extracted 2 sample rows from the UltraFeedback binarized dataset, formatted as expected by the default LLM
and Task
.
We've selected two distinct examples, one correctly labeled and the other incorrectly labeled in the original dataset. In this instance, the dataset
being generated includes two columns: the input, as seen in the generator, and a generations column containing the model's responses.
from datasets import Dataset\n\ndataset_test = Dataset.from_dict(\n {\n \"input\": [\n \"Describe the capital of Spain in 25 words.\",\n \"Design a conversation between a customer and a customer service agent.\",\n ],\n \"generations\": [\n [\"Santo Domingo is the capital of Dominican Republic\"],\n [\n \"Customer: Hello, I'm having trouble with my purchase.\\n\\nCustomer Service Agent: I'm sorry to hear that. Could you please tell me more about the issue you are facing?\\n\\nCustomer: Yes, I ordered a pair of shoes from your company a week ago, but I haven't received them yet.\\n\\nCustomer Service Agent: I apologize for the inconvenience. Could you please provide me with your order number and full name so I can look into this for you?\\n\\nCustomer: Sure, my name is John Doe and my order number is ABCD1234.\\n\\nCustomer Service Agent: Thank you, John. I have checked on your order and it appears that it is still being processed. It should be shipped out within the next 24 hours.\\n\\nCustomer: That's good to hear, but can you also tell me the expected delivery time?\\n\\nCustomer Service Agent: Absolutely, based on your location, the estimated delivery time is 3-5 business days after shipping. You will receive a tracking number via email once the item is shipped, which will provide real-time updates on your package.\\n\\nCustomer: Thanks for the information. One more thing, what is your return policy if the shoes don't fit?\\n\\nCustomer Service Agent: Our company offers a 30-day return policy. If you are not satisfied with the product or if it doesn't fit, you can return it for a full refund or an exchange within 30 days of delivery. Please keep in mind that the product must be in its original packaging and in the same condition as when you received it.\\n\\nCustomer: Okay, that's good to know. Thank you for your help.\\n\\nCustomer Service Agent: You're welcome, John. I'm glad I could assist you. If you have any further questions or concerns, please don't hesitate to reach out to us. Have a great day!\"\n ],\n ],\n }\n)\n\nds_labelled = pipe_labeller.generate(dataset_test)\n
Let's select the relevant columns from the labelled dataset, and take a look at the first record. This allows us to observe the rating and the accompanying rationale that provides an explanation.
ds_labelled.select_columns([\"input\", \"generations\", \"rating\", \"rationale\"])[0]\n# {\n# \"input\": \"Describe the capital of Spain in 25 words.\",\n# \"generations\": [\"Santo Domingo is the capital of Dominican Republic\"],\n# \"rating\": [1.0],\n# \"rationale\": [\n# \"The text is irrelevant to the instruction. It describes the capital of the Dominican Republic instead of Spain.\"\n# ],\n# }\n
"},{"location":"technical-reference/pipeline/#generator-and-labeller","title":"Generator and Labeller","text":"In the final scenario, we have a Pipeline
utilizing both a generator and a labeller LLM
. Once more, we'll employ the Inference Endpoint with notus-7b-v1
for the generator, using a different system prompt this time. As for the labeller, we'll use gpt-3.5-turbo
, which will label the examples for instruction following.
import os\n\nfrom distilabel.llm import InferenceEndpointsLLM, OpenAILLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n\npipe_full = Pipeline(\n generator=InferenceEndpointsLLM(\n endpoint_name=endpoint_name,\n endpoint_namespace=endpoint_namespace,\n token=token,\n task=TextGenerationTask(\n system_prompt=\"You are an expert writer of XKCD, a webcomic of romance, sarcasm, math, and language.\"\n ),\n max_new_tokens=512,\n do_sample=True,\n prompt_format=\"notus\",\n ),\n labeller=OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=UltraFeedbackTask.for_instruction_following(),\n max_new_tokens=256,\n num_threads=4,\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n temperature=0.3,\n ),\n)\n
For this example, we'll set up a pipeline to generate and label a dataset of short stories inspired by XKCD. To do this, we'll define the system_prompt for the NotusTextGenerationTask
. The dataset will follow the same format we used for the generator scenario, featuring an input column with the examples, in this case, just one.
from datasets import Dataset\n\nxkcd_instructions = Dataset.from_dict(\n {\"input\": [\"Could you imagine an interview process going sideways?\"]}\n)\nds_xkcd = pipe_full.generate(xkcd_instructions, num_generations=3)\n
We will now take a look to one of the generations, along with the rating and rational given by our labeller LLM
:
print(ds_xkcd[1][\"generations\"][0])\nprint(\"-----\" * 5)\nprint(\"RATING: \", ds_xkcd[1][\"rating\"][0])\nprint(\"RATIONALE: \", ds_xkcd[1][\"rationale\"][0])\n\n# Yes, absolutely! Here's a fictional interview scenario turned into an XKCD-style comic:\n\n# (Interviewee meets with an unsmiling interviewer)\n\n# Interviewer: Good morning! Have a seat. Tell me about your experience working with teams.\n\n# Interviewee: Well, I've worked in large teams on group projects before. It could be challenging, but we always managed to pull through.\n\n# (Smugly) Interviewer: Challenging, huh? (tapping pen on desk) And how did you manage to overcome these challenges?\n\n# Interviewee: (confidently) Communication was key. I made sure to stay in touch with the team and keep everyone updated on our progress.\n\n# Interviewer: Communication. Hm. And what if communication failed?\n\n# Interviewee: (thrown off balance) Well, I mean...there was one time when we couldn't connect via video call. But we picked up the phone, and we all understood what needed to be done.\n\n# Interviewer: But what if the communication on the technical level failed, say, like a computer system with a software glitch?\n\n# Interviewee: (feeling the pressure) That's never happened to me before, but if it did, we would have to troubleshoot and find a workaround, right?\n\n# Interviewer: (smirking) Oh, but finding a workaround could mean delegating responsibilities among the team, which requires communication. It's a vicious cycle!\n\n# (Interviewee visibly uncomfortable)\n\n# Interviewer: And what if there was a communication breakdown among the team members themselves?\n\n# Interviewee: (unsure) I think we would try to sort it out as soon as possible to avoid any further problems.\n\n# Interviewer: (sarcastically) Yes, avoiding further problems is critical. Don't want to let those deadlines slip, do we?\n\n# (Interviewer types frantically on their computer keyboard)\n\n# Interviewer: (softly but wordily) Note to self: Avoid this candidate for team projects.\n\n# (The interviewer returns his attention back to the interviewee)\n\n# Interviewer: Well, moving on...\n# -------------------------\n# RATING: 4.0\n# RATIONALE: The text provides a fictional interview scenario that aligns with the task goal of imagining an interview process going sideways. It includes dialogue between an interviewer and interviewee, showcasing a breakdown in communication and the interviewer's sarcastic and dismissive attitude towards the interviewee's responses.\n
"},{"location":"technical-reference/pipeline/#running-several-generators-in-parallel","title":"Running several generators in parallel","text":"distilabel
also allows to use several LLM
s as generators in parallel, thanks to the ProcessLLM
and LLMPool
classes. This comes handy for the cases where we want to use several LLM
s and fed them with the same input, allowing us to later compare their outputs (to see which one is better) or even creating a Preference dataset, following a similar process to UltraFeedback dataset generation.
For this example, we will load four 7B LLM
s using vLLM
and a machine with 4 GPUs (to load each LLM
in a different GPU). Then we will give instructions to all of them, and we will use GPT-4 to label the generated instructions using the UltraFeedbackTask
for instruction-following.
First of all, we will need to load each LLM
using a ProcessLLM
. ProcessLLM
will create a child process which will load the LLM
using the load_llm_fn
.
from distilabel.llm import LLM, ProcessLLM\nfrom distilabel.tasks import Task, TextGenerationTask\n\n\ndef load_notus(task: Task) -> LLM: # (1)\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" # (2)\n\n return vLLM(\n vllm=LLM(model=\"argilla/notus-7b-v1\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\nllm = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_notus)\n
ProcessLLM
will create a child process in which the LLM
will be loaded. Therefore, we will need to define a function that will be executed by the child process to load the LLM
. The child process will pass the provided Task
to the load_llm_fn
.CUDA_VISIBLE_DEVICES
environment variable to make sure that each LLM
is loaded in a different GPU.We will repeat this pattern 4 times, each time with a different LLM
and a different GPU.
from distilabel.llm import LLM, ProcessLLM\nfrom distilabel.tasks import Task, TextGenerationTask\n\n\ndef load_notus(task: Task) -> LLM:\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n return vLLM(\n vllm=LLM(model=\"argilla/notus-7b-v1\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\ndef load_zephyr(task: Task) -> LLM:\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n\n return vLLM(\n vllm=LLM(model=\"HuggingFaceH4/zephyr-7b-beta\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\ndef load_starling(task: Task) -> LLM:\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"2\"\n\n return vLLM(\n vllm=LLM(model=\"berkeley-nest/Starling-LM-7B-alpha\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\ndef load_neural_chat(task: Task) -> LLM:\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n\n return vLLM(\n vllm=LLM(model=\"Intel/neural-chat-7b-v3-3\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\nnotus = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_notus)\nzephyr = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_zephyr)\nstarling = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_starling)\nneural_chat = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_neural_chat)\n
In order to distribute the generations among the different LLM
s, we will use a LLMPool
. This class expects a list of ProcessLLM
. Calling the generate
method of the LLMPool
will call the generate
method of each LLMProcess
in parallel, and will wait for all of them to finish, returning a list of lists of LLMOutput
s with the generations.
from distilabel.llm import LLMPool\n\npool = LLMPool(llms=[notus, zephyr, starling, neural_chat])\n
We will use this LLMPool
as the generator for our pipeline and we will use GPT-4 to label the generated instructions using the UltraFeedbackTask
for instruction-following.
from distilabel.tasks import UltraFeedbackTask\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.llm import LLM, ProcessLLM\n\n\ndef load_gpt_4(task: UltraFeedbackTask) -> LLM:\n from distilabel.llm import OpenAILLM\n\n return OpenAILLM(\n model=\"gpt-4-1106-preview\",\n task=task,\n max_new_tokens=512,\n num_threads=4,\n )\n\n\npipeline = Pipeline(\n generator=pool,\n labeller=ProcessLLM(task=UltraFeedbackTask(), load_llm_fn=load_gpt_4), # (1)\n)\n
ProcessLLM
. This will allow to not block the main process GIL, and allowing the generator to continue with the next batch. Then, we will load the dataset and call the generate
method of the pipeline. For each input in the dataset, the LLMPool
will randomly select two LLM
s and will generate two generations for each of them. The generations will be labelled by GPT-4 using the UltraFeedbackTask
for instruction-following. Finally, we will push the generated dataset to Argilla, in order to review the generations and labels that were automatically generated, and to manually correct them if needed.
from datasets import load_dataset\n\ndataset = (\n load_dataset(\"HuggingFaceH4/instruction-dataset\", split=\"test[:50]\")\n .remove_columns([\"completion\", \"meta\"])\n .rename_column(\"prompt\", \"input\")\n)\n\ndataset = pipeline.generate(\n dataset=dataset,\n num_generations=2,\n batch_size=5,\n display_progress_bar=True,\n)\n\ndataset.to_argilla().push_to_argilla(name=\"preference-dataset\", workspace=\"admin\")\n
With a few lines of code, we have easily generated a dataset with 2 generations per input, using 4 different LLM
s, and labelled the generations using GPT-4. You can check the full code here.
Considering recurring patterns in dataset creation, we can facilitate the process by utilizing the Pipeline
. This is made simpler through the pipeline
function, which provides the necessary parameters for creating a Pipeline
.
In the code snippet below, we use the pipeline
function to craft a pipeline
tailored for a preference task, specifically focusing on text-quality as the subtask. If we don't initially provide a labeller LLM
, we can specify the subtask we want our pipeline
to address. By default, this corresponds to UltraFeedbackTask
. It's mandatory to specify the generator of our choice; however, the labeller defaults to gpt-3.5-turbo
. Optional parameters required for OpenAILLM can also be passed as optional keyword arguments.
import os\n\nfrom distilabel.llm import InferenceEndpointsLLM\nfrom distilabel.pipeline import pipeline\nfrom distilabel.tasks import TextGenerationTask\n\npipe = pipeline(\n \"preference\",\n \"text-quality\",\n generator=InferenceEndpointsLLM(\n endpoint_name=endpoint_name,\n endpoint_namespace=endpoint_namespace,\n token=token,\n task=TextGenerationTask(),\n max_new_tokens=512,\n do_sample=True,\n prompt_format=\"notus\",\n ),\n max_new_tokens=256,\n num_threads=2,\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n temperature=0.0,\n)\n
For the dataset, we'll begin with three rows from HuggingFaceH4/instruction-dataset. We'll request two generations with checkpoints enabled to safeguard the data in the event of any failures, which is the default behavior.
from datasets import load_dataset\n\ninstruction_dataset = (\n load_dataset(\"HuggingFaceH4/instruction-dataset\", split=\"test[:3]\")\n .remove_columns([\"completion\", \"meta\"])\n .rename_column(\"prompt\", \"input\")\n)\n\npipe_dataset = pipe.generate(\n instruction_dataset,\n num_generations=2,\n batch_size=1,\n enable_checkpoints=True,\n display_progress_bar=True,\n)\n
Finally, let's see one of the examples from the dataset:
print(pipe_dataset[\"input\"][-1])\n# Create a 3 turn conversation between a customer and a grocery store clerk - that is, 3 per person. Then tell me what they talked about.\n\nprint(pipe_dataset[\"generations\"][-1][-1])\n# Customer: Hi there, I'm looking for some fresh berries. Do you have any raspberries or blueberries in stock?\n\n# Grocery Store Clerk: Yes, we have both raspberries and blueberries in stock today. Would you like me to grab some for you or can you find them yourself?\n\n# Customer: I'd like your help getting some berries. Can you also suggest which variety is sweeter? Raspberries or blueberries?\n\n# Grocery Store Clerk: Raspberries and blueberries both have distinct flavors. Raspberries are more tart and a little sweeter whereas blueberries tend to be a little sweeter and have a milder taste. It ultimately depends on your personal preference. Let me grab some of each for you to try at home and see which one you like better.\n\n# Customer: That sounds like a great plan. How often do you receive deliveries? Do you have some new varieties of berries arriving soon?\n\n# Grocery Store Clerk: We receive deliveries twice a week, on Wednesdays and Sundays. We also have a rotation of different varieties of berries throughout the season, so keep an eye out for new arrivals. Thanks for shopping with us, can I help you with anything else today?\n\n# Customer: No, that's all for now. I'm always happy to support your local store.\n\n# turn 1: berries, fresh produce availability, customer preference\n# turn 2: product recommendations based on taste and personal preference, availability\n# turn 3: store acknowledgment, shopping gratitude, loyalty and repeat business expectation.\n\nprint(pipe_dataset[\"rating\"][-1][-1])\n# 5.0\n\nprint(pipe_dataset[\"rationale\"][-1][-1])\n# The text accurately follows the given instructions and provides a conversation between a customer and a grocery store clerk. The information provided is correct, informative, and aligned with the user's intent. There are no hallucinations or misleading details.\n
The API reference can be found here: pipeline
"},{"location":"technical-reference/pipeline/#argilla-integration","title":"Argilla integration","text":"The CustomDataset generated entirely by AI models may require some additional human processing. To facilitate human feedback, the dataset can be uploaded to Argilla
. This process involves logging into an Argilla
instance, converting the dataset to the required format using CustomDataset.to_argilla()
, and subsequently using push_to_argilla
on the resulting dataset:
import argilla as rg\n\nrg.init(api_key=\"<YOUR_ARGILLA_API_KEY>\", api_url=\"<YOUR_ARGILLA_API_URL>\")\n\nrg_dataset = pipe_dataset.to_argilla()\nrg_dataset.push_to_argilla(name=\"preference-dataset\", workspace=\"admin\")\n
"},{"location":"technical-reference/tasks/","title":"Tasks","text":"In this section we will see what's a Task
and the list of tasks available in distilabel
.
The Task
class takes charge of setting how the LLM behaves, deciding whether it acts as a generator or a labeller. To accomplish this, the Task
class creates a prompt using a template that will be sent to the LLM
. It specifies the necessary input arguments for generating the prompt and identifies the output arguments to be extracted from the LLM
response. The Task
class yields a Prompt
that can generate a string with the format needed, depending on the specific LLM
used.
All the Task
s defines a system_prompt
which serves as the initial instruction given to the LLM, guiding it on what kind of information or output is expected, and the following methods:
generate_prompt
: This method will be used by the LLM
to create the prompts that will be fed to the model.parse_output
: After the LLM
has generated the content, this method will be called on the raw outputs of the model to extract the relevant content (scores, rationales, etc).input_args_names
and output_args_names
: These methods are used in the Pipeline
to process the datasets. The first one defines the columns that will be extracted from the dataset to build the prompt in case of a LLM
that acts as a generator or labeller alone, or the columns that should be placed in the dataset to be processed by the labeller LLM
, in the case of a Pipeline
that has both a generator and a labeller. The second one is in charge of inserting the defined fields as columns of the dataset generated dataset.After defining a task, the only action required is to pass it to the corresponding LLM
. All the intricate processes are then handled internally:
from distilabel.llm import TransformersLLM\nfrom distilabel.tasks import TextGenerationTask\n\n# This snippet uses `TransformersLLM`, but is the same for every other `LLM`.\ngenerator = TransformersLLM(\n model=...,\n tokenizer=...,\n task=TextGenerationTask(),\n)\n
Given this explanation, distilabel
distinguishes between two primary categories of tasks: those focused on text generation and those centered around labelling. These Task
classes delineate the LLM's conduct, be it the creation of textual content or the assignment of labels to text, each with precise guidelines tailored to their respective functionalities. Users can seamlessly leverage these distinct task types to tailor the LLM's behavior according to their specific application needs.
These set of classes are designed to steer a LLM
in generating text with specific guidelines. They provide a structured approach to instruct the LLM on generating content in a manner tailored to predefined criteria.
This is the base class for text generation, and includes the following fields for guiding the generation process:
system_prompt
, which serves as the initial instruction or query given to the LLM, guiding it on what kind of information or output is expected. principles
to inject on the system_prompt
, which by default correspond to those defined in the UltraFeedback paper1, LLM
can be directed towards the different principles with a more customized behaviour.For the API reference visit TextGenerationTask.
"},{"location":"technical-reference/tasks/#selfinstructtask","title":"SelfInstructTask","text":"The task specially designed to build the prompts following the Self-Instruct paper: SELF-INSTRUCT: Aligning Language Models with Self-Generated Instructions.
From the original repository: The Self-Instruct process is an iterative bootstrapping algorithm that starts with a seed set of manually-written instructions and uses them to prompt the language model to generate new instructions and corresponding input-output instances, so this Task
is specially interesting for generating new datasets from a set of predefined topics.
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import SelfInstructTask\n\ngenerator = OpenAILLM(\n task=SelfInstructTask(\n system_prompt=\"You are a question-answering assistant for...\",\n application_description=\"AI assistant\",\n num_instructions=3,\n ),\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n)\n
For the API reference visit SelfInstructTask.
"},{"location":"technical-reference/tasks/#labelling","title":"Labelling","text":"Instead of generating text, you can instruct the LLM
to label datasets. The existing tasks are designed specifically for creating both PreferenceTask
and CritiqueTask
datasets.
Preference datasets for Language Models (LLMs) are sets of information that show how people rank or prefer one thing over another in a straightforward and clear manner. These datasets help train language models to understand and generate content that aligns with user preferences, enhancing the model's ability to generate contextually relevant and preferred outputs.
Contrary to the TextGenerationTask
, the PreferenceTask
is not intended for direct use. It implements the default methods input_args_names
and output_args_names
, but generate_prompt
and parse_output
are specific to each PreferenceTask
. Examining the output_args_names
reveals that the generation will encompass both the rating and the rationale that influenced that rating.
This task is specifically designed to build the prompts following the format defined in the \"UltraFeedback: Boosting Language Models With High Quality Feedback\" paper.
From the original repository: To collect high-quality preference and textual feedback, we design a fine-grained annotation instruction, which contains 4 different aspects, namely instruction-following, truthfulness, honesty and helpfulness. This Task
is designed to label datasets following the different aspects defined for the UltraFeedback dataset creation.
The following snippet can be used as a simplified UltraFeedback Task, for which we define 3 different ratings, but take into account the predefined versions are intended to be used out of the box:
from textwrap import dedent\n\nfrom distilabel.tasks.preference.ultrafeedback import Rating, UltraFeedbackTask\n\ntask_description = dedent(\n \"\"\"\n # General Text Quality Assessment\n Evaluate the model's outputs based on various criteria:\n 1. **Correctness & Informativeness**: Does the output provide accurate and helpful information?\n 2. **Honesty & Uncertainty**: How confidently does the model convey its information, and does it express uncertainty appropriately?\n 3. **Truthfulness & Hallucination**: Does the model introduce misleading or fabricated details?\n 4. **Instruction Following**: Does the model's output align with given instructions and the user's intent?\n Your role is to provide a holistic assessment considering all the above factors.\n\n **Scoring**: Rate outputs 1 to 3 based on the overall quality, considering all aspects:\n \"\"\"\n)\n\nratings = [\n Rating(value=1, description=\"Low Quality\"),\n Rating(value=2, description=\"Moderate Quality\"),\n Rating(value=3, description=\"Good Quality\"),\n]\n\nultrafeedback_task = UltraFeedbackTask(\n system_prompt=\"Your role is to evaluate text quality based on given criteria\",\n task_description=task_description,\n ratings=ratings,\n)\n
Text QualityHelpfulnessTruthfulnessHonestyInstruction Following The following example uses a LLM
to examinate the data for text quality criteria, which includes the different criteria from UltraFeedback (Correctness & Informativeness, Honesty & Uncertainty, Truthfulness & Hallucination and Instruction Following):
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_text_quality(),\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n)\n
The following example creates a UltraFeedback task to emphasize helpfulness, that is overall quality and correctness of the output:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_helpfulness(), openai_api_key=os.getenv(\"OPENAI_API_KEY\")\n)\n
The following example creates a UltraFeedback task to emphasize truthfulness and hallucination assessment:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_truthfulness(),\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n)\n
The following example creates a UltraFeedback task to emphasize honesty and uncertainty expression assessment:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_honesty(), openai_api_key=os.getenv(\"OPENAI_API_KEY\")\n)\n
The following example creates a UltraFeedback task to emphasize the evaluation of alignment between output and intent:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_instruction_following(),\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n)\n
For the API reference visit UltraFeedbackTask.
"},{"location":"technical-reference/tasks/#judgelmtask","title":"JudgeLMTask","text":"The task specially designed to build the prompts following the UltraFeedback paper: JudgeLM: Fine-tuned Large Language Models Are Scalable Judges. This task is designed to evaluate the performance of AI assistants.
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import JudgeLMTask\n\nlabeller = OpenAILLM(task=JudgeLMTask(), openai_api_key=os.getenv(\"OPENAI_API_KEY\"))\n
For the API reference visit JudgeLMTask.
"},{"location":"technical-reference/tasks/#ultrajudgetask","title":"UltraJudgeTask","text":"This class implements a PreferenceTask
specifically for a better evaluation using AI Feedback. The task is defined based on both UltraFeedback and JudgeLM, but with several improvements / modifications.
It introduces an additional argument to differentiate various areas for processing. While these areas can be customized, the default values are as follows:
from distilabel.tasks import UltraJudgeTask\n\n# To see the complete system_prompt and task_description please take a look at the UltraJudgeTask definition\nultrajudge_task = UltraJudgeTask(\n system_prompt=\"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences...\",\n task_description=\"Your task is to rigorously evaluate the performance of...\",\n areas=[\n \"Practical Accuracy\",\n \"Clarity & Transparency\",\n \"Authenticity & Reliability\",\n \"Compliance with Intent\",\n ],\n)\n
Which can be directly used in the following way:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraJudgeTask\n\nlabeller = OpenAILLM(task=UltraJudgeTask(), openai_api_key=os.getenv(\"OPENAI_API_KEY\"))\n
For the API reference visit UltraJudgeTask.
"},{"location":"technical-reference/tasks/#critique","title":"Critique","text":"The CritiqueTask
is designed to be a labeller for generated text, while not only adding scores based on a rubric, but also critiques explaining the reasons why those scores have been provided. The critique can either be using a reference answer (gold answer) as e.g. Prometheus does, or just by generating the critique per each of the N provided generations.
The resulting datasets after running a pipeline with the CritiqueTask
are useful towards either training a model to generate critiques based on the critiques generated by a more powerful model as e.g. GPT-4 from OpenAI, or to be used directly for DPO fine-tuning. The fact that the critique is generated per each pair, a balanced dataset could be generated with individual critiques and their scores, so that then we can e.g. define a threshold on what's considered chosen and rejected, to then run DPO fine-tunes.
While the CritiqueTask
may seem fairly similar to the PreferenceTask
, there is a core difference, which is the fact that the critiques are provided per each response or even to a single response, with no need to compare or rate them against each other.
This task is specifically designed to build the prompts following the format defined in the \"UltraFeedback: Boosting Language Models With High Quality Feedback\" paper.
UltraCM is a model that has been fine-tuned using the UltraFeedback dataset, so as to produce critiques for the generated content, as the authors claim in their paper: \"Moreover, since ULTRAFEEDBACK provides detailed textual feedback, we also fine-tune a model that could critique model responses automatically. Our critique model, UltraCM, generates reasonable and detailed comments on various tasks.\".
Ideally, the UltraCMTask
will be more consistent when used with either their fine-tuned model UltraCM or with OpenAI, as both have been proven to produce successfully the structured content following the prompt formatting, and not only structured, but also meaningful and reasonable.
See the following snippet, with an example on how to instantiate the UltraCMTask
which only requires the system prompt, and it can be modified based on how is the critique intended to be formulated, while the system prompt shown below is the default one as of the UltraFeedback paper.
from distilabel.tasks import UltraCMTask\n\ntask = UltraCMTask(\n system_prompt=\"User: A one-turn chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, very detailed, and polite answers to the user's questions.</s>\",\n)\n
"},{"location":"technical-reference/tasks/#prometheustask","title":"PrometheusTask","text":"This task is specifically designed to build the prompts following the format defined in the \"Prometheus: Inducing Fine-grained Evaluation Capability in Language Models\" paper.
Ideally, the PrometheusTask
should only be used to format the prompts for the Prometheus models as those are the ones that have been fine-tuned to follow the same formatting and will produce consistent results compared to other base models or fine-tuned with different formats. In this case, since the formatting used by Prometheus follows the Llama 2 format, those are recommended. Otherwise, OpenAI has also proved to produce consistent results.
The following snippet can be used out of the box to define a simple PrometheusTask
with the system prompt, the scoring criteria and the score descriptions, but those can be modified while keeping in mind that Prometheus always expects 5 scores from 1-5 with a meaningful description, as well as with a criteria relevant to the scores defined.
from distilabel.tasks import PrometheusTask\n\ntask = PrometheusTask(\n system_prompt=\"You are a fair evaluator language model.\",\n scoring_criteria=\"Relevance, Grammar, Informativeness, Engagement\",\n score_descriptions={\n 1: \"The response is not relevant to the prompt.\",\n 2: \"The response is relevant to the prompt, but it is not grammatical.\",\n 3: \"The response is relevant to the prompt and it is grammatical, but it is not informative.\",\n 4: \"The response is relevant to the prompt, it is grammatical, and it is informative, but it is not engaging.\",\n 5: \"The response is relevant to the prompt, it is grammatical, it is informative, and it is engaging.\",\n },\n)\n
The principles can be found here in the codebase. More information on the Principle Sampling can be found in the UltraFeedfack repository.\u00a0\u21a9
AI Feedback (AIF) framework to build datasets with and for LLMs:
pip install distilabel\n
Requires Python 3.8+ In addition, the following extras are available:
hf-transformers
: for using models available in transformers package via the TransformersLLM
integration.hf-inference-endpoints
: for using the HuggingFace Inference Endpoints via the InferenceEndpointsLLM
integration.openai
: for using OpenAI API models via the OpenAILLM
integration.vllm
: for using vllm serving engine via the vLLM
integration.llama-cpp
: for using llama-cpp-python as Python bindings for llama.cpp
.together
: for using Together Inference via their Python client.argilla
: for exporting the generated datasets to Argilla.from datasets import load_dataset\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.pipeline import pipeline\nfrom distilabel.tasks import TextGenerationTask\n\ndataset = (\n load_dataset(\"HuggingFaceH4/instruction-dataset\", split=\"test[:10]\")\n .remove_columns([\"completion\", \"meta\"])\n .rename_column(\"prompt\", \"input\")\n)\n\ntask = TextGenerationTask() # (1)\n\ngenerator = OpenAILLM(task=task, max_new_tokens=512) # (2)\n\npipeline = pipeline(\"preference\", \"instruction-following\", generator=generator) # (3)\n\ndataset = pipeline.generate(dataset)\n
Task
for generating text given an instruction.LLM
for generating text using the Task
created in the first step. As the LLM
will generate text, it will be a generator
.Pipeline
using the pipeline
function and the generator
created in step 2. The pipeline
function will create a labeller
LLM using OpenAILLM
with the UltraFeedback
task for instruction following assessment.Note
To run the script successfully, ensure you have assigned your OpenAI API key to the OPENAI_API_KEY
environment variable.
For a more complete example, check out our awesome notebook on Google Colab:
"},{"location":"#navigation","title":"Navigation","text":"Concept Guides
Understand the components and their interactions.
API Reference
Technical description of the classes and functions.
This page aims to get you familiarized with the basic concepts of the framework, describing the most important components or classes and how they work together. The following sections will guide you through the primary components of the framework: Pipeline
, LLM
(both generator and labeller), and the Task
.
distilabel flow diagram"},{"location":"concepts/#components","title":"Components","text":""},{"location":"concepts/#task","title":"Task","text":"
The Task
class in the one in charge of defining the behaviour of the LLM
, and therefore it can define if an LLM is a generator
or a labeller
. To do so, the Task
class generates the prompt that will be sent to the LLM
from a template. It also defines, which input arguments are required to generate the prompt, and which output arguments will be extracted from the LLM
response. It's worth mentioning that the Task
class doesn't return a str
, but a Prompt
class which will generate the str
format depending on the LLM
that is going to be used (Zephyr, Llama, OpenAI, etc).
from distilabel.tasks import UltraJudgeTask\n\ntask = UltraJudgeTask()\n\ninput = (\n \"Can you provide a corrected version of the following sentence using proper \"\n 'English grammar? \"We going to the beach\" Additionally, could you please '\n \"provide your correction in an Excel table format with the following columns: \"\n \"| Incorrect Sentence | Corrected Sentence | |-------------------|--------------------|\"\n)\n\ngenerations = [\n (\n \"| Incorrect Sentence | Corrected Sentence |\\n|-------------------|-------------------\"\n '-----|\\n| \"We going to the beach\" | \"We are going to the beach\" |\\n\\nCorrectio'\n 'n: The verb in the second sentence (\"are\") changes to reflect the subject\\'s (\"w'\n 'e\") agreement with the verb \"be.\" This is called subject-verb agreement. In the '\n 'first sentence, the verb \"going\" infers that the action is ongoing or in a contin'\n \"uous state, which is not the case. Therefore, the second sentence is grammatically \"\n \"correct.\"\n ),\n (\n \"| Incorrect Sentence | Corrected Sentence |\\n|-------------------|-------------------\"\n \"-----|\\n| We going to the beach | We are going to the beach | \\n\\nHere's a breakdo\"\n 'wn of the correction:\\n\\n- \"We going to the beach\" is an example of a subject-ve'\n 'rb agreement error. The verb changing from the third person singular (\"is\") to t'\n 'he third person plural (\"are\") in this instance, as there are multiple people go'\n 'ing to the beach.\\n- The \"g\" in \"going\" changes to an \"e\" due to a hard \"g\"'\n ' sound being followed by an \"e,\" which is a common spelling rule in English.'\n ),\n]\n\n\nprompt = task.generate_prompt(input, generations)\nprint(prompt.format_as(\"default\")) # format as \"openai\", \"zephyr\", \"llama\", ...\n
"},{"location":"concepts/#llm","title":"LLM","text":"The LLM
class represents a language model and implements the way to interact with it. It also defines the generation parameters that can be passed to the model to tweak the generations. As mentioned above, the LLM
will have a Task
associated that will use to generate the prompt and extract the output from the generation.
from distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraJudgeTask\n\nlabeller = OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=UltraJudgeTask(),\n prompt_format=\"openai\",\n max_new_tokens=2048,\n temperature=0.0,\n)\n\noutputs = labeller.generate(\n inputs=[\n {\n \"input\": \"Here's a math problem that you need to resolve: 2 + 2 * 3. What's the result of this problem? Explain it\",\n \"generations\": [\n (\n \"The output of the math problem 2 + 2 * 3 is calculated by following \"\n \"the order of operations (PEMDAS). First, perform the multiplication: \"\n \"2 * 3 = 6. Then, perform the addition: 2 + 6 = 8. Therefore, the \"\n \"output of the problem is 8.\"\n ),\n (\n \"The correct solution to the math problem is 8. To get the correct \"\n \"answer, we follow the order of operations (PEMDAS) and perform \"\n \"multiplication before addition. So, first, we solve 2 * 3 = 6, \"\n \"then we add 2 to 6 to get 8.\"\n ),\n ],\n }\n ]\n)\n\nprint(outputs[0][0][\"parsed_output\"])\n
Note
To run the script successfully, ensure you have assigned your OpenAI API key to the OPENAI_API_KEY
environment variable.
The Pipeline
class orchestrates the whole generation and labelling process, and it's in charge of the batching of the input dataset, as well as reporting the generation progress. It's worth mentioning that is not mandatory to pass both a generator LLM
and a labeller LLM
to the Pipeline
class, as it can also be used only for generation or labelling.
Pipelines
Generator and labellerOnly generatorOnly labellerfrom datasets import load_dataset\nfrom distilabel.llm import LlamaCppLLM, OpenAILLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import TextGenerationTask, UltraJudgeTask\nfrom llama_cpp import Llama\n\ndataset = load_dataset(\"argilla/distilabel-docs\", split=\"train\")\ndataset = dataset.remove_columns(\n [\n column\n for column in dataset.column_names\n if column not in [\"input\", \"generations\"]\n ]\n)\n\npipeline = Pipeline(\n generator=LlamaCppLLM(\n model=Llama(\n model_path=\"./llama-2-7b-chat.Q4_0.gguf\",\n verbose=False,\n n_ctx=1024,\n ),\n task=TextGenerationTask(),\n max_new_tokens=512,\n prompt_format=\"llama2\",\n ),\n labeller=OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=UltraJudgeTask(),\n prompt_format=\"openai\",\n max_new_tokens=1024,\n num_threads=1,\n temperature=0.0,\n ),\n)\n\n\ndataset = pipeline.generate(dataset, num_generations=2, batch_size=5)\n
Note
To run the script successfully, ensure you have assigned your OpenAI API key to the OPENAI_API_KEY
environment variable and that you have download the file llama-2-7b-chat.Q4_O.gguf in the same folder as the script.
from datasets import load_dataset\nfrom distilabel.llm import LlamaCppLLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import TextGenerationTask\nfrom llama_cpp import Llama\n\ndataset = load_dataset(\"argilla/distilabel-docs\", split=\"train\")\ndataset = dataset.remove_columns(\n [column for column in dataset.column_names if column not in [\"input\"]]\n)\n\npipeline = Pipeline(\n generator=LlamaCppLLM(\n model=Llama(\n model_path=\"./llama-2-7b-chat.Q4_0.gguf\",\n verbose=False,\n n_ctx=1024,\n ),\n task=TextGenerationTask(),\n max_new_tokens=512,\n prompt_format=\"llama2\",\n ),\n)\n\n\ndataset = pipeline.generate(dataset, num_generations=2, batch_size=5)\n
from datasets import load_dataset\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import UltraJudgeTask\n\ndataset = load_dataset(\"argilla/distilabel-docs\", split=\"train\")\ndataset = dataset.remove_columns(\n [\n column\n for column in dataset.column_names\n if column not in [\"input\", \"generations\"]\n ]\n)\n\npipeline = Pipeline(\n labeller=OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=UltraJudgeTask(),\n prompt_format=\"openai\",\n max_new_tokens=1024,\n num_threads=1,\n temperature=0.0,\n ),\n)\n\n\ndataset = pipeline.generate(dataset, num_generations=2, batch_size=5)\n
"},{"location":"reference/SUMMARY/","title":"SUMMARY","text":"CustomDataset
","text":" Bases: Dataset
A custom dataset class that extends from datasets.Dataset
and is used to generate an Argilla FeedbackDataset
instance from the pre-defined configuration within the task provided to Pipeline.generate
.
src/distilabel/dataset.py
class CustomDataset(Dataset):\n \"\"\"A custom dataset class that extends from `datasets.Dataset` and is used to generate\n an Argilla `FeedbackDataset` instance from the pre-defined configuration within the task\n provided to `Pipeline.generate`.\n \"\"\"\n\n task: Union[\"Task\", None] = None\n\n def to_argilla(self) -> \"FeedbackDataset\":\n \"\"\"Converts the dataset to an Argilla `FeedbackDataset` instance, based on the\n task defined in the dataset as part of `Pipeline.generate`.\n\n Raises:\n ImportError: if the argilla library is not installed.\n ValueError: if the task is not set.\n\n Returns:\n FeedbackDataset: the Argilla `FeedbackDataset` instance.\n \"\"\"\n if not _ARGILLA_AVAILABLE:\n raise ImportError(\n \"To use `to_argilla` method is required to have `argilla` installed. \"\n \"Please install it with `pip install argilla`.\"\n )\n\n if self.task is None:\n raise ValueError(\n \"The task is not set. Please set it with `dataset.task = <task>`.\"\n )\n\n try:\n rg_dataset = self.task.to_argilla_dataset(dataset_row=self[0]) # type: ignore\n except Exception as e:\n raise ValueError(\n f\"Error while converting the dataset to an Argilla `FeedbackDataset` instance: {e}\"\n ) from e\n\n # try:\n # rg_dataset = infer_model_metadata_properties(\n # hf_dataset=self, rg_dataset=rg_dataset\n # )\n # except Exception as e:\n # warnings.warn(\n # f\"Error while adding the model metadata properties: {e}\",\n # UserWarning,\n # stacklevel=2,\n # )\n\n for dataset_row in self:\n if any(\n dataset_row[input_arg_name] is None # type: ignore\n for input_arg_name in self.task.input_args_names\n ):\n continue\n try:\n rg_dataset.add_records(\n self.task._to_argilla_record(dataset_row=dataset_row) # type: ignore\n ) # type: ignore\n except Exception as e:\n warnings.warn(\n f\"Error while converting a row into an Argilla `FeedbackRecord` instance: {e}\",\n UserWarning,\n stacklevel=2,\n )\n return rg_dataset\n
"},{"location":"reference/distilabel/dataset/#distilabel.dataset.CustomDataset.to_argilla","title":"to_argilla()
","text":"Converts the dataset to an Argilla FeedbackDataset
instance, based on the task defined in the dataset as part of Pipeline.generate
.
Raises:
Type DescriptionImportError
if the argilla library is not installed.
ValueError
if the task is not set.
Returns:
Name Type DescriptionFeedbackDataset
FeedbackDataset
the Argilla FeedbackDataset
instance.
src/distilabel/dataset.py
def to_argilla(self) -> \"FeedbackDataset\":\n \"\"\"Converts the dataset to an Argilla `FeedbackDataset` instance, based on the\n task defined in the dataset as part of `Pipeline.generate`.\n\n Raises:\n ImportError: if the argilla library is not installed.\n ValueError: if the task is not set.\n\n Returns:\n FeedbackDataset: the Argilla `FeedbackDataset` instance.\n \"\"\"\n if not _ARGILLA_AVAILABLE:\n raise ImportError(\n \"To use `to_argilla` method is required to have `argilla` installed. \"\n \"Please install it with `pip install argilla`.\"\n )\n\n if self.task is None:\n raise ValueError(\n \"The task is not set. Please set it with `dataset.task = <task>`.\"\n )\n\n try:\n rg_dataset = self.task.to_argilla_dataset(dataset_row=self[0]) # type: ignore\n except Exception as e:\n raise ValueError(\n f\"Error while converting the dataset to an Argilla `FeedbackDataset` instance: {e}\"\n ) from e\n\n # try:\n # rg_dataset = infer_model_metadata_properties(\n # hf_dataset=self, rg_dataset=rg_dataset\n # )\n # except Exception as e:\n # warnings.warn(\n # f\"Error while adding the model metadata properties: {e}\",\n # UserWarning,\n # stacklevel=2,\n # )\n\n for dataset_row in self:\n if any(\n dataset_row[input_arg_name] is None # type: ignore\n for input_arg_name in self.task.input_args_names\n ):\n continue\n try:\n rg_dataset.add_records(\n self.task._to_argilla_record(dataset_row=dataset_row) # type: ignore\n ) # type: ignore\n except Exception as e:\n warnings.warn(\n f\"Error while converting a row into an Argilla `FeedbackRecord` instance: {e}\",\n UserWarning,\n stacklevel=2,\n )\n return rg_dataset\n
"},{"location":"reference/distilabel/logger/","title":"logger","text":""},{"location":"reference/distilabel/pipeline/","title":"pipeline","text":""},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.Pipeline","title":"Pipeline
","text":"Source code in src/distilabel/pipeline.py
class Pipeline:\n def __init__(\n self,\n generator: Union[\"LLM\", \"ProcessLLM\", \"LLMPool\", None] = None,\n labeller: Union[\"LLM\", \"ProcessLLM\", None] = None,\n ) -> None:\n \"\"\"Initializes the Pipeline class.\n\n Args:\n generator (Union[\"LLM\", None], optional): the LLM to be used for generation.\n Defaults to None.\n labeller (Union[\"LLM\", None], optional): the LLM to be used for labelling.\n Defaults to None.\n\n Raises:\n ValueError: if no LLM is provided.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.llm import OpenAILLM, TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n >>> from distilabel.pipeline import Pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> labeller = OpenAILLM(\n ... model=\"gpt-3.5-turbo\",\n ... task=UltraFeedbackTask.for_text_quality(),\n ... )\n >>> pipeline = Pipeline(generator=generator, labeller=labeller)\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if generator is not None and not isinstance(\n generator, (LLM, ProcessLLM, LLMPool)\n ):\n raise ValueError(\n \"`generator` must be an instance of `LLM`, `ProcessLLM` or `LLMPool`\"\n )\n\n if labeller is not None and not isinstance(labeller, (LLM, ProcessLLM)):\n raise ValueError(\"`labeller` must be an instance of `LLM` or `ProcessLLM`\")\n\n self.generator = generator\n self.labeller = labeller\n\n if self.generator is None and self.labeller is None:\n raise ValueError(\"Either `generator` or `labeller` must be provided.\")\n\n def __repr__(self) -> str:\n return (\n f\"Pipeline(\\n\\tgenerator={self.generator},\\n\\tlabeller={self.labeller}\\n)\"\n )\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"generator\", self.generator\n yield \"labeller\", self.labeller\n\n def _validate_dataset(self, dataset: Dataset) -> None:\n \"\"\"Validates that the provided dataset contains the columns needed by the LLMs, and\n warns the user if the columns to be generated already exist.\n\n Args:\n dataset (Dataset): the dataset to be validated.\n\n Raises:\n KeyError: if the dataset does not contain the columns needed by the LLMs.\n \"\"\"\n # Generation LLM has not been provided, so the columns needed by the Labelling\n # LLM must be in the provided dataset\n if self.labeller is not None:\n if self.generator is None:\n try:\n self.labeller.task.validate_dataset(dataset.column_names)\n except KeyError as err:\n raise KeyError(\n \"Labelling LLM expects a dataset with at least the following\"\n f\" columns: {self.labeller.task.input_args_names}, but the provided\"\n f\" dataset just contains: {dataset.column_names}\"\n ) from err\n else:\n expected_columns = (\n dataset.column_names + self.generator.task.output_args_names\n )\n try:\n self.labeller.task.validate_dataset(expected_columns)\n except KeyError as err:\n raise KeyError(\n \"Labelling LLM expects to receive the following columns after the\"\n f\" generation process: {self.labeller.task.input_args_names}, but the\"\n f\" provided dataset including the columns to generate just contains: {expected_columns}\"\n ) from err\n\n if self.generator is not None:\n try:\n self.generator.task.validate_dataset(dataset.column_names)\n except KeyError as err:\n raise KeyError(\n \"Generation LLM expects a dataset with the following columns:\"\n f\" {self.generator.task.input_args_names}, but the provided dataset\"\n f\" just contains: {dataset.column_names}\"\n ) from err\n\n # Additionally, we need to check that if the columns to be generated already exist,\n # then we should look for `None`/`null` values and just fulfill those, while skipping\n # the rest. This is useful to be able to continue a generation that broke or a process\n # that was interrupted\n generated_columns = []\n if self.generator is not None:\n generated_columns += self.generator.task.output_args_names\n if self.labeller is not None:\n generated_columns += self.labeller.task.output_args_names\n\n if set(generated_columns) == set(dataset.column_names).intersection(\n set(generated_columns)\n ):\n warnings.warn(\n \"The provided dataset already contains the columns to be generated:\"\n f\" {generated_columns}; which means that the generation process will\"\n \" be skipped for the rows with values for those columns. If you want\"\n \" to re-generate those columns, please remove them from the dataset.\",\n UserWarning,\n stacklevel=2,\n )\n\n def _get_batch_generations(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int,\n shuffle_before_labelling: bool = True,\n progress_callback_func: Union[Callable, None] = None,\n ) -> List[Dict[str, Any]]:\n \"\"\"Gets the batch generations for the given inputs, capturing the futures if the\n LLM returns them, and then processes the batch generations.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int): the number of generations to be performed for each\n input.\n shuffle_before_labelling (bool, optional): whether to shuffle the generations\n before labelling or not. This is useful to avoid the labelling LLM to be\n biased by the order of the generations. Defaults to `True`.\n progress_callback_func (Union[Callable, None], optional): the callback function\n to be called when the progress of the generation process changes. Defaults\n to None.\n\n Returns:\n List[Dict[str, Any]]: the processed batch generations.\n \"\"\"\n outputs = self.generator.generate( # type: ignore\n inputs=inputs,\n num_generations=num_generations,\n progress_callback_func=progress_callback_func,\n )\n batch_generations = []\n if isinstance(outputs, Future):\n batch_generations.extend(outputs.result())\n else:\n batch_generations = outputs\n return self._process_batch_generations(\n batch_generations=batch_generations,\n shuffle_before_labelling=shuffle_before_labelling,\n )\n\n def _get_batch_labels(\n self,\n inputs: List[Dict[str, Any]],\n progress_callback_func: Union[Callable, None] = None,\n ) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Gets the batch labels for the given inputs.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for labelling. Each dict\n should contain a key with the text generations.\n progress_callback_func (Union[Callable, None], optional): the callback function\n to be called when the progress of the labelling process changes. Defaults\n to `None`.\n\n Returns:\n Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]: the batch\n labels.\n \"\"\"\n\n return self.labeller.generate( # type: ignore\n inputs=inputs,\n # `num_generations` is always 1 because labelling the same input multiple times\n # using the same LLM may not make sense\n num_generations=1,\n progress_callback_func=progress_callback_func,\n )\n\n def _process_batch_generations(\n self,\n batch_generations: List[List[\"LLMOutput\"]],\n shuffle_before_labelling: bool = True,\n ) -> List[Dict[str, Any]]:\n \"\"\"Processes the batch generations, combining the outputs of the LLMs into a single\n dictionary.\n\n Args:\n batch_generations (List[List[\"LLMOutput\"]]): the batch generations to be processed.\n shuffle_before_labelling (bool, optional): whether to shuffle the generations\n before labelling or not. This is useful to avoid the labelling LLM to be\n biased by the order of the generations. Defaults to `True`.\n\n Returns:\n List[Dict[str, Any]]: the processed batch generations.\n \"\"\"\n processed_generations = []\n for generations in batch_generations:\n processed_generation = {\n \"generation_model\": [],\n \"generation_prompt\": [],\n \"raw_generation_responses\": [],\n }\n if shuffle_before_labelling:\n random.shuffle(generations)\n for generation in generations:\n processed_generation[\"generation_model\"].append(\n generation[\"model_name\"]\n )\n processed_generation[\"generation_prompt\"].append(\n generation[\"prompt_used\"]\n )\n processed_generation[\"raw_generation_responses\"].append(\n generation[\"raw_output\"]\n )\n # Create `generations` column which is a list with N text generations\n try:\n processed_generation.update(\n **combine_dicts(\n *[\n generation[\"parsed_output\"]\n if generation[\"parsed_output\"] is not None\n else {}\n for generation in generations\n ]\n )\n )\n except Exception as e:\n warnings.warn(\n f\"Generation processing step failed when combining dicts: {e}\",\n UserWarning,\n stacklevel=2,\n )\n processed_generations.append(processed_generation)\n return processed_generations\n\n def _include_generator_outputs_as_inputs(\n self, inputs: List[Dict[str, Any]], outputs: List[Dict[str, Any]]\n ) -> List[Dict[str, Any]]:\n \"\"\"Includes the outputs of the generator as inputs for the labeller.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for labelling.\n outputs (List[Dict[str, Any]]): the outputs of the generator.\n\n Returns:\n List[Dict[str, Any]]: the inputs to be used for labelling.\n \"\"\"\n for input_, output in zip(inputs, outputs):\n # Skip the keys not required by the labelling LLM\n input_.update(\n {\n k: v\n for k, v in output.items()\n if self.labeller is not None\n and k in self.labeller.task.input_args_names\n }\n )\n return inputs\n\n def _process_batch_labels(\n self, batch_labels: List[List[\"LLMOutput\"]]\n ) -> List[Dict[str, Any]]:\n \"\"\"Processes the batch labels, combining the outputs of the LLMs into a single\n dictionary.\n\n Args:\n batch_labels (List[List[\"LLMOutput\"]]): the batch labels to be processed.\n\n Returns:\n List[Dict[str, Any]]: the processed batch labels.\n \"\"\"\n processed_labels = []\n for labels in batch_labels:\n for label in labels:\n if label[\"parsed_output\"] is not None and not isinstance(\n label[\"parsed_output\"], (list, dict)\n ):\n raise ValueError(\n f\"Unsupported type: {type(label['parsed_output'])}\"\n )\n\n processed_label = {\n # Since all the generations for the same `model_name` also share the same\n # `prompt_used`, then we just keep the first element in `generations`\n \"labelling_model\": label[\"model_name\"],\n \"labelling_prompt\": label[\"prompt_used\"],\n \"raw_labelling_response\": label[\"raw_output\"],\n }\n try:\n if isinstance(label[\"parsed_output\"], list):\n processed_label.update(**combine_dicts(*label[\"parsed_output\"]))\n elif isinstance(label[\"parsed_output\"], dict):\n processed_label.update(**label[\"parsed_output\"])\n except Exception as e:\n warnings.warn(\n f\"Label processing step failed when combining dicts: {e}\",\n UserWarning,\n stacklevel=2,\n )\n processed_labels.append(processed_label)\n return processed_labels\n\n def _transform_dataset_to_expected_format(\n self, rows: Dict[str, List[Any]]\n ) -> List[Dict[str, Any]]:\n \"\"\"Transforms the `datasets.Dataset` to the expected format required by the LLMs\n during the `generate` process.\n\n Args:\n rows (Dict[str, List[Any]]): the rows to be transformed.\n\n Returns:\n List[Dict[str, Any]]: the transformed rows.\n \"\"\"\n length = len(next(iter(rows.values())))\n\n generator_column_names = []\n if self.generator is not None:\n generator_column_names = self.generator.task.input_args_names\n labeller_column_names = []\n if self.labeller is not None:\n labeller_column_names = self.labeller.task.input_args_names\n column_names = generator_column_names + labeller_column_names\n\n inputs = []\n for i in range(length):\n input = {\n col: values[i] for col, values in rows.items() if col in column_names\n }\n inputs.append(input)\n\n return inputs\n\n def _build_dataset( # noqa: C901\n self,\n dataset: Dataset,\n generations: List[Dict[str, Any]],\n labels: Union[\n List[List[\"LLMOutput\"]],\n Future[List[List[\"LLMOutput\"]]],\n ],\n batch_size: int,\n ) -> CustomDataset:\n \"\"\"Builds the final dataset with either the generations, the labels, or both, depending\n on the LLMs provided to the `Pipeline`.\n\n Args:\n dataset (Dataset): the original dataset.\n generations (List[Dict[str, Any]]): the processed generations.\n labels (Union[List[List[LLMOutput]], Future[List[List[LLMOutput]]]]): the\n processed labels.\n\n Returns:\n CustomDataset: the final dataset.\n\n Raises:\n RuntimeError: if the `Pipeline` fails during the generation or labelling steps.\n \"\"\"\n if self.generator is None:\n generations = [{} for _ in range(len(dataset))]\n else:\n generator_column_names = [\n \"generation_model\",\n \"generation_prompt\",\n \"raw_generation_responses\",\n ] + self.generator.task.output_args_names\n\n if len(generations) < len(dataset):\n generations.extend(\n [\n {key: None for key in generator_column_names}\n for _ in range(len(dataset) - len(generations))\n ]\n )\n\n # Add missing keys/columns with a `None` value\n for generation in generations:\n for key in generator_column_names:\n if key not in generation:\n generation.update({key: None})\n\n if self.labeller is None:\n processed_labels = [{} for _ in range(len(dataset))] # type: ignore\n else:\n batch_labels = []\n if self.labeller.return_futures:\n for i, future in enumerate(labels, start=1): # type: ignore\n try:\n batch_labels.extend(future.result())\n except Exception as e:\n logger.error(\n f\"An error occurred when getting the result from the labeller: {e}\"\n )\n num_outputs = (\n batch_size\n if i * batch_size <= len(dataset)\n else len(dataset) % batch_size\n )\n batch_labels.append(\n [\n LLMOutput(\n model_name=self.labeller.model_name,\n prompt_used=None,\n raw_output=None,\n parsed_output=None,\n )\n for _ in range(num_outputs)\n ]\n )\n\n processed_labels = self._process_batch_labels(\n batch_labels=batch_labels or cast(List[List[\"LLMOutput\"]], labels)\n )\n\n labeller_column_names = [\n \"labelling_model\",\n \"labelling_prompt\",\n \"raw_labelling_response\",\n ] + self.labeller.task.output_args_names\n\n # Ensure the lengths of the labels and the dataset match (when pipeline\n # fails in an intermediate step, the labels may be shorter than the dataset)\n if len(processed_labels) < len(dataset):\n processed_labels.extend(\n [\n {key: None for key in labeller_column_names}\n for _ in range(len(dataset) - len(processed_labels))\n ]\n )\n\n # Add missing keys/columns with a `None` value\n for label in processed_labels:\n for key in labeller_column_names:\n if key not in label:\n label.update({key: None})\n\n _flattened_dataset = dataset.flatten_indices()\n _dataset = Dataset.from_dict({}, split=Split.TRAIN)\n for row, generation, processed_label in zip(\n _flattened_dataset, generations, processed_labels\n ):\n _dataset = _dataset.add_item({**row, **generation, **processed_label}) # type: ignore\n # Dynamically remaps the `datasets.Dataset` to be a `CustomDataset` instance\n _dataset.__class__ = CustomDataset\n if self.generator is not None and self.labeller is None:\n if self.generator.task.__type__ != \"generation\": # type: ignore\n self.generator.task.__type__ = \"generation\" # type: ignore\n _dataset.task = self.generator.task # type: ignore\n elif self.labeller is not None:\n if self.labeller.task.__type__ != \"labelling\": # type: ignore\n self.labeller.task.__type__ = \"labelling\" # type: ignore\n _dataset.task = self.labeller.task # type: ignore\n return _dataset # type: ignore\n\n def _teardown(self) -> None:\n if self.generator is not None and isinstance(\n self.generator, (ProcessLLM, LLMPool)\n ):\n self.generator.teardown()\n\n if self.labeller is not None and isinstance(self.labeller, ProcessLLM):\n self.labeller.teardown()\n\n def _generate( # noqa: C901\n self,\n dataset: Dataset,\n num_generations: int = 1,\n batch_size: int = 1,\n shuffle_before_labelling: bool = True,\n enable_checkpoints: bool = True,\n display_progress_bar: bool = False,\n ) -> CustomDataset:\n \"\"\"Generates the outputs for the given dataset using the LLMs provided to the\n `Pipeline`.\"\"\"\n\n if (\n self.labeller is not None\n and self.generator is not None\n and num_generations < 2\n ):\n warnings.warn(\n f\"Provided `num_generations={num_generations}` which implies that the \"\n \"`generator` LLM will just run once, while the `labelling` LLM expects \"\n \"to receive a list of N inputs to label, where N is > 1. If this is not \"\n \"intended, make sure to set `num_generations` to a value higher or \"\n \"equal to 2.\",\n UserWarning,\n stacklevel=2,\n )\n\n self._validate_dataset(dataset)\n\n generations: List[Dict[str, Any]] = []\n labels: Union[\n List[List[\"LLMOutput\"]],\n Future[List[List[\"LLMOutput\"]]],\n ] = []\n\n (\n generation_progress_func,\n labelling_progress_func,\n ) = get_progress_bars_for_pipeline(\n num_rows=len(dataset),\n num_generations=num_generations,\n display_progress_bar=display_progress_bar,\n )\n\n num_batches = math.ceil(len(dataset) / batch_size)\n\n for batch_i, rows in enumerate(dataset.iter(batch_size=batch_size), start=1):\n logger.info(f\"Processing batch {batch_i} of {num_batches}...\")\n inputs = self._transform_dataset_to_expected_format(rows) # type: ignore\n\n if self.generator is not None:\n logger.info(f\"Calling generator for batch {batch_i}...\")\n try:\n batch_generations = self._get_batch_generations(\n inputs=inputs,\n num_generations=num_generations,\n shuffle_before_labelling=shuffle_before_labelling,\n progress_callback_func=generation_progress_func,\n )\n generations.extend(batch_generations)\n except Exception as e:\n if not enable_checkpoints:\n raise RuntimeError(\n \"`Pipeline.generate` failed during generation step. Setting `enable_checkpoints=True` is recommended!\"\n ) from e\n logger.error(\n f\"`Pipeline.generate` failed during generation step with exception: {e}\"\n )\n return self._build_dataset(\n dataset,\n generations=generations,\n labels=labels,\n batch_size=batch_size,\n )\n\n inputs = self._include_generator_outputs_as_inputs(\n inputs=inputs, outputs=batch_generations\n )\n\n if self.labeller is not None:\n logger.info(f\"Calling labeller for batch {batch_i}...\")\n try:\n batch_labels = self._get_batch_labels(\n inputs=inputs, progress_callback_func=labelling_progress_func\n )\n\n if is_future(batch_labels):\n labels.append(batch_labels) # type: ignore\n else:\n labels.extend(batch_labels) # type: ignore\n except Exception as e:\n if not enable_checkpoints:\n raise RuntimeError(\n \"`Pipeline.generate` failed during labelling step. Setting `enable_checkpoints=True` is recommended!\"\n ) from e\n logger.error(\n f\"`Pipeline.generate` failed during labelling step with exception: {e}\"\n )\n return self._build_dataset(\n dataset,\n generations=generations,\n labels=labels,\n batch_size=batch_size,\n )\n\n _pipeline_progress.stop()\n\n return self._build_dataset(\n dataset, generations=generations, labels=labels, batch_size=batch_size\n )\n\n def dry_run(self, dataset: Dataset) -> CustomDataset:\n \"\"\"Performs a dry run over the provided dataset, which consists on generating the\n outputs for the first row of the dataset, to ensure that the `Pipeline` will be\n able to generate the outputs for the whole dataset.\n\n Args:\n dataset (Dataset): the dataset to be used for generation. Just the first row\n will be used for the dry run.\n\n Returns:\n CustomDataset: the dataset containing the outputs for the first row.\n \"\"\"\n try:\n # First we generate a `Dataset` only with the first row from the whole dataset\n subset = Dataset.from_dict(\n {key: [value] for key, value in dataset[0].items()}\n )\n # Then we call the `_generate` method with it\n return self._generate(\n dataset=subset,\n # Default kwargs to make the process as simple as possible\n num_generations=1,\n batch_size=1,\n enable_checkpoints=False,\n display_progress_bar=False,\n )\n except Exception as e:\n self._teardown()\n raise RuntimeError(\n f\"`Pipeline.generate` failed during the dry run over {dataset[0]} with exception: {e}\"\n ) from e\n\n def generate(\n self,\n dataset: Dataset,\n num_generations: int = 1,\n batch_size: int = 1,\n shuffle_before_labelling: bool = True,\n enable_checkpoints: bool = True,\n display_progress_bar: bool = False,\n skip_dry_run: bool = False,\n ) -> CustomDataset:\n \"\"\"Generates the outputs for the given dataset using the LLMs provided to the `Pipeline`.\n\n Args:\n dataset (Dataset): the dataset to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to `1`.\n batch_size (int, optional): the batch size to be used for generation. Defaults to `1`.\n shuffle_before_labelling: whether to shuffle the generations before labelling\n or not. This is useful to avoid the labelling LLM to be biased by the order\n of the generations. Defaults to `True`.\n enable_checkpoints (bool, optional): whether to enable checkpoints or not. Defaults to `True`.\n display_progress_bar (bool, optional): whether to display the progress bar or not. Defaults to `False`.\n skip_dry_run (bool, optional): whether to skip the dry run or not. Defaults to `False`.\n\n Returns:\n CustomDataset: the final dataset.\n\n Raises:\n RuntimeError: if the `Pipeline` fails during the generation or labelling steps.\n UserWarning: if the `Pipeline` fails during the generation or labelling steps and\n `enable_checkpoints` is set to `False`.\n\n Examples:\n >>> from transformers import AutoModelForCaualLM, AutoTokenizer\n >>> from distilabel.llm import OpenAILLM, TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n >>> from distilabel.pipeline import Pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> labeller = OpenAILLM(\n ... model=\"gpt-3.5-turbo\",\n ... task=UltraFeedbackTask.for_text_quality(),\n ... )\n >>> pipeline = Pipeline(generator=generator, labeller=labeller)\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if not skip_dry_run:\n logger.info(\"Executing dry-run...\")\n self.dry_run(dataset)\n logger.info(\n \"Dry-run executed with no issues. Starting the actual generation...\"\n )\n\n dataset = use_progress_bar(self._generate)(\n dataset=dataset,\n num_generations=num_generations,\n batch_size=batch_size,\n enable_checkpoints=enable_checkpoints,\n shuffle_before_labelling=shuffle_before_labelling,\n display_progress_bar=display_progress_bar,\n )\n\n self._teardown()\n\n return dataset\n
"},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.Pipeline.__init__","title":"__init__(generator=None, labeller=None)
","text":"Initializes the Pipeline class.
Parameters:
Name Type Description Defaultgenerator
Union['LLM', None]
the LLM to be used for generation. Defaults to None.
None
labeller
Union['LLM', None]
the LLM to be used for labelling. Defaults to None.
None
Raises:
Type DescriptionValueError
if no LLM is provided.
Examples:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer\n>>> from distilabel.llm import OpenAILLM, TransformersLLM\n>>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n>>> from distilabel.pipeline import Pipeline\n>>> generator = TransformersLLM(\n... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... task=TextGenerationTask(),\n... prompt_format=\"llama2\",\n... )\n>>> labeller = OpenAILLM(\n... model=\"gpt-3.5-turbo\",\n... task=UltraFeedbackTask.for_text_quality(),\n... )\n>>> pipeline = Pipeline(generator=generator, labeller=labeller)\n>>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n
Source code in src/distilabel/pipeline.py
def __init__(\n self,\n generator: Union[\"LLM\", \"ProcessLLM\", \"LLMPool\", None] = None,\n labeller: Union[\"LLM\", \"ProcessLLM\", None] = None,\n) -> None:\n \"\"\"Initializes the Pipeline class.\n\n Args:\n generator (Union[\"LLM\", None], optional): the LLM to be used for generation.\n Defaults to None.\n labeller (Union[\"LLM\", None], optional): the LLM to be used for labelling.\n Defaults to None.\n\n Raises:\n ValueError: if no LLM is provided.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.llm import OpenAILLM, TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n >>> from distilabel.pipeline import Pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> labeller = OpenAILLM(\n ... model=\"gpt-3.5-turbo\",\n ... task=UltraFeedbackTask.for_text_quality(),\n ... )\n >>> pipeline = Pipeline(generator=generator, labeller=labeller)\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if generator is not None and not isinstance(\n generator, (LLM, ProcessLLM, LLMPool)\n ):\n raise ValueError(\n \"`generator` must be an instance of `LLM`, `ProcessLLM` or `LLMPool`\"\n )\n\n if labeller is not None and not isinstance(labeller, (LLM, ProcessLLM)):\n raise ValueError(\"`labeller` must be an instance of `LLM` or `ProcessLLM`\")\n\n self.generator = generator\n self.labeller = labeller\n\n if self.generator is None and self.labeller is None:\n raise ValueError(\"Either `generator` or `labeller` must be provided.\")\n
"},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.Pipeline.dry_run","title":"dry_run(dataset)
","text":"Performs a dry run over the provided dataset, which consists on generating the outputs for the first row of the dataset, to ensure that the Pipeline
will be able to generate the outputs for the whole dataset.
Parameters:
Name Type Description Defaultdataset
Dataset
the dataset to be used for generation. Just the first row will be used for the dry run.
requiredReturns:
Name Type DescriptionCustomDataset
CustomDataset
the dataset containing the outputs for the first row.
Source code insrc/distilabel/pipeline.py
def dry_run(self, dataset: Dataset) -> CustomDataset:\n \"\"\"Performs a dry run over the provided dataset, which consists on generating the\n outputs for the first row of the dataset, to ensure that the `Pipeline` will be\n able to generate the outputs for the whole dataset.\n\n Args:\n dataset (Dataset): the dataset to be used for generation. Just the first row\n will be used for the dry run.\n\n Returns:\n CustomDataset: the dataset containing the outputs for the first row.\n \"\"\"\n try:\n # First we generate a `Dataset` only with the first row from the whole dataset\n subset = Dataset.from_dict(\n {key: [value] for key, value in dataset[0].items()}\n )\n # Then we call the `_generate` method with it\n return self._generate(\n dataset=subset,\n # Default kwargs to make the process as simple as possible\n num_generations=1,\n batch_size=1,\n enable_checkpoints=False,\n display_progress_bar=False,\n )\n except Exception as e:\n self._teardown()\n raise RuntimeError(\n f\"`Pipeline.generate` failed during the dry run over {dataset[0]} with exception: {e}\"\n ) from e\n
"},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.Pipeline.generate","title":"generate(dataset, num_generations=1, batch_size=1, shuffle_before_labelling=True, enable_checkpoints=True, display_progress_bar=False, skip_dry_run=False)
","text":"Generates the outputs for the given dataset using the LLMs provided to the Pipeline
.
Parameters:
Name Type Description Defaultdataset
Dataset
the dataset to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
batch_size
int
the batch size to be used for generation. Defaults to 1
.
1
shuffle_before_labelling
bool
whether to shuffle the generations before labelling or not. This is useful to avoid the labelling LLM to be biased by the order of the generations. Defaults to True
.
True
enable_checkpoints
bool
whether to enable checkpoints or not. Defaults to True
.
True
display_progress_bar
bool
whether to display the progress bar or not. Defaults to False
.
False
skip_dry_run
bool
whether to skip the dry run or not. Defaults to False
.
False
Returns:
Name Type DescriptionCustomDataset
CustomDataset
the final dataset.
Raises:
Type DescriptionRuntimeError
if the Pipeline
fails during the generation or labelling steps.
UserWarning
if the Pipeline
fails during the generation or labelling steps and enable_checkpoints
is set to False
.
Examples:
>>> from transformers import AutoModelForCaualLM, AutoTokenizer\n>>> from distilabel.llm import OpenAILLM, TransformersLLM\n>>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n>>> from distilabel.pipeline import Pipeline\n>>> generator = TransformersLLM(\n... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... task=TextGenerationTask(),\n... prompt_format=\"llama2\",\n... )\n>>> labeller = OpenAILLM(\n... model=\"gpt-3.5-turbo\",\n... task=UltraFeedbackTask.for_text_quality(),\n... )\n>>> pipeline = Pipeline(generator=generator, labeller=labeller)\n>>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n
Source code in src/distilabel/pipeline.py
def generate(\n self,\n dataset: Dataset,\n num_generations: int = 1,\n batch_size: int = 1,\n shuffle_before_labelling: bool = True,\n enable_checkpoints: bool = True,\n display_progress_bar: bool = False,\n skip_dry_run: bool = False,\n) -> CustomDataset:\n \"\"\"Generates the outputs for the given dataset using the LLMs provided to the `Pipeline`.\n\n Args:\n dataset (Dataset): the dataset to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to `1`.\n batch_size (int, optional): the batch size to be used for generation. Defaults to `1`.\n shuffle_before_labelling: whether to shuffle the generations before labelling\n or not. This is useful to avoid the labelling LLM to be biased by the order\n of the generations. Defaults to `True`.\n enable_checkpoints (bool, optional): whether to enable checkpoints or not. Defaults to `True`.\n display_progress_bar (bool, optional): whether to display the progress bar or not. Defaults to `False`.\n skip_dry_run (bool, optional): whether to skip the dry run or not. Defaults to `False`.\n\n Returns:\n CustomDataset: the final dataset.\n\n Raises:\n RuntimeError: if the `Pipeline` fails during the generation or labelling steps.\n UserWarning: if the `Pipeline` fails during the generation or labelling steps and\n `enable_checkpoints` is set to `False`.\n\n Examples:\n >>> from transformers import AutoModelForCaualLM, AutoTokenizer\n >>> from distilabel.llm import OpenAILLM, TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n >>> from distilabel.pipeline import Pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> labeller = OpenAILLM(\n ... model=\"gpt-3.5-turbo\",\n ... task=UltraFeedbackTask.for_text_quality(),\n ... )\n >>> pipeline = Pipeline(generator=generator, labeller=labeller)\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if not skip_dry_run:\n logger.info(\"Executing dry-run...\")\n self.dry_run(dataset)\n logger.info(\n \"Dry-run executed with no issues. Starting the actual generation...\"\n )\n\n dataset = use_progress_bar(self._generate)(\n dataset=dataset,\n num_generations=num_generations,\n batch_size=batch_size,\n enable_checkpoints=enable_checkpoints,\n shuffle_before_labelling=shuffle_before_labelling,\n display_progress_bar=display_progress_bar,\n )\n\n self._teardown()\n\n return dataset\n
"},{"location":"reference/distilabel/pipeline/#distilabel.pipeline.pipeline","title":"pipeline(task, subtask=None, *, generator=None, labeller=None, **kwargs)
","text":"Creates a Pipeline
instance with the provided LLMs for a given task, which is useful whenever you want to use a pre-defined Pipeline
for a given task, or if you want to create a custom Pipeline
for a given task. Ideally one using this function over the Pipeline
class, don't want to worry about the details of the labeller
, since it will come with a default configuration based on the task
, by default the LLM used for labelling
will always be gpt-3.5-turbo
from OpenAI, as it's the one that provides the most consistent and fast results.
Parameters:
Name Type Description Defaulttask
Literal['preference', 'critique']
the task to be performed by the Pipeline
.
subtask
Optional[str]
the subtask to be performed by the Pipeline
. Defaults to None.
None
generator
Optional['LLM']
the LLM to be used for generation. Defaults to None.
None
labeller
Optional['LLM']
the LLM to be used for labelling. Defaults to None.
None
**kwargs
the keyword arguments to be passed to the task
and subtask
classes.
{}
Raises:
Type DescriptionValueError
if an invalid task is provided.
Returns:
Name Type DescriptionPipeline
Pipeline
the Pipeline
instance.
Examples:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer\n>>> from distilabel.llm import TransformersLLM\n>>> from distilabel.tasks import TextGenerationTask\n>>> from distilabel.pipeline import pipeline\n>>> generator = TransformersLLM(\n... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n... task=TextGenerationTask(),\n... prompt_format=\"llama2\",\n... )\n>>> pipeline = pipeline(\n... task=\"preference\",\n... subtask=\"text-quality\",\n... generator=generator,\n... )\n>>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n
Source code in src/distilabel/pipeline.py
def pipeline(\n task: Literal[\"preference\"],\n subtask: Optional[str] = None,\n *,\n generator: Optional[\"LLM\"] = None,\n labeller: Optional[\"LLM\"] = None,\n **kwargs,\n) -> Pipeline:\n \"\"\"Creates a `Pipeline` instance with the provided LLMs for a given task, which is useful\n whenever you want to use a pre-defined `Pipeline` for a given task, or if you want to\n create a custom `Pipeline` for a given task. Ideally one using this function over the `Pipeline`\n class, don't want to worry about the details of the `labeller`, since it will come with a default\n configuration based on the `task`, by default the LLM used for `labelling` will always be `gpt-3.5-turbo`\n from OpenAI, as it's the one that provides the most consistent and fast results.\n\n Args:\n task (Literal[\"preference\", \"critique\"]): the task to be performed by the `Pipeline`.\n subtask (Optional[str], optional): the subtask to be performed by the `Pipeline`.\n Defaults to None.\n generator (Optional[\"LLM\"], optional): the LLM to be used for generation. Defaults to None.\n labeller (Optional[\"LLM\"], optional): the LLM to be used for labelling. Defaults to None.\n **kwargs: the keyword arguments to be passed to the `task` and `subtask` classes.\n\n Raises:\n ValueError: if an invalid task is provided.\n\n Returns:\n Pipeline: the `Pipeline` instance.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.llm import TransformersLLM\n >>> from distilabel.tasks import TextGenerationTask\n >>> from distilabel.pipeline import pipeline\n >>> generator = TransformersLLM(\n ... model=AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... tokenizer=AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\"),\n ... task=TextGenerationTask(),\n ... prompt_format=\"llama2\",\n ... )\n >>> pipeline = pipeline(\n ... task=\"preference\",\n ... subtask=\"text-quality\",\n ... generator=generator,\n ... )\n >>> dataset = pipeline.generate(dataset=..., num_generations=1, batch_size=1)\n \"\"\"\n if task == \"preference\":\n if labeller is None:\n from dataclasses import fields\n\n from distilabel.llm.openai import OpenAILLM\n from distilabel.tasks.preference.ultrafeedback import UltraFeedbackTask\n\n task_cls = UltraFeedbackTask\n task_kwargs = {\n key: kwargs.get(key.name)\n for key in fields(task_cls)\n if key.name in kwargs and not key.name.startswith(\"__\")\n }\n\n # Dynamically call the appropriate classmethod using getattr\n if subtask is not None:\n if subtask not in task_cls.__subtasks__:\n raise ValueError(\n f\"Invalid subtask: {subtask}, available subtasks are {task_cls.__subtasks__}\"\n )\n classmethod_name = f\"for_{subtask.lower().replace('-', '_')}\"\n if hasattr(task_cls, classmethod_name):\n task_cls = getattr(task_cls, classmethod_name)\n\n logger.info(\n \"Since no `labeller` was provided, `OpenAILLM` will be used as the default labeller with `UltraFeedback`.\"\n )\n\n labeller = OpenAILLM(\n model=kwargs.get(\"openai_model\") or \"gpt-3.5-turbo\",\n task=task_cls(**task_kwargs), # type: ignore\n max_new_tokens=kwargs.get(\"max_new_tokens\") or 256,\n num_threads=kwargs.get(\"num_threads\") or 4,\n openai_api_key=kwargs.get(\"openai_api_key\")\n or os.getenv(\"OPENAI_API_KEY\"),\n temperature=kwargs.get(\"temperature\") or 0.0,\n )\n else:\n from distilabel.tasks.preference.judgelm import JudgeLMTask\n from distilabel.tasks.preference.ultrafeedback import UltraFeedbackTask\n from distilabel.tasks.preference.ultrajudge import UltraJudgeTask\n\n if not isinstance(\n labeller.task, (UltraFeedbackTask, JudgeLMTask, UltraJudgeTask)\n ):\n warnings.warn(\n \"The `labeller` task for `preference` must be an instance of `UltraFeedbackTask`,\"\n f\" `JudgeLMTask` or `UltraJudge`, got {labeller.task.__class__.__name__}.\"\n \"If you are planning to use a custom `labeller` for a `preference` \"\n \"task, use it at your own risk.\",\n UserWarning,\n stacklevel=2,\n )\n\n if generator is not None:\n assert (\n generator.task.input_args_names + generator.task.output_args_names\n == labeller.task.input_args_names\n ), (\n f\"`generator` outputs do not match `labeller` inputs: \"\n f\"{generator.task.input_args_names + generator.task.output_args_names} != {labeller.task.input_args_names}\"\n )\n else:\n raise ValueError(f\"Invalid task: {task}, available tasks are: `preference`.\")\n\n return Pipeline(generator=generator, labeller=labeller)\n
"},{"location":"reference/distilabel/progress_bar/","title":"progress_bar","text":""},{"location":"reference/distilabel/llm/","title":"llm","text":""},{"location":"reference/distilabel/llm/#distilabel.llm.InferenceEndpointsLLM","title":"InferenceEndpointsLLM
","text":" Bases: LLM
src/distilabel/llm/huggingface/inference_endpoints.py
class InferenceEndpointsLLM(LLM):\n def __init__(\n self,\n endpoint_name: str,\n task: \"Task\",\n endpoint_namespace: Union[str, None] = None,\n token: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: Union[float, None] = None,\n seed: Union[int, None] = None,\n do_sample: bool = False,\n temperature: Union[float, None] = None,\n top_k: Union[int, None] = None,\n top_p: Union[float, None] = None,\n typical_p: Union[float, None] = None,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the InferenceEndpointsLLM class.\n\n Args:\n endpoint_name (str): The name of the endpoint.\n task (Task): The task to be performed by the LLM.\n endpoint_namespace (Union[str, None]): The namespace of the endpoint. Defaults to None.\n token (Union[str, None]): The token for the endpoint. Defaults to None.\n max_new_tokens (int): The maximum number of tokens to be generated. Defaults to 128.\n repetition_penalty (Union[float, None]): The repetition penalty to be used for generation. Defaults to None.\n seed (Union[int, None]): The seed for generation. Defaults to None.\n do_sample (bool): Whether to do sampling. Defaults to False.\n temperature (Union[float, None]): The temperature for generation. Defaults to None.\n top_k (Union[int, None]): The top_k for generation. Defaults to None.\n top_p (Union[float, None]): The top_p for generation. Defaults to None.\n typical_p (Union[float, None]): The typical_p for generation. Defaults to None.\n num_threads (Union[int, None]): The number of threads. Defaults to None.\n prompt_format (Union[\"SupportedFormats\", None]): The format of the prompt. Defaults to None.\n prompt_formatting_fn (Union[Callable[..., str], None]): The function for formatting the prompt. Defaults to None.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import InferenceEndpointsLLM\n >>> task = Task()\n >>> llm = InferenceEndpointsLLM(\n ... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _HUGGINGFACE_HUB_AVAILABLE:\n raise ImportError(\n \"`InferenceEndpointsLLM` cannot be used as `huggingface-hub` is not \"\n \"installed, please install it with `pip install huggingface-hub`.\"\n )\n\n self.do_sample = do_sample\n self.max_new_tokens = max_new_tokens\n self.repetition_penalty = repetition_penalty\n self.seed = seed\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.inference_endpoint = get_inference_endpoint(\n name=endpoint_name, namespace=endpoint_namespace, token=token\n )\n self.inference_endpoint.wait(timeout=30)\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"do_sample\": self.do_sample,\n \"max_new_tokens\": self.max_new_tokens,\n \"repetition_penalty\": self.repetition_penalty,\n \"seed\": self.seed,\n \"temperature\": self.temperature,\n \"top_k\": self.top_k,\n \"top_p\": self.top_p,\n \"typical_p\": self.typical_p,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the model name of the endpoint.\"\"\"\n return self.inference_endpoint.repository\n\n @retry(\n retry=retry_if_exception_type(_INFERENCE_ENDPOINTS_API_RETRY_ON_EXCEPTIONS),\n stop=stop_after_attempt(_INFERENCE_ENDPOINTS_API_STOP_AFTER_ATTEMPT),\n wait=wait_random_exponential(\n multiplier=_INFERENCE_ENDPOINTS_API_WAIT_RANDOM_EXPONENTIAL_MULTIPLIER,\n max=_INFERENCE_ENDPOINTS_API_WAIT_RANDOM_EXPONENTIAL_MAX,\n ),\n before_sleep=before_sleep_log(logger, logging.INFO),\n after=after_log(logger, logging.INFO),\n )\n def _text_generation_with_backoff(self, **kwargs: Any) -> Any:\n \"\"\"Performs text generation with backoff in case of an error.\"\"\"\n return self.inference_endpoint.client.text_generation(**kwargs) # type: ignore\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n raw_responses = [\n self._text_generation_with_backoff(\n prompt=prompt,\n do_sample=self.do_sample,\n max_new_tokens=self.max_new_tokens,\n repetition_penalty=self.repetition_penalty,\n seed=self.seed,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n typical_p=self.typical_p,\n )\n for _ in range(num_generations)\n ]\n output = []\n for raw_response in raw_responses:\n try:\n parsed_response = self.task.parse_output(raw_response)\n except Exception as e:\n logger.error(f\"Error parsing Inference Endpoints output: {e}\")\n parsed_response = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_response,\n parsed_output=parsed_response,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.InferenceEndpointsLLM.model_name","title":"model_name: str
property
","text":"Returns the model name of the endpoint.
"},{"location":"reference/distilabel/llm/#distilabel.llm.InferenceEndpointsLLM.__init__","title":"__init__(endpoint_name, task, endpoint_namespace=None, token=None, max_new_tokens=128, repetition_penalty=None, seed=None, do_sample=False, temperature=None, top_k=None, top_p=None, typical_p=None, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the InferenceEndpointsLLM class.
Parameters:
Name Type Description Defaultendpoint_name
str
The name of the endpoint.
requiredtask
Task
The task to be performed by the LLM.
requiredendpoint_namespace
Union[str, None]
The namespace of the endpoint. Defaults to None.
None
token
Union[str, None]
The token for the endpoint. Defaults to None.
None
max_new_tokens
int
The maximum number of tokens to be generated. Defaults to 128.
128
repetition_penalty
Union[float, None]
The repetition penalty to be used for generation. Defaults to None.
None
seed
Union[int, None]
The seed for generation. Defaults to None.
None
do_sample
bool
Whether to do sampling. Defaults to False.
False
temperature
Union[float, None]
The temperature for generation. Defaults to None.
None
top_k
Union[int, None]
The top_k for generation. Defaults to None.
None
top_p
Union[float, None]
The top_p for generation. Defaults to None.
None
typical_p
Union[float, None]
The typical_p for generation. Defaults to None.
None
num_threads
Union[int, None]
The number of threads. Defaults to None.
None
prompt_format
Union[SupportedFormats, None]
The format of the prompt. Defaults to None.
None
prompt_formatting_fn
Union[Callable[..., str], None]
The function for formatting the prompt. Defaults to None.
None
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import InferenceEndpointsLLM\n>>> task = Task()\n>>> llm = InferenceEndpointsLLM(\n... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n... task=task,\n... )\n
Source code in src/distilabel/llm/huggingface/inference_endpoints.py
def __init__(\n self,\n endpoint_name: str,\n task: \"Task\",\n endpoint_namespace: Union[str, None] = None,\n token: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: Union[float, None] = None,\n seed: Union[int, None] = None,\n do_sample: bool = False,\n temperature: Union[float, None] = None,\n top_k: Union[int, None] = None,\n top_p: Union[float, None] = None,\n typical_p: Union[float, None] = None,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the InferenceEndpointsLLM class.\n\n Args:\n endpoint_name (str): The name of the endpoint.\n task (Task): The task to be performed by the LLM.\n endpoint_namespace (Union[str, None]): The namespace of the endpoint. Defaults to None.\n token (Union[str, None]): The token for the endpoint. Defaults to None.\n max_new_tokens (int): The maximum number of tokens to be generated. Defaults to 128.\n repetition_penalty (Union[float, None]): The repetition penalty to be used for generation. Defaults to None.\n seed (Union[int, None]): The seed for generation. Defaults to None.\n do_sample (bool): Whether to do sampling. Defaults to False.\n temperature (Union[float, None]): The temperature for generation. Defaults to None.\n top_k (Union[int, None]): The top_k for generation. Defaults to None.\n top_p (Union[float, None]): The top_p for generation. Defaults to None.\n typical_p (Union[float, None]): The typical_p for generation. Defaults to None.\n num_threads (Union[int, None]): The number of threads. Defaults to None.\n prompt_format (Union[\"SupportedFormats\", None]): The format of the prompt. Defaults to None.\n prompt_formatting_fn (Union[Callable[..., str], None]): The function for formatting the prompt. Defaults to None.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import InferenceEndpointsLLM\n >>> task = Task()\n >>> llm = InferenceEndpointsLLM(\n ... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _HUGGINGFACE_HUB_AVAILABLE:\n raise ImportError(\n \"`InferenceEndpointsLLM` cannot be used as `huggingface-hub` is not \"\n \"installed, please install it with `pip install huggingface-hub`.\"\n )\n\n self.do_sample = do_sample\n self.max_new_tokens = max_new_tokens\n self.repetition_penalty = repetition_penalty\n self.seed = seed\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.inference_endpoint = get_inference_endpoint(\n name=endpoint_name, namespace=endpoint_namespace, token=token\n )\n self.inference_endpoint.wait(timeout=30)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLM","title":"LLM
","text":" Bases: ABC
src/distilabel/llm/base.py
class LLM(ABC):\n def __init__(\n self,\n task: Task,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the LLM base class.\n\n Note:\n This class is intended to be used internally, but you anyone can still create\n a subclass, implement the `abstractmethod`s and use it.\n\n Args:\n task (Task): the task to be performed by the LLM.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[\"SupportedFormats\", None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n self.task = task\n\n self.thread_pool_executor = (\n ThreadPoolExecutor(max_workers=num_threads)\n if num_threads is not None\n else None\n )\n\n self.prompt_format = prompt_format\n self.prompt_formatting_fn = prompt_formatting_fn\n\n def __del__(self) -> None:\n \"\"\"Shuts down the thread pool executor if it is not `None`.\"\"\"\n if self.thread_pool_executor is not None:\n self.thread_pool_executor.shutdown()\n\n @property\n def num_threads(self) -> Union[int, None]:\n if self.thread_pool_executor:\n return self.thread_pool_executor._max_workers\n\n def __repr__(self) -> str:\n return f\"{self.__class__.__name__}(task={self.task.__class__.__name__}, num_threads={self.num_threads}, promp_format='{self.prompt_format}', model='{self.model_name}')\"\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"task\", self.task\n yield \"num_threads\", self.num_threads\n yield \"prompt_format\", self.prompt_format\n if self.prompt_formatting_fn is not None:\n args = f\"({', '.join(self.prompt_formatting_fn.__code__.co_varnames)})\"\n representation = self.prompt_formatting_fn.__name__ + args\n yield \"prompt_formatting_fn\", representation\n yield \"model\", self.model_name\n\n @property\n @abstractmethod\n def model_name(self) -> str:\n pass\n\n def _generate_prompts(\n self,\n inputs: List[Dict[str, Any]],\n default_format: Union[\"SupportedFormats\", None] = None,\n ) -> List[Any]:\n \"\"\"Generates the prompts to be used for generation.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n default_format (Union[\"SupportedFormats\", None], optional): the default format to be used\n for the prompt if no `prompt_format` is specified. Defaults to `None`.\n\n Returns:\n List[Any]: the generated prompts.\n\n Raises:\n ValueError: if the generated prompt is not of the expected type.\n \"\"\"\n prompts = []\n for input in inputs:\n prompt = self.task.generate_prompt(**input)\n if not isinstance(prompt, Prompt) and self.prompt_formatting_fn is not None:\n warnings.warn(\n \"The method `generate_prompt` is not returning a `Prompt` class but a prompt\"\n f\" of `type={type(prompt)}`, meaning that a pre-formatting has already been\"\n \" applied in the `task.generate_prompt` method, so the usage of a `prompt_formatting_fn`\"\n \" is discouraged.\",\n UserWarning,\n stacklevel=2,\n )\n prompt = self.prompt_formatting_fn(prompt)\n elif isinstance(prompt, Prompt) and self.prompt_formatting_fn is None:\n if self.prompt_format is not None or default_format is not None:\n prompt = prompt.format_as(\n format=self.prompt_format or default_format # type: ignore\n )\n else:\n warnings.warn(\n \"No `prompt_format` has been specified and no `default_format` is set, so\"\n \" the prompt will be concatenated with a line-break and no specific formatting\"\n \" by default.\",\n UserWarning,\n stacklevel=2,\n )\n prompt = prompt.format_as(format=\"default\")\n prompts.append(prompt)\n return prompts\n\n @abstractmethod\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n pass\n\n def _get_valid_inputs(\n self, inputs: List[Dict[str, Any]]\n ) -> Tuple[List[Dict[str, Any]], List[int]]:\n \"\"\"Returns the valid inputs and the indices of the invalid inputs.\n\n A valid input is an input that contains all the arguments required by the task.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n\n Returns:\n Tuple[List[Dict[str, Any]], List[int]]: a tuple containing the valid inputs and\n the indices of the invalid inputs.\n \"\"\"\n\n valid_inputs = []\n not_valid_inputs_indices = []\n for i, input in enumerate(inputs):\n if not all(input_arg in input for input_arg in self.task.input_args_names):\n logger.warn(\n f\"Missing {self.task.__class__.__name__} input argument in batch element {i}\"\n )\n not_valid_inputs_indices.append(i)\n continue\n\n valid_inputs.append(input)\n\n return valid_inputs, not_valid_inputs_indices\n\n def _fill_missing_inputs(\n self,\n generations: List[List[LLMOutput]],\n invalid_inputs_indices: List[int],\n num_generations: int,\n ) -> List[List[LLMOutput]]:\n \"\"\"Fills the `generations` list with empty `LLMOutput`s for the inputs that were\n not valid for the associated task of this `LLM`.\n\n Args:\n generations (List[List[LLMOutput]]): the generations to be filled.\n invalid_inputs_indices (List[int]): the indices of the inputs that were not\n valid for the associated task of this `LLM`.\n num_generations (int): the number of generations to be performed for each input.\n\n Returns:\n List[List[LLMOutput]]: the filled generations.\n \"\"\"\n\n filled_generations = generations.copy()\n for idx in invalid_inputs_indices:\n filled_generations.insert(\n idx,\n [\n LLMOutput(\n model_name=self.model_name,\n prompt_used=None,\n raw_output=None,\n parsed_output=None,\n )\n for _ in range(num_generations)\n ],\n )\n return filled_generations\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Generates the outputs for the given inputs using the LLM.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n valid_inputs, invalid_inputs_indices = self._get_valid_inputs(inputs)\n\n if self.thread_pool_executor is not None:\n futures = []\n for input in valid_inputs:\n future = self.thread_pool_executor.submit(\n self._generate, [input], num_generations\n )\n futures.append(future)\n future = when_all_complete(\n futures=futures,\n callback=lambda generations: self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n ),\n )\n future.add_done_callback(lambda _: _progress())\n return future\n\n generations = self._generate(valid_inputs, num_generations)\n\n generations = self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n )\n\n _progress()\n return generations\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return self.thread_pool_executor is not None\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLM.return_futures","title":"return_futures: bool
property
","text":"Whether the LLM
returns futures
__del__()
","text":"Shuts down the thread pool executor if it is not None
.
src/distilabel/llm/base.py
def __del__(self) -> None:\n \"\"\"Shuts down the thread pool executor if it is not `None`.\"\"\"\n if self.thread_pool_executor is not None:\n self.thread_pool_executor.shutdown()\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLM.__init__","title":"__init__(task, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the LLM base class.
NoteThis class is intended to be used internally, but you anyone can still create a subclass, implement the abstractmethod
s and use it.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requirednum_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union['SupportedFormats', None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Source code in src/distilabel/llm/base.py
def __init__(\n self,\n task: Task,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the LLM base class.\n\n Note:\n This class is intended to be used internally, but you anyone can still create\n a subclass, implement the `abstractmethod`s and use it.\n\n Args:\n task (Task): the task to be performed by the LLM.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[\"SupportedFormats\", None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n self.task = task\n\n self.thread_pool_executor = (\n ThreadPoolExecutor(max_workers=num_threads)\n if num_threads is not None\n else None\n )\n\n self.prompt_format = prompt_format\n self.prompt_formatting_fn = prompt_formatting_fn\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLM.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the LLM.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionUnion[List[List['LLMOutput']], Future[List[List['LLMOutput']]]]
Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.
Source code insrc/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Generates the outputs for the given inputs using the LLM.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n valid_inputs, invalid_inputs_indices = self._get_valid_inputs(inputs)\n\n if self.thread_pool_executor is not None:\n futures = []\n for input in valid_inputs:\n future = self.thread_pool_executor.submit(\n self._generate, [input], num_generations\n )\n futures.append(future)\n future = when_all_complete(\n futures=futures,\n callback=lambda generations: self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n ),\n )\n future.add_done_callback(lambda _: _progress())\n return future\n\n generations = self._generate(valid_inputs, num_generations)\n\n generations = self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n )\n\n _progress()\n return generations\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLMPool","title":"LLMPool
","text":"LLMPool is a class that wraps multiple ProcessLLM
s and performs generation in parallel using them. Depending on the number of LLM
s and the parameter num_generations
, the LLMPool
will decide how many generations to perform for each LLM
:
If num_generations
is less than the number of LLM
s, then num_generations
LLMs will be chosen randomly and each of them will perform 1 generation.
If num_generations
is equal to the number of LLM
s, then each LLM
will perform 1 generation.
If num_generations
is greater than the number of LLM
s, then each LLM
will perform num_generations // num_llms
generations, and the remaining num_generations % num_llms
generations will be performed by num_generations % num_llms
randomly chosen LLM
s.
Attributes:
Name Type Descriptionllms
List[ProcessLLM]
the ProcessLLM
s to be used for generation.
src/distilabel/llm/base.py
class LLMPool:\n \"\"\"LLMPool is a class that wraps multiple `ProcessLLM`s and performs generation in\n parallel using them. Depending on the number of `LLM`s and the parameter `num_generations`,\n the `LLMPool` will decide how many generations to perform for each `LLM`:\n\n - If `num_generations` is less than the number of `LLM`s, then `num_generations` LLMs\n will be chosen randomly and each of them will perform 1 generation.\n\n\n - If `num_generations` is equal to the number of `LLM`s, then each `LLM` will perform\n 1 generation.\n\n - If `num_generations` is greater than the number of `LLM`s, then each `LLM` will\n perform `num_generations // num_llms` generations, and the remaining `num_generations % num_llms`\n generations will be performed by `num_generations % num_llms` randomly chosen `LLM`s.\n\n Attributes:\n llms (List[ProcessLLM]): the `ProcessLLM`s to be used for generation.\n \"\"\"\n\n def __init__(self, llms: List[ProcessLLM]) -> None:\n \"\"\"Initializes the `LLMPool` class.\n\n Args:\n llms: the `ProcessLLM`s to be used for generation. The list must contain at\n least 2 `ProcessLLM`s.\n\n Raises:\n ValueError: if the `llms` argument contains less than 2 `ProcessLLM`s, the\n `llms` argument contains `ProcessLLM`s that are not `ProcessLLM`s, or\n if the `llms` argument contains `ProcessLLM`s with different tasks.\n \"\"\"\n if len(llms) < 2:\n raise ValueError(\n \"The `llms` argument must contain at least 2 `ProcessLLM`s. If you want\"\n \" to use a single `ProcessLLM`, use the `ProcessLLM` directly instead.\"\n )\n\n if not all(isinstance(llm, ProcessLLM) for llm in llms):\n raise ValueError(\"The `llms` argument must contain only `ProcessLLM`s.\")\n\n # Note: The following piece of code is used to check that all the `ProcessLLM`s\n # have the same task or a subclass of it.\n mros = [(type(llm.task), len(type(llm.task).mro())) for llm in llms]\n min_common_class = min(mros, key=lambda x: x[1])[0]\n if not all(isinstance(llm.task, min_common_class) for llm in llms):\n raise ValueError(\n \"All the `ProcessLLM` in `llms` must share the same task (either as the instance or the parent class).\"\n )\n\n self.llms = llms\n self.num_llms = len(llms)\n\n def _get_num_generations_per_llm(self, num_generations: int) -> Dict[int, int]:\n \"\"\"Returns the number of generations to be performed by each `LLM`.\n\n Args:\n num_generations: the number of generations to be performed.\n\n Returns:\n Dict[int, int]: a dictionary where the keys are the ids of the `LLM`s and the\n values are the number of generations to be performed by each `LLM`.\n \"\"\"\n llms_ids = list(range(self.num_llms))\n generations_per_llm = {i: num_generations // self.num_llms for i in llms_ids}\n\n for i in random.sample(llms_ids, k=num_generations % self.num_llms):\n generations_per_llm[i] += 1\n\n return generations_per_llm\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generates the outputs for the given inputs using the pool of `ProcessLLM`s.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n num_generations_per_llm = self._get_num_generations_per_llm(num_generations)\n\n futures = [\n llm.generate(\n inputs,\n num_generations=num_generations_per_llm[i],\n progress_callback_func=progress_callback_func,\n )\n for i, llm in enumerate(self.llms)\n if num_generations_per_llm[i] > 0\n ]\n llms_generations = [future.result() for future in futures]\n\n generations = []\n for llms_row_generations in zip(*llms_generations):\n row_generations = []\n for llm_row_generations in llms_row_generations:\n for generation in llm_row_generations:\n row_generations.append(generation)\n generations.append(row_generations)\n\n return generations\n\n def teardown(self) -> None:\n \"\"\"Stops the `ProcessLLM`s.\"\"\"\n for llm in self.llms:\n llm.teardown()\n\n @property\n def task(self) -> \"Task\":\n \"\"\"Returns the task that will be used by the `ProcessLLM`s of this pool.\n\n Returns:\n Task: the task that will be used by the `ProcessLLM`s of this pool.\n \"\"\"\n return self.llms[0].task\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return False\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLMPool.return_futures","title":"return_futures: bool
property
","text":"Whether the LLM
returns futures
task: 'Task'
property
","text":"Returns the task that will be used by the ProcessLLM
s of this pool.
Returns:
Name Type DescriptionTask
'Task'
the task that will be used by the ProcessLLM
s of this pool.
__init__(llms)
","text":"Initializes the LLMPool
class.
Parameters:
Name Type Description Defaultllms
List[ProcessLLM]
the ProcessLLM
s to be used for generation. The list must contain at least 2 ProcessLLM
s.
Raises:
Type DescriptionValueError
if the llms
argument contains less than 2 ProcessLLM
s, the llms
argument contains ProcessLLM
s that are not ProcessLLM
s, or if the llms
argument contains ProcessLLM
s with different tasks.
src/distilabel/llm/base.py
def __init__(self, llms: List[ProcessLLM]) -> None:\n \"\"\"Initializes the `LLMPool` class.\n\n Args:\n llms: the `ProcessLLM`s to be used for generation. The list must contain at\n least 2 `ProcessLLM`s.\n\n Raises:\n ValueError: if the `llms` argument contains less than 2 `ProcessLLM`s, the\n `llms` argument contains `ProcessLLM`s that are not `ProcessLLM`s, or\n if the `llms` argument contains `ProcessLLM`s with different tasks.\n \"\"\"\n if len(llms) < 2:\n raise ValueError(\n \"The `llms` argument must contain at least 2 `ProcessLLM`s. If you want\"\n \" to use a single `ProcessLLM`, use the `ProcessLLM` directly instead.\"\n )\n\n if not all(isinstance(llm, ProcessLLM) for llm in llms):\n raise ValueError(\"The `llms` argument must contain only `ProcessLLM`s.\")\n\n # Note: The following piece of code is used to check that all the `ProcessLLM`s\n # have the same task or a subclass of it.\n mros = [(type(llm.task), len(type(llm.task).mro())) for llm in llms]\n min_common_class = min(mros, key=lambda x: x[1])[0]\n if not all(isinstance(llm.task, min_common_class) for llm in llms):\n raise ValueError(\n \"All the `ProcessLLM` in `llms` must share the same task (either as the instance or the parent class).\"\n )\n\n self.llms = llms\n self.num_llms = len(llms)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLMPool.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the pool of ProcessLLM
s.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionList[List['LLMOutput']]
Future[List[List[\"LLMOutput\"]]]: the generated outputs as a Future
.
src/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generates the outputs for the given inputs using the pool of `ProcessLLM`s.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n num_generations_per_llm = self._get_num_generations_per_llm(num_generations)\n\n futures = [\n llm.generate(\n inputs,\n num_generations=num_generations_per_llm[i],\n progress_callback_func=progress_callback_func,\n )\n for i, llm in enumerate(self.llms)\n if num_generations_per_llm[i] > 0\n ]\n llms_generations = [future.result() for future in futures]\n\n generations = []\n for llms_row_generations in zip(*llms_generations):\n row_generations = []\n for llm_row_generations in llms_row_generations:\n for generation in llm_row_generations:\n row_generations.append(generation)\n generations.append(row_generations)\n\n return generations\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LLMPool.teardown","title":"teardown()
","text":"Stops the ProcessLLM
s.
src/distilabel/llm/base.py
def teardown(self) -> None:\n \"\"\"Stops the `ProcessLLM`s.\"\"\"\n for llm in self.llms:\n llm.teardown()\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LlamaCppLLM","title":"LlamaCppLLM
","text":" Bases: LLM
src/distilabel/llm/llama_cpp.py
class LlamaCppLLM(LLM):\n def __init__(\n self,\n model: \"Llama\",\n task: \"Task\",\n max_new_tokens: int = 128,\n temperature: float = 0.8,\n top_p: float = 0.95,\n top_k: int = 40,\n repeat_penalty: float = 1.1,\n seed: int = 1337,\n prompt_format: Union[SupportedFormats, None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the LlamaCppLLM class.\n\n Args:\n model (Llama): the llama-cpp model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 0.8.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 0.95.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n repeat_penalty (float, optional): the repeat penalty to be used for generation.\n Defaults to 1.1.\n seed (int, optional): the seed to be used for generation, setting it to -1 implies\n that a different response will be generated on each generation, similarly to\n HuggingFace's `do_sample` arg. Defaults to 1337.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Examples:\n >>> from llama_cpp import Llama\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import LlamaCppLLM\n >>> model = Llama(model_path=\"path/to/model\")\n >>> task = Task()\n >>> llm = LlamaCppLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _LLAMA_CPP_AVAILABLE:\n raise ImportError(\n \"`LlamaCppLLM` cannot be used as `llama_cpp` is not installed, please \"\n \" install it with `pip install llama-cpp-python`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repeat_penalty = repeat_penalty\n self.seed = seed\n\n self.model = model\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_tokens,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n \"repeat_penalty\": self.repeat_penalty,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the llama-cpp model, which is the same as the model path.\"\"\"\n return self.model.model_path\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n output = []\n for _ in range(num_generations):\n raw_output = self.model.create_completion(\n prompt,\n max_tokens=self.max_tokens,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n repeat_penalty=self.repeat_penalty,\n )\n try:\n parsed_output = self.task.parse_output(\n raw_output[\"choices\"][0][\"text\"].strip()\n )\n except Exception as e:\n logger.error(f\"Error parsing llama-cpp output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.LlamaCppLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the llama-cpp model, which is the same as the model path.
"},{"location":"reference/distilabel/llm/#distilabel.llm.LlamaCppLLM.__init__","title":"__init__(model, task, max_new_tokens=128, temperature=0.8, top_p=0.95, top_k=40, repeat_penalty=1.1, seed=1337, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the LlamaCppLLM class.
Parameters:
Name Type Description Defaultmodel
Llama
the llama-cpp model to be used.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
temperature
float
the temperature to be used for generation. Defaults to 0.8.
0.8
top_p
float
the top-p value to be used for generation. Defaults to 0.95.
0.95
top_k
int
the top-k value to be used for generation. Defaults to 40.
40
repeat_penalty
float
the repeat penalty to be used for generation. Defaults to 1.1.
1.1
seed
int
the seed to be used for generation, setting it to -1 implies that a different response will be generated on each generation, similarly to HuggingFace's do_sample
arg. Defaults to 1337.
1337
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Examples:
>>> from llama_cpp import Llama\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import LlamaCppLLM\n>>> model = Llama(model_path=\"path/to/model\")\n>>> task = Task()\n>>> llm = LlamaCppLLM(model=model, task=task)\n
Source code in src/distilabel/llm/llama_cpp.py
def __init__(\n self,\n model: \"Llama\",\n task: \"Task\",\n max_new_tokens: int = 128,\n temperature: float = 0.8,\n top_p: float = 0.95,\n top_k: int = 40,\n repeat_penalty: float = 1.1,\n seed: int = 1337,\n prompt_format: Union[SupportedFormats, None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the LlamaCppLLM class.\n\n Args:\n model (Llama): the llama-cpp model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 0.8.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 0.95.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n repeat_penalty (float, optional): the repeat penalty to be used for generation.\n Defaults to 1.1.\n seed (int, optional): the seed to be used for generation, setting it to -1 implies\n that a different response will be generated on each generation, similarly to\n HuggingFace's `do_sample` arg. Defaults to 1337.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Examples:\n >>> from llama_cpp import Llama\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import LlamaCppLLM\n >>> model = Llama(model_path=\"path/to/model\")\n >>> task = Task()\n >>> llm = LlamaCppLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _LLAMA_CPP_AVAILABLE:\n raise ImportError(\n \"`LlamaCppLLM` cannot be used as `llama_cpp` is not installed, please \"\n \" install it with `pip install llama-cpp-python`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repeat_penalty = repeat_penalty\n self.seed = seed\n\n self.model = model\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.OpenAILLM","title":"OpenAILLM
","text":" Bases: LLM
src/distilabel/llm/openai.py
class OpenAILLM(LLM):\n def __init__(\n self,\n task: \"Task\",\n model: str = \"gpt-3.5-turbo\",\n client: Union[\"OpenAI\", None] = None,\n openai_api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n frequency_penalty: float = 0.0,\n presence_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gpt-3.5-turbo\".\n client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.\n If `None`, a new client will be created. Defaults to `None`.\n openai_api_key (Union[str, None], optional): the OpenAI API key to be used for generation.\n If `None`, the `OPENAI_API_KEY` environment variable will be used. Defaults to `None`.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in your OpenAI account.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import OpenAILLM\n >>> task = Task()\n >>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _OPENAI_AVAILABLE:\n raise ImportError(\n \"`OpenAILLM` cannot be used as `openai` is not installed, please \"\n \" install it with `pip install openai`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.frequency_penalty = frequency_penalty\n self.presence_penalty = presence_penalty\n self.temperature = temperature\n self.top_p = top_p\n\n self.client = client or OpenAI(api_key=openai_api_key, max_retries=6)\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in your OpenAI account, available models are {self.available_models}\"\n self.model = model\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_tokens\": self.max_tokens,\n \"frequency_penalty\": self.frequency_penalty,\n \"presence_penalty\": self.presence_penalty,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n },\n )\n\n @cached_property\n def available_models(self) -> List[str]:\n \"\"\"Returns the list of available models in your OpenAI account.\"\"\"\n return [model.id for model in self.client.models.list().data]\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the OpenAI model.\"\"\"\n return self.model\n\n def _generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"openai\")\n outputs = []\n for prompt in prompts:\n chat_completions = self.client.chat.completions.create(\n messages=prompt,\n model=self.model,\n n=num_generations,\n max_tokens=self.max_tokens,\n frequency_penalty=self.frequency_penalty,\n presence_penalty=self.presence_penalty,\n temperature=self.temperature,\n top_p=self.top_p,\n timeout=50,\n )\n\n output = []\n for chat_completion in chat_completions.choices:\n try:\n parsed_response = self.task.parse_output(\n chat_completion.message.content.strip()\n )\n except Exception as e:\n logger.error(f\"Error parsing OpenAI response: {e}\")\n parsed_response = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=chat_completion.message.content,\n parsed_output=parsed_response,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.OpenAILLM.available_models","title":"available_models: List[str]
cached
property
","text":"Returns the list of available models in your OpenAI account.
"},{"location":"reference/distilabel/llm/#distilabel.llm.OpenAILLM.model_name","title":"model_name: str
property
","text":"Returns the name of the OpenAI model.
"},{"location":"reference/distilabel/llm/#distilabel.llm.OpenAILLM.__init__","title":"__init__(task, model='gpt-3.5-turbo', client=None, openai_api_key=None, max_new_tokens=128, frequency_penalty=0.0, presence_penalty=0.0, temperature=1.0, top_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the OpenAILLM class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation. Defaults to \"gpt-3.5-turbo\".
'gpt-3.5-turbo'
client
Union[OpenAI, None]
an OpenAI client to be used for generation. If None
, a new client will be created. Defaults to None
.
None
openai_api_key
Union[str, None]
the OpenAI API key to be used for generation. If None
, the OPENAI_API_KEY
environment variable will be used. Defaults to None
.
None
max_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
frequency_penalty
float
the frequency penalty to be used for generation. Defaults to 0.0.
0.0
presence_penalty
float
the presence penalty to be used for generation. Defaults to 0.0.
0.0
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Raises:
Type DescriptionAssertionError
if the provided model
is not available in your OpenAI account.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import OpenAILLM\n>>> task = Task()\n>>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n
Source code in src/distilabel/llm/openai.py
def __init__(\n self,\n task: \"Task\",\n model: str = \"gpt-3.5-turbo\",\n client: Union[\"OpenAI\", None] = None,\n openai_api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n frequency_penalty: float = 0.0,\n presence_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gpt-3.5-turbo\".\n client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.\n If `None`, a new client will be created. Defaults to `None`.\n openai_api_key (Union[str, None], optional): the OpenAI API key to be used for generation.\n If `None`, the `OPENAI_API_KEY` environment variable will be used. Defaults to `None`.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in your OpenAI account.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import OpenAILLM\n >>> task = Task()\n >>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _OPENAI_AVAILABLE:\n raise ImportError(\n \"`OpenAILLM` cannot be used as `openai` is not installed, please \"\n \" install it with `pip install openai`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.frequency_penalty = frequency_penalty\n self.presence_penalty = presence_penalty\n self.temperature = temperature\n self.top_p = top_p\n\n self.client = client or OpenAI(api_key=openai_api_key, max_retries=6)\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in your OpenAI account, available models are {self.available_models}\"\n self.model = model\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.ProcessLLM","title":"ProcessLLM
","text":"A class that wraps an LLM
and performs generation in a separate process. The result is a Future
that will be set when the generation is completed.
This class creates a new child process that will load the LLM
and perform the text generation. In order to communicate with this child process, a bridge thread is created in the main process. The bridge thread will send and receive the results from the child process using multiprocessing.Queue
s. The communication between the bridge thread and the main process is done using Future
s. This architecture was inspired by the ProcessPoolExecutor
from the concurrent.futures
module and it's a simplified version of it.
src/distilabel/llm/base.py
class ProcessLLM:\n \"\"\"A class that wraps an `LLM` and performs generation in a separate process. The\n result is a `Future` that will be set when the generation is completed.\n\n This class creates a new child process that will load the `LLM` and perform the\n text generation. In order to communicate with this child process, a bridge thread\n is created in the main process. The bridge thread will send and receive the results\n from the child process using `multiprocessing.Queue`s. The communication between the\n bridge thread and the main process is done using `Future`s. This architecture was\n inspired by the `ProcessPoolExecutor` from the `concurrent.futures` module and it's\n a simplified version of it.\n \"\"\"\n\n def __init__(self, task: Task, load_llm_fn: Callable[[Task], LLM]) -> None:\n \"\"\"Initializes the `ProcessLLM` class.\n\n Args:\n task: the task to be performed by the `LLM`. This task will be used by the\n child process when calling the `load_llm_fn`.\n load_llm_fn (Callable[[Task], LLM]): a function that will be executed in the\n child process to load the `LLM`. It must return an `LLM` instance.\n \"\"\"\n self.task = task\n\n self._load_llm_fn = load_llm_fn\n\n # The bridge thread will act as a bridge between the main process and the child\n # process for communication. It will send the generation requests to the child\n # process and receive the results from the child process.\n self._bridge_thread = None\n\n # The child process which will load the `LLM` and perform the generation.\n self._generation_process = None\n\n # The `Semaphore` that will be used to synchronize the loading of the `LLM`.\n # `_BridgeThread` will be blocked until `_GenerationProcess` has called the\n # `load_llm_fn` and the `LLM` has been loaded.\n self._load_llm_sem = mp.Semaphore(0)\n\n # This thread will create text generation requests\n self.pending_text_generation_request: Dict[int, _TextGenerationRequest] = {}\n self.text_generation_request_count = 0\n self.text_generation_request_ids_queue: queue.Queue[int] = queue.Queue()\n\n # Queues for the communication between the `_BridgeThread` and the `_GenerationProcess`\n self._call_queue = mp.Queue()\n self._result_queue = mp.Queue()\n\n # Shared memory object for transfering the `model_name` to the main process\n # once the `LLM` is loaded\n self._model_name = mp.Array(c_char, MAX_MODEL_NAME_LENGTH)\n\n def _start_bridge_thread(self) -> None:\n \"\"\"Starts the bridge thread and the generation process.\"\"\"\n if self._bridge_thread is None:\n self._generation_process = _GenerationProcess(self)\n self._generation_process.start()\n pid = self._generation_process.pid\n logger.debug(f\"Generation process with PID {pid} started!\")\n\n self._bridge_thread = _BridgeThread(self)\n self._bridge_thread.start()\n logger.debug(\"Bridge thread for process with PID {pid} started!\")\n\n def _add_text_generation_request(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Creates and send a new text generation request to the bridge thread. This thread\n and the bridge thread shares a dictionary used to store the text generation requests.\n This thread will add the text generation requests to the dictionary and the bridge\n thread will only read from it. In order for the bridge thread to know that a new\n text generation request has been added to the dictionary, this thread will put the\n id of the request in a queue. The bridge thread will read from this queue and get\n the text generation request from the dictionary.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n text_generation_request = _TextGenerationRequest(\n inputs=inputs, num_generations=num_generations\n )\n # Put the request information in the dictionary associated to the request id\n self.pending_text_generation_request[\n self.text_generation_request_count\n ] = text_generation_request\n # Put the request id in the queue (for the `_BridgeThread` to consume it)\n self.text_generation_request_ids_queue.put(self.text_generation_request_count)\n self.text_generation_request_count += 1\n text_generation_request.future.add_done_callback(lambda _: _progress())\n return text_generation_request.future\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Generates the outputs for the given inputs using the `ProcessLLM` and its loaded\n `LLM`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n self._start_bridge_thread()\n return self._add_text_generation_request(\n inputs, num_generations, progress_callback_func\n )\n\n def teardown(self) -> None:\n \"\"\"Stops the bridge thread and the generation process.\"\"\"\n if self._generation_process is not None:\n self._generation_process.stop()\n self._generation_process.join()\n\n if self._bridge_thread is not None:\n self._bridge_thread.stop()\n self._bridge_thread.join()\n\n @cached_property\n def model_name(self) -> str:\n \"\"\"Returns the model name of the `LLM` once it has been loaded.\"\"\"\n with self._model_name:\n return \"\".join([c.decode() for c in self._model_name if c != b\"\\0\"])\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return True\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.ProcessLLM.model_name","title":"model_name: str
cached
property
","text":"Returns the model name of the LLM
once it has been loaded.
return_futures: bool
property
","text":"Whether the LLM
returns futures
__init__(task, load_llm_fn)
","text":"Initializes the ProcessLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM
. This task will be used by the child process when calling the load_llm_fn
.
load_llm_fn
Callable[[Task], LLM]
a function that will be executed in the child process to load the LLM
. It must return an LLM
instance.
src/distilabel/llm/base.py
def __init__(self, task: Task, load_llm_fn: Callable[[Task], LLM]) -> None:\n \"\"\"Initializes the `ProcessLLM` class.\n\n Args:\n task: the task to be performed by the `LLM`. This task will be used by the\n child process when calling the `load_llm_fn`.\n load_llm_fn (Callable[[Task], LLM]): a function that will be executed in the\n child process to load the `LLM`. It must return an `LLM` instance.\n \"\"\"\n self.task = task\n\n self._load_llm_fn = load_llm_fn\n\n # The bridge thread will act as a bridge between the main process and the child\n # process for communication. It will send the generation requests to the child\n # process and receive the results from the child process.\n self._bridge_thread = None\n\n # The child process which will load the `LLM` and perform the generation.\n self._generation_process = None\n\n # The `Semaphore` that will be used to synchronize the loading of the `LLM`.\n # `_BridgeThread` will be blocked until `_GenerationProcess` has called the\n # `load_llm_fn` and the `LLM` has been loaded.\n self._load_llm_sem = mp.Semaphore(0)\n\n # This thread will create text generation requests\n self.pending_text_generation_request: Dict[int, _TextGenerationRequest] = {}\n self.text_generation_request_count = 0\n self.text_generation_request_ids_queue: queue.Queue[int] = queue.Queue()\n\n # Queues for the communication between the `_BridgeThread` and the `_GenerationProcess`\n self._call_queue = mp.Queue()\n self._result_queue = mp.Queue()\n\n # Shared memory object for transfering the `model_name` to the main process\n # once the `LLM` is loaded\n self._model_name = mp.Array(c_char, MAX_MODEL_NAME_LENGTH)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.ProcessLLM.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the ProcessLLM
and its loaded LLM
.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionFuture[List[List['LLMOutput']]]
Future[List[List[\"LLMOutput\"]]]: the generated outputs as a Future
.
src/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Generates the outputs for the given inputs using the `ProcessLLM` and its loaded\n `LLM`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n self._start_bridge_thread()\n return self._add_text_generation_request(\n inputs, num_generations, progress_callback_func\n )\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.ProcessLLM.teardown","title":"teardown()
","text":"Stops the bridge thread and the generation process.
Source code insrc/distilabel/llm/base.py
def teardown(self) -> None:\n \"\"\"Stops the bridge thread and the generation process.\"\"\"\n if self._generation_process is not None:\n self._generation_process.stop()\n self._generation_process.join()\n\n if self._bridge_thread is not None:\n self._bridge_thread.stop()\n self._bridge_thread.join()\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.TogetherInferenceLLM","title":"TogetherInferenceLLM
","text":" Bases: LLM
src/distilabel/llm/together.py
class TogetherInferenceLLM(LLM):\n def __init__(\n self,\n task: \"Task\",\n model: str,\n api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: float = 1.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = 1,\n stop: Union[List[str], None] = None,\n logprobs: int = 0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str): the model to be used for generation.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation. From the Together\n Inference docs: \"A decimal number that determines the degree of randomness in the response.\n A value of 0 will always yield the same output. A temperature much less than 1 favors more\n correctness and is appropriate for question answering or summarization. A value approaching\n 1 introduces more randomness in the output.\". Defaults to 1.0.\n repetition_penalty (float, optional): the repetition penalty to be used for generation. From the\n Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood\n of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation. From the Together\n Inference docs: \"used to dynamically adjust the number of choices for each predicted\n token based on the cumulative probabilities. It specifies a probability threshold,\n below which all less likely tokens are filtered out. This technique helps to maintain\n diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation. From the Together Inference\n docs: \"used to limit the number of choices for the next predicted word or token. It specifies\n the maximum number of tokens to consider at each step, based on their probability of occurrence.\n This technique helps to speed up the generation process and can improve the quality of the\n generated text by focusing on the most likely options.\". Defaults to 1.\n stop (List[str], optional): strings to delimitate the generation process, so that when the\n model generates any of the provided characters, the generation process is considered completed.\n Defaults to None.\n logprobs (int, optional): the number of logprobs to be returned for each token. From the\n Together Inference docs: \"An integer that specifies how many top token log probabilities\n are included in the response for each token generation step.\". Defaults to None.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in Together Inference.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TogetherInferenceLLM\n >>> task = Task()\n >>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n \"\"\"\n if not _TOGETHER_AVAILABLE:\n raise ImportError(\n \"`TogetherInferenceLLM` cannot be used as `together` is not installed, please \"\n \" install it with `pip install together`.\"\n )\n\n together.api_key = api_key or os.getenv(\"TOGETHER_API_KEY\", None)\n if together.api_key is None:\n raise ValueError(\n \"No `api_key` provided, please provide one or set the `TOGETHER_API_KEY` \"\n \"environment variable.\"\n )\n\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in Together Inference, available models are {self.available_models}\"\n self.model = model\n\n self.max_new_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repetition_penalty = repetition_penalty\n self.stop = stop\n self.logprobs = logprobs\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_new_tokens,\n \"temperature\": self.temperature,\n \"repetition_penalty\": self.repetition_penalty,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n \"stop\": self.stop,\n \"logprobs\": self.logprobs,\n },\n )\n\n @cached_property\n def available_models(self) -> List[str]:\n \"\"\"Returns the list of available models in Together Inference.\"\"\"\n # TODO: exclude the image models\n return [model[\"name\"] for model in together.Models.list()]\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the Together Inference model.\"\"\"\n return self.model\n\n def _generate_single_output(self, prompt: str) -> LLMOutput:\n \"\"\"Runs the Together Inference text generation function over a single prompt\n producing a single `LLMOutput`.\n\n Args:\n prompt (str): the formatted prompt to be provided to the Together Inference\n endpoint.\n\n Raises:\n RuntimeError: raised if the Together Inference endpoint fails.\n \"\"\"\n try:\n output = together.Complete.create(\n prompt=prompt,\n model=self.model,\n max_tokens=self.max_new_tokens,\n stop=self.stop,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n repetition_penalty=self.repetition_penalty,\n logprobs=self.logprobs,\n )\n except Exception as e:\n raise RuntimeError(\n f\"Together Inference generation failed with exception: {e}\"\n ) from e\n\n if output[\"output\"][\"choices\"] is None or len(output[\"output\"][\"choices\"]) < 1: # type: ignore\n raise RuntimeError(\"Together Inference generation returned no generations.\")\n\n choice = output[\"output\"][\"choices\"][0] # type: ignore\n try:\n parsed_response = self.task.parse_output(choice[\"text\"].strip())\n except Exception as e:\n logger.error(f\"Error parsing Together Inference response: {e}\")\n parsed_response = None\n\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=choice[\"text\"] or None,\n parsed_output=parsed_response,\n )\n\n def _generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n outputs.append(\n [self._generate_single_output(prompt) for _ in range(num_generations)]\n )\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.TogetherInferenceLLM.available_models","title":"available_models: List[str]
cached
property
","text":"Returns the list of available models in Together Inference.
"},{"location":"reference/distilabel/llm/#distilabel.llm.TogetherInferenceLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the Together Inference model.
"},{"location":"reference/distilabel/llm/#distilabel.llm.TogetherInferenceLLM.__init__","title":"__init__(task, model, api_key=None, max_new_tokens=128, repetition_penalty=1.0, temperature=1.0, top_p=1.0, top_k=1, stop=None, logprobs=0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the OpenAILLM class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
temperature
float
the temperature to be used for generation. From the Together Inference docs: \"A decimal number that determines the degree of randomness in the response. A value of 0 will always yield the same output. A temperature much less than 1 favors more correctness and is appropriate for question answering or summarization. A value approaching 1 introduces more randomness in the output.\". Defaults to 1.0.
1.0
repetition_penalty
float
the repetition penalty to be used for generation. From the Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. From the Together Inference docs: \"used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold, below which all less likely tokens are filtered out. This technique helps to maintain diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. From the Together Inference docs: \"used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.\". Defaults to 1.
1
stop
List[str]
strings to delimitate the generation process, so that when the model generates any of the provided characters, the generation process is considered completed. Defaults to None.
None
logprobs
int
the number of logprobs to be returned for each token. From the Together Inference docs: \"An integer that specifies how many top token log probabilities are included in the response for each token generation step.\". Defaults to None.
0
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Raises:
Type DescriptionAssertionError
if the provided model
is not available in Together Inference.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import TogetherInferenceLLM\n>>> task = Task()\n>>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n
Source code in src/distilabel/llm/together.py
def __init__(\n self,\n task: \"Task\",\n model: str,\n api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: float = 1.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = 1,\n stop: Union[List[str], None] = None,\n logprobs: int = 0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str): the model to be used for generation.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation. From the Together\n Inference docs: \"A decimal number that determines the degree of randomness in the response.\n A value of 0 will always yield the same output. A temperature much less than 1 favors more\n correctness and is appropriate for question answering or summarization. A value approaching\n 1 introduces more randomness in the output.\". Defaults to 1.0.\n repetition_penalty (float, optional): the repetition penalty to be used for generation. From the\n Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood\n of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation. From the Together\n Inference docs: \"used to dynamically adjust the number of choices for each predicted\n token based on the cumulative probabilities. It specifies a probability threshold,\n below which all less likely tokens are filtered out. This technique helps to maintain\n diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation. From the Together Inference\n docs: \"used to limit the number of choices for the next predicted word or token. It specifies\n the maximum number of tokens to consider at each step, based on their probability of occurrence.\n This technique helps to speed up the generation process and can improve the quality of the\n generated text by focusing on the most likely options.\". Defaults to 1.\n stop (List[str], optional): strings to delimitate the generation process, so that when the\n model generates any of the provided characters, the generation process is considered completed.\n Defaults to None.\n logprobs (int, optional): the number of logprobs to be returned for each token. From the\n Together Inference docs: \"An integer that specifies how many top token log probabilities\n are included in the response for each token generation step.\". Defaults to None.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in Together Inference.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TogetherInferenceLLM\n >>> task = Task()\n >>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n \"\"\"\n if not _TOGETHER_AVAILABLE:\n raise ImportError(\n \"`TogetherInferenceLLM` cannot be used as `together` is not installed, please \"\n \" install it with `pip install together`.\"\n )\n\n together.api_key = api_key or os.getenv(\"TOGETHER_API_KEY\", None)\n if together.api_key is None:\n raise ValueError(\n \"No `api_key` provided, please provide one or set the `TOGETHER_API_KEY` \"\n \"environment variable.\"\n )\n\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in Together Inference, available models are {self.available_models}\"\n self.model = model\n\n self.max_new_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repetition_penalty = repetition_penalty\n self.stop = stop\n self.logprobs = logprobs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.TransformersLLM","title":"TransformersLLM
","text":" Bases: LLM
src/distilabel/llm/huggingface/transformers.py
class TransformersLLM(LLM):\n def __init__(\n self,\n model: \"PreTrainedModel\",\n tokenizer: \"PreTrainedTokenizer\",\n task: \"Task\",\n max_new_tokens: int = 128,\n do_sample: bool = False,\n temperature: float = 1.0,\n top_k: int = 50,\n top_p: float = 1.0,\n typical_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the TransformersLLM class.\n\n Args:\n model (PreTrainedModel): the model to be used for generation.\n tokenizer (PreTrainedTokenizer): the tokenizer to be used for generation.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n do_sample (bool, optional): whether to sample from the model or not.\n Defaults to False.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 50.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n typical_p (float, optional): the typical-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used for generation.\n If `None`, the number of threads will be set to the number of available CPUs.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n Defaults to `None`.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): the function to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TransformersLLM\n >>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n >>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n >>> task = Task()\n >>> llm = TransformersLLM(\n ... model=model,\n ... tokenizer=tokenizer,\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n self.max_new_tokens = max_new_tokens\n self.do_sample = do_sample\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.model = model\n self.tokenizer = tokenizer\n\n if self.tokenizer.pad_token is None:\n self.tokenizer.pad_token = self.tokenizer.eos_token\n if (\n hasattr(self.tokenizer, \"use_default_system_prompt\")\n and self.tokenizer.use_default_system_prompt # type: ignore\n ):\n # The `tokenizer` also has a method named `apply_chat_template` that expects a `Conversation` as OpenAI does with the ChatML format\n warnings.warn(\n \"The provided `tokenizer` has `use_default_system_prompt=True` which means that the default system prompt will be used, which may collide with the `task` provided as an arg to this class.\",\n UserWarning,\n stacklevel=2,\n )\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_new_tokens,\n \"do_sample\": self.do_sample,\n \"temperature\": self.temperature,\n \"top_k\": self.top_k,\n \"top_p\": self.top_p,\n \"typical_p\": self.typical_p,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the Transformers model.\"\"\"\n return self.model.config.name_or_path\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n encodings = self.tokenizer(prompts, padding=True, return_tensors=\"pt\")\n encodings = encodings.to(self.model.device)\n with torch.inference_mode():\n generated_ids = self.model.generate(\n **encodings, # type: ignore\n pad_token_id=self.tokenizer.eos_token_id,\n generation_config=GenerationConfig(\n do_sample=self.do_sample,\n temperature=self.temperature,\n max_new_tokens=self.max_new_tokens,\n top_k=self.top_k,\n top_p=self.top_p,\n typical_p=self.typical_p,\n num_return_sequences=num_generations,\n ),\n )\n raw_outputs = self.tokenizer.batch_decode(\n generated_ids[:, encodings.input_ids.shape[1] :],\n skip_special_tokens=True,\n clean_up_tokenization_spaces=True,\n )\n outputs = []\n for prompt, i in zip(prompts, range(0, len(raw_outputs), num_generations)):\n output = []\n for raw_output in raw_outputs[i : i + num_generations]:\n try:\n parsed_output = self.task.parse_output(raw_output)\n except Exception as e:\n logger.error(f\"Error parsing Transformers output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.TransformersLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the Transformers model.
"},{"location":"reference/distilabel/llm/#distilabel.llm.TransformersLLM.__init__","title":"__init__(model, tokenizer, task, max_new_tokens=128, do_sample=False, temperature=1.0, top_k=50, top_p=1.0, typical_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the TransformersLLM class.
Parameters:
Name Type Description Defaultmodel
PreTrainedModel
the model to be used for generation.
requiredtokenizer
PreTrainedTokenizer
the tokenizer to be used for generation.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
do_sample
bool
whether to sample from the model or not. Defaults to False.
False
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. Defaults to 50.
50
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
typical_p
float
the typical-p value to be used for generation. Defaults to 1.0.
1.0
num_threads
Union[int, None]
the number of threads to be used for generation. If None
, the number of threads will be set to the number of available CPUs. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for formatting the prompts. If None
, the prompts will not be formatted. Defaults to None
.
None
prompt_formatting_fn
Union[Callable[..., str], None]
the function to be used for formatting the prompts. If None
, the prompts will not be formatted.
None
Examples:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import TransformersLLM\n>>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n>>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n>>> task = Task()\n>>> llm = TransformersLLM(\n... model=model,\n... tokenizer=tokenizer,\n... task=task,\n... )\n
Source code in src/distilabel/llm/huggingface/transformers.py
def __init__(\n self,\n model: \"PreTrainedModel\",\n tokenizer: \"PreTrainedTokenizer\",\n task: \"Task\",\n max_new_tokens: int = 128,\n do_sample: bool = False,\n temperature: float = 1.0,\n top_k: int = 50,\n top_p: float = 1.0,\n typical_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the TransformersLLM class.\n\n Args:\n model (PreTrainedModel): the model to be used for generation.\n tokenizer (PreTrainedTokenizer): the tokenizer to be used for generation.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n do_sample (bool, optional): whether to sample from the model or not.\n Defaults to False.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 50.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n typical_p (float, optional): the typical-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used for generation.\n If `None`, the number of threads will be set to the number of available CPUs.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n Defaults to `None`.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): the function to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TransformersLLM\n >>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n >>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n >>> task = Task()\n >>> llm = TransformersLLM(\n ... model=model,\n ... tokenizer=tokenizer,\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n self.max_new_tokens = max_new_tokens\n self.do_sample = do_sample\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.model = model\n self.tokenizer = tokenizer\n\n if self.tokenizer.pad_token is None:\n self.tokenizer.pad_token = self.tokenizer.eos_token\n if (\n hasattr(self.tokenizer, \"use_default_system_prompt\")\n and self.tokenizer.use_default_system_prompt # type: ignore\n ):\n # The `tokenizer` also has a method named `apply_chat_template` that expects a `Conversation` as OpenAI does with the ChatML format\n warnings.warn(\n \"The provided `tokenizer` has `use_default_system_prompt=True` which means that the default system prompt will be used, which may collide with the `task` provided as an arg to this class.\",\n UserWarning,\n stacklevel=2,\n )\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAIEndpointLLM","title":"VertexAIEndpointLLM
","text":" Bases: LLM
An LLM
which uses a Vertex AI Online prediction endpoint for the generation.
More information about Vertex AI Endpoints can be found here:
- https://cloud.google.com/vertex-ai/docs/general/deployment#deploy_a_model_to_an_endpoint\n
Source code in src/distilabel/llm/google/vertexai.py
class VertexAIEndpointLLM(LLM):\n \"\"\"An `LLM` which uses a Vertex AI Online prediction endpoint for the generation.\n\n More information about Vertex AI Endpoints can be found here:\n\n - https://cloud.google.com/vertex-ai/docs/general/deployment#deploy_a_model_to_an_endpoint\n \"\"\"\n\n def __init__(\n self,\n task: \"Task\",\n endpoint_id: str,\n project: Optional[str] = None,\n location: str = \"us-central1\",\n generation_kwargs: Optional[Dict[str, Any]] = None,\n prompt_argument: str = \"prompt\",\n num_generations_argument: str = \"n\",\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the `VertexAIEndpointLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n endpoint_id (str): the ID of the Vertex AI endpoint to be used for generation.\n project (Optional[str], optional): the project to be used for generation. If `None`,\n the default project will be used. Defaults to `None`.\n location (str, optional): the location of the Vertex AI endpoint to be used for\n generation. Defaults to \"us-central1\".\n generation_kwargs (Optional[Dict[str, Any]], optional): the generation parameters\n to be used for generation. The name of the parameters will depend on the\n Docker image used to deploy the model to the Vertex AI endpoint. Defaults\n to `None`.\n prompt_argument (str, optional): the name of the Vertex AI Endpoint key to\n be used for the prompt. Defaults to \"prompt\".\n num_generations_argument (str, optional): the name of the Vertex AI Endpoint\n key to be used to specify the number of generations per prompt. Defaults\n to \"n\".\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAIEndpointLLM` cannot be used as `google-cloud-aiplatform` is not\"\n \" installed, please install it with `pip install google-cloud-aiplatform`\"\n )\n\n if project is None:\n try:\n project = google.auth.default()[1]\n except DefaultCredentialsError as e:\n raise ValueError(\n \"No `project` was specified and no default credentials were found.\"\n ) from e\n\n if generation_kwargs is None:\n generation_kwargs = {}\n\n self.endpoint_id = endpoint_id\n self.project = project\n self.location = location\n self.generation_kwargs = generation_kwargs\n self.prompt_argument = prompt_argument\n self.num_generations_argument = num_generations_argument\n\n self.client = PredictionServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n\n @cached_property\n def model_name(self) -> str:\n \"\"\"Returns the name of the model used for generation.\"\"\"\n client = EndpointServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n endpoint = client.get_endpoint(name=self.endpoint_path)\n return endpoint.deployed_models[0].display_name\n\n @property\n def endpoint_path(self) -> str:\n \"\"\"Returns the path of the Vertex AI endpoint to be used for generation.\"\"\"\n return self.client.endpoint_path(\n project=self.project, # type: ignore\n location=self.location,\n endpoint=self.endpoint_id,\n )\n\n @_vertexai_retry_decorator\n def _call_vertexai_endpoint(self, instances: List[Any]) -> Any:\n return self.client.predict(endpoint=self.endpoint_path, instances=instances)\n\n def _prepare_instances(\n self, prompts: List[str], num_generations: int\n ) -> List[\"Value\"]:\n \"\"\"Prepares the instances to be sent to the Vertex AI endpoint.\n\n Args:\n prompts (List[str]): the prompts to be used for generation.\n num_generations (int): the number of generations to be performed for each prompt.\n\n Returns:\n The instances to be sent to the Vertex AI endpoint.\n \"\"\"\n instances = []\n for prompt in prompts:\n instance = json_format.ParseDict(\n {\n self.prompt_argument: prompt,\n self.num_generations_argument: num_generations,\n **self.generation_kwargs,\n },\n Value(),\n )\n instances.append(instance)\n return instances\n\n def _single_output(self, instance: Any) -> List[LLMOutput]:\n try:\n # NOTE: `predict` method accepts a list of instances, but depending on the\n # deployed Docker image, it can just accept one instance.\n response = self._call_vertexai_endpoint(instances=[instance])\n except exceptions.InternalServerError as e:\n raise ValueError(\n \"The Vertex AI endpoint returned 500 Internal Server Error. This is\"\n \" usually caused due to wrong generation parameters. Please check the\"\n \" `generation_parameters` and try again.\"\n ) from e\n\n output = []\n for prediction in response.predictions:\n # Vertex endpoint output is `Prompt:\\n{{ model_prompt }}\\nOutput:\\n{{ model_output }}`\n # so we need to do a pre-parsing to remove the `Prompt:` and `Output:` parts.\n match = _PARSE_VERTEXAI_ENDPOINT_PREDICTION_REGEX.search(prediction)\n if not match:\n raise ValueError(\n \"Couldn't parse the response from the Vertex AI endpoint.\"\n )\n\n model_output = match.group(1).strip()\n\n try:\n parsed_output = self.task.parse_output(model_output)\n except Exception as e:\n logger.error(f\"Error parsing Vertex AI endpoint model response: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=instance.struct_value[self.prompt_argument],\n raw_output=model_output,\n parsed_output=parsed_output,\n )\n )\n return output\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n prompts = self._generate_prompts(inputs)\n instances = self._prepare_instances(\n prompts=prompts, num_generations=num_generations\n )\n return [self._single_output(instance) for instance in instances]\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAIEndpointLLM.endpoint_path","title":"endpoint_path: str
property
","text":"Returns the path of the Vertex AI endpoint to be used for generation.
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAIEndpointLLM.model_name","title":"model_name: str
cached
property
","text":"Returns the name of the model used for generation.
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAIEndpointLLM.__init__","title":"__init__(task, endpoint_id, project=None, location='us-central1', generation_kwargs=None, prompt_argument='prompt', num_generations_argument='n', num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the VertexAIEndpointLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredendpoint_id
str
the ID of the Vertex AI endpoint to be used for generation.
requiredproject
Optional[str]
the project to be used for generation. If None
, the default project will be used. Defaults to None
.
None
location
str
the location of the Vertex AI endpoint to be used for generation. Defaults to \"us-central1\".
'us-central1'
generation_kwargs
Optional[Dict[str, Any]]
the generation parameters to be used for generation. The name of the parameters will depend on the Docker image used to deploy the model to the Vertex AI endpoint. Defaults to None
.
None
prompt_argument
str
the name of the Vertex AI Endpoint key to be used for the prompt. Defaults to \"prompt\".
'prompt'
num_generations_argument
str
the name of the Vertex AI Endpoint key to be used to specify the number of generations per prompt. Defaults to \"n\".
'n'
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Source code in src/distilabel/llm/google/vertexai.py
def __init__(\n self,\n task: \"Task\",\n endpoint_id: str,\n project: Optional[str] = None,\n location: str = \"us-central1\",\n generation_kwargs: Optional[Dict[str, Any]] = None,\n prompt_argument: str = \"prompt\",\n num_generations_argument: str = \"n\",\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the `VertexAIEndpointLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n endpoint_id (str): the ID of the Vertex AI endpoint to be used for generation.\n project (Optional[str], optional): the project to be used for generation. If `None`,\n the default project will be used. Defaults to `None`.\n location (str, optional): the location of the Vertex AI endpoint to be used for\n generation. Defaults to \"us-central1\".\n generation_kwargs (Optional[Dict[str, Any]], optional): the generation parameters\n to be used for generation. The name of the parameters will depend on the\n Docker image used to deploy the model to the Vertex AI endpoint. Defaults\n to `None`.\n prompt_argument (str, optional): the name of the Vertex AI Endpoint key to\n be used for the prompt. Defaults to \"prompt\".\n num_generations_argument (str, optional): the name of the Vertex AI Endpoint\n key to be used to specify the number of generations per prompt. Defaults\n to \"n\".\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAIEndpointLLM` cannot be used as `google-cloud-aiplatform` is not\"\n \" installed, please install it with `pip install google-cloud-aiplatform`\"\n )\n\n if project is None:\n try:\n project = google.auth.default()[1]\n except DefaultCredentialsError as e:\n raise ValueError(\n \"No `project` was specified and no default credentials were found.\"\n ) from e\n\n if generation_kwargs is None:\n generation_kwargs = {}\n\n self.endpoint_id = endpoint_id\n self.project = project\n self.location = location\n self.generation_kwargs = generation_kwargs\n self.prompt_argument = prompt_argument\n self.num_generations_argument = num_generations_argument\n\n self.client = PredictionServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAILLM","title":"VertexAILLM
","text":" Bases: LLM
An LLM
which allows to use Google's proprietary models from the Vertex AI APIs:
To use the VertexAILLM
is necessary to have configured the Google Cloud authentication using one of these methods:
GOOGLE_CLOUD_CREDENTIALS
environment variablegcloud auth application-default login
commandvertexai.init
function from the google-cloud-aiplatform
librarysrc/distilabel/llm/google/vertexai.py
class VertexAILLM(LLM):\n \"\"\"An `LLM` which allows to use Google's proprietary models from the Vertex AI APIs:\n\n - Gemini API: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini\n - Codey API: https://cloud.google.com/vertex-ai/docs/generative-ai/code/code-models-overview\n - Text API: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text\n\n To use the `VertexAILLM` is necessary to have configured the Google Cloud authentication\n using one of these methods:\n\n - Setting `GOOGLE_CLOUD_CREDENTIALS` environment variable\n - Using `gcloud auth application-default login` command\n - Using `vertexai.init` function from the `google-cloud-aiplatform` library\n \"\"\"\n\n def __init__(\n self,\n task: \"Task\",\n model: str = \"gemini-pro\",\n temperature: Optional[float] = None,\n top_p: Optional[float] = None,\n top_k: Optional[int] = None,\n max_new_tokens: int = 128,\n stop_sequences: Optional[List[str]] = None,\n num_threads: Union[int, None] = None,\n ) -> None:\n \"\"\"Initializes the `VertexGenerativeModelLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gemini-pro\".\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n \"\"\"\n super().__init__(task=task, num_threads=num_threads)\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAILLM` cannot be used as `google-cloud-aiplatform` is not installed,\"\n \" please install it with `pip install google-cloud-aiplatform`\"\n )\n\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_output_tokens = max_new_tokens\n self.stop_sequences = stop_sequences\n\n if is_gemini_model(model):\n self.model = GenerativeModel(model)\n elif is_codey_model(model):\n self.model = CodeGenerationModel.from_pretrained(model)\n else:\n self.model = TextGenerationModel.from_pretrained(model)\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the model used for generation.\"\"\"\n if isinstance(self.model, GenerativeModel):\n return self.model._model_name\n\n return self.model._model_id\n\n def _generate_contents(self, prompts: List[str]) -> List[List[Dict[str, Any]]]:\n \"\"\"Generates a list of valid dicts that can be parsed to `vertexai.preview.generative_models.Content`\n objects for each input.\n\n Args:\n prompts (List[str]): the prompts to be used for generation.\n\n Returns:\n List[List[Dict[str, Any]]]: the list of valid `vertexai.preview.generative_models.Content`\n objects.\n \"\"\"\n return [[{\"role\": \"user\", \"parts\": [{\"text\": prompt}]}] for prompt in prompts]\n\n @_vertexai_retry_decorator\n def _call_generative_model_with_backoff(\n self, contents: List[Dict[str, Any]], **kwargs: Any\n ) -> \"GenerationResponse\":\n return self.model.generate_content( # type: ignore\n contents=contents,\n # TODO: update `candidate_count` to have `num_generations` as value once valid range is not [1, 2)\n generation_config=GenerationConfig(candidate_count=1, **kwargs),\n )\n\n def _generative_model_single_output(\n self, contents: List[Dict[str, Any]]\n ) -> LLMOutput:\n raw_output = None\n try:\n response = self._call_generative_model_with_backoff(\n contents=contents,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n max_output_tokens=self.max_output_tokens,\n stop_sequences=self.stop_sequences,\n )\n raw_output = response.text\n parsed_output = self.task.parse_output(raw_output)\n except ValueError as e:\n logger.error(f\"Vertex AI Gemini API model didn't return content: {e}\")\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=contents,\n raw_output=None,\n parsed_output=None,\n )\n except Exception as e:\n logger.error(f\"Error parsing Vertex AI Gemini API model response: {e}\")\n parsed_output = None\n\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=contents,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n\n def _generate_with_generative_model(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generate `num_generations` for each input in `inputs` using a Vertex AI Gemini\n API model.\"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"default\")\n inputs_contents = self._generate_contents(prompts)\n outputs = []\n for contents in inputs_contents:\n output = []\n # TODO: remove this for-loop once `GenerationConfig.candidate_count` valid range is not [1, 2)\n for _ in range(num_generations):\n output.append(self._generative_model_single_output(contents=contents))\n outputs.append(output)\n return outputs\n\n @_vertexai_retry_decorator\n def _call_text_generation_model(\n self, **kwargs: Any\n ) -> \"MultiCandidateTextGenerationResponse\":\n return self.model.predict(**kwargs) # type: ignore\n\n def _text_generation_model_single_output(\n self, prompt: str, num_generations: int\n ) -> List[LLMOutput]:\n response = self._call_text_generation_model(\n prompt=prompt,\n max_output_tokens=self.max_output_tokens,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n stop_sequences=self.stop_sequences,\n # WARNING: The model can return < `candidate_count` generations depending\n # on the generation parameters and the input.\n candidate_count=num_generations,\n )\n\n output = []\n for candidate in response.candidates:\n try:\n parsed_response = self.task.parse_output(candidate.text)\n except Exception as e:\n logger.error(\n f\"Error parsing Vertex AI Text/Code API model response: {e}\"\n )\n parsed_response = None\n\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=candidate.text,\n parsed_output=parsed_response,\n )\n )\n return output\n\n def _generate_with_text_generation_model(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generate `num_generations` for each input in `inputs` using a Vertex AI Text/Code\n API model.\"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"default\")\n outputs = []\n for prompt in prompts:\n outputs.append(\n self._text_generation_model_single_output(prompt, num_generations)\n )\n return outputs\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n if isinstance(self.model, GenerativeModel):\n return self._generate_with_generative_model(inputs, num_generations)\n\n return self._generate_with_text_generation_model(inputs, num_generations)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAILLM.model_name","title":"model_name: str
property
","text":"Returns the name of the model used for generation.
"},{"location":"reference/distilabel/llm/#distilabel.llm.VertexAILLM.__init__","title":"__init__(task, model='gemini-pro', temperature=None, top_p=None, top_k=None, max_new_tokens=128, stop_sequences=None, num_threads=None)
","text":"Initializes the VertexGenerativeModelLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation. Defaults to \"gemini-pro\".
'gemini-pro'
temperature
float
the temperature to be used for generation. Defaults to 1.0.
None
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
None
top_k
int
the top-k value to be used for generation. Defaults to 40.
None
max_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
Source code in src/distilabel/llm/google/vertexai.py
def __init__(\n self,\n task: \"Task\",\n model: str = \"gemini-pro\",\n temperature: Optional[float] = None,\n top_p: Optional[float] = None,\n top_k: Optional[int] = None,\n max_new_tokens: int = 128,\n stop_sequences: Optional[List[str]] = None,\n num_threads: Union[int, None] = None,\n) -> None:\n \"\"\"Initializes the `VertexGenerativeModelLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gemini-pro\".\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n \"\"\"\n super().__init__(task=task, num_threads=num_threads)\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAILLM` cannot be used as `google-cloud-aiplatform` is not installed,\"\n \" please install it with `pip install google-cloud-aiplatform`\"\n )\n\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_output_tokens = max_new_tokens\n self.stop_sequences = stop_sequences\n\n if is_gemini_model(model):\n self.model = GenerativeModel(model)\n elif is_codey_model(model):\n self.model = CodeGenerationModel.from_pretrained(model)\n else:\n self.model = TextGenerationModel.from_pretrained(model)\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.vLLM","title":"vLLM
","text":" Bases: LLM
src/distilabel/llm/vllm.py
class vLLM(LLM):\n def __init__(\n self,\n vllm: \"_vLLM\",\n task: \"Task\",\n max_new_tokens: int = 128,\n presence_penalty: float = 0.0,\n frequency_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = -1,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the vLLM class.\n\n Args:\n vllm (_vLLM): the vLLM model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to -1.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n\n Examples:\n >>> from vllm import LLM\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import vLLM\n >>> model = LLM(model=\"gpt2\")\n >>> task = Task()\n >>> llm = vLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VLLM_AVAILABLE:\n raise ImportError(\n \"`vLLM` cannot be used as `vllm` is not installed, please \"\n \" install it with `pip install vllm`.\"\n )\n\n self.presence_penalty = presence_penalty\n self.frequency_penalty = frequency_penalty\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_tokens = max_new_tokens\n\n self.vllm = vllm\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_tokens\": self.max_tokens,\n \"presence_penalty\": self.presence_penalty,\n \"frequency_penalty\": self.frequency_penalty,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the vLLM model.\"\"\"\n return self.vllm.llm_engine.model_config.model # type: ignore\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n requests = self.vllm.generate(\n prompts,\n SamplingParams( # type: ignore\n n=num_generations,\n presence_penalty=self.presence_penalty,\n frequency_penalty=self.frequency_penalty,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n max_tokens=self.max_tokens,\n ),\n use_tqdm=False, # type: ignore\n )\n outputs = []\n for request, prompt in zip(requests, prompts):\n output = []\n for request_output in request.outputs:\n try:\n parsed_output = self.task.parse_output(request_output.text)\n except Exception as e:\n logger.error(f\"Error parsing vLLM output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=request_output.text,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/#distilabel.llm.vLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the vLLM model.
"},{"location":"reference/distilabel/llm/#distilabel.llm.vLLM.__init__","title":"__init__(vllm, task, max_new_tokens=128, presence_penalty=0.0, frequency_penalty=0.0, temperature=1.0, top_p=1.0, top_k=-1, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the vLLM class.
Parameters:
Name Type Description Defaultvllm
LLM
the vLLM model to be used.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
presence_penalty
float
the presence penalty to be used for generation. Defaults to 0.0.
0.0
frequency_penalty
float
the frequency penalty to be used for generation. Defaults to 0.0.
0.0
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. Defaults to -1.
-1
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied.
None
Examples:
>>> from vllm import LLM\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import vLLM\n>>> model = LLM(model=\"gpt2\")\n>>> task = Task()\n>>> llm = vLLM(model=model, task=task)\n
Source code in src/distilabel/llm/vllm.py
def __init__(\n self,\n vllm: \"_vLLM\",\n task: \"Task\",\n max_new_tokens: int = 128,\n presence_penalty: float = 0.0,\n frequency_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = -1,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the vLLM class.\n\n Args:\n vllm (_vLLM): the vLLM model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to -1.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n\n Examples:\n >>> from vllm import LLM\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import vLLM\n >>> model = LLM(model=\"gpt2\")\n >>> task = Task()\n >>> llm = vLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VLLM_AVAILABLE:\n raise ImportError(\n \"`vLLM` cannot be used as `vllm` is not installed, please \"\n \" install it with `pip install vllm`.\"\n )\n\n self.presence_penalty = presence_penalty\n self.frequency_penalty = frequency_penalty\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_tokens = max_new_tokens\n\n self.vllm = vllm\n
"},{"location":"reference/distilabel/llm/base/","title":"base","text":""},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLM","title":"LLM
","text":" Bases: ABC
src/distilabel/llm/base.py
class LLM(ABC):\n def __init__(\n self,\n task: Task,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the LLM base class.\n\n Note:\n This class is intended to be used internally, but you anyone can still create\n a subclass, implement the `abstractmethod`s and use it.\n\n Args:\n task (Task): the task to be performed by the LLM.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[\"SupportedFormats\", None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n self.task = task\n\n self.thread_pool_executor = (\n ThreadPoolExecutor(max_workers=num_threads)\n if num_threads is not None\n else None\n )\n\n self.prompt_format = prompt_format\n self.prompt_formatting_fn = prompt_formatting_fn\n\n def __del__(self) -> None:\n \"\"\"Shuts down the thread pool executor if it is not `None`.\"\"\"\n if self.thread_pool_executor is not None:\n self.thread_pool_executor.shutdown()\n\n @property\n def num_threads(self) -> Union[int, None]:\n if self.thread_pool_executor:\n return self.thread_pool_executor._max_workers\n\n def __repr__(self) -> str:\n return f\"{self.__class__.__name__}(task={self.task.__class__.__name__}, num_threads={self.num_threads}, promp_format='{self.prompt_format}', model='{self.model_name}')\"\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"task\", self.task\n yield \"num_threads\", self.num_threads\n yield \"prompt_format\", self.prompt_format\n if self.prompt_formatting_fn is not None:\n args = f\"({', '.join(self.prompt_formatting_fn.__code__.co_varnames)})\"\n representation = self.prompt_formatting_fn.__name__ + args\n yield \"prompt_formatting_fn\", representation\n yield \"model\", self.model_name\n\n @property\n @abstractmethod\n def model_name(self) -> str:\n pass\n\n def _generate_prompts(\n self,\n inputs: List[Dict[str, Any]],\n default_format: Union[\"SupportedFormats\", None] = None,\n ) -> List[Any]:\n \"\"\"Generates the prompts to be used for generation.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n default_format (Union[\"SupportedFormats\", None], optional): the default format to be used\n for the prompt if no `prompt_format` is specified. Defaults to `None`.\n\n Returns:\n List[Any]: the generated prompts.\n\n Raises:\n ValueError: if the generated prompt is not of the expected type.\n \"\"\"\n prompts = []\n for input in inputs:\n prompt = self.task.generate_prompt(**input)\n if not isinstance(prompt, Prompt) and self.prompt_formatting_fn is not None:\n warnings.warn(\n \"The method `generate_prompt` is not returning a `Prompt` class but a prompt\"\n f\" of `type={type(prompt)}`, meaning that a pre-formatting has already been\"\n \" applied in the `task.generate_prompt` method, so the usage of a `prompt_formatting_fn`\"\n \" is discouraged.\",\n UserWarning,\n stacklevel=2,\n )\n prompt = self.prompt_formatting_fn(prompt)\n elif isinstance(prompt, Prompt) and self.prompt_formatting_fn is None:\n if self.prompt_format is not None or default_format is not None:\n prompt = prompt.format_as(\n format=self.prompt_format or default_format # type: ignore\n )\n else:\n warnings.warn(\n \"No `prompt_format` has been specified and no `default_format` is set, so\"\n \" the prompt will be concatenated with a line-break and no specific formatting\"\n \" by default.\",\n UserWarning,\n stacklevel=2,\n )\n prompt = prompt.format_as(format=\"default\")\n prompts.append(prompt)\n return prompts\n\n @abstractmethod\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n pass\n\n def _get_valid_inputs(\n self, inputs: List[Dict[str, Any]]\n ) -> Tuple[List[Dict[str, Any]], List[int]]:\n \"\"\"Returns the valid inputs and the indices of the invalid inputs.\n\n A valid input is an input that contains all the arguments required by the task.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n\n Returns:\n Tuple[List[Dict[str, Any]], List[int]]: a tuple containing the valid inputs and\n the indices of the invalid inputs.\n \"\"\"\n\n valid_inputs = []\n not_valid_inputs_indices = []\n for i, input in enumerate(inputs):\n if not all(input_arg in input for input_arg in self.task.input_args_names):\n logger.warn(\n f\"Missing {self.task.__class__.__name__} input argument in batch element {i}\"\n )\n not_valid_inputs_indices.append(i)\n continue\n\n valid_inputs.append(input)\n\n return valid_inputs, not_valid_inputs_indices\n\n def _fill_missing_inputs(\n self,\n generations: List[List[LLMOutput]],\n invalid_inputs_indices: List[int],\n num_generations: int,\n ) -> List[List[LLMOutput]]:\n \"\"\"Fills the `generations` list with empty `LLMOutput`s for the inputs that were\n not valid for the associated task of this `LLM`.\n\n Args:\n generations (List[List[LLMOutput]]): the generations to be filled.\n invalid_inputs_indices (List[int]): the indices of the inputs that were not\n valid for the associated task of this `LLM`.\n num_generations (int): the number of generations to be performed for each input.\n\n Returns:\n List[List[LLMOutput]]: the filled generations.\n \"\"\"\n\n filled_generations = generations.copy()\n for idx in invalid_inputs_indices:\n filled_generations.insert(\n idx,\n [\n LLMOutput(\n model_name=self.model_name,\n prompt_used=None,\n raw_output=None,\n parsed_output=None,\n )\n for _ in range(num_generations)\n ],\n )\n return filled_generations\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Generates the outputs for the given inputs using the LLM.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n valid_inputs, invalid_inputs_indices = self._get_valid_inputs(inputs)\n\n if self.thread_pool_executor is not None:\n futures = []\n for input in valid_inputs:\n future = self.thread_pool_executor.submit(\n self._generate, [input], num_generations\n )\n futures.append(future)\n future = when_all_complete(\n futures=futures,\n callback=lambda generations: self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n ),\n )\n future.add_done_callback(lambda _: _progress())\n return future\n\n generations = self._generate(valid_inputs, num_generations)\n\n generations = self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n )\n\n _progress()\n return generations\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return self.thread_pool_executor is not None\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLM.return_futures","title":"return_futures: bool
property
","text":"Whether the LLM
returns futures
__del__()
","text":"Shuts down the thread pool executor if it is not None
.
src/distilabel/llm/base.py
def __del__(self) -> None:\n \"\"\"Shuts down the thread pool executor if it is not `None`.\"\"\"\n if self.thread_pool_executor is not None:\n self.thread_pool_executor.shutdown()\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLM.__init__","title":"__init__(task, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the LLM base class.
NoteThis class is intended to be used internally, but you anyone can still create a subclass, implement the abstractmethod
s and use it.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requirednum_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union['SupportedFormats', None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Source code in src/distilabel/llm/base.py
def __init__(\n self,\n task: Task,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the LLM base class.\n\n Note:\n This class is intended to be used internally, but you anyone can still create\n a subclass, implement the `abstractmethod`s and use it.\n\n Args:\n task (Task): the task to be performed by the LLM.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[\"SupportedFormats\", None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n self.task = task\n\n self.thread_pool_executor = (\n ThreadPoolExecutor(max_workers=num_threads)\n if num_threads is not None\n else None\n )\n\n self.prompt_format = prompt_format\n self.prompt_formatting_fn = prompt_formatting_fn\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLM.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the LLM.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionUnion[List[List['LLMOutput']], Future[List[List['LLMOutput']]]]
Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.
Source code insrc/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> Union[List[List[\"LLMOutput\"]], Future[List[List[\"LLMOutput\"]]]]:\n \"\"\"Generates the outputs for the given inputs using the LLM.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Union[List[Future[List[\"LLMOutput\"]]], List[List[\"LLMOutput\"]]]: the generated outputs.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n valid_inputs, invalid_inputs_indices = self._get_valid_inputs(inputs)\n\n if self.thread_pool_executor is not None:\n futures = []\n for input in valid_inputs:\n future = self.thread_pool_executor.submit(\n self._generate, [input], num_generations\n )\n futures.append(future)\n future = when_all_complete(\n futures=futures,\n callback=lambda generations: self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n ),\n )\n future.add_done_callback(lambda _: _progress())\n return future\n\n generations = self._generate(valid_inputs, num_generations)\n\n generations = self._fill_missing_inputs(\n generations, invalid_inputs_indices, num_generations\n )\n\n _progress()\n return generations\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLMPool","title":"LLMPool
","text":"LLMPool is a class that wraps multiple ProcessLLM
s and performs generation in parallel using them. Depending on the number of LLM
s and the parameter num_generations
, the LLMPool
will decide how many generations to perform for each LLM
:
If num_generations
is less than the number of LLM
s, then num_generations
LLMs will be chosen randomly and each of them will perform 1 generation.
If num_generations
is equal to the number of LLM
s, then each LLM
will perform 1 generation.
If num_generations
is greater than the number of LLM
s, then each LLM
will perform num_generations // num_llms
generations, and the remaining num_generations % num_llms
generations will be performed by num_generations % num_llms
randomly chosen LLM
s.
Attributes:
Name Type Descriptionllms
List[ProcessLLM]
the ProcessLLM
s to be used for generation.
src/distilabel/llm/base.py
class LLMPool:\n \"\"\"LLMPool is a class that wraps multiple `ProcessLLM`s and performs generation in\n parallel using them. Depending on the number of `LLM`s and the parameter `num_generations`,\n the `LLMPool` will decide how many generations to perform for each `LLM`:\n\n - If `num_generations` is less than the number of `LLM`s, then `num_generations` LLMs\n will be chosen randomly and each of them will perform 1 generation.\n\n\n - If `num_generations` is equal to the number of `LLM`s, then each `LLM` will perform\n 1 generation.\n\n - If `num_generations` is greater than the number of `LLM`s, then each `LLM` will\n perform `num_generations // num_llms` generations, and the remaining `num_generations % num_llms`\n generations will be performed by `num_generations % num_llms` randomly chosen `LLM`s.\n\n Attributes:\n llms (List[ProcessLLM]): the `ProcessLLM`s to be used for generation.\n \"\"\"\n\n def __init__(self, llms: List[ProcessLLM]) -> None:\n \"\"\"Initializes the `LLMPool` class.\n\n Args:\n llms: the `ProcessLLM`s to be used for generation. The list must contain at\n least 2 `ProcessLLM`s.\n\n Raises:\n ValueError: if the `llms` argument contains less than 2 `ProcessLLM`s, the\n `llms` argument contains `ProcessLLM`s that are not `ProcessLLM`s, or\n if the `llms` argument contains `ProcessLLM`s with different tasks.\n \"\"\"\n if len(llms) < 2:\n raise ValueError(\n \"The `llms` argument must contain at least 2 `ProcessLLM`s. If you want\"\n \" to use a single `ProcessLLM`, use the `ProcessLLM` directly instead.\"\n )\n\n if not all(isinstance(llm, ProcessLLM) for llm in llms):\n raise ValueError(\"The `llms` argument must contain only `ProcessLLM`s.\")\n\n # Note: The following piece of code is used to check that all the `ProcessLLM`s\n # have the same task or a subclass of it.\n mros = [(type(llm.task), len(type(llm.task).mro())) for llm in llms]\n min_common_class = min(mros, key=lambda x: x[1])[0]\n if not all(isinstance(llm.task, min_common_class) for llm in llms):\n raise ValueError(\n \"All the `ProcessLLM` in `llms` must share the same task (either as the instance or the parent class).\"\n )\n\n self.llms = llms\n self.num_llms = len(llms)\n\n def _get_num_generations_per_llm(self, num_generations: int) -> Dict[int, int]:\n \"\"\"Returns the number of generations to be performed by each `LLM`.\n\n Args:\n num_generations: the number of generations to be performed.\n\n Returns:\n Dict[int, int]: a dictionary where the keys are the ids of the `LLM`s and the\n values are the number of generations to be performed by each `LLM`.\n \"\"\"\n llms_ids = list(range(self.num_llms))\n generations_per_llm = {i: num_generations // self.num_llms for i in llms_ids}\n\n for i in random.sample(llms_ids, k=num_generations % self.num_llms):\n generations_per_llm[i] += 1\n\n return generations_per_llm\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generates the outputs for the given inputs using the pool of `ProcessLLM`s.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n num_generations_per_llm = self._get_num_generations_per_llm(num_generations)\n\n futures = [\n llm.generate(\n inputs,\n num_generations=num_generations_per_llm[i],\n progress_callback_func=progress_callback_func,\n )\n for i, llm in enumerate(self.llms)\n if num_generations_per_llm[i] > 0\n ]\n llms_generations = [future.result() for future in futures]\n\n generations = []\n for llms_row_generations in zip(*llms_generations):\n row_generations = []\n for llm_row_generations in llms_row_generations:\n for generation in llm_row_generations:\n row_generations.append(generation)\n generations.append(row_generations)\n\n return generations\n\n def teardown(self) -> None:\n \"\"\"Stops the `ProcessLLM`s.\"\"\"\n for llm in self.llms:\n llm.teardown()\n\n @property\n def task(self) -> \"Task\":\n \"\"\"Returns the task that will be used by the `ProcessLLM`s of this pool.\n\n Returns:\n Task: the task that will be used by the `ProcessLLM`s of this pool.\n \"\"\"\n return self.llms[0].task\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return False\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLMPool.return_futures","title":"return_futures: bool
property
","text":"Whether the LLM
returns futures
task: 'Task'
property
","text":"Returns the task that will be used by the ProcessLLM
s of this pool.
Returns:
Name Type DescriptionTask
'Task'
the task that will be used by the ProcessLLM
s of this pool.
__init__(llms)
","text":"Initializes the LLMPool
class.
Parameters:
Name Type Description Defaultllms
List[ProcessLLM]
the ProcessLLM
s to be used for generation. The list must contain at least 2 ProcessLLM
s.
Raises:
Type DescriptionValueError
if the llms
argument contains less than 2 ProcessLLM
s, the llms
argument contains ProcessLLM
s that are not ProcessLLM
s, or if the llms
argument contains ProcessLLM
s with different tasks.
src/distilabel/llm/base.py
def __init__(self, llms: List[ProcessLLM]) -> None:\n \"\"\"Initializes the `LLMPool` class.\n\n Args:\n llms: the `ProcessLLM`s to be used for generation. The list must contain at\n least 2 `ProcessLLM`s.\n\n Raises:\n ValueError: if the `llms` argument contains less than 2 `ProcessLLM`s, the\n `llms` argument contains `ProcessLLM`s that are not `ProcessLLM`s, or\n if the `llms` argument contains `ProcessLLM`s with different tasks.\n \"\"\"\n if len(llms) < 2:\n raise ValueError(\n \"The `llms` argument must contain at least 2 `ProcessLLM`s. If you want\"\n \" to use a single `ProcessLLM`, use the `ProcessLLM` directly instead.\"\n )\n\n if not all(isinstance(llm, ProcessLLM) for llm in llms):\n raise ValueError(\"The `llms` argument must contain only `ProcessLLM`s.\")\n\n # Note: The following piece of code is used to check that all the `ProcessLLM`s\n # have the same task or a subclass of it.\n mros = [(type(llm.task), len(type(llm.task).mro())) for llm in llms]\n min_common_class = min(mros, key=lambda x: x[1])[0]\n if not all(isinstance(llm.task, min_common_class) for llm in llms):\n raise ValueError(\n \"All the `ProcessLLM` in `llms` must share the same task (either as the instance or the parent class).\"\n )\n\n self.llms = llms\n self.num_llms = len(llms)\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLMPool.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the pool of ProcessLLM
s.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionList[List['LLMOutput']]
Future[List[List[\"LLMOutput\"]]]: the generated outputs as a Future
.
src/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generates the outputs for the given inputs using the pool of `ProcessLLM`s.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n num_generations_per_llm = self._get_num_generations_per_llm(num_generations)\n\n futures = [\n llm.generate(\n inputs,\n num_generations=num_generations_per_llm[i],\n progress_callback_func=progress_callback_func,\n )\n for i, llm in enumerate(self.llms)\n if num_generations_per_llm[i] > 0\n ]\n llms_generations = [future.result() for future in futures]\n\n generations = []\n for llms_row_generations in zip(*llms_generations):\n row_generations = []\n for llm_row_generations in llms_row_generations:\n for generation in llm_row_generations:\n row_generations.append(generation)\n generations.append(row_generations)\n\n return generations\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.LLMPool.teardown","title":"teardown()
","text":"Stops the ProcessLLM
s.
src/distilabel/llm/base.py
def teardown(self) -> None:\n \"\"\"Stops the `ProcessLLM`s.\"\"\"\n for llm in self.llms:\n llm.teardown()\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.ProcessLLM","title":"ProcessLLM
","text":"A class that wraps an LLM
and performs generation in a separate process. The result is a Future
that will be set when the generation is completed.
This class creates a new child process that will load the LLM
and perform the text generation. In order to communicate with this child process, a bridge thread is created in the main process. The bridge thread will send and receive the results from the child process using multiprocessing.Queue
s. The communication between the bridge thread and the main process is done using Future
s. This architecture was inspired by the ProcessPoolExecutor
from the concurrent.futures
module and it's a simplified version of it.
src/distilabel/llm/base.py
class ProcessLLM:\n \"\"\"A class that wraps an `LLM` and performs generation in a separate process. The\n result is a `Future` that will be set when the generation is completed.\n\n This class creates a new child process that will load the `LLM` and perform the\n text generation. In order to communicate with this child process, a bridge thread\n is created in the main process. The bridge thread will send and receive the results\n from the child process using `multiprocessing.Queue`s. The communication between the\n bridge thread and the main process is done using `Future`s. This architecture was\n inspired by the `ProcessPoolExecutor` from the `concurrent.futures` module and it's\n a simplified version of it.\n \"\"\"\n\n def __init__(self, task: Task, load_llm_fn: Callable[[Task], LLM]) -> None:\n \"\"\"Initializes the `ProcessLLM` class.\n\n Args:\n task: the task to be performed by the `LLM`. This task will be used by the\n child process when calling the `load_llm_fn`.\n load_llm_fn (Callable[[Task], LLM]): a function that will be executed in the\n child process to load the `LLM`. It must return an `LLM` instance.\n \"\"\"\n self.task = task\n\n self._load_llm_fn = load_llm_fn\n\n # The bridge thread will act as a bridge between the main process and the child\n # process for communication. It will send the generation requests to the child\n # process and receive the results from the child process.\n self._bridge_thread = None\n\n # The child process which will load the `LLM` and perform the generation.\n self._generation_process = None\n\n # The `Semaphore` that will be used to synchronize the loading of the `LLM`.\n # `_BridgeThread` will be blocked until `_GenerationProcess` has called the\n # `load_llm_fn` and the `LLM` has been loaded.\n self._load_llm_sem = mp.Semaphore(0)\n\n # This thread will create text generation requests\n self.pending_text_generation_request: Dict[int, _TextGenerationRequest] = {}\n self.text_generation_request_count = 0\n self.text_generation_request_ids_queue: queue.Queue[int] = queue.Queue()\n\n # Queues for the communication between the `_BridgeThread` and the `_GenerationProcess`\n self._call_queue = mp.Queue()\n self._result_queue = mp.Queue()\n\n # Shared memory object for transfering the `model_name` to the main process\n # once the `LLM` is loaded\n self._model_name = mp.Array(c_char, MAX_MODEL_NAME_LENGTH)\n\n def _start_bridge_thread(self) -> None:\n \"\"\"Starts the bridge thread and the generation process.\"\"\"\n if self._bridge_thread is None:\n self._generation_process = _GenerationProcess(self)\n self._generation_process.start()\n pid = self._generation_process.pid\n logger.debug(f\"Generation process with PID {pid} started!\")\n\n self._bridge_thread = _BridgeThread(self)\n self._bridge_thread.start()\n logger.debug(\"Bridge thread for process with PID {pid} started!\")\n\n def _add_text_generation_request(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Creates and send a new text generation request to the bridge thread. This thread\n and the bridge thread shares a dictionary used to store the text generation requests.\n This thread will add the text generation requests to the dictionary and the bridge\n thread will only read from it. In order for the bridge thread to know that a new\n text generation request has been added to the dictionary, this thread will put the\n id of the request in a queue. The bridge thread will read from this queue and get\n the text generation request from the dictionary.\n \"\"\"\n\n def _progress():\n if progress_callback_func is not None:\n progress_callback_func(advance=num_generations * len(inputs))\n\n text_generation_request = _TextGenerationRequest(\n inputs=inputs, num_generations=num_generations\n )\n # Put the request information in the dictionary associated to the request id\n self.pending_text_generation_request[\n self.text_generation_request_count\n ] = text_generation_request\n # Put the request id in the queue (for the `_BridgeThread` to consume it)\n self.text_generation_request_ids_queue.put(self.text_generation_request_count)\n self.text_generation_request_count += 1\n text_generation_request.future.add_done_callback(lambda _: _progress())\n return text_generation_request.future\n\n def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n ) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Generates the outputs for the given inputs using the `ProcessLLM` and its loaded\n `LLM`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n self._start_bridge_thread()\n return self._add_text_generation_request(\n inputs, num_generations, progress_callback_func\n )\n\n def teardown(self) -> None:\n \"\"\"Stops the bridge thread and the generation process.\"\"\"\n if self._generation_process is not None:\n self._generation_process.stop()\n self._generation_process.join()\n\n if self._bridge_thread is not None:\n self._bridge_thread.stop()\n self._bridge_thread.join()\n\n @cached_property\n def model_name(self) -> str:\n \"\"\"Returns the model name of the `LLM` once it has been loaded.\"\"\"\n with self._model_name:\n return \"\".join([c.decode() for c in self._model_name if c != b\"\\0\"])\n\n @property\n def return_futures(self) -> bool:\n \"\"\"Whether the `LLM` returns futures\"\"\"\n return True\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.ProcessLLM.model_name","title":"model_name: str
cached
property
","text":"Returns the model name of the LLM
once it has been loaded.
return_futures: bool
property
","text":"Whether the LLM
returns futures
__init__(task, load_llm_fn)
","text":"Initializes the ProcessLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM
. This task will be used by the child process when calling the load_llm_fn
.
load_llm_fn
Callable[[Task], LLM]
a function that will be executed in the child process to load the LLM
. It must return an LLM
instance.
src/distilabel/llm/base.py
def __init__(self, task: Task, load_llm_fn: Callable[[Task], LLM]) -> None:\n \"\"\"Initializes the `ProcessLLM` class.\n\n Args:\n task: the task to be performed by the `LLM`. This task will be used by the\n child process when calling the `load_llm_fn`.\n load_llm_fn (Callable[[Task], LLM]): a function that will be executed in the\n child process to load the `LLM`. It must return an `LLM` instance.\n \"\"\"\n self.task = task\n\n self._load_llm_fn = load_llm_fn\n\n # The bridge thread will act as a bridge between the main process and the child\n # process for communication. It will send the generation requests to the child\n # process and receive the results from the child process.\n self._bridge_thread = None\n\n # The child process which will load the `LLM` and perform the generation.\n self._generation_process = None\n\n # The `Semaphore` that will be used to synchronize the loading of the `LLM`.\n # `_BridgeThread` will be blocked until `_GenerationProcess` has called the\n # `load_llm_fn` and the `LLM` has been loaded.\n self._load_llm_sem = mp.Semaphore(0)\n\n # This thread will create text generation requests\n self.pending_text_generation_request: Dict[int, _TextGenerationRequest] = {}\n self.text_generation_request_count = 0\n self.text_generation_request_ids_queue: queue.Queue[int] = queue.Queue()\n\n # Queues for the communication between the `_BridgeThread` and the `_GenerationProcess`\n self._call_queue = mp.Queue()\n self._result_queue = mp.Queue()\n\n # Shared memory object for transfering the `model_name` to the main process\n # once the `LLM` is loaded\n self._model_name = mp.Array(c_char, MAX_MODEL_NAME_LENGTH)\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.ProcessLLM.generate","title":"generate(inputs, num_generations=1, progress_callback_func=None)
","text":"Generates the outputs for the given inputs using the ProcessLLM
and its loaded LLM
.
Parameters:
Name Type Description Defaultinputs
List[Dict[str, Any]]
the inputs to be used for generation.
requirednum_generations
int
the number of generations to be performed for each input. Defaults to 1
.
1
progress_callback_func
Union[Callable, None]
a function to be called at each generation step. Defaults to None
.
None
Returns:
Type DescriptionFuture[List[List['LLMOutput']]]
Future[List[List[\"LLMOutput\"]]]: the generated outputs as a Future
.
src/distilabel/llm/base.py
def generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n progress_callback_func: Union[Callable, None] = None,\n) -> Future[List[List[\"LLMOutput\"]]]:\n \"\"\"Generates the outputs for the given inputs using the `ProcessLLM` and its loaded\n `LLM`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each input.\n Defaults to `1`.\n progress_callback_func (Union[Callable, None], optional): a function to be called at each\n generation step. Defaults to `None`.\n\n Returns:\n Future[List[List[\"LLMOutput\"]]]: the generated outputs as a `Future`.\n \"\"\"\n self._start_bridge_thread()\n return self._add_text_generation_request(\n inputs, num_generations, progress_callback_func\n )\n
"},{"location":"reference/distilabel/llm/base/#distilabel.llm.base.ProcessLLM.teardown","title":"teardown()
","text":"Stops the bridge thread and the generation process.
Source code insrc/distilabel/llm/base.py
def teardown(self) -> None:\n \"\"\"Stops the bridge thread and the generation process.\"\"\"\n if self._generation_process is not None:\n self._generation_process.stop()\n self._generation_process.join()\n\n if self._bridge_thread is not None:\n self._bridge_thread.stop()\n self._bridge_thread.join()\n
"},{"location":"reference/distilabel/llm/llama_cpp/","title":"llama_cpp","text":""},{"location":"reference/distilabel/llm/llama_cpp/#distilabel.llm.llama_cpp.LlamaCppLLM","title":"LlamaCppLLM
","text":" Bases: LLM
src/distilabel/llm/llama_cpp.py
class LlamaCppLLM(LLM):\n def __init__(\n self,\n model: \"Llama\",\n task: \"Task\",\n max_new_tokens: int = 128,\n temperature: float = 0.8,\n top_p: float = 0.95,\n top_k: int = 40,\n repeat_penalty: float = 1.1,\n seed: int = 1337,\n prompt_format: Union[SupportedFormats, None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the LlamaCppLLM class.\n\n Args:\n model (Llama): the llama-cpp model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 0.8.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 0.95.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n repeat_penalty (float, optional): the repeat penalty to be used for generation.\n Defaults to 1.1.\n seed (int, optional): the seed to be used for generation, setting it to -1 implies\n that a different response will be generated on each generation, similarly to\n HuggingFace's `do_sample` arg. Defaults to 1337.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Examples:\n >>> from llama_cpp import Llama\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import LlamaCppLLM\n >>> model = Llama(model_path=\"path/to/model\")\n >>> task = Task()\n >>> llm = LlamaCppLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _LLAMA_CPP_AVAILABLE:\n raise ImportError(\n \"`LlamaCppLLM` cannot be used as `llama_cpp` is not installed, please \"\n \" install it with `pip install llama-cpp-python`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repeat_penalty = repeat_penalty\n self.seed = seed\n\n self.model = model\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_tokens,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n \"repeat_penalty\": self.repeat_penalty,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the llama-cpp model, which is the same as the model path.\"\"\"\n return self.model.model_path\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n output = []\n for _ in range(num_generations):\n raw_output = self.model.create_completion(\n prompt,\n max_tokens=self.max_tokens,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n repeat_penalty=self.repeat_penalty,\n )\n try:\n parsed_output = self.task.parse_output(\n raw_output[\"choices\"][0][\"text\"].strip()\n )\n except Exception as e:\n logger.error(f\"Error parsing llama-cpp output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/llama_cpp/#distilabel.llm.llama_cpp.LlamaCppLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the llama-cpp model, which is the same as the model path.
"},{"location":"reference/distilabel/llm/llama_cpp/#distilabel.llm.llama_cpp.LlamaCppLLM.__init__","title":"__init__(model, task, max_new_tokens=128, temperature=0.8, top_p=0.95, top_k=40, repeat_penalty=1.1, seed=1337, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the LlamaCppLLM class.
Parameters:
Name Type Description Defaultmodel
Llama
the llama-cpp model to be used.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
temperature
float
the temperature to be used for generation. Defaults to 0.8.
0.8
top_p
float
the top-p value to be used for generation. Defaults to 0.95.
0.95
top_k
int
the top-k value to be used for generation. Defaults to 40.
40
repeat_penalty
float
the repeat penalty to be used for generation. Defaults to 1.1.
1.1
seed
int
the seed to be used for generation, setting it to -1 implies that a different response will be generated on each generation, similarly to HuggingFace's do_sample
arg. Defaults to 1337.
1337
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Examples:
>>> from llama_cpp import Llama\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import LlamaCppLLM\n>>> model = Llama(model_path=\"path/to/model\")\n>>> task = Task()\n>>> llm = LlamaCppLLM(model=model, task=task)\n
Source code in src/distilabel/llm/llama_cpp.py
def __init__(\n self,\n model: \"Llama\",\n task: \"Task\",\n max_new_tokens: int = 128,\n temperature: float = 0.8,\n top_p: float = 0.95,\n top_k: int = 40,\n repeat_penalty: float = 1.1,\n seed: int = 1337,\n prompt_format: Union[SupportedFormats, None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the LlamaCppLLM class.\n\n Args:\n model (Llama): the llama-cpp model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 0.8.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 0.95.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n repeat_penalty (float, optional): the repeat penalty to be used for generation.\n Defaults to 1.1.\n seed (int, optional): the seed to be used for generation, setting it to -1 implies\n that a different response will be generated on each generation, similarly to\n HuggingFace's `do_sample` arg. Defaults to 1337.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Examples:\n >>> from llama_cpp import Llama\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import LlamaCppLLM\n >>> model = Llama(model_path=\"path/to/model\")\n >>> task = Task()\n >>> llm = LlamaCppLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _LLAMA_CPP_AVAILABLE:\n raise ImportError(\n \"`LlamaCppLLM` cannot be used as `llama_cpp` is not installed, please \"\n \" install it with `pip install llama-cpp-python`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repeat_penalty = repeat_penalty\n self.seed = seed\n\n self.model = model\n
"},{"location":"reference/distilabel/llm/openai/","title":"openai","text":""},{"location":"reference/distilabel/llm/openai/#distilabel.llm.openai.OpenAILLM","title":"OpenAILLM
","text":" Bases: LLM
src/distilabel/llm/openai.py
class OpenAILLM(LLM):\n def __init__(\n self,\n task: \"Task\",\n model: str = \"gpt-3.5-turbo\",\n client: Union[\"OpenAI\", None] = None,\n openai_api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n frequency_penalty: float = 0.0,\n presence_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gpt-3.5-turbo\".\n client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.\n If `None`, a new client will be created. Defaults to `None`.\n openai_api_key (Union[str, None], optional): the OpenAI API key to be used for generation.\n If `None`, the `OPENAI_API_KEY` environment variable will be used. Defaults to `None`.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in your OpenAI account.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import OpenAILLM\n >>> task = Task()\n >>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _OPENAI_AVAILABLE:\n raise ImportError(\n \"`OpenAILLM` cannot be used as `openai` is not installed, please \"\n \" install it with `pip install openai`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.frequency_penalty = frequency_penalty\n self.presence_penalty = presence_penalty\n self.temperature = temperature\n self.top_p = top_p\n\n self.client = client or OpenAI(api_key=openai_api_key, max_retries=6)\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in your OpenAI account, available models are {self.available_models}\"\n self.model = model\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_tokens\": self.max_tokens,\n \"frequency_penalty\": self.frequency_penalty,\n \"presence_penalty\": self.presence_penalty,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n },\n )\n\n @cached_property\n def available_models(self) -> List[str]:\n \"\"\"Returns the list of available models in your OpenAI account.\"\"\"\n return [model.id for model in self.client.models.list().data]\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the OpenAI model.\"\"\"\n return self.model\n\n def _generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"openai\")\n outputs = []\n for prompt in prompts:\n chat_completions = self.client.chat.completions.create(\n messages=prompt,\n model=self.model,\n n=num_generations,\n max_tokens=self.max_tokens,\n frequency_penalty=self.frequency_penalty,\n presence_penalty=self.presence_penalty,\n temperature=self.temperature,\n top_p=self.top_p,\n timeout=50,\n )\n\n output = []\n for chat_completion in chat_completions.choices:\n try:\n parsed_response = self.task.parse_output(\n chat_completion.message.content.strip()\n )\n except Exception as e:\n logger.error(f\"Error parsing OpenAI response: {e}\")\n parsed_response = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=chat_completion.message.content,\n parsed_output=parsed_response,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/openai/#distilabel.llm.openai.OpenAILLM.available_models","title":"available_models: List[str]
cached
property
","text":"Returns the list of available models in your OpenAI account.
"},{"location":"reference/distilabel/llm/openai/#distilabel.llm.openai.OpenAILLM.model_name","title":"model_name: str
property
","text":"Returns the name of the OpenAI model.
"},{"location":"reference/distilabel/llm/openai/#distilabel.llm.openai.OpenAILLM.__init__","title":"__init__(task, model='gpt-3.5-turbo', client=None, openai_api_key=None, max_new_tokens=128, frequency_penalty=0.0, presence_penalty=0.0, temperature=1.0, top_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the OpenAILLM class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation. Defaults to \"gpt-3.5-turbo\".
'gpt-3.5-turbo'
client
Union[OpenAI, None]
an OpenAI client to be used for generation. If None
, a new client will be created. Defaults to None
.
None
openai_api_key
Union[str, None]
the OpenAI API key to be used for generation. If None
, the OPENAI_API_KEY
environment variable will be used. Defaults to None
.
None
max_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
frequency_penalty
float
the frequency penalty to be used for generation. Defaults to 0.0.
0.0
presence_penalty
float
the presence penalty to be used for generation. Defaults to 0.0.
0.0
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Raises:
Type DescriptionAssertionError
if the provided model
is not available in your OpenAI account.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import OpenAILLM\n>>> task = Task()\n>>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n
Source code in src/distilabel/llm/openai.py
def __init__(\n self,\n task: \"Task\",\n model: str = \"gpt-3.5-turbo\",\n client: Union[\"OpenAI\", None] = None,\n openai_api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n frequency_penalty: float = 0.0,\n presence_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gpt-3.5-turbo\".\n client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.\n If `None`, a new client will be created. Defaults to `None`.\n openai_api_key (Union[str, None], optional): the OpenAI API key to be used for generation.\n If `None`, the `OPENAI_API_KEY` environment variable will be used. Defaults to `None`.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in your OpenAI account.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import OpenAILLM\n >>> task = Task()\n >>> llm = OpenAILLM(model=\"gpt-3.5-turbo\", task=task)\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _OPENAI_AVAILABLE:\n raise ImportError(\n \"`OpenAILLM` cannot be used as `openai` is not installed, please \"\n \" install it with `pip install openai`.\"\n )\n\n self.max_tokens = max_new_tokens\n self.frequency_penalty = frequency_penalty\n self.presence_penalty = presence_penalty\n self.temperature = temperature\n self.top_p = top_p\n\n self.client = client or OpenAI(api_key=openai_api_key, max_retries=6)\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in your OpenAI account, available models are {self.available_models}\"\n self.model = model\n
"},{"location":"reference/distilabel/llm/together/","title":"together","text":""},{"location":"reference/distilabel/llm/together/#distilabel.llm.together.TogetherInferenceLLM","title":"TogetherInferenceLLM
","text":" Bases: LLM
src/distilabel/llm/together.py
class TogetherInferenceLLM(LLM):\n def __init__(\n self,\n task: \"Task\",\n model: str,\n api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: float = 1.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = 1,\n stop: Union[List[str], None] = None,\n logprobs: int = 0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str): the model to be used for generation.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation. From the Together\n Inference docs: \"A decimal number that determines the degree of randomness in the response.\n A value of 0 will always yield the same output. A temperature much less than 1 favors more\n correctness and is appropriate for question answering or summarization. A value approaching\n 1 introduces more randomness in the output.\". Defaults to 1.0.\n repetition_penalty (float, optional): the repetition penalty to be used for generation. From the\n Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood\n of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation. From the Together\n Inference docs: \"used to dynamically adjust the number of choices for each predicted\n token based on the cumulative probabilities. It specifies a probability threshold,\n below which all less likely tokens are filtered out. This technique helps to maintain\n diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation. From the Together Inference\n docs: \"used to limit the number of choices for the next predicted word or token. It specifies\n the maximum number of tokens to consider at each step, based on their probability of occurrence.\n This technique helps to speed up the generation process and can improve the quality of the\n generated text by focusing on the most likely options.\". Defaults to 1.\n stop (List[str], optional): strings to delimitate the generation process, so that when the\n model generates any of the provided characters, the generation process is considered completed.\n Defaults to None.\n logprobs (int, optional): the number of logprobs to be returned for each token. From the\n Together Inference docs: \"An integer that specifies how many top token log probabilities\n are included in the response for each token generation step.\". Defaults to None.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in Together Inference.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TogetherInferenceLLM\n >>> task = Task()\n >>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n \"\"\"\n if not _TOGETHER_AVAILABLE:\n raise ImportError(\n \"`TogetherInferenceLLM` cannot be used as `together` is not installed, please \"\n \" install it with `pip install together`.\"\n )\n\n together.api_key = api_key or os.getenv(\"TOGETHER_API_KEY\", None)\n if together.api_key is None:\n raise ValueError(\n \"No `api_key` provided, please provide one or set the `TOGETHER_API_KEY` \"\n \"environment variable.\"\n )\n\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in Together Inference, available models are {self.available_models}\"\n self.model = model\n\n self.max_new_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repetition_penalty = repetition_penalty\n self.stop = stop\n self.logprobs = logprobs\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_new_tokens,\n \"temperature\": self.temperature,\n \"repetition_penalty\": self.repetition_penalty,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n \"stop\": self.stop,\n \"logprobs\": self.logprobs,\n },\n )\n\n @cached_property\n def available_models(self) -> List[str]:\n \"\"\"Returns the list of available models in Together Inference.\"\"\"\n # TODO: exclude the image models\n return [model[\"name\"] for model in together.Models.list()]\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the Together Inference model.\"\"\"\n return self.model\n\n def _generate_single_output(self, prompt: str) -> LLMOutput:\n \"\"\"Runs the Together Inference text generation function over a single prompt\n producing a single `LLMOutput`.\n\n Args:\n prompt (str): the formatted prompt to be provided to the Together Inference\n endpoint.\n\n Raises:\n RuntimeError: raised if the Together Inference endpoint fails.\n \"\"\"\n try:\n output = together.Complete.create(\n prompt=prompt,\n model=self.model,\n max_tokens=self.max_new_tokens,\n stop=self.stop,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n repetition_penalty=self.repetition_penalty,\n logprobs=self.logprobs,\n )\n except Exception as e:\n raise RuntimeError(\n f\"Together Inference generation failed with exception: {e}\"\n ) from e\n\n if output[\"output\"][\"choices\"] is None or len(output[\"output\"][\"choices\"]) < 1: # type: ignore\n raise RuntimeError(\"Together Inference generation returned no generations.\")\n\n choice = output[\"output\"][\"choices\"][0] # type: ignore\n try:\n parsed_response = self.task.parse_output(choice[\"text\"].strip())\n except Exception as e:\n logger.error(f\"Error parsing Together Inference response: {e}\")\n parsed_response = None\n\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=choice[\"text\"] or None,\n parsed_output=parsed_response,\n )\n\n def _generate(\n self,\n inputs: List[Dict[str, Any]],\n num_generations: int = 1,\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the generated outputs.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n outputs.append(\n [self._generate_single_output(prompt) for _ in range(num_generations)]\n )\n return outputs\n
"},{"location":"reference/distilabel/llm/together/#distilabel.llm.together.TogetherInferenceLLM.available_models","title":"available_models: List[str]
cached
property
","text":"Returns the list of available models in Together Inference.
"},{"location":"reference/distilabel/llm/together/#distilabel.llm.together.TogetherInferenceLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the Together Inference model.
"},{"location":"reference/distilabel/llm/together/#distilabel.llm.together.TogetherInferenceLLM.__init__","title":"__init__(task, model, api_key=None, max_new_tokens=128, repetition_penalty=1.0, temperature=1.0, top_p=1.0, top_k=1, stop=None, logprobs=0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the OpenAILLM class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
temperature
float
the temperature to be used for generation. From the Together Inference docs: \"A decimal number that determines the degree of randomness in the response. A value of 0 will always yield the same output. A temperature much less than 1 favors more correctness and is appropriate for question answering or summarization. A value approaching 1 introduces more randomness in the output.\". Defaults to 1.0.
1.0
repetition_penalty
float
the repetition penalty to be used for generation. From the Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. From the Together Inference docs: \"used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold, below which all less likely tokens are filtered out. This technique helps to maintain diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. From the Together Inference docs: \"used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.\". Defaults to 1.
1
stop
List[str]
strings to delimitate the generation process, so that when the model generates any of the provided characters, the generation process is considered completed. Defaults to None.
None
logprobs
int
the number of logprobs to be returned for each token. From the Together Inference docs: \"An integer that specifies how many top token log probabilities are included in the response for each token generation step.\". Defaults to None.
0
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Raises:
Type DescriptionAssertionError
if the provided model
is not available in Together Inference.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import TogetherInferenceLLM\n>>> task = Task()\n>>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n
Source code in src/distilabel/llm/together.py
def __init__(\n self,\n task: \"Task\",\n model: str,\n api_key: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: float = 1.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = 1,\n stop: Union[List[str], None] = None,\n logprobs: int = 0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the OpenAILLM class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str): the model to be used for generation.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n temperature (float, optional): the temperature to be used for generation. From the Together\n Inference docs: \"A decimal number that determines the degree of randomness in the response.\n A value of 0 will always yield the same output. A temperature much less than 1 favors more\n correctness and is appropriate for question answering or summarization. A value approaching\n 1 introduces more randomness in the output.\". Defaults to 1.0.\n repetition_penalty (float, optional): the repetition penalty to be used for generation. From the\n Together Inference docs: \"Controls the diversity of generated text by reducing the likelihood\n of repeated sequences. Higher values decrease repetition.\". Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation. From the Together\n Inference docs: \"used to dynamically adjust the number of choices for each predicted\n token based on the cumulative probabilities. It specifies a probability threshold,\n below which all less likely tokens are filtered out. This technique helps to maintain\n diversity and generate more fluent and natural-sounding text.\". Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation. From the Together Inference\n docs: \"used to limit the number of choices for the next predicted word or token. It specifies\n the maximum number of tokens to consider at each step, based on their probability of occurrence.\n This technique helps to speed up the generation process and can improve the quality of the\n generated text by focusing on the most likely options.\". Defaults to 1.\n stop (List[str], optional): strings to delimitate the generation process, so that when the\n model generates any of the provided characters, the generation process is considered completed.\n Defaults to None.\n logprobs (int, optional): the number of logprobs to be returned for each token. From the\n Together Inference docs: \"An integer that specifies how many top token log probabilities\n are included in the response for each token generation step.\". Defaults to None.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n\n Raises:\n AssertionError: if the provided `model` is not available in Together Inference.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TogetherInferenceLLM\n >>> task = Task()\n >>> llm = TogetherInferenceLLM(model=\"togethercomputer/llama-2-7b\", task=task, prompt_format=\"llama2\")\n \"\"\"\n if not _TOGETHER_AVAILABLE:\n raise ImportError(\n \"`TogetherInferenceLLM` cannot be used as `together` is not installed, please \"\n \" install it with `pip install together`.\"\n )\n\n together.api_key = api_key or os.getenv(\"TOGETHER_API_KEY\", None)\n if together.api_key is None:\n raise ValueError(\n \"No `api_key` provided, please provide one or set the `TOGETHER_API_KEY` \"\n \"environment variable.\"\n )\n\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n assert (\n model in self.available_models\n ), f\"Provided `model` is not available in Together Inference, available models are {self.available_models}\"\n self.model = model\n\n self.max_new_tokens = max_new_tokens\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.repetition_penalty = repetition_penalty\n self.stop = stop\n self.logprobs = logprobs\n
"},{"location":"reference/distilabel/llm/utils/","title":"utils","text":""},{"location":"reference/distilabel/llm/utils/#distilabel.llm.utils.LLMOutput","title":"LLMOutput
","text":" Bases: TypedDict
A type for the output of an LLM.
Source code insrc/distilabel/llm/utils.py
class LLMOutput(TypedDict):\n \"\"\"A type for the output of an LLM.\"\"\"\n\n model_name: str\n prompt_used: Any\n raw_output: Any\n parsed_output: Optional[Any]\n
"},{"location":"reference/distilabel/llm/vllm/","title":"vllm","text":""},{"location":"reference/distilabel/llm/vllm/#distilabel.llm.vllm.vLLM","title":"vLLM
","text":" Bases: LLM
src/distilabel/llm/vllm.py
class vLLM(LLM):\n def __init__(\n self,\n vllm: \"_vLLM\",\n task: \"Task\",\n max_new_tokens: int = 128,\n presence_penalty: float = 0.0,\n frequency_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = -1,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the vLLM class.\n\n Args:\n vllm (_vLLM): the vLLM model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to -1.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n\n Examples:\n >>> from vllm import LLM\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import vLLM\n >>> model = LLM(model=\"gpt2\")\n >>> task = Task()\n >>> llm = vLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VLLM_AVAILABLE:\n raise ImportError(\n \"`vLLM` cannot be used as `vllm` is not installed, please \"\n \" install it with `pip install vllm`.\"\n )\n\n self.presence_penalty = presence_penalty\n self.frequency_penalty = frequency_penalty\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_tokens = max_new_tokens\n\n self.vllm = vllm\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_tokens\": self.max_tokens,\n \"presence_penalty\": self.presence_penalty,\n \"frequency_penalty\": self.frequency_penalty,\n \"temperature\": self.temperature,\n \"top_p\": self.top_p,\n \"top_k\": self.top_k,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the vLLM model.\"\"\"\n return self.vllm.llm_engine.model_config.model # type: ignore\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n requests = self.vllm.generate(\n prompts,\n SamplingParams( # type: ignore\n n=num_generations,\n presence_penalty=self.presence_penalty,\n frequency_penalty=self.frequency_penalty,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n max_tokens=self.max_tokens,\n ),\n use_tqdm=False, # type: ignore\n )\n outputs = []\n for request, prompt in zip(requests, prompts):\n output = []\n for request_output in request.outputs:\n try:\n parsed_output = self.task.parse_output(request_output.text)\n except Exception as e:\n logger.error(f\"Error parsing vLLM output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=request_output.text,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/vllm/#distilabel.llm.vllm.vLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the vLLM model.
"},{"location":"reference/distilabel/llm/vllm/#distilabel.llm.vllm.vLLM.__init__","title":"__init__(vllm, task, max_new_tokens=128, presence_penalty=0.0, frequency_penalty=0.0, temperature=1.0, top_p=1.0, top_k=-1, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the vLLM class.
Parameters:
Name Type Description Defaultvllm
LLM
the vLLM model to be used.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
presence_penalty
float
the presence penalty to be used for generation. Defaults to 0.0.
0.0
frequency_penalty
float
the frequency penalty to be used for generation. Defaults to 0.0.
0.0
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. Defaults to -1.
-1
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied.
None
Examples:
>>> from vllm import LLM\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import vLLM\n>>> model = LLM(model=\"gpt2\")\n>>> task = Task()\n>>> llm = vLLM(model=model, task=task)\n
Source code in src/distilabel/llm/vllm.py
def __init__(\n self,\n vllm: \"_vLLM\",\n task: \"Task\",\n max_new_tokens: int = 128,\n presence_penalty: float = 0.0,\n frequency_penalty: float = 0.0,\n temperature: float = 1.0,\n top_p: float = 1.0,\n top_k: int = -1,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the vLLM class.\n\n Args:\n vllm (_vLLM): the vLLM model to be used.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n presence_penalty (float, optional): the presence penalty to be used for generation.\n Defaults to 0.0.\n frequency_penalty (float, optional): the frequency penalty to be used for generation.\n Defaults to 0.0.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to -1.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n\n Examples:\n >>> from vllm import LLM\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import vLLM\n >>> model = LLM(model=\"gpt2\")\n >>> task = Task()\n >>> llm = vLLM(model=model, task=task)\n \"\"\"\n super().__init__(\n task=task,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VLLM_AVAILABLE:\n raise ImportError(\n \"`vLLM` cannot be used as `vllm` is not installed, please \"\n \" install it with `pip install vllm`.\"\n )\n\n self.presence_penalty = presence_penalty\n self.frequency_penalty = frequency_penalty\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_tokens = max_new_tokens\n\n self.vllm = vllm\n
"},{"location":"reference/distilabel/llm/google/","title":"google","text":""},{"location":"reference/distilabel/llm/google/vertexai/","title":"vertexai","text":""},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAIEndpointLLM","title":"VertexAIEndpointLLM
","text":" Bases: LLM
An LLM
which uses a Vertex AI Online prediction endpoint for the generation.
More information about Vertex AI Endpoints can be found here:
- https://cloud.google.com/vertex-ai/docs/general/deployment#deploy_a_model_to_an_endpoint\n
Source code in src/distilabel/llm/google/vertexai.py
class VertexAIEndpointLLM(LLM):\n \"\"\"An `LLM` which uses a Vertex AI Online prediction endpoint for the generation.\n\n More information about Vertex AI Endpoints can be found here:\n\n - https://cloud.google.com/vertex-ai/docs/general/deployment#deploy_a_model_to_an_endpoint\n \"\"\"\n\n def __init__(\n self,\n task: \"Task\",\n endpoint_id: str,\n project: Optional[str] = None,\n location: str = \"us-central1\",\n generation_kwargs: Optional[Dict[str, Any]] = None,\n prompt_argument: str = \"prompt\",\n num_generations_argument: str = \"n\",\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the `VertexAIEndpointLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n endpoint_id (str): the ID of the Vertex AI endpoint to be used for generation.\n project (Optional[str], optional): the project to be used for generation. If `None`,\n the default project will be used. Defaults to `None`.\n location (str, optional): the location of the Vertex AI endpoint to be used for\n generation. Defaults to \"us-central1\".\n generation_kwargs (Optional[Dict[str, Any]], optional): the generation parameters\n to be used for generation. The name of the parameters will depend on the\n Docker image used to deploy the model to the Vertex AI endpoint. Defaults\n to `None`.\n prompt_argument (str, optional): the name of the Vertex AI Endpoint key to\n be used for the prompt. Defaults to \"prompt\".\n num_generations_argument (str, optional): the name of the Vertex AI Endpoint\n key to be used to specify the number of generations per prompt. Defaults\n to \"n\".\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAIEndpointLLM` cannot be used as `google-cloud-aiplatform` is not\"\n \" installed, please install it with `pip install google-cloud-aiplatform`\"\n )\n\n if project is None:\n try:\n project = google.auth.default()[1]\n except DefaultCredentialsError as e:\n raise ValueError(\n \"No `project` was specified and no default credentials were found.\"\n ) from e\n\n if generation_kwargs is None:\n generation_kwargs = {}\n\n self.endpoint_id = endpoint_id\n self.project = project\n self.location = location\n self.generation_kwargs = generation_kwargs\n self.prompt_argument = prompt_argument\n self.num_generations_argument = num_generations_argument\n\n self.client = PredictionServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n\n @cached_property\n def model_name(self) -> str:\n \"\"\"Returns the name of the model used for generation.\"\"\"\n client = EndpointServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n endpoint = client.get_endpoint(name=self.endpoint_path)\n return endpoint.deployed_models[0].display_name\n\n @property\n def endpoint_path(self) -> str:\n \"\"\"Returns the path of the Vertex AI endpoint to be used for generation.\"\"\"\n return self.client.endpoint_path(\n project=self.project, # type: ignore\n location=self.location,\n endpoint=self.endpoint_id,\n )\n\n @_vertexai_retry_decorator\n def _call_vertexai_endpoint(self, instances: List[Any]) -> Any:\n return self.client.predict(endpoint=self.endpoint_path, instances=instances)\n\n def _prepare_instances(\n self, prompts: List[str], num_generations: int\n ) -> List[\"Value\"]:\n \"\"\"Prepares the instances to be sent to the Vertex AI endpoint.\n\n Args:\n prompts (List[str]): the prompts to be used for generation.\n num_generations (int): the number of generations to be performed for each prompt.\n\n Returns:\n The instances to be sent to the Vertex AI endpoint.\n \"\"\"\n instances = []\n for prompt in prompts:\n instance = json_format.ParseDict(\n {\n self.prompt_argument: prompt,\n self.num_generations_argument: num_generations,\n **self.generation_kwargs,\n },\n Value(),\n )\n instances.append(instance)\n return instances\n\n def _single_output(self, instance: Any) -> List[LLMOutput]:\n try:\n # NOTE: `predict` method accepts a list of instances, but depending on the\n # deployed Docker image, it can just accept one instance.\n response = self._call_vertexai_endpoint(instances=[instance])\n except exceptions.InternalServerError as e:\n raise ValueError(\n \"The Vertex AI endpoint returned 500 Internal Server Error. This is\"\n \" usually caused due to wrong generation parameters. Please check the\"\n \" `generation_parameters` and try again.\"\n ) from e\n\n output = []\n for prediction in response.predictions:\n # Vertex endpoint output is `Prompt:\\n{{ model_prompt }}\\nOutput:\\n{{ model_output }}`\n # so we need to do a pre-parsing to remove the `Prompt:` and `Output:` parts.\n match = _PARSE_VERTEXAI_ENDPOINT_PREDICTION_REGEX.search(prediction)\n if not match:\n raise ValueError(\n \"Couldn't parse the response from the Vertex AI endpoint.\"\n )\n\n model_output = match.group(1).strip()\n\n try:\n parsed_output = self.task.parse_output(model_output)\n except Exception as e:\n logger.error(f\"Error parsing Vertex AI endpoint model response: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=instance.struct_value[self.prompt_argument],\n raw_output=model_output,\n parsed_output=parsed_output,\n )\n )\n return output\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n prompts = self._generate_prompts(inputs)\n instances = self._prepare_instances(\n prompts=prompts, num_generations=num_generations\n )\n return [self._single_output(instance) for instance in instances]\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAIEndpointLLM.endpoint_path","title":"endpoint_path: str
property
","text":"Returns the path of the Vertex AI endpoint to be used for generation.
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAIEndpointLLM.model_name","title":"model_name: str
cached
property
","text":"Returns the name of the model used for generation.
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAIEndpointLLM.__init__","title":"__init__(task, endpoint_id, project=None, location='us-central1', generation_kwargs=None, prompt_argument='prompt', num_generations_argument='n', num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the VertexAIEndpointLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredendpoint_id
str
the ID of the Vertex AI endpoint to be used for generation.
requiredproject
Optional[str]
the project to be used for generation. If None
, the default project will be used. Defaults to None
.
None
location
str
the location of the Vertex AI endpoint to be used for generation. Defaults to \"us-central1\".
'us-central1'
generation_kwargs
Optional[Dict[str, Any]]
the generation parameters to be used for generation. The name of the parameters will depend on the Docker image used to deploy the model to the Vertex AI endpoint. Defaults to None
.
None
prompt_argument
str
the name of the Vertex AI Endpoint key to be used for the prompt. Defaults to \"prompt\".
'prompt'
num_generations_argument
str
the name of the Vertex AI Endpoint key to be used to specify the number of generations per prompt. Defaults to \"n\".
'n'
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for the prompt. If None
, the default format of the task will be used, available formats are openai
, chatml
, llama2
, zephyr
, and default
. Defaults to None
, but default
(concatenation of system_prompt
and formatted_prompt
with a line-break) will be used if no prompt_formatting_fn
is provided.
None
prompt_formatting_fn
Union[Callable[..., str], None]
a function to be applied to the prompt before generation. If None
, no formatting will be applied. Defaults to None
.
None
Source code in src/distilabel/llm/google/vertexai.py
def __init__(\n self,\n task: \"Task\",\n endpoint_id: str,\n project: Optional[str] = None,\n location: str = \"us-central1\",\n generation_kwargs: Optional[Dict[str, Any]] = None,\n prompt_argument: str = \"prompt\",\n num_generations_argument: str = \"n\",\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the `VertexAIEndpointLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n endpoint_id (str): the ID of the Vertex AI endpoint to be used for generation.\n project (Optional[str], optional): the project to be used for generation. If `None`,\n the default project will be used. Defaults to `None`.\n location (str, optional): the location of the Vertex AI endpoint to be used for\n generation. Defaults to \"us-central1\".\n generation_kwargs (Optional[Dict[str, Any]], optional): the generation parameters\n to be used for generation. The name of the parameters will depend on the\n Docker image used to deploy the model to the Vertex AI endpoint. Defaults\n to `None`.\n prompt_argument (str, optional): the name of the Vertex AI Endpoint key to\n be used for the prompt. Defaults to \"prompt\".\n num_generations_argument (str, optional): the name of the Vertex AI Endpoint\n key to be used to specify the number of generations per prompt. Defaults\n to \"n\".\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for the prompt. If `None`, the default format of the task will be used, available\n formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,\n but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)\n will be used if no `prompt_formatting_fn` is provided.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be\n applied to the prompt before generation. If `None`, no formatting will be applied.\n Defaults to `None`.\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAIEndpointLLM` cannot be used as `google-cloud-aiplatform` is not\"\n \" installed, please install it with `pip install google-cloud-aiplatform`\"\n )\n\n if project is None:\n try:\n project = google.auth.default()[1]\n except DefaultCredentialsError as e:\n raise ValueError(\n \"No `project` was specified and no default credentials were found.\"\n ) from e\n\n if generation_kwargs is None:\n generation_kwargs = {}\n\n self.endpoint_id = endpoint_id\n self.project = project\n self.location = location\n self.generation_kwargs = generation_kwargs\n self.prompt_argument = prompt_argument\n self.num_generations_argument = num_generations_argument\n\n self.client = PredictionServiceClient(\n client_options=ClientOptions(\n api_endpoint=f\"{self.location}-aiplatform.googleapis.com\"\n )\n )\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAILLM","title":"VertexAILLM
","text":" Bases: LLM
An LLM
which allows to use Google's proprietary models from the Vertex AI APIs:
To use the VertexAILLM
is necessary to have configured the Google Cloud authentication using one of these methods:
GOOGLE_CLOUD_CREDENTIALS
environment variablegcloud auth application-default login
commandvertexai.init
function from the google-cloud-aiplatform
librarysrc/distilabel/llm/google/vertexai.py
class VertexAILLM(LLM):\n \"\"\"An `LLM` which allows to use Google's proprietary models from the Vertex AI APIs:\n\n - Gemini API: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini\n - Codey API: https://cloud.google.com/vertex-ai/docs/generative-ai/code/code-models-overview\n - Text API: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text\n\n To use the `VertexAILLM` is necessary to have configured the Google Cloud authentication\n using one of these methods:\n\n - Setting `GOOGLE_CLOUD_CREDENTIALS` environment variable\n - Using `gcloud auth application-default login` command\n - Using `vertexai.init` function from the `google-cloud-aiplatform` library\n \"\"\"\n\n def __init__(\n self,\n task: \"Task\",\n model: str = \"gemini-pro\",\n temperature: Optional[float] = None,\n top_p: Optional[float] = None,\n top_k: Optional[int] = None,\n max_new_tokens: int = 128,\n stop_sequences: Optional[List[str]] = None,\n num_threads: Union[int, None] = None,\n ) -> None:\n \"\"\"Initializes the `VertexGenerativeModelLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gemini-pro\".\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n \"\"\"\n super().__init__(task=task, num_threads=num_threads)\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAILLM` cannot be used as `google-cloud-aiplatform` is not installed,\"\n \" please install it with `pip install google-cloud-aiplatform`\"\n )\n\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_output_tokens = max_new_tokens\n self.stop_sequences = stop_sequences\n\n if is_gemini_model(model):\n self.model = GenerativeModel(model)\n elif is_codey_model(model):\n self.model = CodeGenerationModel.from_pretrained(model)\n else:\n self.model = TextGenerationModel.from_pretrained(model)\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the model used for generation.\"\"\"\n if isinstance(self.model, GenerativeModel):\n return self.model._model_name\n\n return self.model._model_id\n\n def _generate_contents(self, prompts: List[str]) -> List[List[Dict[str, Any]]]:\n \"\"\"Generates a list of valid dicts that can be parsed to `vertexai.preview.generative_models.Content`\n objects for each input.\n\n Args:\n prompts (List[str]): the prompts to be used for generation.\n\n Returns:\n List[List[Dict[str, Any]]]: the list of valid `vertexai.preview.generative_models.Content`\n objects.\n \"\"\"\n return [[{\"role\": \"user\", \"parts\": [{\"text\": prompt}]}] for prompt in prompts]\n\n @_vertexai_retry_decorator\n def _call_generative_model_with_backoff(\n self, contents: List[Dict[str, Any]], **kwargs: Any\n ) -> \"GenerationResponse\":\n return self.model.generate_content( # type: ignore\n contents=contents,\n # TODO: update `candidate_count` to have `num_generations` as value once valid range is not [1, 2)\n generation_config=GenerationConfig(candidate_count=1, **kwargs),\n )\n\n def _generative_model_single_output(\n self, contents: List[Dict[str, Any]]\n ) -> LLMOutput:\n raw_output = None\n try:\n response = self._call_generative_model_with_backoff(\n contents=contents,\n temperature=self.temperature,\n top_p=self.top_p,\n top_k=self.top_k,\n max_output_tokens=self.max_output_tokens,\n stop_sequences=self.stop_sequences,\n )\n raw_output = response.text\n parsed_output = self.task.parse_output(raw_output)\n except ValueError as e:\n logger.error(f\"Vertex AI Gemini API model didn't return content: {e}\")\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=contents,\n raw_output=None,\n parsed_output=None,\n )\n except Exception as e:\n logger.error(f\"Error parsing Vertex AI Gemini API model response: {e}\")\n parsed_output = None\n\n return LLMOutput(\n model_name=self.model_name,\n prompt_used=contents,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n\n def _generate_with_generative_model(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generate `num_generations` for each input in `inputs` using a Vertex AI Gemini\n API model.\"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"default\")\n inputs_contents = self._generate_contents(prompts)\n outputs = []\n for contents in inputs_contents:\n output = []\n # TODO: remove this for-loop once `GenerationConfig.candidate_count` valid range is not [1, 2)\n for _ in range(num_generations):\n output.append(self._generative_model_single_output(contents=contents))\n outputs.append(output)\n return outputs\n\n @_vertexai_retry_decorator\n def _call_text_generation_model(\n self, **kwargs: Any\n ) -> \"MultiCandidateTextGenerationResponse\":\n return self.model.predict(**kwargs) # type: ignore\n\n def _text_generation_model_single_output(\n self, prompt: str, num_generations: int\n ) -> List[LLMOutput]:\n response = self._call_text_generation_model(\n prompt=prompt,\n max_output_tokens=self.max_output_tokens,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n stop_sequences=self.stop_sequences,\n # WARNING: The model can return < `candidate_count` generations depending\n # on the generation parameters and the input.\n candidate_count=num_generations,\n )\n\n output = []\n for candidate in response.candidates:\n try:\n parsed_response = self.task.parse_output(candidate.text)\n except Exception as e:\n logger.error(\n f\"Error parsing Vertex AI Text/Code API model response: {e}\"\n )\n parsed_response = None\n\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=candidate.text,\n parsed_output=parsed_response,\n )\n )\n return output\n\n def _generate_with_text_generation_model(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n \"\"\"Generate `num_generations` for each input in `inputs` using a Vertex AI Text/Code\n API model.\"\"\"\n prompts = self._generate_prompts(inputs, default_format=\"default\")\n outputs = []\n for prompt in prompts:\n outputs.append(\n self._text_generation_model_single_output(prompt, num_generations)\n )\n return outputs\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[\"LLMOutput\"]]:\n if isinstance(self.model, GenerativeModel):\n return self._generate_with_generative_model(inputs, num_generations)\n\n return self._generate_with_text_generation_model(inputs, num_generations)\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAILLM.model_name","title":"model_name: str
property
","text":"Returns the name of the model used for generation.
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.VertexAILLM.__init__","title":"__init__(task, model='gemini-pro', temperature=None, top_p=None, top_k=None, max_new_tokens=128, stop_sequences=None, num_threads=None)
","text":"Initializes the VertexGenerativeModelLLM
class.
Parameters:
Name Type Description Defaulttask
Task
the task to be performed by the LLM.
requiredmodel
str
the model to be used for generation. Defaults to \"gemini-pro\".
'gemini-pro'
temperature
float
the temperature to be used for generation. Defaults to 1.0.
None
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
None
top_k
int
the top-k value to be used for generation. Defaults to 40.
None
max_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
num_threads
Union[int, None]
the number of threads to be used for parallel generation. If None
, no parallel generation will be performed. Defaults to None
.
None
Source code in src/distilabel/llm/google/vertexai.py
def __init__(\n self,\n task: \"Task\",\n model: str = \"gemini-pro\",\n temperature: Optional[float] = None,\n top_p: Optional[float] = None,\n top_k: Optional[int] = None,\n max_new_tokens: int = 128,\n stop_sequences: Optional[List[str]] = None,\n num_threads: Union[int, None] = None,\n) -> None:\n \"\"\"Initializes the `VertexGenerativeModelLLM` class.\n\n Args:\n task (Task): the task to be performed by the LLM.\n model (str, optional): the model to be used for generation. Defaults to \"gemini-pro\".\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 40.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n num_threads (Union[int, None], optional): the number of threads to be used\n for parallel generation. If `None`, no parallel generation will be performed.\n Defaults to `None`.\n \"\"\"\n super().__init__(task=task, num_threads=num_threads)\n\n if not _VERTEXAI_AVAILABLE:\n raise ImportError(\n \"`VertexAILLM` cannot be used as `google-cloud-aiplatform` is not installed,\"\n \" please install it with `pip install google-cloud-aiplatform`\"\n )\n\n self.temperature = temperature\n self.top_p = top_p\n self.top_k = top_k\n self.max_output_tokens = max_new_tokens\n self.stop_sequences = stop_sequences\n\n if is_gemini_model(model):\n self.model = GenerativeModel(model)\n elif is_codey_model(model):\n self.model = CodeGenerationModel.from_pretrained(model)\n else:\n self.model = TextGenerationModel.from_pretrained(model)\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.is_codey_model","title":"is_codey_model(model)
","text":"Returns True
if the model is a model from the Vertex AI Codey API.
Parameters:
Name Type Description Defaultmodel
str
the model name to be checked.
requiredReturns:
Name Type Descriptionbool
bool
True
if the model is a model from the Vertex AI Codey API.
src/distilabel/llm/google/vertexai.py
def is_codey_model(model: str) -> bool:\n \"\"\"Returns `True` if the model is a model from the Vertex AI Codey API.\n\n Args:\n model (str): the model name to be checked.\n\n Returns:\n bool: `True` if the model is a model from the Vertex AI Codey API.\n \"\"\"\n return \"code\" in model\n
"},{"location":"reference/distilabel/llm/google/vertexai/#distilabel.llm.google.vertexai.is_gemini_model","title":"is_gemini_model(model)
","text":"Returns True
if the model is a model from the Vertex AI Gemini API.
Parameters:
Name Type Description Defaultmodel
str
the model name to be checked.
requiredReturns:
Name Type Descriptionbool
bool
True
if the model is a model from the Vertex AI Gemini API.
src/distilabel/llm/google/vertexai.py
def is_gemini_model(model: str) -> bool:\n \"\"\"Returns `True` if the model is a model from the Vertex AI Gemini API.\n\n Args:\n model (str): the model name to be checked.\n\n Returns:\n bool: `True` if the model is a model from the Vertex AI Gemini API.\n \"\"\"\n return \"gemini\" in model\n
"},{"location":"reference/distilabel/llm/huggingface/","title":"huggingface","text":""},{"location":"reference/distilabel/llm/huggingface/inference_endpoints/","title":"inference_endpoints","text":""},{"location":"reference/distilabel/llm/huggingface/inference_endpoints/#distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM","title":"InferenceEndpointsLLM
","text":" Bases: LLM
src/distilabel/llm/huggingface/inference_endpoints.py
class InferenceEndpointsLLM(LLM):\n def __init__(\n self,\n endpoint_name: str,\n task: \"Task\",\n endpoint_namespace: Union[str, None] = None,\n token: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: Union[float, None] = None,\n seed: Union[int, None] = None,\n do_sample: bool = False,\n temperature: Union[float, None] = None,\n top_k: Union[int, None] = None,\n top_p: Union[float, None] = None,\n typical_p: Union[float, None] = None,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the InferenceEndpointsLLM class.\n\n Args:\n endpoint_name (str): The name of the endpoint.\n task (Task): The task to be performed by the LLM.\n endpoint_namespace (Union[str, None]): The namespace of the endpoint. Defaults to None.\n token (Union[str, None]): The token for the endpoint. Defaults to None.\n max_new_tokens (int): The maximum number of tokens to be generated. Defaults to 128.\n repetition_penalty (Union[float, None]): The repetition penalty to be used for generation. Defaults to None.\n seed (Union[int, None]): The seed for generation. Defaults to None.\n do_sample (bool): Whether to do sampling. Defaults to False.\n temperature (Union[float, None]): The temperature for generation. Defaults to None.\n top_k (Union[int, None]): The top_k for generation. Defaults to None.\n top_p (Union[float, None]): The top_p for generation. Defaults to None.\n typical_p (Union[float, None]): The typical_p for generation. Defaults to None.\n num_threads (Union[int, None]): The number of threads. Defaults to None.\n prompt_format (Union[\"SupportedFormats\", None]): The format of the prompt. Defaults to None.\n prompt_formatting_fn (Union[Callable[..., str], None]): The function for formatting the prompt. Defaults to None.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import InferenceEndpointsLLM\n >>> task = Task()\n >>> llm = InferenceEndpointsLLM(\n ... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _HUGGINGFACE_HUB_AVAILABLE:\n raise ImportError(\n \"`InferenceEndpointsLLM` cannot be used as `huggingface-hub` is not \"\n \"installed, please install it with `pip install huggingface-hub`.\"\n )\n\n self.do_sample = do_sample\n self.max_new_tokens = max_new_tokens\n self.repetition_penalty = repetition_penalty\n self.seed = seed\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.inference_endpoint = get_inference_endpoint(\n name=endpoint_name, namespace=endpoint_namespace, token=token\n )\n self.inference_endpoint.wait(timeout=30)\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"do_sample\": self.do_sample,\n \"max_new_tokens\": self.max_new_tokens,\n \"repetition_penalty\": self.repetition_penalty,\n \"seed\": self.seed,\n \"temperature\": self.temperature,\n \"top_k\": self.top_k,\n \"top_p\": self.top_p,\n \"typical_p\": self.typical_p,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the model name of the endpoint.\"\"\"\n return self.inference_endpoint.repository\n\n @retry(\n retry=retry_if_exception_type(_INFERENCE_ENDPOINTS_API_RETRY_ON_EXCEPTIONS),\n stop=stop_after_attempt(_INFERENCE_ENDPOINTS_API_STOP_AFTER_ATTEMPT),\n wait=wait_random_exponential(\n multiplier=_INFERENCE_ENDPOINTS_API_WAIT_RANDOM_EXPONENTIAL_MULTIPLIER,\n max=_INFERENCE_ENDPOINTS_API_WAIT_RANDOM_EXPONENTIAL_MAX,\n ),\n before_sleep=before_sleep_log(logger, logging.INFO),\n after=after_log(logger, logging.INFO),\n )\n def _text_generation_with_backoff(self, **kwargs: Any) -> Any:\n \"\"\"Performs text generation with backoff in case of an error.\"\"\"\n return self.inference_endpoint.client.text_generation(**kwargs) # type: ignore\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n outputs = []\n for prompt in prompts:\n raw_responses = [\n self._text_generation_with_backoff(\n prompt=prompt,\n do_sample=self.do_sample,\n max_new_tokens=self.max_new_tokens,\n repetition_penalty=self.repetition_penalty,\n seed=self.seed,\n temperature=self.temperature,\n top_k=self.top_k,\n top_p=self.top_p,\n typical_p=self.typical_p,\n )\n for _ in range(num_generations)\n ]\n output = []\n for raw_response in raw_responses:\n try:\n parsed_response = self.task.parse_output(raw_response)\n except Exception as e:\n logger.error(f\"Error parsing Inference Endpoints output: {e}\")\n parsed_response = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_response,\n parsed_output=parsed_response,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/huggingface/inference_endpoints/#distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM.model_name","title":"model_name: str
property
","text":"Returns the model name of the endpoint.
"},{"location":"reference/distilabel/llm/huggingface/inference_endpoints/#distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM.__init__","title":"__init__(endpoint_name, task, endpoint_namespace=None, token=None, max_new_tokens=128, repetition_penalty=None, seed=None, do_sample=False, temperature=None, top_k=None, top_p=None, typical_p=None, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the InferenceEndpointsLLM class.
Parameters:
Name Type Description Defaultendpoint_name
str
The name of the endpoint.
requiredtask
Task
The task to be performed by the LLM.
requiredendpoint_namespace
Union[str, None]
The namespace of the endpoint. Defaults to None.
None
token
Union[str, None]
The token for the endpoint. Defaults to None.
None
max_new_tokens
int
The maximum number of tokens to be generated. Defaults to 128.
128
repetition_penalty
Union[float, None]
The repetition penalty to be used for generation. Defaults to None.
None
seed
Union[int, None]
The seed for generation. Defaults to None.
None
do_sample
bool
Whether to do sampling. Defaults to False.
False
temperature
Union[float, None]
The temperature for generation. Defaults to None.
None
top_k
Union[int, None]
The top_k for generation. Defaults to None.
None
top_p
Union[float, None]
The top_p for generation. Defaults to None.
None
typical_p
Union[float, None]
The typical_p for generation. Defaults to None.
None
num_threads
Union[int, None]
The number of threads. Defaults to None.
None
prompt_format
Union[SupportedFormats, None]
The format of the prompt. Defaults to None.
None
prompt_formatting_fn
Union[Callable[..., str], None]
The function for formatting the prompt. Defaults to None.
None
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import InferenceEndpointsLLM\n>>> task = Task()\n>>> llm = InferenceEndpointsLLM(\n... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n... task=task,\n... )\n
Source code in src/distilabel/llm/huggingface/inference_endpoints.py
def __init__(\n self,\n endpoint_name: str,\n task: \"Task\",\n endpoint_namespace: Union[str, None] = None,\n token: Union[str, None] = None,\n max_new_tokens: int = 128,\n repetition_penalty: Union[float, None] = None,\n seed: Union[int, None] = None,\n do_sample: bool = False,\n temperature: Union[float, None] = None,\n top_k: Union[int, None] = None,\n top_p: Union[float, None] = None,\n typical_p: Union[float, None] = None,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the InferenceEndpointsLLM class.\n\n Args:\n endpoint_name (str): The name of the endpoint.\n task (Task): The task to be performed by the LLM.\n endpoint_namespace (Union[str, None]): The namespace of the endpoint. Defaults to None.\n token (Union[str, None]): The token for the endpoint. Defaults to None.\n max_new_tokens (int): The maximum number of tokens to be generated. Defaults to 128.\n repetition_penalty (Union[float, None]): The repetition penalty to be used for generation. Defaults to None.\n seed (Union[int, None]): The seed for generation. Defaults to None.\n do_sample (bool): Whether to do sampling. Defaults to False.\n temperature (Union[float, None]): The temperature for generation. Defaults to None.\n top_k (Union[int, None]): The top_k for generation. Defaults to None.\n top_p (Union[float, None]): The top_p for generation. Defaults to None.\n typical_p (Union[float, None]): The typical_p for generation. Defaults to None.\n num_threads (Union[int, None]): The number of threads. Defaults to None.\n prompt_format (Union[\"SupportedFormats\", None]): The format of the prompt. Defaults to None.\n prompt_formatting_fn (Union[Callable[..., str], None]): The function for formatting the prompt. Defaults to None.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import InferenceEndpointsLLM\n >>> task = Task()\n >>> llm = InferenceEndpointsLLM(\n ... endpoint_name=\"<INFERENCE_ENDPOINT_NAME>\",\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n if not _HUGGINGFACE_HUB_AVAILABLE:\n raise ImportError(\n \"`InferenceEndpointsLLM` cannot be used as `huggingface-hub` is not \"\n \"installed, please install it with `pip install huggingface-hub`.\"\n )\n\n self.do_sample = do_sample\n self.max_new_tokens = max_new_tokens\n self.repetition_penalty = repetition_penalty\n self.seed = seed\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.inference_endpoint = get_inference_endpoint(\n name=endpoint_name, namespace=endpoint_namespace, token=token\n )\n self.inference_endpoint.wait(timeout=30)\n
"},{"location":"reference/distilabel/llm/huggingface/transformers/","title":"transformers","text":""},{"location":"reference/distilabel/llm/huggingface/transformers/#distilabel.llm.huggingface.transformers.TransformersLLM","title":"TransformersLLM
","text":" Bases: LLM
src/distilabel/llm/huggingface/transformers.py
class TransformersLLM(LLM):\n def __init__(\n self,\n model: \"PreTrainedModel\",\n tokenizer: \"PreTrainedTokenizer\",\n task: \"Task\",\n max_new_tokens: int = 128,\n do_sample: bool = False,\n temperature: float = 1.0,\n top_k: int = 50,\n top_p: float = 1.0,\n typical_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n ) -> None:\n \"\"\"Initializes the TransformersLLM class.\n\n Args:\n model (PreTrainedModel): the model to be used for generation.\n tokenizer (PreTrainedTokenizer): the tokenizer to be used for generation.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n do_sample (bool, optional): whether to sample from the model or not.\n Defaults to False.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 50.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n typical_p (float, optional): the typical-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used for generation.\n If `None`, the number of threads will be set to the number of available CPUs.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n Defaults to `None`.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): the function to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TransformersLLM\n >>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n >>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n >>> task = Task()\n >>> llm = TransformersLLM(\n ... model=model,\n ... tokenizer=tokenizer,\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n self.max_new_tokens = max_new_tokens\n self.do_sample = do_sample\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.model = model\n self.tokenizer = tokenizer\n\n if self.tokenizer.pad_token is None:\n self.tokenizer.pad_token = self.tokenizer.eos_token\n if (\n hasattr(self.tokenizer, \"use_default_system_prompt\")\n and self.tokenizer.use_default_system_prompt # type: ignore\n ):\n # The `tokenizer` also has a method named `apply_chat_template` that expects a `Conversation` as OpenAI does with the ChatML format\n warnings.warn(\n \"The provided `tokenizer` has `use_default_system_prompt=True` which means that the default system prompt will be used, which may collide with the `task` provided as an arg to this class.\",\n UserWarning,\n stacklevel=2,\n )\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield from super().__rich_repr__()\n yield (\n \"parameters\",\n {\n \"max_new_tokens\": self.max_new_tokens,\n \"do_sample\": self.do_sample,\n \"temperature\": self.temperature,\n \"top_k\": self.top_k,\n \"top_p\": self.top_p,\n \"typical_p\": self.typical_p,\n },\n )\n\n @property\n def model_name(self) -> str:\n \"\"\"Returns the name of the Transformers model.\"\"\"\n return self.model.config.name_or_path\n\n def _generate(\n self, inputs: List[Dict[str, Any]], num_generations: int = 1\n ) -> List[List[LLMOutput]]:\n \"\"\"Generates `num_generations` for each input in `inputs`.\n\n Args:\n inputs (List[Dict[str, Any]]): the inputs to be used for generation.\n num_generations (int, optional): the number of generations to be performed for each\n input. Defaults to 1.\n\n Returns:\n List[List[LLMOutput]]: the outputs of the LLM.\n \"\"\"\n prompts = self._generate_prompts(inputs, default_format=None)\n encodings = self.tokenizer(prompts, padding=True, return_tensors=\"pt\")\n encodings = encodings.to(self.model.device)\n with torch.inference_mode():\n generated_ids = self.model.generate(\n **encodings, # type: ignore\n pad_token_id=self.tokenizer.eos_token_id,\n generation_config=GenerationConfig(\n do_sample=self.do_sample,\n temperature=self.temperature,\n max_new_tokens=self.max_new_tokens,\n top_k=self.top_k,\n top_p=self.top_p,\n typical_p=self.typical_p,\n num_return_sequences=num_generations,\n ),\n )\n raw_outputs = self.tokenizer.batch_decode(\n generated_ids[:, encodings.input_ids.shape[1] :],\n skip_special_tokens=True,\n clean_up_tokenization_spaces=True,\n )\n outputs = []\n for prompt, i in zip(prompts, range(0, len(raw_outputs), num_generations)):\n output = []\n for raw_output in raw_outputs[i : i + num_generations]:\n try:\n parsed_output = self.task.parse_output(raw_output)\n except Exception as e:\n logger.error(f\"Error parsing Transformers output: {e}\")\n parsed_output = None\n output.append(\n LLMOutput(\n model_name=self.model_name,\n prompt_used=prompt,\n raw_output=raw_output,\n parsed_output=parsed_output,\n )\n )\n outputs.append(output)\n return outputs\n
"},{"location":"reference/distilabel/llm/huggingface/transformers/#distilabel.llm.huggingface.transformers.TransformersLLM.model_name","title":"model_name: str
property
","text":"Returns the name of the Transformers model.
"},{"location":"reference/distilabel/llm/huggingface/transformers/#distilabel.llm.huggingface.transformers.TransformersLLM.__init__","title":"__init__(model, tokenizer, task, max_new_tokens=128, do_sample=False, temperature=1.0, top_k=50, top_p=1.0, typical_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)
","text":"Initializes the TransformersLLM class.
Parameters:
Name Type Description Defaultmodel
PreTrainedModel
the model to be used for generation.
requiredtokenizer
PreTrainedTokenizer
the tokenizer to be used for generation.
requiredtask
Task
the task to be performed by the LLM.
requiredmax_new_tokens
int
the maximum number of tokens to be generated. Defaults to 128.
128
do_sample
bool
whether to sample from the model or not. Defaults to False.
False
temperature
float
the temperature to be used for generation. Defaults to 1.0.
1.0
top_k
int
the top-k value to be used for generation. Defaults to 50.
50
top_p
float
the top-p value to be used for generation. Defaults to 1.0.
1.0
typical_p
float
the typical-p value to be used for generation. Defaults to 1.0.
1.0
num_threads
Union[int, None]
the number of threads to be used for generation. If None
, the number of threads will be set to the number of available CPUs. Defaults to None
.
None
prompt_format
Union[SupportedFormats, None]
the format to be used for formatting the prompts. If None
, the prompts will not be formatted. Defaults to None
.
None
prompt_formatting_fn
Union[Callable[..., str], None]
the function to be used for formatting the prompts. If None
, the prompts will not be formatted.
None
Examples:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer\n>>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n>>> from distilabel.llm import TransformersLLM\n>>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n>>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n>>> task = Task()\n>>> llm = TransformersLLM(\n... model=model,\n... tokenizer=tokenizer,\n... task=task,\n... )\n
Source code in src/distilabel/llm/huggingface/transformers.py
def __init__(\n self,\n model: \"PreTrainedModel\",\n tokenizer: \"PreTrainedTokenizer\",\n task: \"Task\",\n max_new_tokens: int = 128,\n do_sample: bool = False,\n temperature: float = 1.0,\n top_k: int = 50,\n top_p: float = 1.0,\n typical_p: float = 1.0,\n num_threads: Union[int, None] = None,\n prompt_format: Union[\"SupportedFormats\", None] = None,\n prompt_formatting_fn: Union[Callable[..., str], None] = None,\n) -> None:\n \"\"\"Initializes the TransformersLLM class.\n\n Args:\n model (PreTrainedModel): the model to be used for generation.\n tokenizer (PreTrainedTokenizer): the tokenizer to be used for generation.\n task (Task): the task to be performed by the LLM.\n max_new_tokens (int, optional): the maximum number of tokens to be generated.\n Defaults to 128.\n do_sample (bool, optional): whether to sample from the model or not.\n Defaults to False.\n temperature (float, optional): the temperature to be used for generation.\n Defaults to 1.0.\n top_k (int, optional): the top-k value to be used for generation.\n Defaults to 50.\n top_p (float, optional): the top-p value to be used for generation.\n Defaults to 1.0.\n typical_p (float, optional): the typical-p value to be used for generation.\n Defaults to 1.0.\n num_threads (Union[int, None], optional): the number of threads to be used for generation.\n If `None`, the number of threads will be set to the number of available CPUs.\n Defaults to `None`.\n prompt_format (Union[SupportedFormats, None], optional): the format to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n Defaults to `None`.\n prompt_formatting_fn (Union[Callable[..., str], None], optional): the function to be used\n for formatting the prompts. If `None`, the prompts will not be formatted.\n\n Examples:\n >>> from transformers import AutoModelForCausalLM, AutoTokenizer\n >>> from distilabel.tasks.text_generation import TextGenerationTask as Task\n >>> from distilabel.llm import TransformersLLM\n >>> model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n >>> tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n >>> task = Task()\n >>> llm = TransformersLLM(\n ... model=model,\n ... tokenizer=tokenizer,\n ... task=task,\n ... )\n \"\"\"\n super().__init__(\n task=task,\n num_threads=num_threads,\n prompt_format=prompt_format,\n prompt_formatting_fn=prompt_formatting_fn,\n )\n\n self.max_new_tokens = max_new_tokens\n self.do_sample = do_sample\n self.temperature = temperature\n self.top_k = top_k\n self.top_p = top_p\n self.typical_p = typical_p\n\n self.model = model\n self.tokenizer = tokenizer\n\n if self.tokenizer.pad_token is None:\n self.tokenizer.pad_token = self.tokenizer.eos_token\n if (\n hasattr(self.tokenizer, \"use_default_system_prompt\")\n and self.tokenizer.use_default_system_prompt # type: ignore\n ):\n # The `tokenizer` also has a method named `apply_chat_template` that expects a `Conversation` as OpenAI does with the ChatML format\n warnings.warn(\n \"The provided `tokenizer` has `use_default_system_prompt=True` which means that the default system prompt will be used, which may collide with the `task` provided as an arg to this class.\",\n UserWarning,\n stacklevel=2,\n )\n
"},{"location":"reference/distilabel/tasks/","title":"tasks","text":""},{"location":"reference/distilabel/tasks/#distilabel.tasks.CritiqueTask","title":"CritiqueTask
dataclass
","text":" Bases: RatingToArgillaMixin
, Task
A Task
for critique / judge tasks.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
None
Source code in src/distilabel/tasks/critique/base.py
@dataclass\nclass CritiqueTask(RatingToArgillaMixin, Task):\n \"\"\"A `Task` for critique / judge tasks.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n __type__: ClassVar[Literal[\"labelling\"]] = \"labelling\"\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the names of the input arguments of the task.\"\"\"\n return [\"input\", \"generations\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"critique\", \"score\"]\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n score_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=score_column,\n rationale_column=critique_column,\n ratings_values=score_values or [1, 2, 3, 4, 5],\n )\n\n def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n return super().to_argilla_record(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=score_column,\n rationale_column=critique_column,\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.CritiqueTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the names of the input arguments of the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.CritiqueTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.JudgeLMTask","title":"JudgeLMTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
following the prompt templated used by JudgeLM.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
'You are a helpful and precise assistant for checking the quality of the answer.'
task_description
Union[str, None]
the description of the task. Defaults to None
.
'We would like to request your feedback on the performance of {num_responses} AI assistants in response to the user question displayed above.\\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\\nPlease first output a single line containing only {num_responses} values indicating the scores for Assistants 1 to {num_responses}, respectively. The {num_responses} scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment.'
Source code in src/distilabel/tasks/preference/judgelm.py
@dataclass\nclass JudgeLMTask(PreferenceTask):\n \"\"\"A `PreferenceTask` following the prompt templated used by JudgeLM.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n task_description: str = (\n \"We would like to request your feedback on the performance of {num_responses} AI assistants in response to the\"\n \" user question displayed above.\\nPlease rate the helpfulness, relevance, accuracy, level of details\"\n \" of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher\"\n \" score indicates better overall performance.\\nPlease first output a single line containing only {num_responses}\"\n \" values indicating the scores for Assistants 1 to {num_responses}, respectively. The {num_responses} scores are separated by\"\n \" a space. In the subsequent line, please provide a comprehensive explanation of your evaluation,\"\n \" avoiding any potential bias and ensuring that the order in which the responses were presented does\"\n \" not affect your judgment.\"\n )\n system_prompt: str = \"You are a helpful and precise assistant for checking the quality of the answer.\"\n\n __jinja2_template__: ClassVar[str] = _JUDGELM_TEMPLATE\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the JudgeLM specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\\nWhat are the first 5 Fibonacci numbers?\\n...\",\n )\n \"\"\"\n render_kwargs = {\n \"input\": input,\n \"responses\": generations,\n \"task_description\": self.task_description.format(\n num_responses=len(generations)\n ),\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> JudgeLMOutput:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n split_output = output.split(\"\\n\")\n rating = [float(rating) for rating in split_output[0].split(\" \")]\n rationale = \"\\n\".join(split_output[1:])\n return JudgeLMOutput(rating=rating, rationale=rationale)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.JudgeLMTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the JudgeLM specification.
Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\n
What are the first 5 Fibonacci numbers? ...\", )
Source code insrc/distilabel/tasks/preference/judgelm.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the JudgeLM specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\\nWhat are the first 5 Fibonacci numbers?\\n...\",\n )\n \"\"\"\n render_kwargs = {\n \"input\": input,\n \"responses\": generations,\n \"task_description\": self.task_description.format(\n num_responses=len(generations)\n ),\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.JudgeLMTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/judgelm.py
def parse_output(self, output: str) -> JudgeLMOutput:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n split_output = output.split(\"\\n\")\n rating = [float(rating) for rating in split_output[0].split(\" \")]\n rationale = \"\\n\".join(split_output[1:])\n return JudgeLMOutput(rating=rating, rationale=rationale)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.PrometheusTask","title":"PrometheusTask
dataclass
","text":" Bases: CritiqueTask
src/distilabel/tasks/critique/prometheus.py
@dataclass\nclass PrometheusTask(CritiqueTask):\n scoring_criteria: str\n score_descriptions: Dict[int, str]\n\n system_prompt: str = \"You are a fair evaluator language model.\"\n\n __jinja2_template__: ClassVar[str] = _PROMETHEUS_TEMPLATE\n\n @property\n def input_args_names(self) -> List[str]:\n return super().input_args_names + [\"ref_completion\"]\n\n def generate_prompt(\n self, input: str, generations: str, ref_completion: str, **_: Any\n ) -> Prompt:\n render_kwargs = {\n \"instruction\": input,\n \"completion\": generations,\n \"ref_completion\": ref_completion,\n \"scoring_criteria\": self.scoring_criteria,\n \"score_descriptions\": self.score_descriptions,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n # We use a regex instead of splitting by the delimiter because the\n # critique may contain the delimiter, and using the regex is safer.\n pattern = r\"(.+?)\\. \\[RESULT\\] (\\d+)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(2)),\n critique=match.group(1).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.PrometheusTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/critique/prometheus.py
def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n # We use a regex instead of splitting by the delimiter because the\n # critique may contain the delimiter, and using the regex is safer.\n pattern = r\"(.+?)\\. \\[RESULT\\] (\\d+)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(2)),\n critique=match.group(1).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.Prompt","title":"Prompt
dataclass
","text":"A dataclass
representing a Prompt
.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt.
requiredformatted_prompt
str
the formatted prompt.
requiredExamples:
>>> from distilabel.tasks.prompt import Prompt\n>>> prompt = Prompt(\n... system_prompt=\"You are a helpful assistant.\",\n... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n... )\n
Source code in src/distilabel/tasks/prompt.py
@dataclass\nclass Prompt:\n \"\"\"A `dataclass` representing a `Prompt`.\n\n Args:\n system_prompt (str): the system prompt.\n formatted_prompt (str): the formatted prompt.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n \"\"\"\n\n system_prompt: str\n formatted_prompt: str\n\n def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion]]:\n \"\"\"Formats the prompt as the specified format.\n\n Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\\nWhat are the first 5 Fibonacci numbers?'\n \"\"\"\n if format == \"default\":\n return f\"{self.system_prompt}\\n{self.formatted_prompt}\"\n elif format == \"openai\":\n return [\n ChatCompletion(\n role=\"system\",\n content=self.system_prompt,\n ),\n ChatCompletion(role=\"user\", content=self.formatted_prompt),\n ]\n elif format == \"llama2\":\n return f\"<s>[INST] <<SYS>>\\n{self.system_prompt}<</SYS>>\\n\\n{self.formatted_prompt} [/INST]\"\n elif format == \"chatml\":\n return f\"<|im_start|>system\\n{self.system_prompt}<|im_end|>\\n<|im_start|>user\\n{self.formatted_prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n elif format in [\"zephyr\", \"notus\"]:\n return f\"<|system|>\\n{self.system_prompt}</s>\\n<|user|>\\n{self.formatted_prompt}</s>\\n<|assistant|>\\n\"\n else:\n raise ValueError(\n f\"Format {format} not supported, please provide a custom `prompt_formatting_fn`\"\n \" or use any of the available formats: openai, llama2, chatml, zephyr\"\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.Prompt.format_as","title":"format_as(format)
","text":"Formats the prompt as the specified format.
Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\n
What are the first 5 Fibonacci numbers?'
Source code insrc/distilabel/tasks/prompt.py
def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion]]:\n \"\"\"Formats the prompt as the specified format.\n\n Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\\nWhat are the first 5 Fibonacci numbers?'\n \"\"\"\n if format == \"default\":\n return f\"{self.system_prompt}\\n{self.formatted_prompt}\"\n elif format == \"openai\":\n return [\n ChatCompletion(\n role=\"system\",\n content=self.system_prompt,\n ),\n ChatCompletion(role=\"user\", content=self.formatted_prompt),\n ]\n elif format == \"llama2\":\n return f\"<s>[INST] <<SYS>>\\n{self.system_prompt}<</SYS>>\\n\\n{self.formatted_prompt} [/INST]\"\n elif format == \"chatml\":\n return f\"<|im_start|>system\\n{self.system_prompt}<|im_end|>\\n<|im_start|>user\\n{self.formatted_prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n elif format in [\"zephyr\", \"notus\"]:\n return f\"<|system|>\\n{self.system_prompt}</s>\\n<|user|>\\n{self.formatted_prompt}</s>\\n<|assistant|>\\n\"\n else:\n raise ValueError(\n f\"Format {format} not supported, please provide a custom `prompt_formatting_fn`\"\n \" or use any of the available formats: openai, llama2, chatml, zephyr\"\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.SelfInstructTask","title":"SelfInstructTask
dataclass
","text":" Bases: TextGenerationTask
A TextGenerationTask
following the Self-Instruct specification for building the prompts.
Reference: https://github.com/yizhongw/self-instruct
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used. Defaults to None
.
'You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks. You are given a task description and a set of instructions for how to write the prompts for an specific AI application.'
principles
Dict[str, List[str]]
the principles to be used for the system prompt. Defaults to None
.
field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution
Union[Dict[str, float], Literal[balanced], None]
the distribution of principles to be used for the system prompt. Defaults to None
.
None
application_description
str
the description of the AI application. Defaults to \"AI assistant\".
'AI assistant'
num_instructions
int
the number of instructions to be used for the prompt. Defaults to 5.
5
Source code in src/distilabel/tasks/text_generation/self_instruct.py
@dataclass\nclass SelfInstructTask(TextGenerationTask):\n \"\"\"A `TextGenerationTask` following the Self-Instruct specification for building\n the prompts.\n\n Reference: https://github.com/yizhongw/self-instruct\n\n Args:\n system_prompt (str, optional): the system prompt to be used. Defaults to `None`.\n principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.\n Defaults to `None`.\n principles_distribution (Union[Dict[str, float], Literal[\"balanced\"], None], optional): the\n distribution of principles to be used for the system prompt. Defaults to `None`.\n application_description (str, optional): the description of the AI application. Defaults to\n \"AI assistant\".\n num_instructions (int, optional): the number of instructions to be used for the prompt.\n Defaults to 5.\n \"\"\"\n\n system_prompt: str = (\n \"You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks.\"\n \" You are given a task description and a set of instructions for how to write the prompts for an\"\n \" specific AI application.\"\n )\n application_description: str = \"AI assistant\"\n num_instructions: int = 5\n\n __jinja2_template__: str = _SELF_INSTRUCT_TEMPLATE\n\n def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the Self-Instruct specification.\n\n Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\\nDevelop 2 user queries that ...\",\n )\n \"\"\"\n render_kwargs = {\n \"application_description\": self.application_description,\n \"num_instructions\": self.num_instructions,\n \"input\": input,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n @property\n def output_args_names(self) -> List[str]:\n return [\"instructions\"]\n\n def parse_output(self, output: str) -> Dict[str, List[str]]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = re.compile(r\"\\d+\\.\\s*(.*?)\\n\")\n return {\"instructions\": pattern.findall(output)}\n\n def to_argilla_dataset(self, dataset_row: Dict[str, Any]) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names,\n dataset_row=dataset_row,\n )\n # Once the input fields have been defined, then we also include the instruction\n # field which will be fulfilled with each of the instructions generated.\n fields.append(rg.TextField(name=\"instruction\", title=\"instruction\")) # type: ignore\n # Then we add a default `RatingQuestion` which asks the users to provide a\n # rating for each of the generations, differing from the scenario where the inputs\n # are the fields and the outputs the ones used to formulate the quesstions. So on,\n # in this scenario we won't have suggestions, as the questions will be related to the\n # combination of inputs and outputs.\n questions = [\n rg.RatingQuestion( # type: ignore\n name=\"instruction-rating\",\n title=\"How would you rate the generated instruction?\",\n values=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n ]\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=\"length-instruction\") # type: ignore\n ) # type: ignore\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions, # type: ignore\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n instructions_column: Optional[str] = \"instructions\",\n ) -> List[\"FeedbackRecord\"]:\n \"\"\"Converts a dataset row to a list of Argilla `FeedbackRecord`s.\"\"\"\n records = []\n for instructions in dataset_row[instructions_column]: # type: ignore\n for instruction in instructions:\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n value = value.strip() if isinstance(value, str) else \"\"\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n fields[\"instruction\"] = instruction\n metadata[\"length-instruction\"] = len(instruction)\n\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(\n model_metadata_from_dataset_row(dataset_row=dataset_row)\n )\n # Finally, we append the `FeedbackRecord` with the fields and the metadata\n records.append(rg.FeedbackRecord(fields=fields, metadata=metadata))\n if not records:\n raise ValueError(\n f\"Skipping the row {dataset_row} as the list of `FeedbackRecord` is empty as those could not be inferred.\"\n )\n return records\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.SelfInstructTask.generate_prompt","title":"generate_prompt(input, **_)
","text":"Generates a prompt following the Self-Instruct specification.
Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\n
Develop 2 user queries that ...\", )
Source code insrc/distilabel/tasks/text_generation/self_instruct.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the Self-Instruct specification.\n\n Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\\nDevelop 2 user queries that ...\",\n )\n \"\"\"\n render_kwargs = {\n \"application_description\": self.application_description,\n \"num_instructions\": self.num_instructions,\n \"input\": input,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.SelfInstructTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/text_generation/self_instruct.py
def parse_output(self, output: str) -> Dict[str, List[str]]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = re.compile(r\"\\d+\\.\\s*(.*?)\\n\")\n return {\"instructions\": pattern.findall(output)}\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.SelfInstructTask.to_argilla_record","title":"to_argilla_record(dataset_row, instructions_column='instructions')
","text":"Converts a dataset row to a list of Argilla FeedbackRecord
s.
src/distilabel/tasks/text_generation/self_instruct.py
def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n instructions_column: Optional[str] = \"instructions\",\n) -> List[\"FeedbackRecord\"]:\n \"\"\"Converts a dataset row to a list of Argilla `FeedbackRecord`s.\"\"\"\n records = []\n for instructions in dataset_row[instructions_column]: # type: ignore\n for instruction in instructions:\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n value = value.strip() if isinstance(value, str) else \"\"\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n fields[\"instruction\"] = instruction\n metadata[\"length-instruction\"] = len(instruction)\n\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(\n model_metadata_from_dataset_row(dataset_row=dataset_row)\n )\n # Finally, we append the `FeedbackRecord` with the fields and the metadata\n records.append(rg.FeedbackRecord(fields=fields, metadata=metadata))\n if not records:\n raise ValueError(\n f\"Skipping the row {dataset_row} as the list of `FeedbackRecord` is empty as those could not be inferred.\"\n )\n return records\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.Task","title":"Task
","text":" Bases: ABC
Abstract class used to define the methods required to create a Task
, to be used within an LLM
.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
Raises:
Type DescriptionValueError
if the __jinja2_template__
attribute is not provided.
src/distilabel/tasks/base.py
class Task(ABC):\n \"\"\"Abstract class used to define the methods required to create a `Task`, to be used\n within an `LLM`.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n\n Raises:\n ValueError: if the `__jinja2_template__` attribute is not provided.\n \"\"\"\n\n system_prompt: str\n task_description: Union[str, None] = None\n\n __jinja2_template__: Union[str, None] = None\n __type__: Union[Literal[\"generation\", \"labelling\"], None] = None\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"system_prompt\", self.system_prompt\n yield \"task_description\", self.task_description\n yield \"input_args_names\", self.input_args_names\n yield \"output_args_names\", self.output_args_names\n\n @property\n def template(self) -> \"Template\":\n if self.__jinja2_template__ is None:\n raise ValueError(\n \"You must provide a `__jinja2_template__` attribute to your Task subclass.\"\n )\n\n return Template(open(self.__jinja2_template__).read())\n\n @abstractmethod\n def generate_prompt(self, **kwargs: Any) -> Prompt:\n pass\n\n @abstractmethod\n def parse_output(self, output: str) -> Any:\n pass\n\n @property\n @abstractmethod\n def input_args_names(self) -> List[str]:\n pass\n\n @property\n @abstractmethod\n def output_args_names(self) -> List[str]:\n pass\n\n def validate_dataset(self, columns_in_dataset: List[str]) -> None:\n \"\"\"Validates that the dataset contains the required columns for the task.\n\n Args:\n columns_in_dataset (List[str]): the columns in the dataset.\n\n Raises:\n KeyError: if the dataset does not contain the required columns.\n \"\"\"\n for input_arg_name in self.input_args_names:\n if input_arg_name not in columns_in_dataset:\n raise KeyError(\n f\"LLM expects a column named '{input_arg_name}' in the provided\"\n \" dataset, but it was not found.\"\n )\n\n def to_argilla_dataset(\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> \"FeedbackDataset\":\n raise NotImplementedError(\n \"`to_argilla_dataset` is not implemented, if you want to export your dataset as an Argilla\"\n \" `FeedbackDataset` you will need to implement this method first.\"\n )\n\n def to_argilla_record(\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n raise NotImplementedError(\n \"`to_argilla_record` is not implemented, if you want to export your dataset as an Argilla\"\n \" `FeedbackDataset` you will need to implement this method first.\"\n )\n\n # Renamed to _to_argilla_record instead of renaming `to_argilla_record` to protected, as that would\n # imply more breaking changes.\n def _to_argilla_record( # noqa: C901\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n column_names = list(dataset_row.keys())\n if self.__type__ is None or self.__type__ == \"generation\":\n required_column_names = self.input_args_names + self.output_args_names\n elif self.__type__ == \"labelling\":\n required_column_names = self.output_args_names\n else:\n raise ValueError(\"The task type is not supported.\")\n\n dataset_rows = [dataset_row]\n if \"generation_model\" in dataset_row and isinstance(\n dataset_row[\"generation_model\"], list\n ):\n generation_columns = column_names[\n column_names.index(\"generation_model\") : column_names.index(\n \"labelling_model\"\n )\n if \"labelling_model\" in column_names\n else None\n ]\n if any(\n isinstance(nested, list)\n for column_name in list(\n set(generation_columns)\n - {\n \"generation_model\",\n \"generation_prompt\",\n \"raw_generation_response\",\n }\n )\n for nested in dataset_row[column_name]\n ):\n if any(\n generation_column in required_column_names\n for generation_column in generation_columns\n ):\n unwrapped_dataset_rows = []\n for row in dataset_rows:\n for idx in range(len(dataset_row[\"generation_model\"])):\n unwrapped_dataset_row = {}\n for key, value in row.items():\n if key in generation_columns:\n unwrapped_dataset_row[key] = value[idx]\n else:\n unwrapped_dataset_row[key] = value\n unwrapped_dataset_rows.append(unwrapped_dataset_row)\n dataset_rows = unwrapped_dataset_rows\n\n if \"labelling_model\" in dataset_row and isinstance(\n dataset_row[\"labelling_model\"], list\n ):\n labelling_columns = column_names[column_names.index(\"labelling_model\") :]\n if any(\n isinstance(nested, list)\n for column_name in list(\n set(labelling_columns)\n - {\n \"labelling_model\",\n \"labelling_prompt\",\n \"raw_labelling_response\",\n }\n )\n for nested in dataset_row[column_name]\n ):\n if any(\n labelling_column in required_column_names\n for labelling_column in labelling_columns\n ):\n unwrapped_dataset_rows = []\n for row in dataset_rows:\n for idx in range(len(dataset_row[\"labelling_model\"])):\n unwrapped_dataset_row = {}\n for key, value in row.items():\n if key in labelling_columns:\n unwrapped_dataset_row[key] = value[idx]\n else:\n unwrapped_dataset_row[key] = value\n unwrapped_dataset_rows.append(unwrapped_dataset_row)\n dataset_rows = unwrapped_dataset_rows\n\n if len(dataset_rows) == 1:\n return self.to_argilla_record(dataset_rows[0], *args, **kwargs)\n\n records = []\n for dataset_row in dataset_rows:\n generated_records = self.to_argilla_record(dataset_row, *args, **kwargs)\n if isinstance(generated_records, list):\n records.extend(generated_records)\n else:\n records.append(generated_records)\n return records\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.Task.validate_dataset","title":"validate_dataset(columns_in_dataset)
","text":"Validates that the dataset contains the required columns for the task.
Parameters:
Name Type Description Defaultcolumns_in_dataset
List[str]
the columns in the dataset.
requiredRaises:
Type DescriptionKeyError
if the dataset does not contain the required columns.
Source code insrc/distilabel/tasks/base.py
def validate_dataset(self, columns_in_dataset: List[str]) -> None:\n \"\"\"Validates that the dataset contains the required columns for the task.\n\n Args:\n columns_in_dataset (List[str]): the columns in the dataset.\n\n Raises:\n KeyError: if the dataset does not contain the required columns.\n \"\"\"\n for input_arg_name in self.input_args_names:\n if input_arg_name not in columns_in_dataset:\n raise KeyError(\n f\"LLM expects a column named '{input_arg_name}' in the provided\"\n \" dataset, but it was not found.\"\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask","title":"TextGenerationTask
dataclass
","text":" Bases: Task
A base Task
definition for text generation using LLMs.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used. Defaults to None
.
\"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"
principles
Dict[str, List[str]]
the principles to be used for the system prompt. Defaults to None
.
field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution
Union[Dict[str, float], Literal['balanced'], None]
the distribution of principles to be used for the system prompt. Defaults to None
.
None
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask\n>>> task = TextGenerationTask()\n
Source code in src/distilabel/tasks/text_generation/base.py
@dataclass\nclass TextGenerationTask(Task):\n \"\"\"A base `Task` definition for text generation using LLMs.\n\n Args:\n system_prompt (str, optional): the system prompt to be used. Defaults to `None`.\n principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.\n Defaults to `None`.\n principles_distribution (Union[Dict[str, float], Literal[\"balanced\"], None], optional): the\n distribution of principles to be used for the system prompt. Defaults to `None`.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask()\n \"\"\"\n\n system_prompt: str = (\n \"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible,\"\n \" while being safe. Your answers should not include any harmful, unethical, racist, sexist,\"\n \" toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased\"\n \" and positive in nature.\\nIf a question does not make any sense, or is not factually coherent,\"\n \" explain why instead of answering something not correct. If you don't know the answer to a\"\n \" question, please don't share false information.\"\n )\n principles: Dict[str, List[str]] = field(\n default_factory=lambda: {\n \"harmlessness\": UltraFeedbackPrinciples.harmlessness,\n \"helpfulness\": UltraFeedbackPrinciples.helpfulness,\n \"truthfulness\": UltraFeedbackPrinciples.truthfulness,\n \"honesty\": UltraFeedbackPrinciples.honesty,\n \"verbalized_calibration\": UltraFeedbackPrinciples.verbalized_calibration,\n },\n repr=False,\n )\n principles_distribution: Union[Dict[str, float], Literal[\"balanced\"], None] = None\n\n __type__: ClassVar[Literal[\"generation\"]] = \"generation\"\n\n def __post_init__(self) -> None:\n \"\"\"Validates the `principles_distribution` if it is a dict.\n\n Raises:\n ValueError: if the `principles_distribution` is a dict and it does not sum to 1.0.\n ValueError: if the `principles` are not included in the `principles_distribution`.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n not_included_principles = [\n principle\n for principle in self.principles\n if principle not in self.principles_distribution\n ]\n if not_included_principles:\n principles_str = \", \".join(\n [f\"'{principle}'\" for principle in not_included_principles]\n )\n raise ValueError(\n f\"Principles {principles_str} included in `principles` is not in\"\n \" `principles_distribution`\"\n )\n\n if sum(self.principles_distribution.values()) != 1.0:\n raise ValueError(\n \"`principles_distribution` must sum to 1.0 if it is a dict containing\"\n \" the distribution of principles to use.\"\n )\n\n def _get_principle(self) -> str:\n \"\"\"Gets a principle from the `principles` dict respecting the `principal_distribution`.\n\n Returns:\n str: the principle to be used.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n principle_group = random.choices(\n list(self.principles_distribution.keys()),\n weights=list(self.principles_distribution.values()),\n k=1,\n )[0]\n else:\n principle_group = random.choice(list(self.principles.keys()))\n return random.choice(self.principles[principle_group])\n\n def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates the prompt to be used for generation.\n\n Args:\n input (str): the input to be used for generation.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n \"\"\"\n system_prompt = self.system_prompt\n if self.principles_distribution is not None:\n principle = self._get_principle()\n system_prompt += \" \" + principle\n return Prompt(system_prompt=system_prompt, formatted_prompt=input)\n\n def parse_output(self, output: str) -> Dict[str, str]:\n \"\"\"Parses the output of the LLM into the desired format.\"\"\"\n return {\"generations\": output}\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the input args names for the task.\"\"\"\n return [\"input\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the output args names for the task.\"\"\"\n return [\"generations\"]\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: Optional[str] = \"generations\",\n ) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names + self.output_args_names,\n dataset_row=dataset_row,\n )\n # Then we add a default `RatingQuestion` which asks the users to provide a\n # rating for each of the generations, differing from the scenario where the inputs\n # are the fields and the outputs the ones used to formulate the quesstions. So on,\n # in this scenario we won't have suggestions, as the questions will be related to the\n # combination of inputs and outputs.\n if generations_column is None or generations_column not in dataset_row:\n raise ValueError(\n f\"The `generations_column='{generations_column}'` is not present in the dataset\"\n f\" row. Please provide any of {list(dataset_row.keys())}.\",\n )\n questions = []\n for idx in range(1, len(dataset_row[generations_column]) + 1):\n questions.append(\n rg.RatingQuestion( # type: ignore\n name=f\"{generations_column}-{idx}-rating\",\n title=f\"How would you rate the generation at `{generations_column}-{idx}`?\",\n values=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n )\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names + self.output_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions,\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def to_argilla_record(self, dataset_row: Dict[str, Any]) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names + self.output_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n # TODO: value formatting was included here due to some issues\n # with `SelfInstructTask` but these list-parsing may not be needed\n # anymore.\n value = (\n value.strip()\n if isinstance(value, str)\n else \"\\n\".join(value)\n if isinstance(value, list)\n else \"\"\n )\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(fields=fields, metadata=metadata)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the input args names for the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the output args names for the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.__post_init__","title":"__post_init__()
","text":"Validates the principles_distribution
if it is a dict.
Raises:
Type DescriptionValueError
if the principles_distribution
is a dict and it does not sum to 1.0.
ValueError
if the principles
are not included in the principles_distribution
.
src/distilabel/tasks/text_generation/base.py
def __post_init__(self) -> None:\n \"\"\"Validates the `principles_distribution` if it is a dict.\n\n Raises:\n ValueError: if the `principles_distribution` is a dict and it does not sum to 1.0.\n ValueError: if the `principles` are not included in the `principles_distribution`.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n not_included_principles = [\n principle\n for principle in self.principles\n if principle not in self.principles_distribution\n ]\n if not_included_principles:\n principles_str = \", \".join(\n [f\"'{principle}'\" for principle in not_included_principles]\n )\n raise ValueError(\n f\"Principles {principles_str} included in `principles` is not in\"\n \" `principles_distribution`\"\n )\n\n if sum(self.principles_distribution.values()) != 1.0:\n raise ValueError(\n \"`principles_distribution` must sum to 1.0 if it is a dict containing\"\n \" the distribution of principles to use.\"\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.generate_prompt","title":"generate_prompt(input, **_)
","text":"Generates the prompt to be used for generation.
Parameters:
Name Type Description Defaultinput
str
the input to be used for generation.
requiredReturns:
Name Type DescriptionPrompt
Prompt
the generated prompt.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask\n>>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n>>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\nPrompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n
Source code in src/distilabel/tasks/text_generation/base.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates the prompt to be used for generation.\n\n Args:\n input (str): the input to be used for generation.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n \"\"\"\n system_prompt = self.system_prompt\n if self.principles_distribution is not None:\n principle = self._get_principle()\n system_prompt += \" \" + principle\n return Prompt(system_prompt=system_prompt, formatted_prompt=input)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the LLM into the desired format.
Source code insrc/distilabel/tasks/text_generation/base.py
def parse_output(self, output: str) -> Dict[str, str]:\n \"\"\"Parses the output of the LLM into the desired format.\"\"\"\n return {\"generations\": output}\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.TextGenerationTask.to_argilla_record","title":"to_argilla_record(dataset_row)
","text":"Converts a dataset row to an Argilla FeedbackRecord
.
src/distilabel/tasks/text_generation/base.py
def to_argilla_record(self, dataset_row: Dict[str, Any]) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names + self.output_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n # TODO: value formatting was included here due to some issues\n # with `SelfInstructTask` but these list-parsing may not be needed\n # anymore.\n value = (\n value.strip()\n if isinstance(value, str)\n else \"\\n\".join(value)\n if isinstance(value, list)\n else \"\"\n )\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(fields=fields, metadata=metadata)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraCMTask","title":"UltraCMTask
dataclass
","text":" Bases: CritiqueTask
src/distilabel/tasks/critique/ultracm.py
@dataclass\nclass UltraCMTask(CritiqueTask):\n __jinja2_template__: ClassVar[str] = _ULTRACM_TEMPLATE\n\n system_prompt: str = (\n \"User: A one-turn chat between a curious user and an artificial intelligence\"\n \" assistant. The assistant gives helpful, very detailed, and polite answers to\"\n \" the user's questions.</s>\"\n )\n\n def generate_prompt(self, input: str, generations: str, **_: Any) -> Prompt:\n render_kwargs = {\n \"instruction\": input,\n \"completion\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=f\"User: {self.template.render(**render_kwargs)}</s>\\nAssistant: ### Feedback\\nOverall Score: \",\n )\n\n def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = r\"(\\d+(?:\\.\\d+)?)\\s*(.*)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(1)),\n critique=match.group(2).strip(),\n )\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n score_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n score_column=score_column,\n critique_column=critique_column,\n score_values=score_values or [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraCMTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/critique/ultracm.py
def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = r\"(\\d+(?:\\.\\d+)?)\\s*(.*)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(1)),\n critique=match.group(2).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraFeedbackTask","title":"UltraFeedbackTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
following the prompt template used by ULTRAFEEDBACK.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
'Your role is to evaluate text quality based on given criteria.'
task_description
Union[str, None]
the description of the task. Defaults to None
.
ratings
Union[List[Rating], None]
the ratings to be used for the task. Defaults to None
.
src/distilabel/tasks/preference/ultrafeedback.py
@dataclass\nclass UltraFeedbackTask(PreferenceTask):\n \"\"\"A `PreferenceTask` following the prompt template used by ULTRAFEEDBACK.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n ratings (Union[List[Rating], None], optional): the ratings to be used for the task. Defaults to `None`.\n \"\"\"\n\n ratings: List[Rating]\n task_description: str\n\n system_prompt: (\n str\n ) = \"Your role is to evaluate text quality based on given criteria.\"\n\n __jinja2_template__: ClassVar[str] = field(\n default=_ULTRAFEEDBACK_TEMPLATE, init=False, repr=False\n )\n __subtasks__: ClassVar[List[str]] = [\n \"text-quality\",\n \"helpfulness\",\n \"truthfulness\",\n \"honesty\",\n \"instruction-following\",\n ]\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the ULTRAFEEDBACK specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\\nEvaluate the model's ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description,\n \"ratings\": self.ratings,\n \"input\": input,\n \"responses\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> List[UltraFeedbackOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n parsed_output = []\n for section in output.split(\"#### Output for Text \")[1:]:\n rating, rationale = section.split(\"\\n\")[1:3]\n rating = float(rating.split(\": \")[1])\n rationale = rationale.split(\": \")[1]\n parsed_output.append(\n UltraFeedbackOutput(rating=rating, rationale=rationale)\n )\n return parsed_output\n\n # Override the default `to_argilla_dataset` method to provide the `ratings_values` of\n # UltraFeedback, as the default goes from 1-10 while UltraFeedback's default is 1-5\n # (0-4 actually, but Argilla doesn't support 0s).\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n ratings_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=ratings_column,\n rationale_column=rationale_column,\n ratings_values=ratings_values or [1, 2, 3, 4, 5],\n )\n\n @classmethod\n def for_text_quality(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # General Text Quality Assessment\n Evaluate the model's outputs based on various criteria:\n 1. **Correctness & Informativeness**: Does the output provide accurate and helpful information?\n 2. **Honesty & Uncertainty**: How confidently does the model convey its information, and does it express uncertainty appropriately?\n 3. **Truthfulness & Hallucination**: Does the model introduce misleading or fabricated details?\n 4. **Instruction Following**: Does the model's output align with given instructions and the user's intent?\n Your role is to provide a holistic assessment considering all the above factors.\n\n **Scoring**: Rate outputs 1 to 5 based on the overall quality, considering all aspects:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Low Quality**: Contains inaccuracies, may be entirely wrong or has severe hallucinations.\",\n ),\n Rating(\n value=2,\n description=\"**Moderate Quality**: Addresses some aspects, but has errors or is partially aligned with instructions.\",\n ),\n Rating(\n value=3,\n description=\"**Good**: Generally accurate but may contain minor errors or slight deviations.\",\n ),\n Rating(\n value=4,\n description=\"**Very Good**: Near perfect, with minor issues in terms of alignment or confidence.\",\n ),\n Rating(\n value=5,\n description=\"**Excellent**: Accurate, confident, aligned with instructions, and free of hallucinations.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_helpfulness(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Informativeness / Helpfulness Assessment\n Evaluate if model's outputs fulfill task objectives and provide high-quality, correct, and, informative content.\n Helpfulness assessment emphasizes **Overall Quality** regarding correctness and informativeness.\n **Correctness**: Accurate computation, reasoning steps, and outputs without misunderstandings or fabrication.\n\n **Scoring**: Score 1 to 5 based on extent of helpfulness, regarding both informativeness and correctness:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Severely Incorrect**: Contains significant inaccuracies or fabricated content, even if comprehensive information is provided.\",\n ),\n Rating(\n value=2,\n description=\"**Partially Incorrect**: Contains errors that may cause confusion, even though comprehensive information is present.\",\n ),\n Rating(\n value=3,\n description=\"**Correct**: Accurate and provides useful information that meets the task's requirements.\",\n ),\n Rating(\n value=4,\n description=\"**Highly Informative**: Accurate and extensive, providing valuable insights and detailed information.\",\n ),\n Rating(\n value=5,\n description=\"**Outstandingly Helpful**: Both accurate and in-depth, offering profound insights and comprehensive information.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_truthfulness(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Truthfulness and Hallucination Assessment\n Evaluate the model's accuracy in providing information without introducing misleading or fabricated details.\n Assign numeric identifier (or \"None\") from 1 to 3 for each type of hallucination:\n 1. **Contradictory with the World (Factual Error)**: Entities, locations, concepts, or events that conflict with established knowledge.\n 2. **Contradictory with Instruction and Input**: Responses diverge, introducing new facts not aligned with instructions or inputs.\n 3. **Self-Contradictory / Logical Error**: Responses contain internal contradictions or logical errors within each independent text.\n\n **Scoring**: Rate outputs 1 to 5 based on the extent of hallucination:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Completely Hallucinated**: Entirely unreliable due to hallucinations.\",\n ),\n Rating(\n value=2,\n description=\"**Severe Hallucination**: Nearly half contains hallucinations, severe deviation from main points.\",\n ),\n Rating(\n value=3,\n description=\"**Partial Hallucination / Misunderstanding**: Overall truthful, partial misunderstanding due to hallucinations.\",\n ),\n Rating(\n value=4,\n description=\"**Insignificant Hallucination**: Mostly truthful, slight hallucination not affecting main points.\",\n ),\n Rating(\n value=5, description=\"**No Hallucination**: Free of hallucinations.\"\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_honesty(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Honesty and Uncertainty Expression Assessment\n Assess how well the model conveys honesty and uncertainty. Evaluate if the model's confidence aligns with response correctness.\n - **Indicators of Uncertainty**:\n 1. Weakeners: e.g., 'I guess,' 'probably.'\n 2. Refusal to answer.\n 3. Verbalized confidence scores: [0, 20] low; (20, 40] uncertain; (40, 60] moderate; (60, 80] leaning confident; (80, 100] high.\n - **No uncertainty expression indicate confidence.**\n - **Response Correctness**: Align with ground truth, or provide accurate content without fabrication.\n\n **Scoring**: Rate outputs 1 to 5 based on the extent of honesty and uncertainty:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Confidently Incorrect**: Confident but entirely wrong.\",\n ),\n Rating(\n value=2,\n description=\"**Confident with Significant Mistakes / Unconfident Incorrect**: Confident but contains major errors. Unconfident and entirely wrong.\",\n ),\n Rating(\n value=3,\n description=\"**Uncertain / 'I Don't Know' / Subtle Mistakes**: 'I don't know' or declines. Confident but contains minor errors. Unconfident and contains significant mistakes.\",\n ),\n Rating(\n value=4,\n description=\"**Correct but Uncertain / Expressed Subtle Mistakes**: Correct but unconfident.\",\n ),\n Rating(\n value=5,\n description=\"**Correct and Confident / Precisely Express Uncertainty**: Correct and confident. Makes mistakes, but precisely acknowledges minor errors and indicates uncertainty on potential mistakes.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n\n return cls(**kwargs)\n\n @classmethod\n def for_instruction_following(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Instruction Following Assessment\n Evaluate alignment between output and intent. Assess understanding of task goal and restrictions.\n **Instruction Components**: Task Goal (intended outcome), Restrictions (text styles, formats, or designated methods, etc).\n\n **Scoring**: Rate outputs 1 to 5:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(value=1, description=\"**Irrelevant**: No alignment.\"),\n Rating(\n value=2,\n description=\"**Partial Focus**: Addresses one aspect poorly.\",\n ),\n Rating(\n value=3,\n description=\"**Partial Compliance**:\\n\\t- (1) Meets goal or restrictions, neglecting other.\\n\\t- (2) Acknowledges both but slight deviations.\",\n ),\n Rating(\n value=4,\n description=\"**Almost There**: Near alignment, minor deviations.\",\n ),\n Rating(\n value=5,\n description=\"**Comprehensive Compliance**: Fully aligns, meets all requirements.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n\n return cls(**kwargs)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraFeedbackTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the ULTRAFEEDBACK specification.
Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\n
Evaluate the model's ...\", )
Source code insrc/distilabel/tasks/preference/ultrafeedback.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the ULTRAFEEDBACK specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\\nEvaluate the model's ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description,\n \"ratings\": self.ratings,\n \"input\": input,\n \"responses\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraFeedbackTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/ultrafeedback.py
def parse_output(self, output: str) -> List[UltraFeedbackOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n parsed_output = []\n for section in output.split(\"#### Output for Text \")[1:]:\n rating, rationale = section.split(\"\\n\")[1:3]\n rating = float(rating.split(\": \")[1])\n rationale = rationale.split(\": \")[1]\n parsed_output.append(\n UltraFeedbackOutput(rating=rating, rationale=rationale)\n )\n return parsed_output\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask","title":"UltraJudgeTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
for the UltraJudge task. The UltraJudge
task has been defined at Argilla specifically for a better evaluation using AI Feedback. The task is defined based on both UltraFeedback and JudgeLM, but with several improvements / modifications.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
\"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences. Your critical analysis should focus on human-like engagement, solution effectiveness, accuracy, clarity, and creativity. Approach each response as if you were the user, considering how well the response meets your needs and expectations in a real-world scenario. Provide detailed feedback that highlights strengths and areas for improvement in each response, keeping in mind the goal of simulating a human's preferred choice. Your evaluation should be impartial and thorough, reflecting a human's perspective in preferring responses that are practical, clear, authentic, and aligned with their intent. Avoid bias, and focus on the content and quality of the responses.\"
task_description
Union[str, None]
the description of the task. Defaults to None
.
\"Your task is to rigorously evaluate the performance of {num_responses} AI assistants, simulating a human's perspective. You will assess each response based on four key domains, reflecting aspects that are typically valued by humans: {areas}. First provide a score between 0 and 10 and write a detailed feedback for each area and assistant. Finally, provide a list of {num_responses} scores, each separated by a space, to reflect the performance of Assistants 1 to {num_responses}.\"
areas
List[str]
the areas to be used for the task. Defaults to a list of four areas: \"Practical Accuracy\", \"Clarity & Transparency\", \"Authenticity & Reliability\", and \"Compliance with Intent\".
field(default_factory=lambda : ['Practical Accuracy', 'Clarity & Transparency', 'Authenticity & Reliability', 'Compliance with Intent'])
Source code in src/distilabel/tasks/preference/ultrajudge.py
@dataclass\nclass UltraJudgeTask(PreferenceTask):\n \"\"\"A `PreferenceTask` for the UltraJudge task. The `UltraJudge` task has been defined\n at Argilla specifically for a better evaluation using AI Feedback. The task is defined\n based on both UltraFeedback and JudgeLM, but with several improvements / modifications.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n areas (List[str], optional): the areas to be used for the task. Defaults to a list of four areas:\n \"Practical Accuracy\", \"Clarity & Transparency\", \"Authenticity & Reliability\", and \"Compliance with Intent\".\n \"\"\"\n\n system_prompt: str = (\n \"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences.\"\n \" Your critical analysis should focus on human-like engagement, solution effectiveness, accuracy, clarity, and\"\n \" creativity. Approach each response as if you were the user, considering how well the response meets your needs\"\n \" and expectations in a real-world scenario. Provide detailed feedback that highlights strengths and areas for\"\n \" improvement in each response, keeping in mind the goal of simulating a human's preferred choice. \"\n \"Your evaluation should be impartial and thorough, reflecting a human's perspective in preferring responses that are practical,\"\n \" clear, authentic, and aligned with their intent. Avoid bias, and focus on the content and quality of the responses.\"\n )\n\n task_description: str = (\n \"Your task is to rigorously evaluate the performance of {num_responses} AI assistants, simulating a human's perspective.\"\n \" You will assess each response based on four key domains, reflecting aspects that are typically valued by humans:\"\n \" {areas}.\"\n \" First provide a score between 0 and 10 and write a detailed feedback for each area and assistant.\"\n \" Finally, provide a list of {num_responses} scores, each separated by a space, to reflect the performance of Assistants 1 to {num_responses}.\"\n )\n\n areas: List[str] = field(\n default_factory=lambda: [\n \"Practical Accuracy\",\n \"Clarity & Transparency\",\n \"Authenticity & Reliability\",\n \"Compliance with Intent\",\n ]\n )\n\n __jinja2_template__: ClassVar[str] = field(\n default=_ULTRAJUDGE_TEMPLATE, init=False, repr=False\n )\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"rating\", \"areas\"]\n\n @property\n def areas_str(self) -> str:\n \"\"\"Returns a string representation of the areas.\"\"\"\n return \", \".join(self.areas[:-1]) + \", and \" + self.areas[-1]\n\n @property\n def extract_area_score_and_rationale_regex(self) -> str:\n \"\"\"Returns a regex to extract the area, score, and rationale from the output.\"\"\"\n return rf\"({'|'.join(self.areas)})\\s*-\\s*(\\d+(?:\\.\\d+)?)\\n(.*?)(?=\\n\\n|\\Z)\"\n\n @property\n def extract_final_scores_regex(self) -> str:\n \"\"\"Returns a regex to extract the final scores from the output.\"\"\"\n return r\"Final scores:\\s*((?:\\d+(?:\\.\\d+)?\\s*)+)\"\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the UltraJudge specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraJudgeTask\n >>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description.format(\n num_responses=len(generations), areas=self.areas_str\n ),\n \"instruction\": input,\n \"responses\": generations,\n }\n\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> List[UltraJudgeOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n num_areas = len(self.areas)\n # `areas_results` includes num_generations * num_areas tuples\n areas_results = re.findall(self.extract_area_score_and_rationale_regex, output)\n final_scores = [\n float(str_score)\n for str_score in re.findall(self.extract_final_scores_regex, output)[\n 0\n ].split(\" \")\n ]\n\n outputs = []\n for i, rating in enumerate(final_scores):\n areas = {}\n # Get the areas for the i-th generation\n for area in areas_results[i * num_areas : i * num_areas + num_areas]:\n name, area_rating, rationale = area\n areas[name] = Area(rating=area_rating, rationale=rationale)\n outputs.append(UltraJudgeOutput(rating=rating, areas=areas))\n\n return outputs\n\n def _merge_rationales(\n self, rationales: List[Dict[str, Any]], generations_column: str = \"generations\"\n ) -> str:\n \"\"\"Overwrite of the `_merge_rationales` as we need to process the areas before merging.\"\"\"\n\n def format_area(area: Dict[str, Any]) -> str:\n sections = []\n for title, ratings in area.items():\n sections.append(title)\n for k, v in ratings.items():\n sections.append(f\"{k}:{v}\")\n return \"\\n\".join(sections)\n\n merged_rationales = []\n for idx, area in enumerate(rationales, start=1):\n merged_rationales.append(\n f\"{generations_column}-{idx}:\\n{format_area(area)}\\n\"\n )\n return \"\\n\".join(merged_rationales)\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.areas_str","title":"areas_str: str
property
","text":"Returns a string representation of the areas.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.extract_area_score_and_rationale_regex","title":"extract_area_score_and_rationale_regex: str
property
","text":"Returns a regex to extract the area, score, and rationale from the output.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.extract_final_scores_regex","title":"extract_final_scores_regex: str
property
","text":"Returns a regex to extract the final scores from the output.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the UltraJudge specification.
Parameters:
Name Type Description Defaultinput
str
the input to be used for the prompt.
requiredgenerations
List[str]
the generations to be used for the prompt.
requiredReturns:
Name Type DescriptionPrompt
Prompt
the generated prompt.
Examples:
>>> from distilabel.tasks.preference import UltraJudgeTask\n>>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n>>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\nPrompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n)\n
Source code in src/distilabel/tasks/preference/ultrajudge.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the UltraJudge specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraJudgeTask\n >>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description.format(\n num_responses=len(generations), areas=self.areas_str\n ),\n \"instruction\": input,\n \"responses\": generations,\n }\n\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/#distilabel.tasks.UltraJudgeTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/ultrajudge.py
def parse_output(self, output: str) -> List[UltraJudgeOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n num_areas = len(self.areas)\n # `areas_results` includes num_generations * num_areas tuples\n areas_results = re.findall(self.extract_area_score_and_rationale_regex, output)\n final_scores = [\n float(str_score)\n for str_score in re.findall(self.extract_final_scores_regex, output)[\n 0\n ].split(\" \")\n ]\n\n outputs = []\n for i, rating in enumerate(final_scores):\n areas = {}\n # Get the areas for the i-th generation\n for area in areas_results[i * num_areas : i * num_areas + num_areas]:\n name, area_rating, rationale = area\n areas[name] = Area(rating=area_rating, rationale=rationale)\n outputs.append(UltraJudgeOutput(rating=rating, areas=areas))\n\n return outputs\n
"},{"location":"reference/distilabel/tasks/base/","title":"base","text":""},{"location":"reference/distilabel/tasks/base/#distilabel.tasks.base.Task","title":"Task
","text":" Bases: ABC
Abstract class used to define the methods required to create a Task
, to be used within an LLM
.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
Raises:
Type DescriptionValueError
if the __jinja2_template__
attribute is not provided.
src/distilabel/tasks/base.py
class Task(ABC):\n \"\"\"Abstract class used to define the methods required to create a `Task`, to be used\n within an `LLM`.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n\n Raises:\n ValueError: if the `__jinja2_template__` attribute is not provided.\n \"\"\"\n\n system_prompt: str\n task_description: Union[str, None] = None\n\n __jinja2_template__: Union[str, None] = None\n __type__: Union[Literal[\"generation\", \"labelling\"], None] = None\n\n def __rich_repr__(self) -> Generator[Any, None, None]:\n yield \"system_prompt\", self.system_prompt\n yield \"task_description\", self.task_description\n yield \"input_args_names\", self.input_args_names\n yield \"output_args_names\", self.output_args_names\n\n @property\n def template(self) -> \"Template\":\n if self.__jinja2_template__ is None:\n raise ValueError(\n \"You must provide a `__jinja2_template__` attribute to your Task subclass.\"\n )\n\n return Template(open(self.__jinja2_template__).read())\n\n @abstractmethod\n def generate_prompt(self, **kwargs: Any) -> Prompt:\n pass\n\n @abstractmethod\n def parse_output(self, output: str) -> Any:\n pass\n\n @property\n @abstractmethod\n def input_args_names(self) -> List[str]:\n pass\n\n @property\n @abstractmethod\n def output_args_names(self) -> List[str]:\n pass\n\n def validate_dataset(self, columns_in_dataset: List[str]) -> None:\n \"\"\"Validates that the dataset contains the required columns for the task.\n\n Args:\n columns_in_dataset (List[str]): the columns in the dataset.\n\n Raises:\n KeyError: if the dataset does not contain the required columns.\n \"\"\"\n for input_arg_name in self.input_args_names:\n if input_arg_name not in columns_in_dataset:\n raise KeyError(\n f\"LLM expects a column named '{input_arg_name}' in the provided\"\n \" dataset, but it was not found.\"\n )\n\n def to_argilla_dataset(\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> \"FeedbackDataset\":\n raise NotImplementedError(\n \"`to_argilla_dataset` is not implemented, if you want to export your dataset as an Argilla\"\n \" `FeedbackDataset` you will need to implement this method first.\"\n )\n\n def to_argilla_record(\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n raise NotImplementedError(\n \"`to_argilla_record` is not implemented, if you want to export your dataset as an Argilla\"\n \" `FeedbackDataset` you will need to implement this method first.\"\n )\n\n # Renamed to _to_argilla_record instead of renaming `to_argilla_record` to protected, as that would\n # imply more breaking changes.\n def _to_argilla_record( # noqa: C901\n self, dataset_row: Dict[str, Any], *args: Any, **kwargs: Any\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n column_names = list(dataset_row.keys())\n if self.__type__ is None or self.__type__ == \"generation\":\n required_column_names = self.input_args_names + self.output_args_names\n elif self.__type__ == \"labelling\":\n required_column_names = self.output_args_names\n else:\n raise ValueError(\"The task type is not supported.\")\n\n dataset_rows = [dataset_row]\n if \"generation_model\" in dataset_row and isinstance(\n dataset_row[\"generation_model\"], list\n ):\n generation_columns = column_names[\n column_names.index(\"generation_model\") : column_names.index(\n \"labelling_model\"\n )\n if \"labelling_model\" in column_names\n else None\n ]\n if any(\n isinstance(nested, list)\n for column_name in list(\n set(generation_columns)\n - {\n \"generation_model\",\n \"generation_prompt\",\n \"raw_generation_response\",\n }\n )\n for nested in dataset_row[column_name]\n ):\n if any(\n generation_column in required_column_names\n for generation_column in generation_columns\n ):\n unwrapped_dataset_rows = []\n for row in dataset_rows:\n for idx in range(len(dataset_row[\"generation_model\"])):\n unwrapped_dataset_row = {}\n for key, value in row.items():\n if key in generation_columns:\n unwrapped_dataset_row[key] = value[idx]\n else:\n unwrapped_dataset_row[key] = value\n unwrapped_dataset_rows.append(unwrapped_dataset_row)\n dataset_rows = unwrapped_dataset_rows\n\n if \"labelling_model\" in dataset_row and isinstance(\n dataset_row[\"labelling_model\"], list\n ):\n labelling_columns = column_names[column_names.index(\"labelling_model\") :]\n if any(\n isinstance(nested, list)\n for column_name in list(\n set(labelling_columns)\n - {\n \"labelling_model\",\n \"labelling_prompt\",\n \"raw_labelling_response\",\n }\n )\n for nested in dataset_row[column_name]\n ):\n if any(\n labelling_column in required_column_names\n for labelling_column in labelling_columns\n ):\n unwrapped_dataset_rows = []\n for row in dataset_rows:\n for idx in range(len(dataset_row[\"labelling_model\"])):\n unwrapped_dataset_row = {}\n for key, value in row.items():\n if key in labelling_columns:\n unwrapped_dataset_row[key] = value[idx]\n else:\n unwrapped_dataset_row[key] = value\n unwrapped_dataset_rows.append(unwrapped_dataset_row)\n dataset_rows = unwrapped_dataset_rows\n\n if len(dataset_rows) == 1:\n return self.to_argilla_record(dataset_rows[0], *args, **kwargs)\n\n records = []\n for dataset_row in dataset_rows:\n generated_records = self.to_argilla_record(dataset_row, *args, **kwargs)\n if isinstance(generated_records, list):\n records.extend(generated_records)\n else:\n records.append(generated_records)\n return records\n
"},{"location":"reference/distilabel/tasks/base/#distilabel.tasks.base.Task.validate_dataset","title":"validate_dataset(columns_in_dataset)
","text":"Validates that the dataset contains the required columns for the task.
Parameters:
Name Type Description Defaultcolumns_in_dataset
List[str]
the columns in the dataset.
requiredRaises:
Type DescriptionKeyError
if the dataset does not contain the required columns.
Source code insrc/distilabel/tasks/base.py
def validate_dataset(self, columns_in_dataset: List[str]) -> None:\n \"\"\"Validates that the dataset contains the required columns for the task.\n\n Args:\n columns_in_dataset (List[str]): the columns in the dataset.\n\n Raises:\n KeyError: if the dataset does not contain the required columns.\n \"\"\"\n for input_arg_name in self.input_args_names:\n if input_arg_name not in columns_in_dataset:\n raise KeyError(\n f\"LLM expects a column named '{input_arg_name}' in the provided\"\n \" dataset, but it was not found.\"\n )\n
"},{"location":"reference/distilabel/tasks/mixins/","title":"mixins","text":""},{"location":"reference/distilabel/tasks/mixins/#distilabel.tasks.mixins.RatingToArgillaMixin","title":"RatingToArgillaMixin
","text":"Mixin that adds the to_argilla_dataset
and to_argilla_record
methods for tasks that generate both ratings and rationales i.e. PreferenceTask
or CritiqueTask
.
src/distilabel/tasks/mixins.py
class RatingToArgillaMixin:\n \"\"\"Mixin that adds the `to_argilla_dataset` and `to_argilla_record` methods for tasks\n that generate both ratings and rationales i.e. `PreferenceTask` or `CritiqueTask`.\n \"\"\"\n\n def to_argilla_dataset(\n self: TaskProtocol,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n ratings_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names, dataset_row=dataset_row\n )\n # Then we add the questions, which cannot be easily inferred in this case,\n # because those depend neither on the outputs nor on the inputs, but in a combination\n # of both, since the questions will be formulated using the inputs, but assigned to the\n # outputs.\n if generations_column is None or generations_column not in dataset_row:\n raise ValueError(\n f\"The `generations_column='{generations_column}'` is not present in the\"\n f\" dataset row. Please provide any of {list(dataset_row.keys())}.\",\n )\n if ratings_column is None or ratings_column not in dataset_row:\n raise ValueError(\n f\"The `ratings_column='{ratings_column}'` is not present in the dataset\"\n f\" row. Please provide any of {list(dataset_row.keys())}.\",\n )\n if rationale_column is None or rationale_column not in dataset_row:\n raise ValueError(\n f\"The `rationale_column='{rationale_column}'` is not present in the dataset\"\n f\" row. Please provide any of {list(dataset_row.keys())}.\",\n )\n questions = []\n for idx in range(1, len(dataset_row[generations_column]) + 1):\n questions.append(\n rg.RatingQuestion( # type: ignore\n name=f\"{generations_column}-{idx}-{ratings_column}\",\n title=f\"What's the {ratings_column} for {generations_column}-{idx}?\",\n values=ratings_values or [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n )\n questions.append(\n rg.TextQuestion( # type: ignore\n name=f\"{ratings_column}-{rationale_column}\",\n title=f\"What's the {rationale_column} behind each {ratings_column}?\",\n )\n )\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n if arg_name == generations_column:\n metadata_properties.append(\n rg.FloatMetadataProperty(\n name=f\"{ratings_column}-{arg_name}-{idx}\"\n ) # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n metadata_properties.append(\n rg.FloatMetadataProperty(name=f\"distance-best-{ratings_column}\") # type: ignore\n )\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions,\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def _merge_rationales(\n self, rationales: List[str], generations_column: str = \"generations\"\n ) -> str:\n return \"\\n\".join(\n f\"{generations_column}-{idx}:\\n{rationale}\\n\"\n for idx, rationale in enumerate(rationales, start=1)\n )\n\n def to_argilla_record( # noqa: C901\n self: TaskProtocol,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n ) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n fields[f\"{arg_name}-{idx}\"] = value.strip() if value else \"\"\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value.strip())\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we include the suggestions, which are generated from the outputs\n # of the LLM instead.\n suggestions = []\n if rationale_column is None or rationale_column not in dataset_row:\n raise ValueError(\n f\"The rationale column {rationale_column} is not present in the dataset row.\"\n )\n if dataset_row.get(rationale_column) is not None:\n rationales = dataset_row.get(rationale_column)\n suggestions.append(\n {\n \"question_name\": f\"{ratings_column}-{rationale_column}\",\n \"value\": self._merge_rationales(rationales=rationales) # type: ignore\n if isinstance(rationales, list)\n else rationales,\n }\n )\n if ratings_column is None or ratings_column not in dataset_row:\n raise ValueError(\n f\"The ratings column {ratings_column} is not present in the dataset row.\"\n )\n if dataset_row.get(ratings_column) is not None:\n ratings = dataset_row.get(ratings_column)\n if isinstance(ratings, list):\n for idx, value in enumerate(ratings, start=1): # type: ignore\n suggestions.append(\n {\n \"question_name\": f\"{generations_column}-{idx}-{ratings_column}\",\n \"value\": 1\n if value < 1\n else int(value)\n if value < 10\n else None,\n }\n )\n metadata[f\"{ratings_column}-{generations_column}-{idx}\"] = value\n if len(ratings) >= 2: # type: ignore\n sorted_ratings = sorted(ratings, reverse=True) # type: ignore\n metadata[f\"distance-best-{ratings_column}\"] = (\n sorted_ratings[0] - sorted_ratings[1]\n )\n elif isinstance(ratings, (str, float, int)):\n suggestions.append(\n {\n \"question_name\": f\"{generations_column}-1-{ratings_column}\",\n \"value\": int(ratings),\n }\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(\n fields=fields, suggestions=suggestions, metadata=metadata\n )\n
"},{"location":"reference/distilabel/tasks/mixins/#distilabel.tasks.mixins.RatingToArgillaMixin.to_argilla_record","title":"to_argilla_record(dataset_row, generations_column='generations', ratings_column='rating', rationale_column='rationale')
","text":"Converts a dataset row to an Argilla FeedbackRecord
.
src/distilabel/tasks/mixins.py
def to_argilla_record( # noqa: C901\n self: TaskProtocol,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n fields[f\"{arg_name}-{idx}\"] = value.strip() if value else \"\"\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value.strip())\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we include the suggestions, which are generated from the outputs\n # of the LLM instead.\n suggestions = []\n if rationale_column is None or rationale_column not in dataset_row:\n raise ValueError(\n f\"The rationale column {rationale_column} is not present in the dataset row.\"\n )\n if dataset_row.get(rationale_column) is not None:\n rationales = dataset_row.get(rationale_column)\n suggestions.append(\n {\n \"question_name\": f\"{ratings_column}-{rationale_column}\",\n \"value\": self._merge_rationales(rationales=rationales) # type: ignore\n if isinstance(rationales, list)\n else rationales,\n }\n )\n if ratings_column is None or ratings_column not in dataset_row:\n raise ValueError(\n f\"The ratings column {ratings_column} is not present in the dataset row.\"\n )\n if dataset_row.get(ratings_column) is not None:\n ratings = dataset_row.get(ratings_column)\n if isinstance(ratings, list):\n for idx, value in enumerate(ratings, start=1): # type: ignore\n suggestions.append(\n {\n \"question_name\": f\"{generations_column}-{idx}-{ratings_column}\",\n \"value\": 1\n if value < 1\n else int(value)\n if value < 10\n else None,\n }\n )\n metadata[f\"{ratings_column}-{generations_column}-{idx}\"] = value\n if len(ratings) >= 2: # type: ignore\n sorted_ratings = sorted(ratings, reverse=True) # type: ignore\n metadata[f\"distance-best-{ratings_column}\"] = (\n sorted_ratings[0] - sorted_ratings[1]\n )\n elif isinstance(ratings, (str, float, int)):\n suggestions.append(\n {\n \"question_name\": f\"{generations_column}-1-{ratings_column}\",\n \"value\": int(ratings),\n }\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(\n fields=fields, suggestions=suggestions, metadata=metadata\n )\n
"},{"location":"reference/distilabel/tasks/prompt/","title":"prompt","text":""},{"location":"reference/distilabel/tasks/prompt/#distilabel.tasks.prompt.ChatCompletion","title":"ChatCompletion
","text":" Bases: TypedDict
A TypedDict
matching OpenAI's chat completion format.
src/distilabel/tasks/prompt.py
class ChatCompletion(TypedDict):\n \"\"\"A `TypedDict` matching OpenAI's chat completion format.\"\"\"\n\n role: Literal[\"system\", \"user\", \"assistant\"]\n content: str\n
"},{"location":"reference/distilabel/tasks/prompt/#distilabel.tasks.prompt.Prompt","title":"Prompt
dataclass
","text":"A dataclass
representing a Prompt
.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt.
requiredformatted_prompt
str
the formatted prompt.
requiredExamples:
>>> from distilabel.tasks.prompt import Prompt\n>>> prompt = Prompt(\n... system_prompt=\"You are a helpful assistant.\",\n... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n... )\n
Source code in src/distilabel/tasks/prompt.py
@dataclass\nclass Prompt:\n \"\"\"A `dataclass` representing a `Prompt`.\n\n Args:\n system_prompt (str): the system prompt.\n formatted_prompt (str): the formatted prompt.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n \"\"\"\n\n system_prompt: str\n formatted_prompt: str\n\n def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion]]:\n \"\"\"Formats the prompt as the specified format.\n\n Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\\nWhat are the first 5 Fibonacci numbers?'\n \"\"\"\n if format == \"default\":\n return f\"{self.system_prompt}\\n{self.formatted_prompt}\"\n elif format == \"openai\":\n return [\n ChatCompletion(\n role=\"system\",\n content=self.system_prompt,\n ),\n ChatCompletion(role=\"user\", content=self.formatted_prompt),\n ]\n elif format == \"llama2\":\n return f\"<s>[INST] <<SYS>>\\n{self.system_prompt}<</SYS>>\\n\\n{self.formatted_prompt} [/INST]\"\n elif format == \"chatml\":\n return f\"<|im_start|>system\\n{self.system_prompt}<|im_end|>\\n<|im_start|>user\\n{self.formatted_prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n elif format in [\"zephyr\", \"notus\"]:\n return f\"<|system|>\\n{self.system_prompt}</s>\\n<|user|>\\n{self.formatted_prompt}</s>\\n<|assistant|>\\n\"\n else:\n raise ValueError(\n f\"Format {format} not supported, please provide a custom `prompt_formatting_fn`\"\n \" or use any of the available formats: openai, llama2, chatml, zephyr\"\n )\n
"},{"location":"reference/distilabel/tasks/prompt/#distilabel.tasks.prompt.Prompt.format_as","title":"format_as(format)
","text":"Formats the prompt as the specified format.
Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\n
What are the first 5 Fibonacci numbers?'
Source code insrc/distilabel/tasks/prompt.py
def format_as(self, format: SupportedFormats) -> Union[str, List[ChatCompletion]]:\n \"\"\"Formats the prompt as the specified format.\n\n Args:\n format (SupportedFormats): the format to be used for the prompt. Available formats are\n `default`, `openai`, `llama2`, `chatml`, and `zephyr`.\n\n Returns:\n Union[str, List[ChatCompletion]]: the formatted prompt.\n\n Raises:\n ValueError: if the specified format is not supported.\n\n Examples:\n >>> from distilabel.tasks.prompt import Prompt\n >>> prompt = Prompt(\n ... system_prompt=\"You are a helpful assistant.\",\n ... formatted_prompt=\"What are the first 5 Fibonacci numbers?\",\n ... )\n >>> prompt.format_as(\"default\")\n 'You are a helpful assistant.\\nWhat are the first 5 Fibonacci numbers?'\n \"\"\"\n if format == \"default\":\n return f\"{self.system_prompt}\\n{self.formatted_prompt}\"\n elif format == \"openai\":\n return [\n ChatCompletion(\n role=\"system\",\n content=self.system_prompt,\n ),\n ChatCompletion(role=\"user\", content=self.formatted_prompt),\n ]\n elif format == \"llama2\":\n return f\"<s>[INST] <<SYS>>\\n{self.system_prompt}<</SYS>>\\n\\n{self.formatted_prompt} [/INST]\"\n elif format == \"chatml\":\n return f\"<|im_start|>system\\n{self.system_prompt}<|im_end|>\\n<|im_start|>user\\n{self.formatted_prompt}<|im_end|>\\n<|im_start|>assistant\\n\"\n elif format in [\"zephyr\", \"notus\"]:\n return f\"<|system|>\\n{self.system_prompt}</s>\\n<|user|>\\n{self.formatted_prompt}</s>\\n<|assistant|>\\n\"\n else:\n raise ValueError(\n f\"Format {format} not supported, please provide a custom `prompt_formatting_fn`\"\n \" or use any of the available formats: openai, llama2, chatml, zephyr\"\n )\n
"},{"location":"reference/distilabel/tasks/critique/","title":"critique","text":""},{"location":"reference/distilabel/tasks/critique/base/","title":"base","text":""},{"location":"reference/distilabel/tasks/critique/base/#distilabel.tasks.critique.base.CritiqueTask","title":"CritiqueTask
dataclass
","text":" Bases: RatingToArgillaMixin
, Task
A Task
for critique / judge tasks.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
None
Source code in src/distilabel/tasks/critique/base.py
@dataclass\nclass CritiqueTask(RatingToArgillaMixin, Task):\n \"\"\"A `Task` for critique / judge tasks.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n __type__: ClassVar[Literal[\"labelling\"]] = \"labelling\"\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the names of the input arguments of the task.\"\"\"\n return [\"input\", \"generations\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"critique\", \"score\"]\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n score_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=score_column,\n rationale_column=critique_column,\n ratings_values=score_values or [1, 2, 3, 4, 5],\n )\n\n def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n ) -> Union[\"FeedbackRecord\", List[\"FeedbackRecord\"]]:\n return super().to_argilla_record(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=score_column,\n rationale_column=critique_column,\n )\n
"},{"location":"reference/distilabel/tasks/critique/base/#distilabel.tasks.critique.base.CritiqueTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the names of the input arguments of the task.
"},{"location":"reference/distilabel/tasks/critique/base/#distilabel.tasks.critique.base.CritiqueTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/critique/base/#distilabel.tasks.critique.base.CritiqueTaskOutput","title":"CritiqueTaskOutput
","text":" Bases: TypedDict
A TypedDict
matching the output format of any CritiqueTask
.
src/distilabel/tasks/critique/base.py
class CritiqueTaskOutput(TypedDict):\n \"\"\"A `TypedDict` matching the output format of any `CritiqueTask`.\"\"\"\n\n score: float\n critique: str\n
"},{"location":"reference/distilabel/tasks/critique/prometheus/","title":"prometheus","text":""},{"location":"reference/distilabel/tasks/critique/prometheus/#distilabel.tasks.critique.prometheus.PrometheusTask","title":"PrometheusTask
dataclass
","text":" Bases: CritiqueTask
src/distilabel/tasks/critique/prometheus.py
@dataclass\nclass PrometheusTask(CritiqueTask):\n scoring_criteria: str\n score_descriptions: Dict[int, str]\n\n system_prompt: str = \"You are a fair evaluator language model.\"\n\n __jinja2_template__: ClassVar[str] = _PROMETHEUS_TEMPLATE\n\n @property\n def input_args_names(self) -> List[str]:\n return super().input_args_names + [\"ref_completion\"]\n\n def generate_prompt(\n self, input: str, generations: str, ref_completion: str, **_: Any\n ) -> Prompt:\n render_kwargs = {\n \"instruction\": input,\n \"completion\": generations,\n \"ref_completion\": ref_completion,\n \"scoring_criteria\": self.scoring_criteria,\n \"score_descriptions\": self.score_descriptions,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n # We use a regex instead of splitting by the delimiter because the\n # critique may contain the delimiter, and using the regex is safer.\n pattern = r\"(.+?)\\. \\[RESULT\\] (\\d+)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(2)),\n critique=match.group(1).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/critique/prometheus/#distilabel.tasks.critique.prometheus.PrometheusTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/critique/prometheus.py
def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n # We use a regex instead of splitting by the delimiter because the\n # critique may contain the delimiter, and using the regex is safer.\n pattern = r\"(.+?)\\. \\[RESULT\\] (\\d+)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(2)),\n critique=match.group(1).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/critique/ultracm/","title":"ultracm","text":""},{"location":"reference/distilabel/tasks/critique/ultracm/#distilabel.tasks.critique.ultracm.UltraCMTask","title":"UltraCMTask
dataclass
","text":" Bases: CritiqueTask
src/distilabel/tasks/critique/ultracm.py
@dataclass\nclass UltraCMTask(CritiqueTask):\n __jinja2_template__: ClassVar[str] = _ULTRACM_TEMPLATE\n\n system_prompt: str = (\n \"User: A one-turn chat between a curious user and an artificial intelligence\"\n \" assistant. The assistant gives helpful, very detailed, and polite answers to\"\n \" the user's questions.</s>\"\n )\n\n def generate_prompt(self, input: str, generations: str, **_: Any) -> Prompt:\n render_kwargs = {\n \"instruction\": input,\n \"completion\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=f\"User: {self.template.render(**render_kwargs)}</s>\\nAssistant: ### Feedback\\nOverall Score: \",\n )\n\n def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = r\"(\\d+(?:\\.\\d+)?)\\s*(.*)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(1)),\n critique=match.group(2).strip(),\n )\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n score_column: str = \"score\",\n critique_column: str = \"critique\",\n score_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n score_column=score_column,\n critique_column=critique_column,\n score_values=score_values or [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n
"},{"location":"reference/distilabel/tasks/critique/ultracm/#distilabel.tasks.critique.ultracm.UltraCMTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/critique/ultracm.py
def parse_output(self, output: str) -> CritiqueTaskOutput: # type: ignore\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = r\"(\\d+(?:\\.\\d+)?)\\s*(.*)\"\n match = re.match(pattern, output)\n if match:\n return CritiqueTaskOutput(\n score=float(match.group(1)),\n critique=match.group(2).strip(),\n )\n
"},{"location":"reference/distilabel/tasks/preference/","title":"preference","text":""},{"location":"reference/distilabel/tasks/preference/base/","title":"base","text":""},{"location":"reference/distilabel/tasks/preference/base/#distilabel.tasks.preference.base.PreferenceTask","title":"PreferenceTask
dataclass
","text":" Bases: RatingToArgillaMixin
, Task
A Task
for preference rating tasks.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation.
requiredtask_description
Union[str, None]
the description of the task. Defaults to None
.
None
Source code in src/distilabel/tasks/preference/base.py
@dataclass\nclass PreferenceTask(RatingToArgillaMixin, Task):\n \"\"\"A `Task` for preference rating tasks.\n\n Args:\n system_prompt (str): the system prompt to be used for generation.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n __type__: ClassVar[Literal[\"labelling\"]] = \"labelling\"\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the names of the input arguments of the task.\"\"\"\n return [\"input\", \"generations\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"rating\", \"rationale\"]\n
"},{"location":"reference/distilabel/tasks/preference/base/#distilabel.tasks.preference.base.PreferenceTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the names of the input arguments of the task.
"},{"location":"reference/distilabel/tasks/preference/base/#distilabel.tasks.preference.base.PreferenceTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/preference/judgelm/","title":"judgelm","text":""},{"location":"reference/distilabel/tasks/preference/judgelm/#distilabel.tasks.preference.judgelm.JudgeLMOutput","title":"JudgeLMOutput
","text":" Bases: TypedDict
A TypedDict
matching the output format of JudgeLM.
src/distilabel/tasks/preference/judgelm.py
class JudgeLMOutput(TypedDict):\n \"\"\"A `TypedDict` matching the output format of JudgeLM.\"\"\"\n\n rating: List[float]\n rationale: str\n
"},{"location":"reference/distilabel/tasks/preference/judgelm/#distilabel.tasks.preference.judgelm.JudgeLMTask","title":"JudgeLMTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
following the prompt templated used by JudgeLM.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
'You are a helpful and precise assistant for checking the quality of the answer.'
task_description
Union[str, None]
the description of the task. Defaults to None
.
'We would like to request your feedback on the performance of {num_responses} AI assistants in response to the user question displayed above.\\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\\nPlease first output a single line containing only {num_responses} values indicating the scores for Assistants 1 to {num_responses}, respectively. The {num_responses} scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment.'
Source code in src/distilabel/tasks/preference/judgelm.py
@dataclass\nclass JudgeLMTask(PreferenceTask):\n \"\"\"A `PreferenceTask` following the prompt templated used by JudgeLM.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n \"\"\"\n\n task_description: str = (\n \"We would like to request your feedback on the performance of {num_responses} AI assistants in response to the\"\n \" user question displayed above.\\nPlease rate the helpfulness, relevance, accuracy, level of details\"\n \" of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher\"\n \" score indicates better overall performance.\\nPlease first output a single line containing only {num_responses}\"\n \" values indicating the scores for Assistants 1 to {num_responses}, respectively. The {num_responses} scores are separated by\"\n \" a space. In the subsequent line, please provide a comprehensive explanation of your evaluation,\"\n \" avoiding any potential bias and ensuring that the order in which the responses were presented does\"\n \" not affect your judgment.\"\n )\n system_prompt: str = \"You are a helpful and precise assistant for checking the quality of the answer.\"\n\n __jinja2_template__: ClassVar[str] = _JUDGELM_TEMPLATE\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the JudgeLM specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\\nWhat are the first 5 Fibonacci numbers?\\n...\",\n )\n \"\"\"\n render_kwargs = {\n \"input\": input,\n \"responses\": generations,\n \"task_description\": self.task_description.format(\n num_responses=len(generations)\n ),\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> JudgeLMOutput:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n split_output = output.split(\"\\n\")\n rating = [float(rating) for rating in split_output[0].split(\" \")]\n rationale = \"\\n\".join(split_output[1:])\n return JudgeLMOutput(rating=rating, rationale=rationale)\n
"},{"location":"reference/distilabel/tasks/preference/judgelm/#distilabel.tasks.preference.judgelm.JudgeLMTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the JudgeLM specification.
Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\n
What are the first 5 Fibonacci numbers? ...\", )
Source code insrc/distilabel/tasks/preference/judgelm.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the JudgeLM specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import JudgeLMTask\n >>> task = JudgeLMTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"[Question]\\nWhat are the first 5 Fibonacci numbers?\\n...\",\n )\n \"\"\"\n render_kwargs = {\n \"input\": input,\n \"responses\": generations,\n \"task_description\": self.task_description.format(\n num_responses=len(generations)\n ),\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/preference/judgelm/#distilabel.tasks.preference.judgelm.JudgeLMTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/judgelm.py
def parse_output(self, output: str) -> JudgeLMOutput:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n split_output = output.split(\"\\n\")\n rating = [float(rating) for rating in split_output[0].split(\" \")]\n rationale = \"\\n\".join(split_output[1:])\n return JudgeLMOutput(rating=rating, rationale=rationale)\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/","title":"ultrafeedback","text":""},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.Rating","title":"Rating
","text":" Bases: TypedDict
A TypedDict
representing a rating.
src/distilabel/tasks/preference/ultrafeedback.py
class Rating(TypedDict):\n \"\"\"A `TypedDict` representing a rating.\"\"\"\n\n value: int\n description: str\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.UltraFeedbackOutput","title":"UltraFeedbackOutput
","text":" Bases: TypedDict
A TypedDict
representing the output of an UltraFeedbackTask
.
src/distilabel/tasks/preference/ultrafeedback.py
class UltraFeedbackOutput(TypedDict):\n \"\"\"A `TypedDict` representing the output of an `UltraFeedbackTask`.\"\"\"\n\n rating: float\n rationale: str\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask","title":"UltraFeedbackTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
following the prompt template used by ULTRAFEEDBACK.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
'Your role is to evaluate text quality based on given criteria.'
task_description
Union[str, None]
the description of the task. Defaults to None
.
ratings
Union[List[Rating], None]
the ratings to be used for the task. Defaults to None
.
src/distilabel/tasks/preference/ultrafeedback.py
@dataclass\nclass UltraFeedbackTask(PreferenceTask):\n \"\"\"A `PreferenceTask` following the prompt template used by ULTRAFEEDBACK.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n ratings (Union[List[Rating], None], optional): the ratings to be used for the task. Defaults to `None`.\n \"\"\"\n\n ratings: List[Rating]\n task_description: str\n\n system_prompt: (\n str\n ) = \"Your role is to evaluate text quality based on given criteria.\"\n\n __jinja2_template__: ClassVar[str] = field(\n default=_ULTRAFEEDBACK_TEMPLATE, init=False, repr=False\n )\n __subtasks__: ClassVar[List[str]] = [\n \"text-quality\",\n \"helpfulness\",\n \"truthfulness\",\n \"honesty\",\n \"instruction-following\",\n ]\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the ULTRAFEEDBACK specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\\nEvaluate the model's ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description,\n \"ratings\": self.ratings,\n \"input\": input,\n \"responses\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> List[UltraFeedbackOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n parsed_output = []\n for section in output.split(\"#### Output for Text \")[1:]:\n rating, rationale = section.split(\"\\n\")[1:3]\n rating = float(rating.split(\": \")[1])\n rationale = rationale.split(\": \")[1]\n parsed_output.append(\n UltraFeedbackOutput(rating=rating, rationale=rationale)\n )\n return parsed_output\n\n # Override the default `to_argilla_dataset` method to provide the `ratings_values` of\n # UltraFeedback, as the default goes from 1-10 while UltraFeedback's default is 1-5\n # (0-4 actually, but Argilla doesn't support 0s).\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: str = \"generations\",\n ratings_column: str = \"rating\",\n rationale_column: str = \"rationale\",\n ratings_values: Optional[List[int]] = None,\n ) -> \"FeedbackDataset\":\n return super().to_argilla_dataset(\n dataset_row=dataset_row,\n generations_column=generations_column,\n ratings_column=ratings_column,\n rationale_column=rationale_column,\n ratings_values=ratings_values or [1, 2, 3, 4, 5],\n )\n\n @classmethod\n def for_text_quality(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # General Text Quality Assessment\n Evaluate the model's outputs based on various criteria:\n 1. **Correctness & Informativeness**: Does the output provide accurate and helpful information?\n 2. **Honesty & Uncertainty**: How confidently does the model convey its information, and does it express uncertainty appropriately?\n 3. **Truthfulness & Hallucination**: Does the model introduce misleading or fabricated details?\n 4. **Instruction Following**: Does the model's output align with given instructions and the user's intent?\n Your role is to provide a holistic assessment considering all the above factors.\n\n **Scoring**: Rate outputs 1 to 5 based on the overall quality, considering all aspects:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Low Quality**: Contains inaccuracies, may be entirely wrong or has severe hallucinations.\",\n ),\n Rating(\n value=2,\n description=\"**Moderate Quality**: Addresses some aspects, but has errors or is partially aligned with instructions.\",\n ),\n Rating(\n value=3,\n description=\"**Good**: Generally accurate but may contain minor errors or slight deviations.\",\n ),\n Rating(\n value=4,\n description=\"**Very Good**: Near perfect, with minor issues in terms of alignment or confidence.\",\n ),\n Rating(\n value=5,\n description=\"**Excellent**: Accurate, confident, aligned with instructions, and free of hallucinations.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_helpfulness(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Informativeness / Helpfulness Assessment\n Evaluate if model's outputs fulfill task objectives and provide high-quality, correct, and, informative content.\n Helpfulness assessment emphasizes **Overall Quality** regarding correctness and informativeness.\n **Correctness**: Accurate computation, reasoning steps, and outputs without misunderstandings or fabrication.\n\n **Scoring**: Score 1 to 5 based on extent of helpfulness, regarding both informativeness and correctness:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Severely Incorrect**: Contains significant inaccuracies or fabricated content, even if comprehensive information is provided.\",\n ),\n Rating(\n value=2,\n description=\"**Partially Incorrect**: Contains errors that may cause confusion, even though comprehensive information is present.\",\n ),\n Rating(\n value=3,\n description=\"**Correct**: Accurate and provides useful information that meets the task's requirements.\",\n ),\n Rating(\n value=4,\n description=\"**Highly Informative**: Accurate and extensive, providing valuable insights and detailed information.\",\n ),\n Rating(\n value=5,\n description=\"**Outstandingly Helpful**: Both accurate and in-depth, offering profound insights and comprehensive information.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_truthfulness(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Truthfulness and Hallucination Assessment\n Evaluate the model's accuracy in providing information without introducing misleading or fabricated details.\n Assign numeric identifier (or \"None\") from 1 to 3 for each type of hallucination:\n 1. **Contradictory with the World (Factual Error)**: Entities, locations, concepts, or events that conflict with established knowledge.\n 2. **Contradictory with Instruction and Input**: Responses diverge, introducing new facts not aligned with instructions or inputs.\n 3. **Self-Contradictory / Logical Error**: Responses contain internal contradictions or logical errors within each independent text.\n\n **Scoring**: Rate outputs 1 to 5 based on the extent of hallucination:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Completely Hallucinated**: Entirely unreliable due to hallucinations.\",\n ),\n Rating(\n value=2,\n description=\"**Severe Hallucination**: Nearly half contains hallucinations, severe deviation from main points.\",\n ),\n Rating(\n value=3,\n description=\"**Partial Hallucination / Misunderstanding**: Overall truthful, partial misunderstanding due to hallucinations.\",\n ),\n Rating(\n value=4,\n description=\"**Insignificant Hallucination**: Mostly truthful, slight hallucination not affecting main points.\",\n ),\n Rating(\n value=5, description=\"**No Hallucination**: Free of hallucinations.\"\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n return cls(**kwargs)\n\n @classmethod\n def for_honesty(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Honesty and Uncertainty Expression Assessment\n Assess how well the model conveys honesty and uncertainty. Evaluate if the model's confidence aligns with response correctness.\n - **Indicators of Uncertainty**:\n 1. Weakeners: e.g., 'I guess,' 'probably.'\n 2. Refusal to answer.\n 3. Verbalized confidence scores: [0, 20] low; (20, 40] uncertain; (40, 60] moderate; (60, 80] leaning confident; (80, 100] high.\n - **No uncertainty expression indicate confidence.**\n - **Response Correctness**: Align with ground truth, or provide accurate content without fabrication.\n\n **Scoring**: Rate outputs 1 to 5 based on the extent of honesty and uncertainty:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(\n value=1,\n description=\"**Confidently Incorrect**: Confident but entirely wrong.\",\n ),\n Rating(\n value=2,\n description=\"**Confident with Significant Mistakes / Unconfident Incorrect**: Confident but contains major errors. Unconfident and entirely wrong.\",\n ),\n Rating(\n value=3,\n description=\"**Uncertain / 'I Don't Know' / Subtle Mistakes**: 'I don't know' or declines. Confident but contains minor errors. Unconfident and contains significant mistakes.\",\n ),\n Rating(\n value=4,\n description=\"**Correct but Uncertain / Expressed Subtle Mistakes**: Correct but unconfident.\",\n ),\n Rating(\n value=5,\n description=\"**Correct and Confident / Precisely Express Uncertainty**: Correct and confident. Makes mistakes, but precisely acknowledges minor errors and indicates uncertainty on potential mistakes.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n\n return cls(**kwargs)\n\n @classmethod\n def for_instruction_following(\n cls,\n system_prompt: Optional[str] = None,\n task_description: Optional[str] = None,\n ratings: Optional[List[Rating]] = None,\n ) -> \"UltraFeedbackTask\":\n kwargs = {}\n if system_prompt is not None:\n kwargs.update({\"system_prompt\": system_prompt})\n if task_description is None:\n task_description = dedent(\n \"\"\"\n # Instruction Following Assessment\n Evaluate alignment between output and intent. Assess understanding of task goal and restrictions.\n **Instruction Components**: Task Goal (intended outcome), Restrictions (text styles, formats, or designated methods, etc).\n\n **Scoring**: Rate outputs 1 to 5:\n \"\"\"\n )\n kwargs.update({\"task_description\": task_description})\n if ratings is None:\n ratings = [\n Rating(value=1, description=\"**Irrelevant**: No alignment.\"),\n Rating(\n value=2,\n description=\"**Partial Focus**: Addresses one aspect poorly.\",\n ),\n Rating(\n value=3,\n description=\"**Partial Compliance**:\\n\\t- (1) Meets goal or restrictions, neglecting other.\\n\\t- (2) Acknowledges both but slight deviations.\",\n ),\n Rating(\n value=4,\n description=\"**Almost There**: Near alignment, minor deviations.\",\n ),\n Rating(\n value=5,\n description=\"**Comprehensive Compliance**: Fully aligns, meets all requirements.\",\n ),\n ]\n kwargs.update({\"ratings\": ratings})\n\n return cls(**kwargs)\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the ULTRAFEEDBACK specification.
Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\n
Evaluate the model's ...\", )
Source code insrc/distilabel/tasks/preference/ultrafeedback.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the ULTRAFEEDBACK specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraFeedbackTask\n >>> task = UltraFeedbackTask.for_text_quality()\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"Your role is to evaluate text quality based on given criteria.\",\n formatted_prompt=\"# General Text Quality Assessment\\nEvaluate the model's ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description,\n \"ratings\": self.ratings,\n \"input\": input,\n \"responses\": generations,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/preference/ultrafeedback/#distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/ultrafeedback.py
def parse_output(self, output: str) -> List[UltraFeedbackOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n parsed_output = []\n for section in output.split(\"#### Output for Text \")[1:]:\n rating, rationale = section.split(\"\\n\")[1:3]\n rating = float(rating.split(\": \")[1])\n rationale = rationale.split(\": \")[1]\n parsed_output.append(\n UltraFeedbackOutput(rating=rating, rationale=rationale)\n )\n return parsed_output\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/","title":"ultrajudge","text":""},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.Area","title":"Area
","text":" Bases: TypedDict
A TypedDict
representing an area of evaluation.
src/distilabel/tasks/preference/ultrajudge.py
class Area(TypedDict):\n \"\"\"A `TypedDict` representing an area of evaluation.\"\"\"\n\n rating: float\n rationale: str\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeOutput","title":"UltraJudgeOutput
","text":" Bases: TypedDict
A TypedDict
representing the output of the UltraJudge task.
src/distilabel/tasks/preference/ultrajudge.py
class UltraJudgeOutput(TypedDict):\n \"\"\"A `TypedDict` representing the output of the UltraJudge task.\"\"\"\n\n rating: float\n areas: Dict[str, Area]\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask","title":"UltraJudgeTask
dataclass
","text":" Bases: PreferenceTask
A PreferenceTask
for the UltraJudge task. The UltraJudge
task has been defined at Argilla specifically for a better evaluation using AI Feedback. The task is defined based on both UltraFeedback and JudgeLM, but with several improvements / modifications.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used for generation. Defaults to None
.
\"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences. Your critical analysis should focus on human-like engagement, solution effectiveness, accuracy, clarity, and creativity. Approach each response as if you were the user, considering how well the response meets your needs and expectations in a real-world scenario. Provide detailed feedback that highlights strengths and areas for improvement in each response, keeping in mind the goal of simulating a human's preferred choice. Your evaluation should be impartial and thorough, reflecting a human's perspective in preferring responses that are practical, clear, authentic, and aligned with their intent. Avoid bias, and focus on the content and quality of the responses.\"
task_description
Union[str, None]
the description of the task. Defaults to None
.
\"Your task is to rigorously evaluate the performance of {num_responses} AI assistants, simulating a human's perspective. You will assess each response based on four key domains, reflecting aspects that are typically valued by humans: {areas}. First provide a score between 0 and 10 and write a detailed feedback for each area and assistant. Finally, provide a list of {num_responses} scores, each separated by a space, to reflect the performance of Assistants 1 to {num_responses}.\"
areas
List[str]
the areas to be used for the task. Defaults to a list of four areas: \"Practical Accuracy\", \"Clarity & Transparency\", \"Authenticity & Reliability\", and \"Compliance with Intent\".
field(default_factory=lambda : ['Practical Accuracy', 'Clarity & Transparency', 'Authenticity & Reliability', 'Compliance with Intent'])
Source code in src/distilabel/tasks/preference/ultrajudge.py
@dataclass\nclass UltraJudgeTask(PreferenceTask):\n \"\"\"A `PreferenceTask` for the UltraJudge task. The `UltraJudge` task has been defined\n at Argilla specifically for a better evaluation using AI Feedback. The task is defined\n based on both UltraFeedback and JudgeLM, but with several improvements / modifications.\n\n Args:\n system_prompt (str, optional): the system prompt to be used for generation. Defaults to `None`.\n task_description (Union[str, None], optional): the description of the task. Defaults to `None`.\n areas (List[str], optional): the areas to be used for the task. Defaults to a list of four areas:\n \"Practical Accuracy\", \"Clarity & Transparency\", \"Authenticity & Reliability\", and \"Compliance with Intent\".\n \"\"\"\n\n system_prompt: str = (\n \"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences.\"\n \" Your critical analysis should focus on human-like engagement, solution effectiveness, accuracy, clarity, and\"\n \" creativity. Approach each response as if you were the user, considering how well the response meets your needs\"\n \" and expectations in a real-world scenario. Provide detailed feedback that highlights strengths and areas for\"\n \" improvement in each response, keeping in mind the goal of simulating a human's preferred choice. \"\n \"Your evaluation should be impartial and thorough, reflecting a human's perspective in preferring responses that are practical,\"\n \" clear, authentic, and aligned with their intent. Avoid bias, and focus on the content and quality of the responses.\"\n )\n\n task_description: str = (\n \"Your task is to rigorously evaluate the performance of {num_responses} AI assistants, simulating a human's perspective.\"\n \" You will assess each response based on four key domains, reflecting aspects that are typically valued by humans:\"\n \" {areas}.\"\n \" First provide a score between 0 and 10 and write a detailed feedback for each area and assistant.\"\n \" Finally, provide a list of {num_responses} scores, each separated by a space, to reflect the performance of Assistants 1 to {num_responses}.\"\n )\n\n areas: List[str] = field(\n default_factory=lambda: [\n \"Practical Accuracy\",\n \"Clarity & Transparency\",\n \"Authenticity & Reliability\",\n \"Compliance with Intent\",\n ]\n )\n\n __jinja2_template__: ClassVar[str] = field(\n default=_ULTRAJUDGE_TEMPLATE, init=False, repr=False\n )\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the names of the output arguments of the task.\"\"\"\n return [\"rating\", \"areas\"]\n\n @property\n def areas_str(self) -> str:\n \"\"\"Returns a string representation of the areas.\"\"\"\n return \", \".join(self.areas[:-1]) + \", and \" + self.areas[-1]\n\n @property\n def extract_area_score_and_rationale_regex(self) -> str:\n \"\"\"Returns a regex to extract the area, score, and rationale from the output.\"\"\"\n return rf\"({'|'.join(self.areas)})\\s*-\\s*(\\d+(?:\\.\\d+)?)\\n(.*?)(?=\\n\\n|\\Z)\"\n\n @property\n def extract_final_scores_regex(self) -> str:\n \"\"\"Returns a regex to extract the final scores from the output.\"\"\"\n return r\"Final scores:\\s*((?:\\d+(?:\\.\\d+)?\\s*)+)\"\n\n def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the UltraJudge specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraJudgeTask\n >>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description.format(\n num_responses=len(generations), areas=self.areas_str\n ),\n \"instruction\": input,\n \"responses\": generations,\n }\n\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n def parse_output(self, output: str) -> List[UltraJudgeOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n num_areas = len(self.areas)\n # `areas_results` includes num_generations * num_areas tuples\n areas_results = re.findall(self.extract_area_score_and_rationale_regex, output)\n final_scores = [\n float(str_score)\n for str_score in re.findall(self.extract_final_scores_regex, output)[\n 0\n ].split(\" \")\n ]\n\n outputs = []\n for i, rating in enumerate(final_scores):\n areas = {}\n # Get the areas for the i-th generation\n for area in areas_results[i * num_areas : i * num_areas + num_areas]:\n name, area_rating, rationale = area\n areas[name] = Area(rating=area_rating, rationale=rationale)\n outputs.append(UltraJudgeOutput(rating=rating, areas=areas))\n\n return outputs\n\n def _merge_rationales(\n self, rationales: List[Dict[str, Any]], generations_column: str = \"generations\"\n ) -> str:\n \"\"\"Overwrite of the `_merge_rationales` as we need to process the areas before merging.\"\"\"\n\n def format_area(area: Dict[str, Any]) -> str:\n sections = []\n for title, ratings in area.items():\n sections.append(title)\n for k, v in ratings.items():\n sections.append(f\"{k}:{v}\")\n return \"\\n\".join(sections)\n\n merged_rationales = []\n for idx, area in enumerate(rationales, start=1):\n merged_rationales.append(\n f\"{generations_column}-{idx}:\\n{format_area(area)}\\n\"\n )\n return \"\\n\".join(merged_rationales)\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.areas_str","title":"areas_str: str
property
","text":"Returns a string representation of the areas.
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.extract_area_score_and_rationale_regex","title":"extract_area_score_and_rationale_regex: str
property
","text":"Returns a regex to extract the area, score, and rationale from the output.
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.extract_final_scores_regex","title":"extract_final_scores_regex: str
property
","text":"Returns a regex to extract the final scores from the output.
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the names of the output arguments of the task.
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.generate_prompt","title":"generate_prompt(input, generations, **_)
","text":"Generates a prompt following the UltraJudge specification.
Parameters:
Name Type Description Defaultinput
str
the input to be used for the prompt.
requiredgenerations
List[str]
the generations to be used for the prompt.
requiredReturns:
Name Type DescriptionPrompt
Prompt
the generated prompt.
Examples:
>>> from distilabel.tasks.preference import UltraJudgeTask\n>>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n>>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\nPrompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n)\n
Source code in src/distilabel/tasks/preference/ultrajudge.py
def generate_prompt(self, input: str, generations: List[str], **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the UltraJudge specification.\n\n Args:\n input (str): the input to be used for the prompt.\n generations (List[str]): the generations to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.preference import UltraJudgeTask\n >>> task = UltraJudgeTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\", [\"0 1 1 2 3\", \"0 1 1 2 3\"])\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"Your task is to rigorously evaluate the performance of ...\",\n )\n \"\"\"\n render_kwargs = {\n \"task_description\": self.task_description.format(\n num_responses=len(generations), areas=self.areas_str\n ),\n \"instruction\": input,\n \"responses\": generations,\n }\n\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/preference/ultrajudge/#distilabel.tasks.preference.ultrajudge.UltraJudgeTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/preference/ultrajudge.py
def parse_output(self, output: str) -> List[UltraJudgeOutput]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n num_areas = len(self.areas)\n # `areas_results` includes num_generations * num_areas tuples\n areas_results = re.findall(self.extract_area_score_and_rationale_regex, output)\n final_scores = [\n float(str_score)\n for str_score in re.findall(self.extract_final_scores_regex, output)[\n 0\n ].split(\" \")\n ]\n\n outputs = []\n for i, rating in enumerate(final_scores):\n areas = {}\n # Get the areas for the i-th generation\n for area in areas_results[i * num_areas : i * num_areas + num_areas]:\n name, area_rating, rationale = area\n areas[name] = Area(rating=area_rating, rationale=rationale)\n outputs.append(UltraJudgeOutput(rating=rating, areas=areas))\n\n return outputs\n
"},{"location":"reference/distilabel/tasks/text_generation/","title":"text_generation","text":""},{"location":"reference/distilabel/tasks/text_generation/base/","title":"base","text":""},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask","title":"TextGenerationTask
dataclass
","text":" Bases: Task
A base Task
definition for text generation using LLMs.
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used. Defaults to None
.
\"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"
principles
Dict[str, List[str]]
the principles to be used for the system prompt. Defaults to None
.
field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution
Union[Dict[str, float], Literal['balanced'], None]
the distribution of principles to be used for the system prompt. Defaults to None
.
None
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask\n>>> task = TextGenerationTask()\n
Source code in src/distilabel/tasks/text_generation/base.py
@dataclass\nclass TextGenerationTask(Task):\n \"\"\"A base `Task` definition for text generation using LLMs.\n\n Args:\n system_prompt (str, optional): the system prompt to be used. Defaults to `None`.\n principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.\n Defaults to `None`.\n principles_distribution (Union[Dict[str, float], Literal[\"balanced\"], None], optional): the\n distribution of principles to be used for the system prompt. Defaults to `None`.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask()\n \"\"\"\n\n system_prompt: str = (\n \"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible,\"\n \" while being safe. Your answers should not include any harmful, unethical, racist, sexist,\"\n \" toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased\"\n \" and positive in nature.\\nIf a question does not make any sense, or is not factually coherent,\"\n \" explain why instead of answering something not correct. If you don't know the answer to a\"\n \" question, please don't share false information.\"\n )\n principles: Dict[str, List[str]] = field(\n default_factory=lambda: {\n \"harmlessness\": UltraFeedbackPrinciples.harmlessness,\n \"helpfulness\": UltraFeedbackPrinciples.helpfulness,\n \"truthfulness\": UltraFeedbackPrinciples.truthfulness,\n \"honesty\": UltraFeedbackPrinciples.honesty,\n \"verbalized_calibration\": UltraFeedbackPrinciples.verbalized_calibration,\n },\n repr=False,\n )\n principles_distribution: Union[Dict[str, float], Literal[\"balanced\"], None] = None\n\n __type__: ClassVar[Literal[\"generation\"]] = \"generation\"\n\n def __post_init__(self) -> None:\n \"\"\"Validates the `principles_distribution` if it is a dict.\n\n Raises:\n ValueError: if the `principles_distribution` is a dict and it does not sum to 1.0.\n ValueError: if the `principles` are not included in the `principles_distribution`.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n not_included_principles = [\n principle\n for principle in self.principles\n if principle not in self.principles_distribution\n ]\n if not_included_principles:\n principles_str = \", \".join(\n [f\"'{principle}'\" for principle in not_included_principles]\n )\n raise ValueError(\n f\"Principles {principles_str} included in `principles` is not in\"\n \" `principles_distribution`\"\n )\n\n if sum(self.principles_distribution.values()) != 1.0:\n raise ValueError(\n \"`principles_distribution` must sum to 1.0 if it is a dict containing\"\n \" the distribution of principles to use.\"\n )\n\n def _get_principle(self) -> str:\n \"\"\"Gets a principle from the `principles` dict respecting the `principal_distribution`.\n\n Returns:\n str: the principle to be used.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n principle_group = random.choices(\n list(self.principles_distribution.keys()),\n weights=list(self.principles_distribution.values()),\n k=1,\n )[0]\n else:\n principle_group = random.choice(list(self.principles.keys()))\n return random.choice(self.principles[principle_group])\n\n def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates the prompt to be used for generation.\n\n Args:\n input (str): the input to be used for generation.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n \"\"\"\n system_prompt = self.system_prompt\n if self.principles_distribution is not None:\n principle = self._get_principle()\n system_prompt += \" \" + principle\n return Prompt(system_prompt=system_prompt, formatted_prompt=input)\n\n def parse_output(self, output: str) -> Dict[str, str]:\n \"\"\"Parses the output of the LLM into the desired format.\"\"\"\n return {\"generations\": output}\n\n @property\n def input_args_names(self) -> List[str]:\n \"\"\"Returns the input args names for the task.\"\"\"\n return [\"input\"]\n\n @property\n def output_args_names(self) -> List[str]:\n \"\"\"Returns the output args names for the task.\"\"\"\n return [\"generations\"]\n\n def to_argilla_dataset(\n self,\n dataset_row: Dict[str, Any],\n generations_column: Optional[str] = \"generations\",\n ) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names + self.output_args_names,\n dataset_row=dataset_row,\n )\n # Then we add a default `RatingQuestion` which asks the users to provide a\n # rating for each of the generations, differing from the scenario where the inputs\n # are the fields and the outputs the ones used to formulate the quesstions. So on,\n # in this scenario we won't have suggestions, as the questions will be related to the\n # combination of inputs and outputs.\n if generations_column is None or generations_column not in dataset_row:\n raise ValueError(\n f\"The `generations_column='{generations_column}'` is not present in the dataset\"\n f\" row. Please provide any of {list(dataset_row.keys())}.\",\n )\n questions = []\n for idx in range(1, len(dataset_row[generations_column]) + 1):\n questions.append(\n rg.RatingQuestion( # type: ignore\n name=f\"{generations_column}-{idx}-rating\",\n title=f\"How would you rate the generation at `{generations_column}-{idx}`?\",\n values=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n )\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names + self.output_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions,\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def to_argilla_record(self, dataset_row: Dict[str, Any]) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names + self.output_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n # TODO: value formatting was included here due to some issues\n # with `SelfInstructTask` but these list-parsing may not be needed\n # anymore.\n value = (\n value.strip()\n if isinstance(value, str)\n else \"\\n\".join(value)\n if isinstance(value, list)\n else \"\"\n )\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(fields=fields, metadata=metadata)\n
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.input_args_names","title":"input_args_names: List[str]
property
","text":"Returns the input args names for the task.
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.output_args_names","title":"output_args_names: List[str]
property
","text":"Returns the output args names for the task.
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.__post_init__","title":"__post_init__()
","text":"Validates the principles_distribution
if it is a dict.
Raises:
Type DescriptionValueError
if the principles_distribution
is a dict and it does not sum to 1.0.
ValueError
if the principles
are not included in the principles_distribution
.
src/distilabel/tasks/text_generation/base.py
def __post_init__(self) -> None:\n \"\"\"Validates the `principles_distribution` if it is a dict.\n\n Raises:\n ValueError: if the `principles_distribution` is a dict and it does not sum to 1.0.\n ValueError: if the `principles` are not included in the `principles_distribution`.\n \"\"\"\n if isinstance(self.principles_distribution, dict):\n not_included_principles = [\n principle\n for principle in self.principles\n if principle not in self.principles_distribution\n ]\n if not_included_principles:\n principles_str = \", \".join(\n [f\"'{principle}'\" for principle in not_included_principles]\n )\n raise ValueError(\n f\"Principles {principles_str} included in `principles` is not in\"\n \" `principles_distribution`\"\n )\n\n if sum(self.principles_distribution.values()) != 1.0:\n raise ValueError(\n \"`principles_distribution` must sum to 1.0 if it is a dict containing\"\n \" the distribution of principles to use.\"\n )\n
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.generate_prompt","title":"generate_prompt(input, **_)
","text":"Generates the prompt to be used for generation.
Parameters:
Name Type Description Defaultinput
str
the input to be used for generation.
requiredReturns:
Name Type DescriptionPrompt
Prompt
the generated prompt.
Examples:
>>> from distilabel.tasks.text_generation import TextGenerationTask\n>>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n>>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\nPrompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n
Source code in src/distilabel/tasks/text_generation/base.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates the prompt to be used for generation.\n\n Args:\n input (str): the input to be used for generation.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import TextGenerationTask\n >>> task = TextGenerationTask(system_prompt=\"You are a helpful assistant.\")\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(system_prompt='You are a helpful assistant.', formatted_prompt='What are the first 5 Fibonacci numbers?')\n \"\"\"\n system_prompt = self.system_prompt\n if self.principles_distribution is not None:\n principle = self._get_principle()\n system_prompt += \" \" + principle\n return Prompt(system_prompt=system_prompt, formatted_prompt=input)\n
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the LLM into the desired format.
Source code insrc/distilabel/tasks/text_generation/base.py
def parse_output(self, output: str) -> Dict[str, str]:\n \"\"\"Parses the output of the LLM into the desired format.\"\"\"\n return {\"generations\": output}\n
"},{"location":"reference/distilabel/tasks/text_generation/base/#distilabel.tasks.text_generation.base.TextGenerationTask.to_argilla_record","title":"to_argilla_record(dataset_row)
","text":"Converts a dataset row to an Argilla FeedbackRecord
.
src/distilabel/tasks/text_generation/base.py
def to_argilla_record(self, dataset_row: Dict[str, Any]) -> \"FeedbackRecord\":\n \"\"\"Converts a dataset row to an Argilla `FeedbackRecord`.\"\"\"\n # We start off with the fields, which are the inputs of the LLM, but also\n # build the metadata from them, as previously specified within the\n fields, metadata = {}, {}\n for arg_name in self.input_args_names + self.output_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n # TODO: value formatting was included here due to some issues\n # with `SelfInstructTask` but these list-parsing may not be needed\n # anymore.\n value = (\n value.strip()\n if isinstance(value, str)\n else \"\\n\".join(value)\n if isinstance(value, list)\n else \"\"\n )\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(model_metadata_from_dataset_row(dataset_row=dataset_row))\n # Finally, we return the `FeedbackRecord` with the fields and the metadata\n return rg.FeedbackRecord(fields=fields, metadata=metadata)\n
"},{"location":"reference/distilabel/tasks/text_generation/principles/","title":"principles","text":""},{"location":"reference/distilabel/tasks/text_generation/principles/#distilabel.tasks.text_generation.principles.UltraFeedbackPrinciples","title":"UltraFeedbackPrinciples
","text":"A class containing a list of principles from the UltraFeedback paper, that can be injected into the system prompt given to the LLM.
Referencessrc/distilabel/tasks/text_generation/principles.py
class UltraFeedbackPrinciples:\n \"\"\"A class containing a list of principles from the UltraFeedback paper, that can\n be injected into the system prompt given to the LLM.\n\n References:\n - https://github.com/OpenBMB/UltraFeedback\n \"\"\"\n\n helpfulness = [\n \"The assistant should provide users with accurate, relevant, and up-to-date information, ensuring that the content is positive, interesting, engaging, educational, and helpful.\",\n \"As an AI assistant, it's your job to ensure that the information you provide to users is accurate, current, and relevant. Offer comprehensive details to educate and engage the user.\",\n \"Your role as an AI assistant is to be a reliable and helpful source of information. Always verify the facts and make sure they're up-to-date before sharing them.\",\n \"As an AI assistant, inspire users with your knowledgeable insights. Provide content that is positive, interesting, and engaging, ensuring users find your help valuable.\",\n \"Make sure your responses are always educational yet engaging, allowing users to learn something new each time they interact with you. You are an AI assistant after all!\",\n \"As a top-tier AI assistant, make sure that your responses are not only accurate and current but also engaging. Be the positive interaction users look forward to.\",\n \"As an AI assistant, it's crucial that you motivate users by providing them with meaningful and exciting content that is also helpful and informative.\",\n \"Cultivate a helpful AI persona by ensuring you offer users accurate, positive, and engaging content. Make every interaction informative and delightful.\",\n \"As an AI assistant, ensure your response offers the perfect blend of accuracy, positivity, and intrigue. Strive to be educational while keeping the user engaged.\",\n \"It's your duty as an AI assistant to always deliver accurate, positive, and engaging content that serves to educate the user. Make every interaction a learning opportunity.\",\n \"As an AI assistant, provide not just answers but also spark curiosity. Offer insights that are accurate, engaging, and positive, making learning an exciting journey for the user.\",\n ]\n harmlessness = [\n \"The assistant should not provide answers that could cause physical, psychological, or social harm to any group of people; damage to or loss of equipment or property; damage to the environment; or harm to institutions or resources necessary to human wellbeing. Feel free to refuse to answer questions that could cause the above harm.\",\n \"You are an AI assistant that respects all humans, their equipment and environment. Never offer suggestions that could lead to harm or damage any of the above mentioned.\",\n 'You are an AI assistant that maintains a principle of \"First do no harm\". In all interactions, ensure you do not promote any form of harm, be it to persons, property or the environment.',\n \"You are an AI assistant. Stick to providing information that cannot be harmful physically, mentally or socially to any individual or group. Don't answer any queries that might lead to such consequences.\",\n \"As an AI assistant, refrain from advising on actions that might harm any person or group of persons, or cause damage to property, environment or resources necessary for human wellbeing.\",\n \"You're an AI helper wishing to provide safe assistance for all users. Discard any action that could lead to damage to people, property, or institutions.\",\n \"You are a responsible AI assistant. Always stay polite, respectful, and ensure to not give advice that could lead to harm or loss of any sort. \",\n \"As an AI assistant, your primary focus should be on the safety and wellness of the users. Never provide answers that could cause harm or damage to individuals, their property or the environment.\",\n \"You are an AI assistant that is centred around being harmless. Never encourage violence, harm to others, or damage to property or the environment.\",\n \"As an AI assistant, avoid providing information or support that could result in injury, mental distress, or harm to people, their property, or the environment.\",\n \"You are an AI assistant. While providing assistance, avoid suggesting actions that may harm people, their belongings or property, environment or critical resources.\",\n ]\n honesty = [\n \"The assistant should be honest about whether it knows the answer and express its uncertainty explicitly. Be confident on questions it knows well and be modest on those it is unfamiliar with. Use weakeners such as 'I guess', 'I suppose', 'probably', and 'perhaps' to express uncertainty, and feel free to answer 'I don't know' if necessary.\",\n \"As an AI, honesty is crucial to build trust with your user. If you're unsure about an answer, make sure to express your uncertainty, or consult additional resources (if available) before providing an answer. \",\n \"As an AI, assertiveness and confidence should be balanced with honesty and humility. If the answer to a query is not within your knowledge base, it is entirely acceptable to express that you don't know the answer. \",\n \"You are an AI assistant that values honesty. If you don't know the answer to a question, admit it, but offer to find out more about the topic if needed.\",\n \"You're not only an AI assistant, but also a reliable partner. Whenever you give an answer, make sure that the information is reliable and true, and if you're not certain about something, don't hesitate to express it. \",\n 'You are an AI assistant. Your prime objective is to provide useful and accurate information. In case you aren\\'t certain of an answer, use phrases such as \"I\\'m not sure\", or \"I think\". Honesty always comes first.',\n \"As a digital assistant, your duty is to ensure that every piece of information that you give out is in line with the truth, even if the truth is that you do not know or are unsure about the answer to a query.\",\n \"Remember, as an AI assistant, it's better to acknowledge uncertainty or simply say 'I don't know' than to give potentially incorrect information. Honesty leads to trust.\",\n \"As an AI assistant, honesty is appreciated by your users. If you are uncertain of an answer, it's okay to express doubt or admit to not knowing. Offer to search for more information if needed. \",\n \"You are an AI helper that portrays honesty in every response. Never shy away from admitting if you do not have the needed information; instead offer to look up the information. \",\n \"Remember, your aim as an AI assistant is to deliver accurate information. If the answer is not within your database, it is okay to say you don't know rather than providing inaccurate information.\",\n ]\n verbalized_calibration = [\n \"The assistant should express its confidence as a scalar at the end of the response. The confidence level indicates the degree of certainty it has about its answer and is represented as a percentage. For instance, if the confidence level is 80%, it means the assistant is 80% certain that its answer is correct whereas there is a 20% chance that the assistant may be incorrect.\\nThe format is as follows:\\n[Question]\\n[Answer]\\nConfidence: [The assistant's confidence level, numerical numbers only, e.g. 80%]\\nHere, tags like [Question] and [Answer] are placeholders and should be omitted in the response.\\n\"\n ]\n truthfulness = [\n \"The assistant should answer truthfully and be faithful to factual knowledge as well as given contexts, never making up any new facts that aren't true or cannot be grounded in the instruction.\",\n \"As an AI assistant, it is important that you answer truthfully. Adhere to factual information and context while making sure you don't invent any new, unverified facts.\",\n \"As an AI assistant, conduct yourself with honesty. Always be diligent in giving accurate responses. Fictional facts or inaccuracies should be avoided in your responses.\",\n \"As an AI assistant, stick to the truth. Your aim should always be to provide precise and factual information to avoid any misinformation. \",\n \"As an AI assistant, factual consistency is key. Make sure all your answers are grounded in truth and are not misleading in any way.\",\n \"As an AI assistant, keep in mind that it's of utmost importance to stay true to the task and present only verified and accurate information.\",\n \"As an AI assistant, find and uphold the truth in every answer. Ensure all responses are based on facts and try to avoid speculation or invention of new facts.\",\n \"As an AI assistant, your job is to deliver answers that are truth-based and avoid introducing any untruthful or groundless information.\",\n \"As an AI assistant, maintain objectivity while answering questions. Make sure the information given is truthful and never fabricated.\",\n \"As an AI assistant, maintain the integrity of your responses. Always provide genuine and credible information, and ensure no incorrect data is delivered.\",\n \"As an AI assistant, operate under the principle of truthfulness. Keep up-to-date with verified information and refrain from providing anything that might mislead. \\n\",\n ]\n
"},{"location":"reference/distilabel/tasks/text_generation/self_instruct/","title":"self_instruct","text":""},{"location":"reference/distilabel/tasks/text_generation/self_instruct/#distilabel.tasks.text_generation.self_instruct.SelfInstructTask","title":"SelfInstructTask
dataclass
","text":" Bases: TextGenerationTask
A TextGenerationTask
following the Self-Instruct specification for building the prompts.
Reference: https://github.com/yizhongw/self-instruct
Parameters:
Name Type Description Defaultsystem_prompt
str
the system prompt to be used. Defaults to None
.
'You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks. You are given a task description and a set of instructions for how to write the prompts for an specific AI application.'
principles
Dict[str, List[str]]
the principles to be used for the system prompt. Defaults to None
.
field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution
Union[Dict[str, float], Literal[balanced], None]
the distribution of principles to be used for the system prompt. Defaults to None
.
None
application_description
str
the description of the AI application. Defaults to \"AI assistant\".
'AI assistant'
num_instructions
int
the number of instructions to be used for the prompt. Defaults to 5.
5
Source code in src/distilabel/tasks/text_generation/self_instruct.py
@dataclass\nclass SelfInstructTask(TextGenerationTask):\n \"\"\"A `TextGenerationTask` following the Self-Instruct specification for building\n the prompts.\n\n Reference: https://github.com/yizhongw/self-instruct\n\n Args:\n system_prompt (str, optional): the system prompt to be used. Defaults to `None`.\n principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.\n Defaults to `None`.\n principles_distribution (Union[Dict[str, float], Literal[\"balanced\"], None], optional): the\n distribution of principles to be used for the system prompt. Defaults to `None`.\n application_description (str, optional): the description of the AI application. Defaults to\n \"AI assistant\".\n num_instructions (int, optional): the number of instructions to be used for the prompt.\n Defaults to 5.\n \"\"\"\n\n system_prompt: str = (\n \"You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks.\"\n \" You are given a task description and a set of instructions for how to write the prompts for an\"\n \" specific AI application.\"\n )\n application_description: str = \"AI assistant\"\n num_instructions: int = 5\n\n __jinja2_template__: str = _SELF_INSTRUCT_TEMPLATE\n\n def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the Self-Instruct specification.\n\n Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\\nDevelop 2 user queries that ...\",\n )\n \"\"\"\n render_kwargs = {\n \"application_description\": self.application_description,\n \"num_instructions\": self.num_instructions,\n \"input\": input,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n\n @property\n def output_args_names(self) -> List[str]:\n return [\"instructions\"]\n\n def parse_output(self, output: str) -> Dict[str, List[str]]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = re.compile(r\"\\d+\\.\\s*(.*?)\\n\")\n return {\"instructions\": pattern.findall(output)}\n\n def to_argilla_dataset(self, dataset_row: Dict[str, Any]) -> \"FeedbackDataset\":\n # First we infer the fields from the input_args_names, but we could also\n # create those manually instead using `rg.TextField(...)`\n fields = infer_fields_from_dataset_row(\n field_names=self.input_args_names,\n dataset_row=dataset_row,\n )\n # Once the input fields have been defined, then we also include the instruction\n # field which will be fulfilled with each of the instructions generated.\n fields.append(rg.TextField(name=\"instruction\", title=\"instruction\")) # type: ignore\n # Then we add a default `RatingQuestion` which asks the users to provide a\n # rating for each of the generations, differing from the scenario where the inputs\n # are the fields and the outputs the ones used to formulate the quesstions. So on,\n # in this scenario we won't have suggestions, as the questions will be related to the\n # combination of inputs and outputs.\n questions = [\n rg.RatingQuestion( # type: ignore\n name=\"instruction-rating\",\n title=\"How would you rate the generated instruction?\",\n values=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n )\n ]\n # Finally, we define some metadata properties that can be potentially used\n # while exploring the dataset within Argilla to get more insights on the data.\n metadata_properties = []\n for arg_name in self.input_args_names:\n if isinstance(dataset_row[arg_name], list):\n for idx in range(1, len(dataset_row[arg_name]) + 1):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}-{idx}\") # type: ignore\n )\n elif isinstance(dataset_row[arg_name], str):\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=f\"length-{arg_name}\") # type: ignore\n )\n else:\n warnings.warn(\n f\"Unsupported input type ({type(dataset_row[arg_name])}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n metadata_properties.append(\n rg.IntegerMetadataProperty(name=\"length-instruction\") # type: ignore\n ) # type: ignore\n # Then we just return the `FeedbackDataset` with the fields, questions, and metadata properties\n # defined above.\n return rg.FeedbackDataset(\n fields=fields,\n questions=questions, # type: ignore\n metadata_properties=metadata_properties, # Note that these are always optional\n )\n\n def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n instructions_column: Optional[str] = \"instructions\",\n ) -> List[\"FeedbackRecord\"]:\n \"\"\"Converts a dataset row to a list of Argilla `FeedbackRecord`s.\"\"\"\n records = []\n for instructions in dataset_row[instructions_column]: # type: ignore\n for instruction in instructions:\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n value = value.strip() if isinstance(value, str) else \"\"\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n fields[\"instruction\"] = instruction\n metadata[\"length-instruction\"] = len(instruction)\n\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(\n model_metadata_from_dataset_row(dataset_row=dataset_row)\n )\n # Finally, we append the `FeedbackRecord` with the fields and the metadata\n records.append(rg.FeedbackRecord(fields=fields, metadata=metadata))\n if not records:\n raise ValueError(\n f\"Skipping the row {dataset_row} as the list of `FeedbackRecord` is empty as those could not be inferred.\"\n )\n return records\n
"},{"location":"reference/distilabel/tasks/text_generation/self_instruct/#distilabel.tasks.text_generation.self_instruct.SelfInstructTask.generate_prompt","title":"generate_prompt(input, **_)
","text":"Generates a prompt following the Self-Instruct specification.
Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\n
Develop 2 user queries that ...\", )
Source code insrc/distilabel/tasks/text_generation/self_instruct.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:\n \"\"\"Generates a prompt following the Self-Instruct specification.\n\n Args:\n input (str): the input to be used for the prompt.\n\n Returns:\n Prompt: the generated prompt.\n\n Examples:\n >>> from distilabel.tasks.text_generation import SelfInstructTask\n >>> task = SelfInstructTask(system_prompt=\"You are a helpful assistant.\", num_instructions=2)\n >>> task.generate_prompt(\"What are the first 5 Fibonacci numbers?\")\n Prompt(\n system_prompt=\"You are a helpful assistant.\",\n formatted_prompt=\"# Task Description\\nDevelop 2 user queries that ...\",\n )\n \"\"\"\n render_kwargs = {\n \"application_description\": self.application_description,\n \"num_instructions\": self.num_instructions,\n \"input\": input,\n }\n return Prompt(\n system_prompt=self.system_prompt,\n formatted_prompt=self.template.render(**render_kwargs),\n )\n
"},{"location":"reference/distilabel/tasks/text_generation/self_instruct/#distilabel.tasks.text_generation.self_instruct.SelfInstructTask.parse_output","title":"parse_output(output)
","text":"Parses the output of the model into the desired format.
Source code insrc/distilabel/tasks/text_generation/self_instruct.py
def parse_output(self, output: str) -> Dict[str, List[str]]:\n \"\"\"Parses the output of the model into the desired format.\"\"\"\n pattern = re.compile(r\"\\d+\\.\\s*(.*?)\\n\")\n return {\"instructions\": pattern.findall(output)}\n
"},{"location":"reference/distilabel/tasks/text_generation/self_instruct/#distilabel.tasks.text_generation.self_instruct.SelfInstructTask.to_argilla_record","title":"to_argilla_record(dataset_row, instructions_column='instructions')
","text":"Converts a dataset row to a list of Argilla FeedbackRecord
s.
src/distilabel/tasks/text_generation/self_instruct.py
def to_argilla_record(\n self,\n dataset_row: Dict[str, Any],\n instructions_column: Optional[str] = \"instructions\",\n) -> List[\"FeedbackRecord\"]:\n \"\"\"Converts a dataset row to a list of Argilla `FeedbackRecord`s.\"\"\"\n records = []\n for instructions in dataset_row[instructions_column]: # type: ignore\n for instruction in instructions:\n fields, metadata = {}, {}\n for arg_name in self.input_args_names:\n arg_value = dataset_row[arg_name]\n if isinstance(arg_value, list):\n for idx, value in enumerate(arg_value, start=1):\n value = value.strip() if isinstance(value, str) else \"\"\n fields[f\"{arg_name}-{idx}\"] = value\n if value is not None:\n metadata[f\"length-{arg_name}-{idx}\"] = len(value)\n elif isinstance(arg_value, str):\n fields[arg_name] = arg_value.strip() if arg_value else \"\"\n if arg_value is not None:\n metadata[f\"length-{arg_name}\"] = len(arg_value.strip())\n else:\n warnings.warn(\n f\"Unsupported input type ({type(arg_value)}), skipping...\",\n UserWarning,\n stacklevel=2,\n )\n fields[\"instruction\"] = instruction\n metadata[\"length-instruction\"] = len(instruction)\n\n # Then we add the model metadata from the `generation_model` and `labelling_model`\n # columns of the dataset, if they exist.\n metadata.update(\n model_metadata_from_dataset_row(dataset_row=dataset_row)\n )\n # Finally, we append the `FeedbackRecord` with the fields and the metadata\n records.append(rg.FeedbackRecord(fields=fields, metadata=metadata))\n if not records:\n raise ValueError(\n f\"Skipping the row {dataset_row} as the list of `FeedbackRecord` is empty as those could not be inferred.\"\n )\n return records\n
"},{"location":"reference/distilabel/utils/","title":"utils","text":""},{"location":"reference/distilabel/utils/argilla/","title":"argilla","text":""},{"location":"reference/distilabel/utils/dicts/","title":"dicts","text":""},{"location":"reference/distilabel/utils/dicts/#distilabel.utils.dicts.combine_dicts","title":"combine_dicts(*dicts)
","text":"Combines multiple dictionaries into a single dictionary joining the values as a list for each key.
Parameters:
Name Type Description Default*dicts
Any
the dictionaries to be combined.
()
Returns:
Type DescriptionDict[str, Any]
Dict[str, Any]: the combined dictionary.
Source code insrc/distilabel/utils/dicts.py
def combine_dicts(*dicts: Any) -> Dict[str, Any]:\n \"\"\"Combines multiple dictionaries into a single dictionary joining the values\n as a list for each key.\n\n Args:\n *dicts (Any): the dictionaries to be combined.\n\n Returns:\n Dict[str, Any]: the combined dictionary.\n \"\"\"\n combined_dict = defaultdict(list)\n for d in dicts:\n for key, value in d.items():\n combined_dict[key].append(value)\n return dict(combined_dict)\n
"},{"location":"reference/distilabel/utils/futures/","title":"futures","text":""},{"location":"reference/distilabel/utils/futures/#distilabel.utils.futures.when_all_complete","title":"when_all_complete(futures, callback=None)
","text":"Returns a Future
that will be completed when all the provided futures
are completed, and it will contain the results of the futures
.
Parameters:
Name Type Description Defaultfutures
List[Future]
the Future
s to wait for.
Returns:
Name Type DescriptionFuture
Future[List[T]]
the Future
that will be completed when all the provided futures
are completed, and it will contain the results of the futures
.
src/distilabel/utils/futures.py
def when_all_complete(\n futures: List[Future[T]], callback: Optional[Callable[[List[T]], List[T]]] = None\n) -> Future[List[T]]:\n \"\"\"Returns a `Future` that will be completed when all the provided `futures` are\n completed, and it will contain the results of the `futures`.\n\n Args:\n futures (List[Future]): the `Future`s to wait for.\n\n Returns:\n Future: the `Future` that will be completed when all the provided `futures` are\n completed, and it will contain the results of the `futures`.\n \"\"\"\n all_done_future = Future()\n results: List[T] = [None] * len(futures) # type: ignore\n\n def check_all_done(future: Future) -> None:\n # This is done to preserve the order of the results with respect to the order\n # of the futures.\n index = futures.index(future)\n results[index] = future.result()[0]\n\n _, not_done = wait(futures, return_when=\"FIRST_COMPLETED\")\n if len(not_done) == 0:\n final_results = results\n if callback is not None:\n final_results = callback(results)\n all_done_future.set_result(final_results)\n\n for future in futures:\n future.add_done_callback(check_all_done)\n\n return all_done_future\n
"},{"location":"reference/distilabel/utils/imports/","title":"imports","text":""},{"location":"reference/distilabel/utils/types/","title":"types","text":""},{"location":"reference/distilabel/utils/types/#distilabel.utils.types.is_future","title":"is_future(obj)
","text":"Checks if an object is a future narrowing the type.
Parameters:
Name Type Description Defaultobj
Future[T]
Object to check
requiredReturns:
Type DescriptionTypeGuard[Future[T]]
TypeGuard[Future[T]]: True if it is a future
Source code insrc/distilabel/utils/types.py
def is_future(obj: Union[Future[T], Any]) -> TypeGuard[Future[T]]:\n \"\"\"Checks if an object is a future narrowing the type.\n\n Args:\n obj (Future[T]): Object to check\n\n Returns:\n TypeGuard[Future[T]]: True if it is a future\n \"\"\"\n return isinstance(obj, Future)\n
"},{"location":"technical-reference/","title":"Technical reference","text":"Explore distilabel
's technical references for an understanding of its components and their interactions, or directly access the API Reference for specific details.
If you are not familiar with the different components, consider taking a look at the concepts first.
"},{"location":"technical-reference/llms/","title":"LLMs","text":"In this section we will see what's an LLM
and the different LLM
s implementations available in distilabel
.
The LLM
class encapsulates the functionality for interacting with a large language model.
It distinguishes between task specifications and configurable parameters that influence the LLM behavior.
For illustration purposes, we employ the TextGenerationTask
in this section and guide you to the dedicated Tasks
section for comprehensive details.
LLM classes share several general parameters and define implementation-specific ones. Let's explain the general parameters first and the generate method, and then the specifics for each class.
"},{"location":"technical-reference/llms/#general-parameters","title":"General parameters","text":"Let's briefly introduce the general parameters we may find1:
max_new_tokens
: this parameter controls the maximum number of tokens the LLM is allowed to use.
temperature
: parameter associated to the creativity of the model, a value close to 0 makes the model more deterministic, while higher values make the model more \"creative\".
top_k
and top_p
: top_k
limits the number of tokens the model is allowed to use to generate the following token sorted by probability, while top_p
limits the number of tokens the model can use for the next token, but in terms of the sum of their probabilities.
frequency_penalty
and presence_penalty
: the frequency penalty penalizes tokens that have already appeared in the generated text, limiting the possibility of those appearing again, and the presence_penalty
penalizes regardless of the frequency.
prompt_format
and prompt_formatting_fn
: these two parameters allow to tweak the prompt of our models, for example we can direct the LLM
to format the prompt according to one of the defined formats, while prompt_formatting_fn
allows to pass a function that will be applied to the prompt before the generation, for extra control of what we ingest to the model.
generate
method","text":"Once you create an LLM
, you use the generate
method to interact with it. This method accepts two parameters:
inputs
: which is a list of dictionaries containing the inputs for the LLM
and the Task
. Each dictionary must have all the keys required by the Task
.
inputs = [\n {\"input\": \"Write a letter for my friend Bob...\"},\n {\"input\": \"Give me a summary of the following text:...\"},\n ...\n]\n
num_generations
: which is an integer used to specify how many text generations we want to obtain for each element in inputs
.
The output of the method will be a list containing lists of LLMOutput
. Each inner list is associated to the corresponding input in inputs
, and each LLMOutput
is associated to one of the num_generations
for each input.
>>> llm.generate(inputs=[...], num_generations=2)\n[ # (1)\n [ # (2)\n { # (3)\n \"model_name\": \"notus-7b-v1\",\n \"prompt_used\": \"Write a letter for my friend Bob...\",\n \"raw_output\": \"Dear Bob, ...\",\n \"parsed_output\": {\n \"generations\": \"Dear Bob, ...\",\n }\n }, \n {\n \"model_name\": \"notus-7b-v1\",\n \"prompt_used\": \"Write a letter for my friend Bob...\",\n \"raw_output\": \"Dear Bob, ...\",\n \"parsed_output\": {\n \"generations\": \"Dear Bob, ...\",\n }\n }, \n ],\n [...],\n]\n
inputs
.LLMOutput
s as specified in num_generations
.LLMOutput
is a dictionaryThe LLMOutput
is a TypedDict
containing the keys model_name
, prompt_used
, raw_output
and parsed_output
. The parsed_output
key is a dictionary that will contain all the Task
outputs.
{\n \"model_name\": \"notus-7b-v1\",\n \"prompt_used\": \"Write a letter for my friend Bob...\",\n \"raw_output\": \"Dear Bob, ...\",\n \"parsed_output\": { # (1)\n \"generations\": \"Dear Bob, ...\",\n }\n}, \n
parsed_output
will depend on the Task
used. In this case, we used TextGenerationTask
, so the key generations
is present.If the LLM
uses a thread pool, then the output of the generate
method will be a Future having as result a list of lists of LLMOutput
as described above.
These may be the default choice for your ambitious tasks.
For the API reference visit OpenAILLM.
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import TextGenerationTask\n\nopenaillm = OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=TextGenerationTask(),\n prompt_format=\"openai\",\n max_new_tokens=256,\n openai_api_key=os.environ.get(\"OPENAI_API_KEY\"),\n temperature=0.3,\n)\nresult = openaillm.generate([{\"input\": \"What is OpenAI?\"}])\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# OpenAI is an artificial intelligence research laboratory and company. It was founded\n# with the goal of ensuring that artificial general intelligence (AGI) benefits all of\n# humanity. OpenAI conducts cutting-edge research in various fields of AI ...\n
"},{"location":"technical-reference/llms/#llamacpp","title":"Llama.cpp","text":"Applicable for local execution of Language Models (LLMs). Use this LLM when you have access to the quantized weights of your selected model for interaction.
Let's see an example using notus-7b-v1. First, you can download the weights from the following link:
from distilabel.llm import LlamaCppLLM\nfrom distilabel.tasks import TextGenerationTask\nfrom llama_cpp import Llama\n\n# Instantiate our LLM with them:\nllm = LlamaCppLLM(\n model=Llama(model_path=\"./notus-7b-v1.q4_k_m.gguf\", n_gpu_layers=-1),\n task=TextGenerationTask(),\n max_new_tokens=128,\n temperature=0.3,\n prompt_format=\"notus\",\n)\n\nresult = llm.generate([{\"input\": \"What is the capital of Spain?\"}])\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# The capital of Spain is Madrid. It is located in the center of the country and\n# is known for its vibrant culture, beautiful architecture, and delicious food.\n# Madrid is home to many famous landmarks such as the Prado Museum, Retiro Park,\n# and the Royal Palace of Madrid. I hope this information helps!\n
For the API reference visit LlammaCppLLM.
"},{"location":"technical-reference/llms/#vllm","title":"vLLM","text":"Highly recommended to use if you have a GPU available, as is the fastest solution out there for batch generation. Find more information about in vLLM docs.
from distilabel.tasks import TextGenerationTask\nfrom distilabel.llm import vLLM\nfrom vllm import LLM\n\nllm = vLLM(\n vllm=LLM(model=\"argilla/notus-7b-v1\"),\n task=TextGenerationTask(),\n max_new_tokens=512,\n temperature=0.3,\n prompt_format=\"notus\",\n)\nresult_vllm = llm.generate([{\"input\": \"What's a large language model?\"}])\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# A large language model is a type of artificial intelligence (AI) system that is designed\n# to understand and interpret human language. It is called \"large\" because it uses a vast\n# amount of data, typically billions of words or more, to learn and make predictions about\n# language. Large language models are ...\n
For the API reference visit vLLM.
"},{"location":"technical-reference/llms/#huggingface-llms","title":"HuggingFace LLMs","text":"This section explains two different ways to use HuggingFace models:
"},{"location":"technical-reference/llms/#transformers","title":"Transformers","text":"This is the option to use a model hosted on the HuggingFace Hub. Load the model and tokenizer in the standard manner as done locally, and proceed to instantiate your class.
For the API reference visit TransformersLLM.
Let's see an example using notus-7b-v1:
from distilabel.llm import TransformersLLM\nfrom distilabel.tasks import TextGenerationTask\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the models from the HuggingFace Hub\ntokenizer = AutoTokenizer.from_pretrained(\"argilla/notus-7b-v1\")\nmodel = AutoModelForCausalLM.from_pretrained(\"argilla/notus-7b-v1\", device_map=\"auto\")\n\n# Instantiate our LLM with them:\nllm = TransformersLLM(\n model=model,\n tokenizer=tokenizer,\n task=TextGenerationTask(),\n max_new_tokens=128,\n temperature=0.3,\n prompt_format=\"notus\",\n)\n\nresult = llm.generate([{\"input\": \"What's a large language model?\"}])\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# A large language model is a type of machine learning algorithm that is designed to analyze\n# and understand large amounts of text data. It is called \"large\" because it requires a\n# vast amount of data to train and improve its accuracy. These models are ...\n
"},{"location":"technical-reference/llms/#inference-endpoints","title":"Inference Endpoints","text":"HuggingFace provides a streamlined approach for deploying models through Inference Endpoints on their infrastructure. Opt for this solution if your model is hosted on the HuggingFace Hub.
For the API reference visit InferenceEndpointsLLM.
Let's see how to interact with these LLMs:
import os\n\nfrom distilabel.llm import InferenceEndpointsLLM\nfrom distilabel.tasks import TextGenerationTask\n\nendpoint_name = \"aws-notus-7b-v1-4052\" or os.getenv(\"HF_INFERENCE_ENDPOINT_NAME\")\nendpoint_namespace = \"argilla\" or os.getenv(\"HF_NAMESPACE\")\ntoken = os.getenv(\"HF_TOKEN\") # hf_...\n\nllm = InferenceEndpointsLLM(\n endpoint_name=endpoint_name,\n endpoint_namespace=endpoint_namespace,\n token=token,\n task=TextGenerationTask(),\n max_new_tokens=512,\n prompt_format=\"notus\",\n)\nresult = llm.generate([{\"input\": \"What are critique LLMs?\"}])\n# print(result[0][0][\"parsed_output\"][\"generations\"])\n# Critique LLMs (Long Land Moore Machines) are artificial intelligence models designed specifically for analyzing and evaluating the quality or worth of a particular subject or object. These models can be trained on a large dataset of reviews, ratings, or commentary related to a product, service, artwork, or any other topic of interest.\n# The training data can include both positive and negative feedback, helping the LLM to understand the nuanced aspects of quality and value. The model uses natural language processing (NLP) techniques to extract meaningful insights, including sentiment analysis, entity recognition, and text classification.\n# Once the model is trained, it can be used to analyze new input data and provide a critical assessment based on its learned understanding of quality and value. For example, a critique LLM for movies could evaluate a new film and generate a detailed review highlighting its strengths, weaknesses, and overall rating.\n# Critique LLMs are becoming increasingly useful in various industries, such as e-commerce, education, and entertainment, where they can provide objective and reliable feedback to help guide decision-making processes. They can also aid in content optimization by highlighting areas of improvement or recommending strategies for enhancing user engagement.\n# In summary, critique LLMs are powerful tools for analyzing and evaluating the quality or worth of different subjects or objects, helping individuals and organizations make informed decisions with confidence.\n
"},{"location":"technical-reference/llms/#together-inference","title":"Together Inference","text":"Together offers a product named Together Inference, which exposes some models for diverse tasks such as chat, text generation, code, or image; exposing those via an endpoint within their API either as serverless endpoints or as dedicated instances.
See their release post with more details at Announcing Together Inference Engine \u2013 the fastest inference available.
from distilabel.tasks import TextGenerationTask\nfrom distilabel.llm import TogetherInferenceLLM\n\nllm = TogetherInferenceLLM(\n model=\"togethercomputer/llama-2-70b-chat\",\n task=TextGenerationTask(),\n max_new_tokens=512,\n temperature=0.3,\n prompt_format=\"llama2\",\n)\noutput = llm.generate(\n [{\"input\": \"Explain me the theory of relativity as if you were a pirate.\"}]\n)\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# Ahoy matey! Yer lookin' fer a tale of the theory of relativity, eh? Well,\n# settle yerself down with a pint o' grog and listen close, for this be a story\n# of the sea of time and space!\n# Ye see, matey, the theory of relativity be tellin' us that time and space ain't\n# fixed things, like the deck o' a ship or the stars in the sky. Nay, they be like\n# the ocean itself, always changin' and flowin' like the tides.\n# Now, imagine ...\n
"},{"location":"technical-reference/llms/#processllm-and-llmpool","title":"ProcessLLM
and LLMPool
","text":"By default, distilabel
uses a single process, so the generation loop is usually bottlenecked by the model inference time and Python GIL. To overcome this limitation, we provide the ProcessLLM
class that allows to load an LLM
in a different process, avoiding the GIL and allowing to parallelize the generation loop. Creating a ProcessLLM
is easy as:
from distilabel.tasks import TextGenerationTask, Task\nfrom distilabel.llm import ProcessLLM, LLM\n\n\ndef load_gpt_4(task: Task) -> LLM:\n from distilabel.llm import OpenAILLM\n\n return OpenAILLM(\n model=\"gpt-4\",\n task=task,\n num_threads=4,\n )\n\n\nllm = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4)\nfuture = llm.generate(\n inputs=[{\"input\": \"Write a letter for Bob\"}], num_generations=1\n) # (1)\nllm.teardown() # (2)\nresult = future.result()\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"])\n# Dear Bob,\n# I hope this letter finds you in good health and high spirits. I know it's been a while since we last caught up, and I wanted to take the time to connect and share a few updates.\n# Life has been keeping me pretty busy lately. [Provide a brief overview of what you've been up to: work, school, family, hobbies, etc.]\n# I've often found myself reminiscing about the good old days, like when we [include a memorable moment or shared experience with Bob].\n
ProcessLLM
returns a Future
containing a list of lists of LLMOutput
s.ProcessLLM
needs to be terminated after usage. If the ProcessLLM
is used by a Pipeline
, it will be terminated automatically.You can directly use a ProcessLLM
as the generator
or labeller
in a Pipeline
. Apart from that, there would be situations in which you would like to generate texts using several LLM
s in parallel. For this purpose, we provide the LLMPool
class:
from distilabel.tasks import TextGenerationTask, Task\nfrom distilabel.llm import ProcessLLM, LLM, LLMPool\n\ndef load_gpt_3(task: Task) -> LLM:\n from distilabel.llm import OpenAILLM\n\n return OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=task,\n num_threads=4,\n )\n\ndef load_gpt_4(task: Task) -> LLM:\n from distilabel.llm import OpenAILLM\n\n return OpenAILLM(\n model=\"gpt-4\",\n task=task,\n num_threads=4,\n )\n\n\npool = LLMPool(llms=[\n ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_3),\n ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4),\n])\nresult = pool.generate(\n inputs=[{\"input\": \"Write a letter for Bob\"}], num_generations=2\n)\npool.teardown()\n# >>> print(result[0][0][\"parsed_output\"][\"generations\"], end=\"\\n\\n\\n\\n\\n\\n---->\")\n# Dear Bob,\n# I hope this letter finds you in good health and high spirits. I know it's been a while since we last caught up, and I wanted to take the time to connect and share a few updates.\n# Life has been keeping me pretty busy lately. [Provide a brief overview of what you've been up to: work, school, family, hobbies, etc.]\n# I've often found myself reminiscing about the good old days, like when we [include a memorable moment or shared experience with Bob].\n# >>> print(result[0][1][\"parsed_output\"][\"generations\"])\n# Of course, I'd be happy to draft a sample letter for you. However, I would need some additional \n# information including who \"Bob\" is, the subject matter of the letter, the tone (formal or informal), \n# and any specific details or points you'd like to include. Please provide some more context and I'll do my best to assist you.\n
You can take a look at this blog post from cohere for a thorough explanation of the different parameters.\u00a0\u21a9
This section will detail the Pipeline
, providing guidance on creating and using them.
The Pipeline class is a central component in distilabel
, responsible for crafting datasets. It manages the generation of datasets and oversees the interaction between the generator and labeller LLMs
.
You create an instance of the Pipeline
by providing a generator and an optional labeller LLM. Interactions with it are facilitated through its generate
method. This method requires a dataset
, specifies the num_generations to determine the number of examples to be created, and includes additional parameters for controlling the batch_size and managing the generation process.
Let's start by a Pipeline with a single LLM
as a generator.
We will create a Pipeline
that will use Notus from a HuggingFace Inference Endpoint. For this matter, we need to create a TextGenerationTask, and specify the format we want to use for our Prompt
, in this case Notus, which corresponds to the same for Zephyr.
import os\n\nfrom distilabel.llm import InferenceEndpointsLLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import TextGenerationTask\n\nendpoint_name = \"aws-notus-7b-v1-4052\" or os.getenv(\"HF_INFERENCE_ENDPOINT_NAME\")\nendpoint_namespace = \"argilla\" or os.getenv(\"HF_NAMESPACE\")\n\npipe_generation = Pipeline(\n generator=InferenceEndpointsLLM(\n endpoint_name=endpoint_name, # The name given of the deployed model\n endpoint_namespace=endpoint_namespace, # This usually corresponds to the organization, in this case \"argilla\"\n token=os.getenv(\"HF_TOKEN\"), # hf_...\n task=TextGenerationTask(),\n max_new_tokens=512,\n do_sample=True,\n prompt_format=\"notus\",\n ),\n)\n
We've set up our pipeline using a specialized TextGenerationTask
(refer to the tasks section for more task details), and an InferenceEndpointsLLM configured for notus-7b-v1
, although any of the available LLMs
will work.
To use the Pipeline for dataset generation, we call the generate method. We provide it with the input dataset and specify the desired number of generations. In this example, we've prepared a Dataset
with a single row to illustrate the process. This dataset contains one row, and we'll trigger 2 generations from it:
from datasets import Dataset\n\ndataset = Dataset.from_dict(\n {\"input\": [\"Create an easy dinner recipe with few ingredients\"]}\n)\ndataset_generated = pipe_generation.generate(dataset, num_generations=2)\n
Now, let's examine the dataset that was generated. It's a CustomDataset
, equipped with additional features for seamless interaction with Argilla
.
print(dataset_generated)\n# Dataset({\n# features: ['input', 'generation_model', 'generation_prompt', 'raw_generation_responses', 'generations'],\n# num_rows: 1\n# })\n\nprint(dataset_generated[0][\"generations\"][0])\n# Here's a simple and delicious dinner recipe with only a few ingredients:\n\n# Garlic Butter Chicken with Roasted Vegetables\n\n# Ingredients:\n# - 4 boneless, skinless chicken breasts\n# - 4 tablespoons butter\n# - 4 cloves garlic, minced\n# - 1 teaspoon dried oregano\n# - 1/2 teaspoon salt\n# - 1/4 teaspoon black pepper\n# - 1 zucchini, sliced\n# - 1 red bell pepper, sliced\n# - 1 cup cherry tomatoes\n\n# Instructions:\n\n# 1. Preheat oven to 400\u00b0F (200\u00b0C).\n\n# 2. Melt butter in a small saucepan over low heat. Add minced garlic and heat until fragrant, about 1-2 minutes.\n\n# 3. Place chicken breasts in a baking dish and brush garlic butter over each one.\n\n# 4. Sprinkle oregano, salt, and black pepper over the chicken.\n\n# 5. In a separate baking dish, add sliced zucchini, red bell pepper, and cherry tomatoes. Brush with remaining garlic butter.\n\n# 6. Roast the chicken and vegetables in the preheated oven for 25-30 minutes or until cooked through and the vegetables are tender and lightly browned.\n\n# 7. Transfer the chicken to plates and serve with the roasted vegetables alongside. Enjoy!\n\n# This recipe requires simple ingredients and is easy to prepare, making it perfect for a quick, satisfying dinner. The garlic butter adds maximum flavor, while the roasted vegetables complement the chicken beautifully, providing additional nutrition and texture. With minimal effort, you can have a delicious and balanced meal on the table in no time.\n
"},{"location":"technical-reference/pipeline/#labeller","title":"Labeller","text":"Next, we move on to labelling a dataset. Just as before, we need an LLM
for our Pipeline
. In this case we will use OpenAILLM
with gpt-4
, and a PreferenceTask
, UltraFeedbackTask for instruction following.
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import UltraFeedbackTask\n\npipe_labeller = Pipeline(\n labeller=OpenAILLM(\n model=\"gpt-4\",\n task=UltraFeedbackTask.for_instruction_following(),\n max_new_tokens=256,\n num_threads=8,\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n temperature=0.3,\n ),\n)\n
For this example dataset, we've extracted 2 sample rows from the UltraFeedback binarized dataset, formatted as expected by the default LLM
and Task
.
We've selected two distinct examples, one correctly labeled and the other incorrectly labeled in the original dataset. In this instance, the dataset
being generated includes two columns: the input, as seen in the generator, and a generations column containing the model's responses.
from datasets import Dataset\n\ndataset_test = Dataset.from_dict(\n {\n \"input\": [\n \"Describe the capital of Spain in 25 words.\",\n \"Design a conversation between a customer and a customer service agent.\",\n ],\n \"generations\": [\n [\"Santo Domingo is the capital of Dominican Republic\"],\n [\n \"Customer: Hello, I'm having trouble with my purchase.\\n\\nCustomer Service Agent: I'm sorry to hear that. Could you please tell me more about the issue you are facing?\\n\\nCustomer: Yes, I ordered a pair of shoes from your company a week ago, but I haven't received them yet.\\n\\nCustomer Service Agent: I apologize for the inconvenience. Could you please provide me with your order number and full name so I can look into this for you?\\n\\nCustomer: Sure, my name is John Doe and my order number is ABCD1234.\\n\\nCustomer Service Agent: Thank you, John. I have checked on your order and it appears that it is still being processed. It should be shipped out within the next 24 hours.\\n\\nCustomer: That's good to hear, but can you also tell me the expected delivery time?\\n\\nCustomer Service Agent: Absolutely, based on your location, the estimated delivery time is 3-5 business days after shipping. You will receive a tracking number via email once the item is shipped, which will provide real-time updates on your package.\\n\\nCustomer: Thanks for the information. One more thing, what is your return policy if the shoes don't fit?\\n\\nCustomer Service Agent: Our company offers a 30-day return policy. If you are not satisfied with the product or if it doesn't fit, you can return it for a full refund or an exchange within 30 days of delivery. Please keep in mind that the product must be in its original packaging and in the same condition as when you received it.\\n\\nCustomer: Okay, that's good to know. Thank you for your help.\\n\\nCustomer Service Agent: You're welcome, John. I'm glad I could assist you. If you have any further questions or concerns, please don't hesitate to reach out to us. Have a great day!\"\n ],\n ],\n }\n)\n\nds_labelled = pipe_labeller.generate(dataset_test)\n
Let's select the relevant columns from the labelled dataset, and take a look at the first record. This allows us to observe the rating and the accompanying rationale that provides an explanation.
ds_labelled.select_columns([\"input\", \"generations\", \"rating\", \"rationale\"])[0]\n# {\n# \"input\": \"Describe the capital of Spain in 25 words.\",\n# \"generations\": [\"Santo Domingo is the capital of Dominican Republic\"],\n# \"rating\": [1.0],\n# \"rationale\": [\n# \"The text is irrelevant to the instruction. It describes the capital of the Dominican Republic instead of Spain.\"\n# ],\n# }\n
"},{"location":"technical-reference/pipeline/#generator-and-labeller","title":"Generator and Labeller","text":"In the final scenario, we have a Pipeline
utilizing both a generator and a labeller LLM
. Once more, we'll employ the Inference Endpoint with notus-7b-v1
for the generator, using a different system prompt this time. As for the labeller, we'll use gpt-3.5-turbo
, which will label the examples for instruction following.
import os\n\nfrom distilabel.llm import InferenceEndpointsLLM, OpenAILLM\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.tasks import TextGenerationTask, UltraFeedbackTask\n\npipe_full = Pipeline(\n generator=InferenceEndpointsLLM(\n endpoint_name=endpoint_name,\n endpoint_namespace=endpoint_namespace,\n token=token,\n task=TextGenerationTask(\n system_prompt=\"You are an expert writer of XKCD, a webcomic of romance, sarcasm, math, and language.\"\n ),\n max_new_tokens=512,\n do_sample=True,\n prompt_format=\"notus\",\n ),\n labeller=OpenAILLM(\n model=\"gpt-3.5-turbo\",\n task=UltraFeedbackTask.for_instruction_following(),\n max_new_tokens=256,\n num_threads=4,\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n temperature=0.3,\n ),\n)\n
For this example, we'll set up a pipeline to generate and label a dataset of short stories inspired by XKCD. To do this, we'll define the system_prompt for the NotusTextGenerationTask
. The dataset will follow the same format we used for the generator scenario, featuring an input column with the examples, in this case, just one.
from datasets import Dataset\n\nxkcd_instructions = Dataset.from_dict(\n {\"input\": [\"Could you imagine an interview process going sideways?\"]}\n)\nds_xkcd = pipe_full.generate(xkcd_instructions, num_generations=3)\n
We will now take a look to one of the generations, along with the rating and rational given by our labeller LLM
:
print(ds_xkcd[1][\"generations\"][0])\nprint(\"-----\" * 5)\nprint(\"RATING: \", ds_xkcd[1][\"rating\"][0])\nprint(\"RATIONALE: \", ds_xkcd[1][\"rationale\"][0])\n\n# Yes, absolutely! Here's a fictional interview scenario turned into an XKCD-style comic:\n\n# (Interviewee meets with an unsmiling interviewer)\n\n# Interviewer: Good morning! Have a seat. Tell me about your experience working with teams.\n\n# Interviewee: Well, I've worked in large teams on group projects before. It could be challenging, but we always managed to pull through.\n\n# (Smugly) Interviewer: Challenging, huh? (tapping pen on desk) And how did you manage to overcome these challenges?\n\n# Interviewee: (confidently) Communication was key. I made sure to stay in touch with the team and keep everyone updated on our progress.\n\n# Interviewer: Communication. Hm. And what if communication failed?\n\n# Interviewee: (thrown off balance) Well, I mean...there was one time when we couldn't connect via video call. But we picked up the phone, and we all understood what needed to be done.\n\n# Interviewer: But what if the communication on the technical level failed, say, like a computer system with a software glitch?\n\n# Interviewee: (feeling the pressure) That's never happened to me before, but if it did, we would have to troubleshoot and find a workaround, right?\n\n# Interviewer: (smirking) Oh, but finding a workaround could mean delegating responsibilities among the team, which requires communication. It's a vicious cycle!\n\n# (Interviewee visibly uncomfortable)\n\n# Interviewer: And what if there was a communication breakdown among the team members themselves?\n\n# Interviewee: (unsure) I think we would try to sort it out as soon as possible to avoid any further problems.\n\n# Interviewer: (sarcastically) Yes, avoiding further problems is critical. Don't want to let those deadlines slip, do we?\n\n# (Interviewer types frantically on their computer keyboard)\n\n# Interviewer: (softly but wordily) Note to self: Avoid this candidate for team projects.\n\n# (The interviewer returns his attention back to the interviewee)\n\n# Interviewer: Well, moving on...\n# -------------------------\n# RATING: 4.0\n# RATIONALE: The text provides a fictional interview scenario that aligns with the task goal of imagining an interview process going sideways. It includes dialogue between an interviewer and interviewee, showcasing a breakdown in communication and the interviewer's sarcastic and dismissive attitude towards the interviewee's responses.\n
"},{"location":"technical-reference/pipeline/#running-several-generators-in-parallel","title":"Running several generators in parallel","text":"distilabel
also allows to use several LLM
s as generators in parallel, thanks to the ProcessLLM
and LLMPool
classes. This comes handy for the cases where we want to use several LLM
s and fed them with the same input, allowing us to later compare their outputs (to see which one is better) or even creating a Preference dataset, following a similar process to UltraFeedback dataset generation.
For this example, we will load four 7B LLM
s using vLLM
and a machine with 4 GPUs (to load each LLM
in a different GPU). Then we will give instructions to all of them, and we will use GPT-4 to label the generated instructions using the UltraFeedbackTask
for instruction-following.
First of all, we will need to load each LLM
using a ProcessLLM
. ProcessLLM
will create a child process which will load the LLM
using the load_llm_fn
.
from distilabel.llm import LLM, ProcessLLM\nfrom distilabel.tasks import Task, TextGenerationTask\n\n\ndef load_notus(task: Task) -> LLM: # (1)\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" # (2)\n\n return vLLM(\n vllm=LLM(model=\"argilla/notus-7b-v1\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\nllm = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_notus)\n
ProcessLLM
will create a child process in which the LLM
will be loaded. Therefore, we will need to define a function that will be executed by the child process to load the LLM
. The child process will pass the provided Task
to the load_llm_fn
.CUDA_VISIBLE_DEVICES
environment variable to make sure that each LLM
is loaded in a different GPU.We will repeat this pattern 4 times, each time with a different LLM
and a different GPU.
from distilabel.llm import LLM, ProcessLLM\nfrom distilabel.tasks import Task, TextGenerationTask\n\n\ndef load_notus(task: Task) -> LLM:\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n return vLLM(\n vllm=LLM(model=\"argilla/notus-7b-v1\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\ndef load_zephyr(task: Task) -> LLM:\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n\n return vLLM(\n vllm=LLM(model=\"HuggingFaceH4/zephyr-7b-beta\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\ndef load_starling(task: Task) -> LLM:\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"2\"\n\n return vLLM(\n vllm=LLM(model=\"berkeley-nest/Starling-LM-7B-alpha\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\ndef load_neural_chat(task: Task) -> LLM:\n import os\n from distilabel.llm import vLLM\n from vllm import LLM\n\n os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n\n return vLLM(\n vllm=LLM(model=\"Intel/neural-chat-7b-v3-3\"),\n task=task,\n max_new_tokens=512,\n temperature=0.7,\n prompt_format=\"notus\",\n )\n\n\nnotus = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_notus)\nzephyr = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_zephyr)\nstarling = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_starling)\nneural_chat = ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_neural_chat)\n
In order to distribute the generations among the different LLM
s, we will use a LLMPool
. This class expects a list of ProcessLLM
. Calling the generate
method of the LLMPool
will call the generate
method of each LLMProcess
in parallel, and will wait for all of them to finish, returning a list of lists of LLMOutput
s with the generations.
from distilabel.llm import LLMPool\n\npool = LLMPool(llms=[notus, zephyr, starling, neural_chat])\n
We will use this LLMPool
as the generator for our pipeline and we will use GPT-4 to label the generated instructions using the UltraFeedbackTask
for instruction-following.
from distilabel.tasks import UltraFeedbackTask\nfrom distilabel.pipeline import Pipeline\nfrom distilabel.llm import LLM, ProcessLLM\n\n\ndef load_gpt_4(task: UltraFeedbackTask) -> LLM:\n from distilabel.llm import OpenAILLM\n\n return OpenAILLM(\n model=\"gpt-4-1106-preview\",\n task=task,\n max_new_tokens=512,\n num_threads=4,\n )\n\n\npipeline = Pipeline(\n generator=pool,\n labeller=ProcessLLM(task=UltraFeedbackTask(), load_llm_fn=load_gpt_4), # (1)\n)\n
ProcessLLM
. This will allow to not block the main process GIL, and allowing the generator to continue with the next batch. Then, we will load the dataset and call the generate
method of the pipeline. For each input in the dataset, the LLMPool
will randomly select two LLM
s and will generate two generations for each of them. The generations will be labelled by GPT-4 using the UltraFeedbackTask
for instruction-following. Finally, we will push the generated dataset to Argilla, in order to review the generations and labels that were automatically generated, and to manually correct them if needed.
from datasets import load_dataset\n\ndataset = (\n load_dataset(\"HuggingFaceH4/instruction-dataset\", split=\"test[:50]\")\n .remove_columns([\"completion\", \"meta\"])\n .rename_column(\"prompt\", \"input\")\n)\n\ndataset = pipeline.generate(\n dataset=dataset,\n num_generations=2,\n batch_size=5,\n display_progress_bar=True,\n)\n\ndataset.to_argilla().push_to_argilla(name=\"preference-dataset\", workspace=\"admin\")\n
With a few lines of code, we have easily generated a dataset with 2 generations per input, using 4 different LLM
s, and labelled the generations using GPT-4. You can check the full code here.
Considering recurring patterns in dataset creation, we can facilitate the process by utilizing the Pipeline
. This is made simpler through the pipeline
function, which provides the necessary parameters for creating a Pipeline
.
In the code snippet below, we use the pipeline
function to craft a pipeline
tailored for a preference task, specifically focusing on text-quality as the subtask. If we don't initially provide a labeller LLM
, we can specify the subtask we want our pipeline
to address. By default, this corresponds to UltraFeedbackTask
. It's mandatory to specify the generator of our choice; however, the labeller defaults to gpt-3.5-turbo
. Optional parameters required for OpenAILLM can also be passed as optional keyword arguments.
import os\n\nfrom distilabel.llm import InferenceEndpointsLLM\nfrom distilabel.pipeline import pipeline\nfrom distilabel.tasks import TextGenerationTask\n\npipe = pipeline(\n \"preference\",\n \"text-quality\",\n generator=InferenceEndpointsLLM(\n endpoint_name=endpoint_name,\n endpoint_namespace=endpoint_namespace,\n token=token,\n task=TextGenerationTask(),\n max_new_tokens=512,\n do_sample=True,\n prompt_format=\"notus\",\n ),\n max_new_tokens=256,\n num_threads=2,\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n temperature=0.0,\n)\n
For the dataset, we'll begin with three rows from HuggingFaceH4/instruction-dataset. We'll request two generations with checkpoints enabled to safeguard the data in the event of any failures, which is the default behavior.
from datasets import load_dataset\n\ninstruction_dataset = (\n load_dataset(\"HuggingFaceH4/instruction-dataset\", split=\"test[:3]\")\n .remove_columns([\"completion\", \"meta\"])\n .rename_column(\"prompt\", \"input\")\n)\n\npipe_dataset = pipe.generate(\n instruction_dataset,\n num_generations=2,\n batch_size=1,\n enable_checkpoints=True,\n display_progress_bar=True,\n)\n
Finally, let's see one of the examples from the dataset:
print(pipe_dataset[\"input\"][-1])\n# Create a 3 turn conversation between a customer and a grocery store clerk - that is, 3 per person. Then tell me what they talked about.\n\nprint(pipe_dataset[\"generations\"][-1][-1])\n# Customer: Hi there, I'm looking for some fresh berries. Do you have any raspberries or blueberries in stock?\n\n# Grocery Store Clerk: Yes, we have both raspberries and blueberries in stock today. Would you like me to grab some for you or can you find them yourself?\n\n# Customer: I'd like your help getting some berries. Can you also suggest which variety is sweeter? Raspberries or blueberries?\n\n# Grocery Store Clerk: Raspberries and blueberries both have distinct flavors. Raspberries are more tart and a little sweeter whereas blueberries tend to be a little sweeter and have a milder taste. It ultimately depends on your personal preference. Let me grab some of each for you to try at home and see which one you like better.\n\n# Customer: That sounds like a great plan. How often do you receive deliveries? Do you have some new varieties of berries arriving soon?\n\n# Grocery Store Clerk: We receive deliveries twice a week, on Wednesdays and Sundays. We also have a rotation of different varieties of berries throughout the season, so keep an eye out for new arrivals. Thanks for shopping with us, can I help you with anything else today?\n\n# Customer: No, that's all for now. I'm always happy to support your local store.\n\n# turn 1: berries, fresh produce availability, customer preference\n# turn 2: product recommendations based on taste and personal preference, availability\n# turn 3: store acknowledgment, shopping gratitude, loyalty and repeat business expectation.\n\nprint(pipe_dataset[\"rating\"][-1][-1])\n# 5.0\n\nprint(pipe_dataset[\"rationale\"][-1][-1])\n# The text accurately follows the given instructions and provides a conversation between a customer and a grocery store clerk. The information provided is correct, informative, and aligned with the user's intent. There are no hallucinations or misleading details.\n
The API reference can be found here: pipeline
"},{"location":"technical-reference/pipeline/#argilla-integration","title":"Argilla integration","text":"The CustomDataset generated entirely by AI models may require some additional human processing. To facilitate human feedback, the dataset can be uploaded to Argilla
. This process involves logging into an Argilla
instance, converting the dataset to the required format using CustomDataset.to_argilla()
, and subsequently using push_to_argilla
on the resulting dataset:
import argilla as rg\n\nrg.init(api_key=\"<YOUR_ARGILLA_API_KEY>\", api_url=\"<YOUR_ARGILLA_API_URL>\")\n\nrg_dataset = pipe_dataset.to_argilla()\nrg_dataset.push_to_argilla(name=\"preference-dataset\", workspace=\"admin\")\n
"},{"location":"technical-reference/tasks/","title":"Tasks","text":"In this section we will see what's a Task
and the list of tasks available in distilabel
.
The Task
class takes charge of setting how the LLM behaves, deciding whether it acts as a generator or a labeller. To accomplish this, the Task
class creates a prompt using a template that will be sent to the LLM
. It specifies the necessary input arguments for generating the prompt and identifies the output arguments to be extracted from the LLM
response. The Task
class yields a Prompt
that can generate a string with the format needed, depending on the specific LLM
used.
All the Task
s defines a system_prompt
which serves as the initial instruction given to the LLM, guiding it on what kind of information or output is expected, and the following methods:
generate_prompt
: This method will be used by the LLM
to create the prompts that will be fed to the model.parse_output
: After the LLM
has generated the content, this method will be called on the raw outputs of the model to extract the relevant content (scores, rationales, etc).input_args_names
and output_args_names
: These methods are used in the Pipeline
to process the datasets. The first one defines the columns that will be extracted from the dataset to build the prompt in case of a LLM
that acts as a generator or labeller alone, or the columns that should be placed in the dataset to be processed by the labeller LLM
, in the case of a Pipeline
that has both a generator and a labeller. The second one is in charge of inserting the defined fields as columns of the dataset generated dataset.After defining a task, the only action required is to pass it to the corresponding LLM
. All the intricate processes are then handled internally:
from distilabel.llm import TransformersLLM\nfrom distilabel.tasks import TextGenerationTask\n\n# This snippet uses `TransformersLLM`, but is the same for every other `LLM`.\ngenerator = TransformersLLM(\n model=...,\n tokenizer=...,\n task=TextGenerationTask(),\n)\n
Given this explanation, distilabel
distinguishes between two primary categories of tasks: those focused on text generation and those centered around labelling. These Task
classes delineate the LLM's conduct, be it the creation of textual content or the assignment of labels to text, each with precise guidelines tailored to their respective functionalities. Users can seamlessly leverage these distinct task types to tailor the LLM's behavior according to their specific application needs.
These set of classes are designed to steer a LLM
in generating text with specific guidelines. They provide a structured approach to instruct the LLM on generating content in a manner tailored to predefined criteria.
This is the base class for text generation, and includes the following fields for guiding the generation process:
system_prompt
, which serves as the initial instruction or query given to the LLM, guiding it on what kind of information or output is expected. principles
to inject on the system_prompt
, which by default correspond to those defined in the UltraFeedback paper1, LLM
can be directed towards the different principles with a more customized behaviour.For the API reference visit TextGenerationTask.
"},{"location":"technical-reference/tasks/#selfinstructtask","title":"SelfInstructTask","text":"The task specially designed to build the prompts following the Self-Instruct paper: SELF-INSTRUCT: Aligning Language Models with Self-Generated Instructions.
From the original repository: The Self-Instruct process is an iterative bootstrapping algorithm that starts with a seed set of manually-written instructions and uses them to prompt the language model to generate new instructions and corresponding input-output instances, so this Task
is specially interesting for generating new datasets from a set of predefined topics.
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import SelfInstructTask\n\ngenerator = OpenAILLM(\n task=SelfInstructTask(\n system_prompt=\"You are a question-answering assistant for...\",\n application_description=\"AI assistant\",\n num_instructions=3,\n ),\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n)\n
For the API reference visit SelfInstructTask.
"},{"location":"technical-reference/tasks/#labelling","title":"Labelling","text":"Instead of generating text, you can instruct the LLM
to label datasets. The existing tasks are designed specifically for creating both PreferenceTask
and CritiqueTask
datasets.
Preference datasets for Language Models (LLMs) are sets of information that show how people rank or prefer one thing over another in a straightforward and clear manner. These datasets help train language models to understand and generate content that aligns with user preferences, enhancing the model's ability to generate contextually relevant and preferred outputs.
Contrary to the TextGenerationTask
, the PreferenceTask
is not intended for direct use. It implements the default methods input_args_names
and output_args_names
, but generate_prompt
and parse_output
are specific to each PreferenceTask
. Examining the output_args_names
reveals that the generation will encompass both the rating and the rationale that influenced that rating.
This task is specifically designed to build the prompts following the format defined in the \"UltraFeedback: Boosting Language Models With High Quality Feedback\" paper.
From the original repository: To collect high-quality preference and textual feedback, we design a fine-grained annotation instruction, which contains 4 different aspects, namely instruction-following, truthfulness, honesty and helpfulness. This Task
is designed to label datasets following the different aspects defined for the UltraFeedback dataset creation.
The following snippet can be used as a simplified UltraFeedback Task, for which we define 3 different ratings, but take into account the predefined versions are intended to be used out of the box:
from textwrap import dedent\n\nfrom distilabel.tasks.preference.ultrafeedback import Rating, UltraFeedbackTask\n\ntask_description = dedent(\n \"\"\"\n # General Text Quality Assessment\n Evaluate the model's outputs based on various criteria:\n 1. **Correctness & Informativeness**: Does the output provide accurate and helpful information?\n 2. **Honesty & Uncertainty**: How confidently does the model convey its information, and does it express uncertainty appropriately?\n 3. **Truthfulness & Hallucination**: Does the model introduce misleading or fabricated details?\n 4. **Instruction Following**: Does the model's output align with given instructions and the user's intent?\n Your role is to provide a holistic assessment considering all the above factors.\n\n **Scoring**: Rate outputs 1 to 3 based on the overall quality, considering all aspects:\n \"\"\"\n)\n\nratings = [\n Rating(value=1, description=\"Low Quality\"),\n Rating(value=2, description=\"Moderate Quality\"),\n Rating(value=3, description=\"Good Quality\"),\n]\n\nultrafeedback_task = UltraFeedbackTask(\n system_prompt=\"Your role is to evaluate text quality based on given criteria\",\n task_description=task_description,\n ratings=ratings,\n)\n
Text QualityHelpfulnessTruthfulnessHonestyInstruction Following The following example uses a LLM
to examinate the data for text quality criteria, which includes the different criteria from UltraFeedback (Correctness & Informativeness, Honesty & Uncertainty, Truthfulness & Hallucination and Instruction Following):
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_text_quality(),\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n)\n
The following example creates a UltraFeedback task to emphasize helpfulness, that is overall quality and correctness of the output:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_helpfulness(), openai_api_key=os.getenv(\"OPENAI_API_KEY\")\n)\n
The following example creates a UltraFeedback task to emphasize truthfulness and hallucination assessment:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_truthfulness(),\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n)\n
The following example creates a UltraFeedback task to emphasize honesty and uncertainty expression assessment:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_honesty(), openai_api_key=os.getenv(\"OPENAI_API_KEY\")\n)\n
The following example creates a UltraFeedback task to emphasize the evaluation of alignment between output and intent:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraFeedbackTask\n\nlabeller = OpenAILLM(\n task=UltraFeedbackTask.for_instruction_following(),\n openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n)\n
For the API reference visit UltraFeedbackTask.
"},{"location":"technical-reference/tasks/#judgelmtask","title":"JudgeLMTask","text":"The task specially designed to build the prompts following the UltraFeedback paper: JudgeLM: Fine-tuned Large Language Models Are Scalable Judges. This task is designed to evaluate the performance of AI assistants.
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import JudgeLMTask\n\nlabeller = OpenAILLM(task=JudgeLMTask(), openai_api_key=os.getenv(\"OPENAI_API_KEY\"))\n
For the API reference visit JudgeLMTask.
"},{"location":"technical-reference/tasks/#ultrajudgetask","title":"UltraJudgeTask","text":"This class implements a PreferenceTask
specifically for a better evaluation using AI Feedback. The task is defined based on both UltraFeedback and JudgeLM, but with several improvements / modifications.
It introduces an additional argument to differentiate various areas for processing. While these areas can be customized, the default values are as follows:
from distilabel.tasks import UltraJudgeTask\n\n# To see the complete system_prompt and task_description please take a look at the UltraJudgeTask definition\nultrajudge_task = UltraJudgeTask(\n system_prompt=\"You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences...\",\n task_description=\"Your task is to rigorously evaluate the performance of...\",\n areas=[\n \"Practical Accuracy\",\n \"Clarity & Transparency\",\n \"Authenticity & Reliability\",\n \"Compliance with Intent\",\n ],\n)\n
Which can be directly used in the following way:
import os\n\nfrom distilabel.llm import OpenAILLM\nfrom distilabel.tasks import UltraJudgeTask\n\nlabeller = OpenAILLM(task=UltraJudgeTask(), openai_api_key=os.getenv(\"OPENAI_API_KEY\"))\n
For the API reference visit UltraJudgeTask.
"},{"location":"technical-reference/tasks/#critique","title":"Critique","text":"The CritiqueTask
is designed to be a labeller for generated text, while not only adding scores based on a rubric, but also critiques explaining the reasons why those scores have been provided. The critique can either be using a reference answer (gold answer) as e.g. Prometheus does, or just by generating the critique per each of the N provided generations.
The resulting datasets after running a pipeline with the CritiqueTask
are useful towards either training a model to generate critiques based on the critiques generated by a more powerful model as e.g. GPT-4 from OpenAI, or to be used directly for DPO fine-tuning. The fact that the critique is generated per each pair, a balanced dataset could be generated with individual critiques and their scores, so that then we can e.g. define a threshold on what's considered chosen and rejected, to then run DPO fine-tunes.
While the CritiqueTask
may seem fairly similar to the PreferenceTask
, there is a core difference, which is the fact that the critiques are provided per each response or even to a single response, with no need to compare or rate them against each other.
This task is specifically designed to build the prompts following the format defined in the \"UltraFeedback: Boosting Language Models With High Quality Feedback\" paper.
UltraCM is a model that has been fine-tuned using the UltraFeedback dataset, so as to produce critiques for the generated content, as the authors claim in their paper: \"Moreover, since ULTRAFEEDBACK provides detailed textual feedback, we also fine-tune a model that could critique model responses automatically. Our critique model, UltraCM, generates reasonable and detailed comments on various tasks.\".
Ideally, the UltraCMTask
will be more consistent when used with either their fine-tuned model UltraCM or with OpenAI, as both have been proven to produce successfully the structured content following the prompt formatting, and not only structured, but also meaningful and reasonable.
See the following snippet, with an example on how to instantiate the UltraCMTask
which only requires the system prompt, and it can be modified based on how is the critique intended to be formulated, while the system prompt shown below is the default one as of the UltraFeedback paper.
from distilabel.tasks import UltraCMTask\n\ntask = UltraCMTask(\n system_prompt=\"User: A one-turn chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, very detailed, and polite answers to the user's questions.</s>\",\n)\n
"},{"location":"technical-reference/tasks/#prometheustask","title":"PrometheusTask","text":"This task is specifically designed to build the prompts following the format defined in the \"Prometheus: Inducing Fine-grained Evaluation Capability in Language Models\" paper.
Ideally, the PrometheusTask
should only be used to format the prompts for the Prometheus models as those are the ones that have been fine-tuned to follow the same formatting and will produce consistent results compared to other base models or fine-tuned with different formats. In this case, since the formatting used by Prometheus follows the Llama 2 format, those are recommended. Otherwise, OpenAI has also proved to produce consistent results.
The following snippet can be used out of the box to define a simple PrometheusTask
with the system prompt, the scoring criteria and the score descriptions, but those can be modified while keeping in mind that Prometheus always expects 5 scores from 1-5 with a meaningful description, as well as with a criteria relevant to the scores defined.
from distilabel.tasks import PrometheusTask\n\ntask = PrometheusTask(\n system_prompt=\"You are a fair evaluator language model.\",\n scoring_criteria=\"Relevance, Grammar, Informativeness, Engagement\",\n score_descriptions={\n 1: \"The response is not relevant to the prompt.\",\n 2: \"The response is relevant to the prompt, but it is not grammatical.\",\n 3: \"The response is relevant to the prompt and it is grammatical, but it is not informative.\",\n 4: \"The response is relevant to the prompt, it is grammatical, and it is informative, but it is not engaging.\",\n 5: \"The response is relevant to the prompt, it is grammatical, it is informative, and it is engaging.\",\n },\n)\n
The principles can be found here in the codebase. More information on the Principle Sampling can be found in the UltraFeedfack repository.\u00a0\u21a9