From bfbe2ecc23ca6267e2caa078d6e8e31da8b5150c Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 19 Dec 2024 12:48:05 +0100 Subject: [PATCH 01/11] add magpie support llama-cpp --- src/distilabel/models/llms/llamacpp.py | 161 ++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 16 deletions(-) diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 77e2707c1c..8ecfc9d286 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -20,13 +20,19 @@ from distilabel.models.llms.base import LLM from distilabel.models.llms.typing import GenerateOutput from distilabel.models.llms.utils import prepare_output +from distilabel.models.mixins.magpie import MagpieChatTemplateMixin from distilabel.steps.tasks.typing import FormattedInput, OutlinesStructuredOutputType if TYPE_CHECKING: from llama_cpp import CreateChatCompletionResponse, Llama, LogitsProcessorList + from distilabel.steps.tasks.typing import ( + FormattedInput, + StandardInput, + ) + -class LlamaCppLLM(LLM): +class LlamaCppLLM(LLM, MagpieChatTemplateMixin): """llama.cpp LLM implementation running the Python bindings for the C++ code. Attributes: @@ -44,6 +50,16 @@ class LlamaCppLLM(LLM): fine-grained control is needed, an instance of `OutlinesStructuredOutput`. Defaults to None. extra_kwargs: additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library. Defaults to `{}`. + tokenizer_id: the tokenizer Hugging Face Hub repo id or a path to a directory containing + the tokenizer config files. If not provided, the one associated to the `model` + will be used. Defaults to `None`. + + use_magpie_template: a flag used to enable/disable applying the Magpie pre-query + template. Defaults to `False`. + magpie_pre_query_template: the pre-query template to be applied to the prompt or + sent to the LLM to generate an instruction or a follow up user message. Valid + values are "llama3", "qwen2" or another pre-query template provided. Defaults + to `None`. _model: the Llama model instance. This attribute is meant to be used internally and should not be accessed directly. It will be set in the `load` method. @@ -140,7 +156,21 @@ class User(BaseModel): default=None, description="The structured output format to use across all the generations.", ) - + tokenizer_id: Optional[RuntimeParameter[str]] = Field( + default=None, + description="The tokenizer Hugging Face Hub repo id or a path to a directory containing" + " the tokenizer config files. If not provided, the one associated to the `model`" + " will be used.", + ) + use_magpie_template: RuntimeParameter[bool] = Field( + default=False, + description="Whether to use the Magpie pre-query template or not.", + ) + magpie_pre_query_template: Optional[RuntimeParameter[str]] = Field( + default=None, + description="The pre-query template to use for the model. Valid values are " + "`llama3`, `qwen2` or another pre-query template provided.", + ) _logits_processor: Optional["LogitsProcessorList"] = PrivateAttr(default=None) _model: Optional["Llama"] = PrivateAttr(...) @@ -169,6 +199,24 @@ def load(self) -> None: self.structured_output ) + if self.use_magpie_template or self.magpie_pre_query_template: + if not self.tokenizer_id: + raise ValueError( + "The Hugging Face Hub repo id or a path to a directory containing" + " the tokenizer config files is required when using the `use_magpie_template`" + " or `magpie_pre_query_template` runtime parameters." + ) + + if self.tokenizer_id: + try: + from transformers import AutoTokenizer + except ImportError as ie: + raise ImportError( + "Transformers is not installed. Please install it using `pip install transformers`." + ) from ie + + self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id) + # NOTE: Here because of the custom `logging` interface used, since it will create the logging name # out of the model name, which won't be available until the `Llama` instance is created. super().load() @@ -178,6 +226,75 @@ def model_name(self) -> str: """Returns the model name used for the LLM.""" return self._model.model_path # type: ignore + def _generate_chat_completion( + self, + input: FormattedInput, + max_new_tokens: int = 128, + frequency_penalty: float = 0.0, + presence_penalty: float = 0.0, + temperature: float = 1.0, + top_p: float = 1.0, + extra_generation_kwargs: Optional[Dict[str, Any]] = None, + ) -> "CreateChatCompletionResponse": + return self._model.create_chat_completion( # type: ignore + messages=input, # type: ignore + max_tokens=max_new_tokens, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + temperature=temperature, + top_p=top_p, + logits_processor=self._logits_processor, + **(extra_generation_kwargs or {}), + ) + + def prepare_input(self, input: "StandardInput") -> str: + """Prepares the input (applying the chat template and tokenization) for the provided + input. + + Args: + input: the input list containing chat items. + + Returns: + The prompt to send to the LLM. + """ + prompt: str = ( + self._tokenizer.apply_chat_template( # type: ignore + conversation=input, # type: ignore + tokenize=False, + add_generation_prompt=True, + ) + if input + else "" + ) + return super().apply_magpie_pre_query_template(prompt, input) + + def _generate_with_text_generation( + self, + input: FormattedInput, + max_new_tokens: int = 128, + frequency_penalty: float = 0.0, + presence_penalty: float = 0.0, + temperature: float = 1.0, + top_p: float = 1.0, + extra_generation_kwargs: Optional[Dict[str, Any]] = None, + ) -> "CreateChatCompletionResponse": + import pdb + + prompt = self.prepare_input(input) + pdb.set_trace() + output = self._model.create_completion( + prompt=prompt, + max_tokens=max_new_tokens, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + temperature=temperature, + top_p=top_p, + logits_processor=self._logits_processor, + **(extra_generation_kwargs or {}), + ) + pdb.set_trace() + return output + @validate_call def generate( # type: ignore self, @@ -230,24 +347,36 @@ def generate( # type: ignore self._logits_processor = self._prepare_structured_output( structured_output ) - chat_completions: "CreateChatCompletionResponse" = ( - self._model.create_chat_completion( # type: ignore - messages=input, # type: ignore - max_tokens=max_new_tokens, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - temperature=temperature, - top_p=top_p, - logits_processor=self._logits_processor, - **(extra_generation_kwargs or {}), + if self.tokenizer_id is None: + completion = self._generate_chat_completion( + input, + max_new_tokens, + frequency_penalty, + presence_penalty, + temperature, + top_p, + extra_generation_kwargs, ) - ) - outputs.append(chat_completions["choices"][0]["message"]["content"]) - output_tokens.append(chat_completions["usage"]["completion_tokens"]) + outputs.append(completion["choices"][0]["message"]["content"]) + output_tokens.append(completion["usage"]["completion_tokens"]) + else: + completion: "CreateChatCompletionResponse" = ( + self._generate_with_text_generation( # type: ignore + input, + max_new_tokens, + frequency_penalty, + presence_penalty, + temperature, + top_p, + extra_generation_kwargs, + ) + ) + outputs.append(completion["choices"][0]["text"]) + output_tokens.append(completion["usage"]["completion_tokens"]) batch_outputs.append( prepare_output( outputs, - input_tokens=[chat_completions["usage"]["prompt_tokens"]] + input_tokens=[completion["usage"]["prompt_tokens"]] * num_generations, output_tokens=output_tokens, ) From 0efe7a5f6e19e2d8a5a8d29edc8adc85bb6ef4b3 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 19 Dec 2024 13:00:55 +0100 Subject: [PATCH 02/11] remove pdb trace --- src/distilabel/models/llms/llamacpp.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 8ecfc9d286..be232beb85 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -53,7 +53,6 @@ class LlamaCppLLM(LLM, MagpieChatTemplateMixin): tokenizer_id: the tokenizer Hugging Face Hub repo id or a path to a directory containing the tokenizer config files. If not provided, the one associated to the `model` will be used. Defaults to `None`. - use_magpie_template: a flag used to enable/disable applying the Magpie pre-query template. Defaults to `False`. magpie_pre_query_template: the pre-query template to be applied to the prompt or @@ -278,11 +277,8 @@ def _generate_with_text_generation( top_p: float = 1.0, extra_generation_kwargs: Optional[Dict[str, Any]] = None, ) -> "CreateChatCompletionResponse": - import pdb - prompt = self.prepare_input(input) - pdb.set_trace() - output = self._model.create_completion( + return self._model.create_completion( prompt=prompt, max_tokens=max_new_tokens, frequency_penalty=frequency_penalty, @@ -292,8 +288,6 @@ def _generate_with_text_generation( logits_processor=self._logits_processor, **(extra_generation_kwargs or {}), ) - pdb.set_trace() - return output @validate_call def generate( # type: ignore From d275152cbf5004bd38f8d0e888aced0d0ac23dc7 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 19 Dec 2024 14:27:46 +0100 Subject: [PATCH 03/11] fix imports --- src/distilabel/models/llms/anthropic.py | 2 +- src/distilabel/models/llms/cohere.py | 2 +- src/distilabel/models/llms/groq.py | 2 +- src/distilabel/models/llms/mistral.py | 2 +- src/distilabel/models/llms/openai.py | 3 +-- src/distilabel/models/llms/vertexai.py | 2 +- src/distilabel/steps/tasks/evol_instruct/base.py | 2 +- src/distilabel/steps/tasks/evol_instruct/generator.py | 2 +- 8 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/distilabel/models/llms/anthropic.py b/src/distilabel/models/llms/anthropic.py index c6c79a9141..0eefc092dc 100644 --- a/src/distilabel/models/llms/anthropic.py +++ b/src/distilabel/models/llms/anthropic.py @@ -42,7 +42,7 @@ from anthropic import AsyncAnthropic from anthropic.types import Message - from distilabel.llms.typing import LLMStatistics + from distilabel.models.llms.typing import LLMStatistics _ANTHROPIC_API_KEY_ENV_VAR_NAME = "ANTHROPIC_API_KEY" diff --git a/src/distilabel/models/llms/cohere.py b/src/distilabel/models/llms/cohere.py index 043ac4214c..8b081a762e 100644 --- a/src/distilabel/models/llms/cohere.py +++ b/src/distilabel/models/llms/cohere.py @@ -40,7 +40,7 @@ from pydantic import BaseModel from tokenizers import Tokenizer - from distilabel.llms.typing import LLMStatistics + from distilabel.models.llms.typing import LLMStatistics _COHERE_API_KEY_ENV_VAR_NAME = "COHERE_API_KEY" diff --git a/src/distilabel/models/llms/groq.py b/src/distilabel/models/llms/groq.py index 2977c513f3..8000211936 100644 --- a/src/distilabel/models/llms/groq.py +++ b/src/distilabel/models/llms/groq.py @@ -30,7 +30,7 @@ from groq import AsyncGroq from groq.types.chat.chat_completion import ChatCompletion - from distilabel.llms.typing import LLMStatistics + from distilabel.models.llms.typing import LLMStatistics _GROQ_API_BASE_URL_ENV_VAR_NAME = "GROQ_BASE_URL" diff --git a/src/distilabel/models/llms/mistral.py b/src/distilabel/models/llms/mistral.py index 873565091b..9fe9f357da 100644 --- a/src/distilabel/models/llms/mistral.py +++ b/src/distilabel/models/llms/mistral.py @@ -30,7 +30,7 @@ from mistralai import Mistral from mistralai.models.chatcompletionresponse import ChatCompletionResponse - from distilabel.llms.typing import LLMStatistics + from distilabel.models.llms.typing import LLMStatistics _MISTRALAI_API_KEY_ENV_VAR_NAME = "MISTRAL_API_KEY" diff --git a/src/distilabel/models/llms/openai.py b/src/distilabel/models/llms/openai.py index c53122fa63..37bb5bb6be 100644 --- a/src/distilabel/models/llms/openai.py +++ b/src/distilabel/models/llms/openai.py @@ -35,8 +35,7 @@ from openai.types.chat.chat_completion import Choice as OpenAIChoice from openai.types.completion import Completion as OpenAICompletion - from distilabel.llms.typing import LLMStatistics - from distilabel.models.llms.typing import Logprob + from distilabel.models.llms.typing import LLMStatistics, Logprob _OPENAI_API_KEY_ENV_VAR_NAME = "OPENAI_API_KEY" diff --git a/src/distilabel/models/llms/vertexai.py b/src/distilabel/models/llms/vertexai.py index 8f5dc28bbd..62235dd321 100644 --- a/src/distilabel/models/llms/vertexai.py +++ b/src/distilabel/models/llms/vertexai.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: from vertexai.generative_models import Content, GenerationResponse, GenerativeModel - from distilabel.llms.typing import LLMStatistics + from distilabel.models.llms.typing import LLMStatistics class VertexChatItem(TypedDict): diff --git a/src/distilabel/steps/tasks/evol_instruct/base.py b/src/distilabel/steps/tasks/evol_instruct/base.py index 3f2ba5da4f..f1a44d6a84 100644 --- a/src/distilabel/steps/tasks/evol_instruct/base.py +++ b/src/distilabel/steps/tasks/evol_instruct/base.py @@ -27,7 +27,7 @@ from distilabel.utils.lists import flatten_responses if TYPE_CHECKING: - from distilabel.llms.typing import LLMStatistics + from distilabel.models.llms.typing import LLMStatistics from distilabel.steps.typing import StepOutput diff --git a/src/distilabel/steps/tasks/evol_instruct/generator.py b/src/distilabel/steps/tasks/evol_instruct/generator.py index fa568d392e..6f985464eb 100644 --- a/src/distilabel/steps/tasks/evol_instruct/generator.py +++ b/src/distilabel/steps/tasks/evol_instruct/generator.py @@ -33,7 +33,7 @@ from distilabel.utils.lists import flatten_responses if TYPE_CHECKING: - from distilabel.llms.typing import LLMStatistics + from distilabel.models.llms.typing import LLMStatistics from distilabel.steps.tasks.typing import ChatType from distilabel.steps.typing import GeneratorStepOutput From ea6af5900d93241fb1b0674e32bca28b9d7725a4 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 19 Dec 2024 14:28:04 +0100 Subject: [PATCH 04/11] refactor code formatting --- src/distilabel/models/llms/llamacpp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index be232beb85..53af5b5cbb 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -213,7 +213,6 @@ def load(self) -> None: raise ImportError( "Transformers is not installed. Please install it using `pip install transformers`." ) from ie - self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id) # NOTE: Here because of the custom `logging` interface used, since it will create the logging name From c6708c9cf64e169f78b7255fc4a936d7101b7bdb Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 19 Dec 2024 14:29:19 +0100 Subject: [PATCH 05/11] add ollama support magpie --- src/distilabel/models/llms/ollama.py | 135 ++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 14 deletions(-) diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py index f704627487..782ce93103 100644 --- a/src/distilabel/models/llms/ollama.py +++ b/src/distilabel/models/llms/ollama.py @@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Sequence, Union +from llama_cpp.llama_types import CreateChatCompletionResponse from pydantic import Field, PrivateAttr, validate_call from typing_extensions import TypedDict @@ -21,12 +22,17 @@ from distilabel.models.llms.base import AsyncLLM from distilabel.models.llms.typing import GenerateOutput from distilabel.models.llms.utils import prepare_output +from distilabel.models.mixins.magpie import MagpieChatTemplateMixin from distilabel.steps.tasks.typing import InstructorStructuredOutputType, StandardInput if TYPE_CHECKING: + from llama_cpp import CreateChatCompletionResponse from ollama import AsyncClient - from distilabel.llms.typing import LLMStatistics + from distilabel.models.llms.typing import LLMStatistics + from distilabel.steps.tasks.typing import ( + StandardInput, + ) # Copied from `ollama._types.Options` @@ -69,13 +75,25 @@ class Options(TypedDict, total=False): stop: Sequence[str] -class OllamaLLM(AsyncLLM): +class OllamaLLM(AsyncLLM, MagpieChatTemplateMixin): """Ollama LLM implementation running the Async API client. Attributes: model: the model name to use for the LLM e.g. "notus". host: the Ollama server host. timeout: the timeout for the LLM. Defaults to `120`. + follow_redirects: whether to follow redirects. Defaults to `True`. + structured_output: a dictionary containing the structured output configuration or if more + fine-grained control is needed, an instance of `OutlinesStructuredOutput`. Defaults to None. + tokenizer_id: the tokenizer Hugging Face Hub repo id or a path to a directory containing + the tokenizer config files. If not provided, the one associated to the `model` + will be used. Defaults to `None`. + use_magpie_template: a flag used to enable/disable applying the Magpie pre-query + template. Defaults to `False`. + magpie_pre_query_template: the pre-query template to be applied to the prompt or + sent to the LLM to generate an instruction or a follow up user message. Valid + values are "llama3", "qwen2" or another pre-query template provided. Defaults + to `None`. _aclient: the `AsyncClient` to use for the Ollama API. It is meant to be used internally. Set in the `load` method. @@ -112,9 +130,22 @@ class OllamaLLM(AsyncLLM): description="The structured output format to use across all the generations.", ) ) - + tokenizer_id: Optional[RuntimeParameter[str]] = Field( + default=None, + description="The tokenizer Hugging Face Hub repo id or a path to a directory containing" + " the tokenizer config files. If not provided, the one associated to the `model`" + " will be used.", + ) + use_magpie_template: RuntimeParameter[bool] = Field( + default=False, + description="Whether to use the Magpie pre-query template or not.", + ) + magpie_pre_query_template: Optional[RuntimeParameter[str]] = Field( + default=None, + description="The pre-query template to use for the model. Valid values are " + "`llama3`, `qwen2` or another pre-query template provided.", + ) _num_generations_param_supported = False - _aclient: Optional["AsyncClient"] = PrivateAttr(...) def load(self) -> None: @@ -135,13 +166,83 @@ def load(self) -> None: " `pip install ollama`." ) from e + if self.use_magpie_template or self.magpie_pre_query_template: + if not self.tokenizer_id: + raise ValueError( + "The Hugging Face Hub repo id or a path to a directory containing" + " the tokenizer config files is required when using the `use_magpie_template`" + " or `magpie_pre_query_template` runtime parameters." + ) + + if self.tokenizer_id: + try: + from transformers import AutoTokenizer + except ImportError as ie: + raise ImportError( + "Transformers is not installed. Please install it using `pip install transformers`." + ) from ie + self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id) + @property def model_name(self) -> str: """Returns the model name used for the LLM.""" return self.model + async def _generate_chat_completion( + self, + input: "StandardInput", + format: Literal["", "json"] = "", + options: Union[Options, None] = None, + keep_alive: Union[bool, None] = None, + ) -> "CreateChatCompletionResponse": + return await self._aclient.chat( + model=self.model, + messages=input, + stream=False, + format=format, + options=options, + keep_alive=keep_alive, + ) + + def prepare_input(self, input: "StandardInput") -> str: + """Prepares the input (applying the chat template and tokenization) for the provided + input. + + Args: + input: the input list containing chat items. + + Returns: + The prompt to send to the LLM. + """ + prompt: str = ( + self._tokenizer.apply_chat_template( + conversation=input, + tokenize=False, + add_generation_prompt=True, + ) + if input + else "" + ) + return super().apply_magpie_pre_query_template(prompt, input) + + async def _generate_with_text_generation( + self, + input: "StandardInput", + format: Literal["", "json"] = None, + options: Union[Options, None] = None, + keep_alive: Union[bool, None] = None, + ) -> "CreateChatCompletionResponse": + input = self.prepare_input(input) + return await self._aclient.generate( + model=self.model, + prompt=input, + format=format, + options=options, + keep_alive=keep_alive, + ) + @validate_call - async def agenerate( # type: ignore + async def agenerate( self, input: StandardInput, format: Literal["", "json"] = "", @@ -163,15 +264,21 @@ async def agenerate( # type: ignore """ text = None try: - completion: Dict[str, Any] = await self._aclient.chat( # type: ignore - model=self.model, - messages=input, # type: ignore - stream=False, - format=format, - options=options, - keep_alive=keep_alive, - ) - text = completion["message"]["content"] + if not format: + format = None + if self.tokenizer_id is None: + completion = await self._generate_chat_completion( + input, format, options, keep_alive + ) + text = completion["message"]["content"] + else: + completion: CreateChatCompletionResponse = ( + await self._generate_with_text_generation( + input, format, options, keep_alive + ) + ) + + text = completion.response except Exception as e: self._logger.warning( # type: ignore f"⚠️ Received no response using Ollama client (model: '{self.model_name}')." From 2ddfa3faf36849c213d659de765640cf386f5c3d Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 19 Dec 2024 15:22:23 +0100 Subject: [PATCH 06/11] fix set flag to raw --- src/distilabel/models/llms/ollama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py index 782ce93103..381f041693 100644 --- a/src/distilabel/models/llms/ollama.py +++ b/src/distilabel/models/llms/ollama.py @@ -239,6 +239,7 @@ async def _generate_with_text_generation( format=format, options=options, keep_alive=keep_alive, + raw=True, ) @validate_call @@ -277,7 +278,6 @@ async def agenerate( input, format, options, keep_alive ) ) - text = completion.response except Exception as e: self._logger.warning( # type: ignore From 6b9316c37aba4ccdaf52b259380635d422657304 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 23 Dec 2024 09:15:52 +0100 Subject: [PATCH 07/11] fix tests ollama llamacpp serialisation --- tests/unit/models/llms/test_llamacpp.py | 6 ++++++ tests/unit/models/llms/test_ollama.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/tests/unit/models/llms/test_llamacpp.py b/tests/unit/models/llms/test_llamacpp.py index 94bf008f19..823793b9b9 100644 --- a/tests/unit/models/llms/test_llamacpp.py +++ b/tests/unit/models/llms/test_llamacpp.py @@ -83,6 +83,9 @@ def test_generate(self, llm: LlamaCppLLM) -> None: "name": "LlamaCppLLM", }, "verbose": False, + "magpie_pre_query_template": None, + "tokenizer_id": None, + "use_magpie_template": False, }, ), ( @@ -110,6 +113,9 @@ def test_generate(self, llm: LlamaCppLLM) -> None: "name": "LlamaCppLLM", }, "verbose": False, + "magpie_pre_query_template": None, + "tokenizer_id": None, + "use_magpie_template": False, }, ), ], diff --git a/tests/unit/models/llms/test_ollama.py b/tests/unit/models/llms/test_ollama.py index 167ec6a1dc..f3d4a3ef47 100644 --- a/tests/unit/models/llms/test_ollama.py +++ b/tests/unit/models/llms/test_ollama.py @@ -97,6 +97,9 @@ def test_serialization(self, _: MagicMock) -> None: "generation_kwargs": {}, "structured_output": None, "jobs_ids": None, + "magpie_pre_query_template": None, + "tokenizer_id": None, + "use_magpie_template": False, "offline_batch_generation_block_until_done": None, "use_offline_batch_generation": False, "type_info": { From 5fc6d478c37f2cc464ec6d21465aff5c09bac432 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 23 Dec 2024 09:32:13 +0100 Subject: [PATCH 08/11] refactor magpie template validation --- src/distilabel/models/llms/llamacpp.py | 14 +++++++++++++- src/distilabel/models/llms/ollama.py | 22 +++++++++++++--------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 53af5b5cbb..26c634a2f8 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union -from pydantic import Field, FilePath, PrivateAttr, validate_call +from pydantic import Field, FilePath, PrivateAttr, model_validator, validate_call from distilabel.mixins.runtime_parameters import RuntimeParameter from distilabel.models.llms.base import LLM @@ -173,6 +173,18 @@ class User(BaseModel): _logits_processor: Optional["LogitsProcessorList"] = PrivateAttr(default=None) _model: Optional["Llama"] = PrivateAttr(...) + @model_validator(mode="after") # type: ignore + def validate_magpie_usage( + self, + ) -> "LlamaCppLLM": + """Validates that magpie usage is valid.""" + + if self.use_magpie_template and self.tokenizer_id is None: + raise ValueError( + "`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`. Please," + " set a `tokenizer_id` and try again." + ) + def load(self) -> None: """Loads the `Llama` model from the `model_path`.""" try: diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py index 381f041693..fbacb69e3d 100644 --- a/src/distilabel/models/llms/ollama.py +++ b/src/distilabel/models/llms/ollama.py @@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Sequence, Union from llama_cpp.llama_types import CreateChatCompletionResponse -from pydantic import Field, PrivateAttr, validate_call +from pydantic import Field, PrivateAttr, model_validator, validate_call from typing_extensions import TypedDict from distilabel.mixins.runtime_parameters import RuntimeParameter @@ -148,6 +148,18 @@ class OllamaLLM(AsyncLLM, MagpieChatTemplateMixin): _num_generations_param_supported = False _aclient: Optional["AsyncClient"] = PrivateAttr(...) + @model_validator(mode="after") # type: ignore + def validate_magpie_usage( + self, + ) -> "OllamaLLM": + """Validates that magpie usage is valid.""" + + if self.use_magpie_template and self.tokenizer_id is None: + raise ValueError( + "`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`. Please," + " set a `tokenizer_id` and try again." + ) + def load(self) -> None: """Loads the `AsyncClient` to use Ollama async API.""" super().load() @@ -166,14 +178,6 @@ def load(self) -> None: " `pip install ollama`." ) from e - if self.use_magpie_template or self.magpie_pre_query_template: - if not self.tokenizer_id: - raise ValueError( - "The Hugging Face Hub repo id or a path to a directory containing" - " the tokenizer config files is required when using the `use_magpie_template`" - " or `magpie_pre_query_template` runtime parameters." - ) - if self.tokenizer_id: try: from transformers import AutoTokenizer From 0da1adcdf42378432cc6f9a22a68e4425b45eac3 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 23 Dec 2024 09:41:58 +0100 Subject: [PATCH 09/11] fix failing tests --- tests/unit/models/llms/test_llamacpp.py | 11 +++++++++++ tests/unit/models/llms/test_ollama.py | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/tests/unit/models/llms/test_llamacpp.py b/tests/unit/models/llms/test_llamacpp.py index 823793b9b9..f897cf1954 100644 --- a/tests/unit/models/llms/test_llamacpp.py +++ b/tests/unit/models/llms/test_llamacpp.py @@ -38,6 +38,17 @@ def llm() -> Generator[LlamaCppLLM, None, None]: class TestLlamaCppLLM: + def test_no_tokenizer_magpie_raise_value_error(self) -> None: + with pytest.raises( + ValueError, + match="`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`", + ): + LlamaCppLLM( + model_path="tinyllama.gguf", + use_magpie_template=True, + magpie_pre_query_template="llama3", + ) + def test_model_name(self, llm: LlamaCppLLM) -> None: assert llm.model_name == "tinyllama.gguf" diff --git a/tests/unit/models/llms/test_ollama.py b/tests/unit/models/llms/test_ollama.py index f3d4a3ef47..3d80846370 100644 --- a/tests/unit/models/llms/test_ollama.py +++ b/tests/unit/models/llms/test_ollama.py @@ -22,6 +22,17 @@ @patch("ollama.AsyncClient") class TestOllamaLLM: + def test_no_tokenizer_magpie_raise_value_error(self, _: MagicMock) -> None: + with pytest.raises( + ValueError, + match="`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`", + ): + OllamaLLM( + model="llama3.1", + use_magpie_template=True, + magpie_pre_query_template="llama3", + ) + def test_ollama_llm(self, _: MagicMock) -> None: llm = OllamaLLM(model="notus") # type: ignore assert isinstance(llm, OllamaLLM) From 52f6eb4db10efb842af0a964663b6f213ca4119e Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 23 Dec 2024 10:14:37 +0100 Subject: [PATCH 10/11] add validation for chat_template --- src/distilabel/models/llms/llamacpp.py | 4 ++++ src/distilabel/models/llms/ollama.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 26c634a2f8..ba30735f61 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -226,6 +226,10 @@ def load(self) -> None: "Transformers is not installed. Please install it using `pip install transformers`." ) from ie self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id) + if self._tokenizer.chat_template is None: + raise ValueError( + "The tokenizer does not have a chat template. Please use a tokenizer with a chat template." + ) # NOTE: Here because of the custom `logging` interface used, since it will create the logging name # out of the model name, which won't be available until the `Llama` instance is created. diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py index fbacb69e3d..ae3f2715ce 100644 --- a/src/distilabel/models/llms/ollama.py +++ b/src/distilabel/models/llms/ollama.py @@ -186,6 +186,10 @@ def load(self) -> None: "Transformers is not installed. Please install it using `pip install transformers`." ) from ie self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id) + if self._tokenizer.chat_template is None: + raise ValueError( + "The tokenizer does not have a chat template. Please use a tokenizer with a chat template." + ) @property def model_name(self) -> str: From 4e291e7bf1c27b734a683a3af1fefe58965d77d6 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Mon, 23 Dec 2024 10:20:57 +0100 Subject: [PATCH 11/11] fix llamacpp download tinyllama --- tests/unit/models/llms/test_llamacpp.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/unit/models/llms/test_llamacpp.py b/tests/unit/models/llms/test_llamacpp.py index f897cf1954..f5b9f51cec 100644 --- a/tests/unit/models/llms/test_llamacpp.py +++ b/tests/unit/models/llms/test_llamacpp.py @@ -23,14 +23,18 @@ from .utils import DummyUserDetail -@pytest.fixture(scope="module") -def llm() -> Generator[LlamaCppLLM, None, None]: +def download_tinyllama() -> None: if not os.path.exists("tinyllama.gguf"): urllib.request.urlretrieve( "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q2_K.gguf", "tinyllama.gguf", ) + +@pytest.fixture(scope="module") +def llm() -> Generator[LlamaCppLLM, None, None]: + download_tinyllama() + llm = LlamaCppLLM(model_path="tinyllama.gguf", n_gpu_layers=0) # type: ignore llm.load() @@ -39,6 +43,8 @@ def llm() -> Generator[LlamaCppLLM, None, None]: class TestLlamaCppLLM: def test_no_tokenizer_magpie_raise_value_error(self) -> None: + download_tinyllama() + with pytest.raises( ValueError, match="`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`",