From bfbe2ecc23ca6267e2caa078d6e8e31da8b5150c Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Thu, 19 Dec 2024 12:48:05 +0100
Subject: [PATCH 01/11] add magpie support llama-cpp

---
 src/distilabel/models/llms/llamacpp.py | 161 ++++++++++++++++++++++---
 1 file changed, 145 insertions(+), 16 deletions(-)

diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py
index 77e2707c1c..8ecfc9d286 100644
--- a/src/distilabel/models/llms/llamacpp.py
+++ b/src/distilabel/models/llms/llamacpp.py
@@ -20,13 +20,19 @@
 from distilabel.models.llms.base import LLM
 from distilabel.models.llms.typing import GenerateOutput
 from distilabel.models.llms.utils import prepare_output
+from distilabel.models.mixins.magpie import MagpieChatTemplateMixin
 from distilabel.steps.tasks.typing import FormattedInput, OutlinesStructuredOutputType
 
 if TYPE_CHECKING:
     from llama_cpp import CreateChatCompletionResponse, Llama, LogitsProcessorList
 
+    from distilabel.steps.tasks.typing import (
+        FormattedInput,
+        StandardInput,
+    )
+
 
-class LlamaCppLLM(LLM):
+class LlamaCppLLM(LLM, MagpieChatTemplateMixin):
     """llama.cpp LLM implementation running the Python bindings for the C++ code.
 
     Attributes:
@@ -44,6 +50,16 @@ class LlamaCppLLM(LLM):
             fine-grained control is needed, an instance of `OutlinesStructuredOutput`. Defaults to None.
         extra_kwargs: additional dictionary of keyword arguments that will be passed to the
             `Llama` class of `llama_cpp` library. Defaults to `{}`.
+        tokenizer_id: the tokenizer Hugging Face Hub repo id or a path to a directory containing
+            the tokenizer config files. If not provided, the one associated to the `model`
+            will be used. Defaults to `None`.
+
+        use_magpie_template: a flag used to enable/disable applying the Magpie pre-query
+            template. Defaults to `False`.
+        magpie_pre_query_template: the pre-query template to be applied to the prompt or
+            sent to the LLM to generate an instruction or a follow up user message. Valid
+            values are "llama3", "qwen2" or another pre-query template provided. Defaults
+            to `None`.
         _model: the Llama model instance. This attribute is meant to be used internally and
             should not be accessed directly. It will be set in the `load` method.
 
@@ -140,7 +156,21 @@ class User(BaseModel):
         default=None,
         description="The structured output format to use across all the generations.",
     )
-
+    tokenizer_id: Optional[RuntimeParameter[str]] = Field(
+        default=None,
+        description="The tokenizer Hugging Face Hub repo id or a path to a directory containing"
+        " the tokenizer config files. If not provided, the one associated to the `model`"
+        " will be used.",
+    )
+    use_magpie_template: RuntimeParameter[bool] = Field(
+        default=False,
+        description="Whether to use the Magpie pre-query template or not.",
+    )
+    magpie_pre_query_template: Optional[RuntimeParameter[str]] = Field(
+        default=None,
+        description="The pre-query template to use for the model. Valid values are "
+        "`llama3`, `qwen2` or another pre-query template provided.",
+    )
     _logits_processor: Optional["LogitsProcessorList"] = PrivateAttr(default=None)
     _model: Optional["Llama"] = PrivateAttr(...)
 
@@ -169,6 +199,24 @@ def load(self) -> None:
                 self.structured_output
             )
 
+        if self.use_magpie_template or self.magpie_pre_query_template:
+            if not self.tokenizer_id:
+                raise ValueError(
+                    "The Hugging Face Hub repo id or a path to a directory containing"
+                    " the tokenizer config files is required when using the `use_magpie_template`"
+                    " or `magpie_pre_query_template` runtime parameters."
+                )
+
+        if self.tokenizer_id:
+            try:
+                from transformers import AutoTokenizer
+            except ImportError as ie:
+                raise ImportError(
+                    "Transformers is not installed. Please install it using `pip install transformers`."
+                ) from ie
+
+            self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id)
+
         # NOTE: Here because of the custom `logging` interface used, since it will create the logging name
         # out of the model name, which won't be available until the `Llama` instance is created.
         super().load()
@@ -178,6 +226,75 @@ def model_name(self) -> str:
         """Returns the model name used for the LLM."""
         return self._model.model_path  # type: ignore
 
+    def _generate_chat_completion(
+        self,
+        input: FormattedInput,
+        max_new_tokens: int = 128,
+        frequency_penalty: float = 0.0,
+        presence_penalty: float = 0.0,
+        temperature: float = 1.0,
+        top_p: float = 1.0,
+        extra_generation_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> "CreateChatCompletionResponse":
+        return self._model.create_chat_completion(  # type: ignore
+            messages=input,  # type: ignore
+            max_tokens=max_new_tokens,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            temperature=temperature,
+            top_p=top_p,
+            logits_processor=self._logits_processor,
+            **(extra_generation_kwargs or {}),
+        )
+
+    def prepare_input(self, input: "StandardInput") -> str:
+        """Prepares the input (applying the chat template and tokenization) for the provided
+        input.
+
+        Args:
+            input: the input list containing chat items.
+
+        Returns:
+            The prompt to send to the LLM.
+        """
+        prompt: str = (
+            self._tokenizer.apply_chat_template(  # type: ignore
+                conversation=input,  # type: ignore
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+            if input
+            else ""
+        )
+        return super().apply_magpie_pre_query_template(prompt, input)
+
+    def _generate_with_text_generation(
+        self,
+        input: FormattedInput,
+        max_new_tokens: int = 128,
+        frequency_penalty: float = 0.0,
+        presence_penalty: float = 0.0,
+        temperature: float = 1.0,
+        top_p: float = 1.0,
+        extra_generation_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> "CreateChatCompletionResponse":
+        import pdb
+
+        prompt = self.prepare_input(input)
+        pdb.set_trace()
+        output = self._model.create_completion(
+            prompt=prompt,
+            max_tokens=max_new_tokens,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            temperature=temperature,
+            top_p=top_p,
+            logits_processor=self._logits_processor,
+            **(extra_generation_kwargs or {}),
+        )
+        pdb.set_trace()
+        return output
+
     @validate_call
     def generate(  # type: ignore
         self,
@@ -230,24 +347,36 @@ def generate(  # type: ignore
                     self._logits_processor = self._prepare_structured_output(
                         structured_output
                     )
-                chat_completions: "CreateChatCompletionResponse" = (
-                    self._model.create_chat_completion(  # type: ignore
-                        messages=input,  # type: ignore
-                        max_tokens=max_new_tokens,
-                        frequency_penalty=frequency_penalty,
-                        presence_penalty=presence_penalty,
-                        temperature=temperature,
-                        top_p=top_p,
-                        logits_processor=self._logits_processor,
-                        **(extra_generation_kwargs or {}),
+                if self.tokenizer_id is None:
+                    completion = self._generate_chat_completion(
+                        input,
+                        max_new_tokens,
+                        frequency_penalty,
+                        presence_penalty,
+                        temperature,
+                        top_p,
+                        extra_generation_kwargs,
                     )
-                )
-                outputs.append(chat_completions["choices"][0]["message"]["content"])
-                output_tokens.append(chat_completions["usage"]["completion_tokens"])
+                    outputs.append(completion["choices"][0]["message"]["content"])
+                    output_tokens.append(completion["usage"]["completion_tokens"])
+                else:
+                    completion: "CreateChatCompletionResponse" = (
+                        self._generate_with_text_generation(  # type: ignore
+                            input,
+                            max_new_tokens,
+                            frequency_penalty,
+                            presence_penalty,
+                            temperature,
+                            top_p,
+                            extra_generation_kwargs,
+                        )
+                    )
+                    outputs.append(completion["choices"][0]["text"])
+                    output_tokens.append(completion["usage"]["completion_tokens"])
             batch_outputs.append(
                 prepare_output(
                     outputs,
-                    input_tokens=[chat_completions["usage"]["prompt_tokens"]]
+                    input_tokens=[completion["usage"]["prompt_tokens"]]
                     * num_generations,
                     output_tokens=output_tokens,
                 )

From 0efe7a5f6e19e2d8a5a8d29edc8adc85bb6ef4b3 Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Thu, 19 Dec 2024 13:00:55 +0100
Subject: [PATCH 02/11] remove pdb trace

---
 src/distilabel/models/llms/llamacpp.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py
index 8ecfc9d286..be232beb85 100644
--- a/src/distilabel/models/llms/llamacpp.py
+++ b/src/distilabel/models/llms/llamacpp.py
@@ -53,7 +53,6 @@ class LlamaCppLLM(LLM, MagpieChatTemplateMixin):
         tokenizer_id: the tokenizer Hugging Face Hub repo id or a path to a directory containing
             the tokenizer config files. If not provided, the one associated to the `model`
             will be used. Defaults to `None`.
-
         use_magpie_template: a flag used to enable/disable applying the Magpie pre-query
             template. Defaults to `False`.
         magpie_pre_query_template: the pre-query template to be applied to the prompt or
@@ -278,11 +277,8 @@ def _generate_with_text_generation(
         top_p: float = 1.0,
         extra_generation_kwargs: Optional[Dict[str, Any]] = None,
     ) -> "CreateChatCompletionResponse":
-        import pdb
-
         prompt = self.prepare_input(input)
-        pdb.set_trace()
-        output = self._model.create_completion(
+        return self._model.create_completion(
             prompt=prompt,
             max_tokens=max_new_tokens,
             frequency_penalty=frequency_penalty,
@@ -292,8 +288,6 @@ def _generate_with_text_generation(
             logits_processor=self._logits_processor,
             **(extra_generation_kwargs or {}),
         )
-        pdb.set_trace()
-        return output
 
     @validate_call
     def generate(  # type: ignore

From d275152cbf5004bd38f8d0e888aced0d0ac23dc7 Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Thu, 19 Dec 2024 14:27:46 +0100
Subject: [PATCH 03/11] fix imports

---
 src/distilabel/models/llms/anthropic.py               | 2 +-
 src/distilabel/models/llms/cohere.py                  | 2 +-
 src/distilabel/models/llms/groq.py                    | 2 +-
 src/distilabel/models/llms/mistral.py                 | 2 +-
 src/distilabel/models/llms/openai.py                  | 3 +--
 src/distilabel/models/llms/vertexai.py                | 2 +-
 src/distilabel/steps/tasks/evol_instruct/base.py      | 2 +-
 src/distilabel/steps/tasks/evol_instruct/generator.py | 2 +-
 8 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/distilabel/models/llms/anthropic.py b/src/distilabel/models/llms/anthropic.py
index c6c79a9141..0eefc092dc 100644
--- a/src/distilabel/models/llms/anthropic.py
+++ b/src/distilabel/models/llms/anthropic.py
@@ -42,7 +42,7 @@
     from anthropic import AsyncAnthropic
     from anthropic.types import Message
 
-    from distilabel.llms.typing import LLMStatistics
+    from distilabel.models.llms.typing import LLMStatistics
 
 
 _ANTHROPIC_API_KEY_ENV_VAR_NAME = "ANTHROPIC_API_KEY"
diff --git a/src/distilabel/models/llms/cohere.py b/src/distilabel/models/llms/cohere.py
index 043ac4214c..8b081a762e 100644
--- a/src/distilabel/models/llms/cohere.py
+++ b/src/distilabel/models/llms/cohere.py
@@ -40,7 +40,7 @@
     from pydantic import BaseModel
     from tokenizers import Tokenizer
 
-    from distilabel.llms.typing import LLMStatistics
+    from distilabel.models.llms.typing import LLMStatistics
 
 
 _COHERE_API_KEY_ENV_VAR_NAME = "COHERE_API_KEY"
diff --git a/src/distilabel/models/llms/groq.py b/src/distilabel/models/llms/groq.py
index 2977c513f3..8000211936 100644
--- a/src/distilabel/models/llms/groq.py
+++ b/src/distilabel/models/llms/groq.py
@@ -30,7 +30,7 @@
     from groq import AsyncGroq
     from groq.types.chat.chat_completion import ChatCompletion
 
-    from distilabel.llms.typing import LLMStatistics
+    from distilabel.models.llms.typing import LLMStatistics
 
 
 _GROQ_API_BASE_URL_ENV_VAR_NAME = "GROQ_BASE_URL"
diff --git a/src/distilabel/models/llms/mistral.py b/src/distilabel/models/llms/mistral.py
index 873565091b..9fe9f357da 100644
--- a/src/distilabel/models/llms/mistral.py
+++ b/src/distilabel/models/llms/mistral.py
@@ -30,7 +30,7 @@
     from mistralai import Mistral
     from mistralai.models.chatcompletionresponse import ChatCompletionResponse
 
-    from distilabel.llms.typing import LLMStatistics
+    from distilabel.models.llms.typing import LLMStatistics
 
 
 _MISTRALAI_API_KEY_ENV_VAR_NAME = "MISTRAL_API_KEY"
diff --git a/src/distilabel/models/llms/openai.py b/src/distilabel/models/llms/openai.py
index c53122fa63..37bb5bb6be 100644
--- a/src/distilabel/models/llms/openai.py
+++ b/src/distilabel/models/llms/openai.py
@@ -35,8 +35,7 @@
     from openai.types.chat.chat_completion import Choice as OpenAIChoice
     from openai.types.completion import Completion as OpenAICompletion
 
-    from distilabel.llms.typing import LLMStatistics
-    from distilabel.models.llms.typing import Logprob
+    from distilabel.models.llms.typing import LLMStatistics, Logprob
 
 
 _OPENAI_API_KEY_ENV_VAR_NAME = "OPENAI_API_KEY"
diff --git a/src/distilabel/models/llms/vertexai.py b/src/distilabel/models/llms/vertexai.py
index 8f5dc28bbd..62235dd321 100644
--- a/src/distilabel/models/llms/vertexai.py
+++ b/src/distilabel/models/llms/vertexai.py
@@ -25,7 +25,7 @@
 if TYPE_CHECKING:
     from vertexai.generative_models import Content, GenerationResponse, GenerativeModel
 
-    from distilabel.llms.typing import LLMStatistics
+    from distilabel.models.llms.typing import LLMStatistics
 
 
 class VertexChatItem(TypedDict):
diff --git a/src/distilabel/steps/tasks/evol_instruct/base.py b/src/distilabel/steps/tasks/evol_instruct/base.py
index 3f2ba5da4f..f1a44d6a84 100644
--- a/src/distilabel/steps/tasks/evol_instruct/base.py
+++ b/src/distilabel/steps/tasks/evol_instruct/base.py
@@ -27,7 +27,7 @@
 from distilabel.utils.lists import flatten_responses
 
 if TYPE_CHECKING:
-    from distilabel.llms.typing import LLMStatistics
+    from distilabel.models.llms.typing import LLMStatistics
     from distilabel.steps.typing import StepOutput
 
 
diff --git a/src/distilabel/steps/tasks/evol_instruct/generator.py b/src/distilabel/steps/tasks/evol_instruct/generator.py
index fa568d392e..6f985464eb 100644
--- a/src/distilabel/steps/tasks/evol_instruct/generator.py
+++ b/src/distilabel/steps/tasks/evol_instruct/generator.py
@@ -33,7 +33,7 @@
 from distilabel.utils.lists import flatten_responses
 
 if TYPE_CHECKING:
-    from distilabel.llms.typing import LLMStatistics
+    from distilabel.models.llms.typing import LLMStatistics
     from distilabel.steps.tasks.typing import ChatType
     from distilabel.steps.typing import GeneratorStepOutput
 

From ea6af5900d93241fb1b0674e32bca28b9d7725a4 Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Thu, 19 Dec 2024 14:28:04 +0100
Subject: [PATCH 04/11] refactor code formatting

---
 src/distilabel/models/llms/llamacpp.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py
index be232beb85..53af5b5cbb 100644
--- a/src/distilabel/models/llms/llamacpp.py
+++ b/src/distilabel/models/llms/llamacpp.py
@@ -213,7 +213,6 @@ def load(self) -> None:
                 raise ImportError(
                     "Transformers is not installed. Please install it using `pip install transformers`."
                 ) from ie
-
             self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id)
 
         # NOTE: Here because of the custom `logging` interface used, since it will create the logging name

From c6708c9cf64e169f78b7255fc4a936d7101b7bdb Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Thu, 19 Dec 2024 14:29:19 +0100
Subject: [PATCH 05/11] add ollama support magpie

---
 src/distilabel/models/llms/ollama.py | 135 ++++++++++++++++++++++++---
 1 file changed, 121 insertions(+), 14 deletions(-)

diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py
index f704627487..782ce93103 100644
--- a/src/distilabel/models/llms/ollama.py
+++ b/src/distilabel/models/llms/ollama.py
@@ -14,6 +14,7 @@
 
 from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Sequence, Union
 
+from llama_cpp.llama_types import CreateChatCompletionResponse
 from pydantic import Field, PrivateAttr, validate_call
 from typing_extensions import TypedDict
 
@@ -21,12 +22,17 @@
 from distilabel.models.llms.base import AsyncLLM
 from distilabel.models.llms.typing import GenerateOutput
 from distilabel.models.llms.utils import prepare_output
+from distilabel.models.mixins.magpie import MagpieChatTemplateMixin
 from distilabel.steps.tasks.typing import InstructorStructuredOutputType, StandardInput
 
 if TYPE_CHECKING:
+    from llama_cpp import CreateChatCompletionResponse
     from ollama import AsyncClient
 
-    from distilabel.llms.typing import LLMStatistics
+    from distilabel.models.llms.typing import LLMStatistics
+    from distilabel.steps.tasks.typing import (
+        StandardInput,
+    )
 
 
 # Copied from `ollama._types.Options`
@@ -69,13 +75,25 @@ class Options(TypedDict, total=False):
     stop: Sequence[str]
 
 
-class OllamaLLM(AsyncLLM):
+class OllamaLLM(AsyncLLM, MagpieChatTemplateMixin):
     """Ollama LLM implementation running the Async API client.
 
     Attributes:
         model: the model name to use for the LLM e.g. "notus".
         host: the Ollama server host.
         timeout: the timeout for the LLM. Defaults to `120`.
+        follow_redirects: whether to follow redirects. Defaults to `True`.
+        structured_output: a dictionary containing the structured output configuration or if more
+            fine-grained control is needed, an instance of `OutlinesStructuredOutput`. Defaults to None.
+        tokenizer_id: the tokenizer Hugging Face Hub repo id or a path to a directory containing
+            the tokenizer config files. If not provided, the one associated to the `model`
+            will be used. Defaults to `None`.
+        use_magpie_template: a flag used to enable/disable applying the Magpie pre-query
+            template. Defaults to `False`.
+        magpie_pre_query_template: the pre-query template to be applied to the prompt or
+            sent to the LLM to generate an instruction or a follow up user message. Valid
+            values are "llama3", "qwen2" or another pre-query template provided. Defaults
+            to `None`.
         _aclient: the `AsyncClient` to use for the Ollama API. It is meant to be used internally.
             Set in the `load` method.
 
@@ -112,9 +130,22 @@ class OllamaLLM(AsyncLLM):
             description="The structured output format to use across all the generations.",
         )
     )
-
+    tokenizer_id: Optional[RuntimeParameter[str]] = Field(
+        default=None,
+        description="The tokenizer Hugging Face Hub repo id or a path to a directory containing"
+        " the tokenizer config files. If not provided, the one associated to the `model`"
+        " will be used.",
+    )
+    use_magpie_template: RuntimeParameter[bool] = Field(
+        default=False,
+        description="Whether to use the Magpie pre-query template or not.",
+    )
+    magpie_pre_query_template: Optional[RuntimeParameter[str]] = Field(
+        default=None,
+        description="The pre-query template to use for the model. Valid values are "
+        "`llama3`, `qwen2` or another pre-query template provided.",
+    )
     _num_generations_param_supported = False
-
     _aclient: Optional["AsyncClient"] = PrivateAttr(...)
 
     def load(self) -> None:
@@ -135,13 +166,83 @@ def load(self) -> None:
                 " `pip install ollama`."
             ) from e
 
+        if self.use_magpie_template or self.magpie_pre_query_template:
+            if not self.tokenizer_id:
+                raise ValueError(
+                    "The Hugging Face Hub repo id or a path to a directory containing"
+                    " the tokenizer config files is required when using the `use_magpie_template`"
+                    " or `magpie_pre_query_template` runtime parameters."
+                )
+
+        if self.tokenizer_id:
+            try:
+                from transformers import AutoTokenizer
+            except ImportError as ie:
+                raise ImportError(
+                    "Transformers is not installed. Please install it using `pip install transformers`."
+                ) from ie
+            self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id)
+
     @property
     def model_name(self) -> str:
         """Returns the model name used for the LLM."""
         return self.model
 
+    async def _generate_chat_completion(
+        self,
+        input: "StandardInput",
+        format: Literal["", "json"] = "",
+        options: Union[Options, None] = None,
+        keep_alive: Union[bool, None] = None,
+    ) -> "CreateChatCompletionResponse":
+        return await self._aclient.chat(
+            model=self.model,
+            messages=input,
+            stream=False,
+            format=format,
+            options=options,
+            keep_alive=keep_alive,
+        )
+
+    def prepare_input(self, input: "StandardInput") -> str:
+        """Prepares the input (applying the chat template and tokenization) for the provided
+        input.
+
+        Args:
+            input: the input list containing chat items.
+
+        Returns:
+            The prompt to send to the LLM.
+        """
+        prompt: str = (
+            self._tokenizer.apply_chat_template(
+                conversation=input,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+            if input
+            else ""
+        )
+        return super().apply_magpie_pre_query_template(prompt, input)
+
+    async def _generate_with_text_generation(
+        self,
+        input: "StandardInput",
+        format: Literal["", "json"] = None,
+        options: Union[Options, None] = None,
+        keep_alive: Union[bool, None] = None,
+    ) -> "CreateChatCompletionResponse":
+        input = self.prepare_input(input)
+        return await self._aclient.generate(
+            model=self.model,
+            prompt=input,
+            format=format,
+            options=options,
+            keep_alive=keep_alive,
+        )
+
     @validate_call
-    async def agenerate(  # type: ignore
+    async def agenerate(
         self,
         input: StandardInput,
         format: Literal["", "json"] = "",
@@ -163,15 +264,21 @@ async def agenerate(  # type: ignore
         """
         text = None
         try:
-            completion: Dict[str, Any] = await self._aclient.chat(  # type: ignore
-                model=self.model,
-                messages=input,  # type: ignore
-                stream=False,
-                format=format,
-                options=options,
-                keep_alive=keep_alive,
-            )
-            text = completion["message"]["content"]
+            if not format:
+                format = None
+            if self.tokenizer_id is None:
+                completion = await self._generate_chat_completion(
+                    input, format, options, keep_alive
+                )
+                text = completion["message"]["content"]
+            else:
+                completion: CreateChatCompletionResponse = (
+                    await self._generate_with_text_generation(
+                        input, format, options, keep_alive
+                    )
+                )
+
+                text = completion.response
         except Exception as e:
             self._logger.warning(  # type: ignore
                 f"⚠️ Received no response using Ollama client (model: '{self.model_name}')."

From 2ddfa3faf36849c213d659de765640cf386f5c3d Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Thu, 19 Dec 2024 15:22:23 +0100
Subject: [PATCH 06/11] fix set flag to raw

---
 src/distilabel/models/llms/ollama.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py
index 782ce93103..381f041693 100644
--- a/src/distilabel/models/llms/ollama.py
+++ b/src/distilabel/models/llms/ollama.py
@@ -239,6 +239,7 @@ async def _generate_with_text_generation(
             format=format,
             options=options,
             keep_alive=keep_alive,
+            raw=True,
         )
 
     @validate_call
@@ -277,7 +278,6 @@ async def agenerate(
                         input, format, options, keep_alive
                     )
                 )
-
                 text = completion.response
         except Exception as e:
             self._logger.warning(  # type: ignore

From 6b9316c37aba4ccdaf52b259380635d422657304 Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Mon, 23 Dec 2024 09:15:52 +0100
Subject: [PATCH 07/11] fix tests ollama llamacpp serialisation

---
 tests/unit/models/llms/test_llamacpp.py | 6 ++++++
 tests/unit/models/llms/test_ollama.py   | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/tests/unit/models/llms/test_llamacpp.py b/tests/unit/models/llms/test_llamacpp.py
index 94bf008f19..823793b9b9 100644
--- a/tests/unit/models/llms/test_llamacpp.py
+++ b/tests/unit/models/llms/test_llamacpp.py
@@ -83,6 +83,9 @@ def test_generate(self, llm: LlamaCppLLM) -> None:
                         "name": "LlamaCppLLM",
                     },
                     "verbose": False,
+                    "magpie_pre_query_template": None,
+                    "tokenizer_id": None,
+                    "use_magpie_template": False,
                 },
             ),
             (
@@ -110,6 +113,9 @@ def test_generate(self, llm: LlamaCppLLM) -> None:
                         "name": "LlamaCppLLM",
                     },
                     "verbose": False,
+                    "magpie_pre_query_template": None,
+                    "tokenizer_id": None,
+                    "use_magpie_template": False,
                 },
             ),
         ],
diff --git a/tests/unit/models/llms/test_ollama.py b/tests/unit/models/llms/test_ollama.py
index 167ec6a1dc..f3d4a3ef47 100644
--- a/tests/unit/models/llms/test_ollama.py
+++ b/tests/unit/models/llms/test_ollama.py
@@ -97,6 +97,9 @@ def test_serialization(self, _: MagicMock) -> None:
             "generation_kwargs": {},
             "structured_output": None,
             "jobs_ids": None,
+            "magpie_pre_query_template": None,
+            "tokenizer_id": None,
+            "use_magpie_template": False,
             "offline_batch_generation_block_until_done": None,
             "use_offline_batch_generation": False,
             "type_info": {

From 5fc6d478c37f2cc464ec6d21465aff5c09bac432 Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Mon, 23 Dec 2024 09:32:13 +0100
Subject: [PATCH 08/11] refactor magpie template validation

---
 src/distilabel/models/llms/llamacpp.py | 14 +++++++++++++-
 src/distilabel/models/llms/ollama.py   | 22 +++++++++++++---------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py
index 53af5b5cbb..26c634a2f8 100644
--- a/src/distilabel/models/llms/llamacpp.py
+++ b/src/distilabel/models/llms/llamacpp.py
@@ -14,7 +14,7 @@
 
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
-from pydantic import Field, FilePath, PrivateAttr, validate_call
+from pydantic import Field, FilePath, PrivateAttr, model_validator, validate_call
 
 from distilabel.mixins.runtime_parameters import RuntimeParameter
 from distilabel.models.llms.base import LLM
@@ -173,6 +173,18 @@ class User(BaseModel):
     _logits_processor: Optional["LogitsProcessorList"] = PrivateAttr(default=None)
     _model: Optional["Llama"] = PrivateAttr(...)
 
+    @model_validator(mode="after")  # type: ignore
+    def validate_magpie_usage(
+        self,
+    ) -> "LlamaCppLLM":
+        """Validates that magpie usage is valid."""
+
+        if self.use_magpie_template and self.tokenizer_id is None:
+            raise ValueError(
+                "`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`. Please,"
+                " set a `tokenizer_id` and try again."
+            )
+
     def load(self) -> None:
         """Loads the `Llama` model from the `model_path`."""
         try:
diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py
index 381f041693..fbacb69e3d 100644
--- a/src/distilabel/models/llms/ollama.py
+++ b/src/distilabel/models/llms/ollama.py
@@ -15,7 +15,7 @@
 from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Sequence, Union
 
 from llama_cpp.llama_types import CreateChatCompletionResponse
-from pydantic import Field, PrivateAttr, validate_call
+from pydantic import Field, PrivateAttr, model_validator, validate_call
 from typing_extensions import TypedDict
 
 from distilabel.mixins.runtime_parameters import RuntimeParameter
@@ -148,6 +148,18 @@ class OllamaLLM(AsyncLLM, MagpieChatTemplateMixin):
     _num_generations_param_supported = False
     _aclient: Optional["AsyncClient"] = PrivateAttr(...)
 
+    @model_validator(mode="after")  # type: ignore
+    def validate_magpie_usage(
+        self,
+    ) -> "OllamaLLM":
+        """Validates that magpie usage is valid."""
+
+        if self.use_magpie_template and self.tokenizer_id is None:
+            raise ValueError(
+                "`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`. Please,"
+                " set a `tokenizer_id` and try again."
+            )
+
     def load(self) -> None:
         """Loads the `AsyncClient` to use Ollama async API."""
         super().load()
@@ -166,14 +178,6 @@ def load(self) -> None:
                 " `pip install ollama`."
             ) from e
 
-        if self.use_magpie_template or self.magpie_pre_query_template:
-            if not self.tokenizer_id:
-                raise ValueError(
-                    "The Hugging Face Hub repo id or a path to a directory containing"
-                    " the tokenizer config files is required when using the `use_magpie_template`"
-                    " or `magpie_pre_query_template` runtime parameters."
-                )
-
         if self.tokenizer_id:
             try:
                 from transformers import AutoTokenizer

From 0da1adcdf42378432cc6f9a22a68e4425b45eac3 Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Mon, 23 Dec 2024 09:41:58 +0100
Subject: [PATCH 09/11] fix failing tests

---
 tests/unit/models/llms/test_llamacpp.py | 11 +++++++++++
 tests/unit/models/llms/test_ollama.py   | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/tests/unit/models/llms/test_llamacpp.py b/tests/unit/models/llms/test_llamacpp.py
index 823793b9b9..f897cf1954 100644
--- a/tests/unit/models/llms/test_llamacpp.py
+++ b/tests/unit/models/llms/test_llamacpp.py
@@ -38,6 +38,17 @@ def llm() -> Generator[LlamaCppLLM, None, None]:
 
 
 class TestLlamaCppLLM:
+    def test_no_tokenizer_magpie_raise_value_error(self) -> None:
+        with pytest.raises(
+            ValueError,
+            match="`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`",
+        ):
+            LlamaCppLLM(
+                model_path="tinyllama.gguf",
+                use_magpie_template=True,
+                magpie_pre_query_template="llama3",
+            )
+
     def test_model_name(self, llm: LlamaCppLLM) -> None:
         assert llm.model_name == "tinyllama.gguf"
 
diff --git a/tests/unit/models/llms/test_ollama.py b/tests/unit/models/llms/test_ollama.py
index f3d4a3ef47..3d80846370 100644
--- a/tests/unit/models/llms/test_ollama.py
+++ b/tests/unit/models/llms/test_ollama.py
@@ -22,6 +22,17 @@
 
 @patch("ollama.AsyncClient")
 class TestOllamaLLM:
+    def test_no_tokenizer_magpie_raise_value_error(self, _: MagicMock) -> None:
+        with pytest.raises(
+            ValueError,
+            match="`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`",
+        ):
+            OllamaLLM(
+                model="llama3.1",
+                use_magpie_template=True,
+                magpie_pre_query_template="llama3",
+            )
+
     def test_ollama_llm(self, _: MagicMock) -> None:
         llm = OllamaLLM(model="notus")  # type: ignore
         assert isinstance(llm, OllamaLLM)

From 52f6eb4db10efb842af0a964663b6f213ca4119e Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Mon, 23 Dec 2024 10:14:37 +0100
Subject: [PATCH 10/11] add validation for chat_template

---
 src/distilabel/models/llms/llamacpp.py | 4 ++++
 src/distilabel/models/llms/ollama.py   | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py
index 26c634a2f8..ba30735f61 100644
--- a/src/distilabel/models/llms/llamacpp.py
+++ b/src/distilabel/models/llms/llamacpp.py
@@ -226,6 +226,10 @@ def load(self) -> None:
                     "Transformers is not installed. Please install it using `pip install transformers`."
                 ) from ie
             self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id)
+            if self._tokenizer.chat_template is None:
+                raise ValueError(
+                    "The tokenizer does not have a chat template. Please use a tokenizer with a chat template."
+                )
 
         # NOTE: Here because of the custom `logging` interface used, since it will create the logging name
         # out of the model name, which won't be available until the `Llama` instance is created.
diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py
index fbacb69e3d..ae3f2715ce 100644
--- a/src/distilabel/models/llms/ollama.py
+++ b/src/distilabel/models/llms/ollama.py
@@ -186,6 +186,10 @@ def load(self) -> None:
                     "Transformers is not installed. Please install it using `pip install transformers`."
                 ) from ie
             self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id)
+            if self._tokenizer.chat_template is None:
+                raise ValueError(
+                    "The tokenizer does not have a chat template. Please use a tokenizer with a chat template."
+                )
 
     @property
     def model_name(self) -> str:

From 4e291e7bf1c27b734a683a3af1fefe58965d77d6 Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <david.m.berenstein@gmail.com>
Date: Mon, 23 Dec 2024 10:20:57 +0100
Subject: [PATCH 11/11] fix llamacpp download tinyllama

---
 tests/unit/models/llms/test_llamacpp.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/unit/models/llms/test_llamacpp.py b/tests/unit/models/llms/test_llamacpp.py
index f897cf1954..f5b9f51cec 100644
--- a/tests/unit/models/llms/test_llamacpp.py
+++ b/tests/unit/models/llms/test_llamacpp.py
@@ -23,14 +23,18 @@
 from .utils import DummyUserDetail
 
 
-@pytest.fixture(scope="module")
-def llm() -> Generator[LlamaCppLLM, None, None]:
+def download_tinyllama() -> None:
     if not os.path.exists("tinyllama.gguf"):
         urllib.request.urlretrieve(
             "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q2_K.gguf",
             "tinyllama.gguf",
         )
 
+
+@pytest.fixture(scope="module")
+def llm() -> Generator[LlamaCppLLM, None, None]:
+    download_tinyllama()
+
     llm = LlamaCppLLM(model_path="tinyllama.gguf", n_gpu_layers=0)  # type: ignore
     llm.load()
 
@@ -39,6 +43,8 @@ def llm() -> Generator[LlamaCppLLM, None, None]:
 
 class TestLlamaCppLLM:
     def test_no_tokenizer_magpie_raise_value_error(self) -> None:
+        download_tinyllama()
+
         with pytest.raises(
             ValueError,
             match="`use_magpie_template` cannot be `True` if `tokenizer_id` is `None`",