diff --git a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py index 6e3273e1c..b5783c611 100644 --- a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py +++ b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py @@ -7,6 +7,23 @@ @component class OllamaDocumentEmbedder: + """ + Computes the embeddings of a list of Documents and stores the obtained vectors in the embedding field of each + Document. It uses embedding models compatible with the Ollama Library. + + Usage example: + ```python + from haystack import Document + from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder + + doc = Document(content="What do llamas say once you have thanked them? No probllama!") + document_embedder = OllamaDocumentEmbedder() + + result = document_embedder.run([doc]) + print(result['documents'][0].embedding) + ``` + """ + def __init__( self, model: str = "nomic-embed-text", @@ -20,15 +37,16 @@ def __init__( embedding_separator: str = "\n", ): """ - :param model: The name of the model to use. The model should be available in the running Ollama instance. - Default is "nomic-embed-text". "https://ollama.com/library/nomic-embed-text" - :param url: The URL of the chat endpoint of a running Ollama instance. - Default is "http://localhost:11434/api/embeddings". - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, - top_p, and others. See the available arguments in + :param model: + The name of the model to use. The model should be available in the running Ollama instance. + :param url: + The URL of the chat endpoint of a running Ollama instance. + :param generation_kwargs: + Optional arguments to pass to the Ollama generation endpoint, such as temperature, top_p, and others. + See the available arguments in [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :param timeout: The number of seconds before throwing a timeout error from the Ollama API. - Default is 120 seconds. + :param timeout: + The number of seconds before throwing a timeout error from the Ollama API. """ self.timeout = timeout self.generation_kwargs = generation_kwargs or {} @@ -44,15 +62,12 @@ def __init__( def _create_json_payload(self, text: str, generation_kwargs: Optional[Dict[str, Any]]) -> Dict[str, Any]: """ Returns A dictionary of JSON arguments for a POST request to an Ollama service - :param text: Text that is to be converted to an embedding - :param generation_kwargs: - :return: A dictionary of arguments for a POST request to an Ollama service """ return {"model": self.model, "prompt": text, "options": {**self.generation_kwargs, **(generation_kwargs or {})}} def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: """ - Prepare the texts to embed by concatenating the Document text with the metadata fields to embed. + Prepares the texts to embed by concatenating the Document text with the metadata fields to embed. """ texts_to_embed = [] for doc in documents: @@ -101,12 +116,17 @@ def _embed_batch( @component.output_types(documents=List[Document], meta=Dict[str, Any]) def run(self, documents: List[Document], generation_kwargs: Optional[Dict[str, Any]] = None): """ - Run an Ollama Model on a provided documents. - :param documents: Documents to be converted to an embedding. - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + Runs an Ollama Model to compute embeddings of the provided documents. + + :param documents: + Documents to be converted to an embedding. + :param generation_kwargs: + Optional arguments to pass to the Ollama generation endpoint, such as temperature, top_p, etc. See the [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :return: Documents with embedding information attached and metadata in a dictionary + :returns: A dictionary with the following keys: + - `documents`: Documents with embedding information attached + - `meta`: The metadata collected during the embedding process """ if not isinstance(documents, list) or documents and not isinstance(documents[0], Document): msg = ( diff --git a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py index e2ef136b4..5a28ba393 100644 --- a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py +++ b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py @@ -6,6 +6,20 @@ @component class OllamaTextEmbedder: + """ + Computes the embeddings of a list of Documents and stores the obtained vectors in the embedding field of + each Document. It uses embedding models compatible with the Ollama Library. + + Usage example: + ```python + from haystack_integrations.components.embedders.ollama import OllamaTextEmbedder + + embedder = OllamaTextEmbedder() + result = embedder.run(text="What do llamas say once you have thanked them? No probllama!") + print(result['embedding']) + ``` + """ + def __init__( self, model: str = "nomic-embed-text", @@ -14,15 +28,16 @@ def __init__( timeout: int = 120, ): """ - :param model: The name of the model to use. The model should be available in the running Ollama instance. - Default is "nomic-embed-text". "https://ollama.com/library/nomic-embed-text" - :param url: The URL of the chat endpoint of a running Ollama instance. - Default is "http://localhost:11434/api/embeddings". - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + :param model: + The name of the model to use. The model should be available in the running Ollama instance. + :param url: + The URL of the chat endpoint of a running Ollama instance. + :param generation_kwargs: + Optional arguments to pass to the Ollama generation endpoint, such as temperature, top_p, and others. See the available arguments in [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :param timeout: The number of seconds before throwing a timeout error from the Ollama API. - Default is 120 seconds. + :param timeout: + The number of seconds before throwing a timeout error from the Ollama API. """ self.timeout = timeout self.generation_kwargs = generation_kwargs or {} @@ -32,21 +47,23 @@ def __init__( def _create_json_payload(self, text: str, generation_kwargs: Optional[Dict[str, Any]]) -> Dict[str, Any]: """ Returns A dictionary of JSON arguments for a POST request to an Ollama service - :param text: Text that is to be converted to an embedding - :param generation_kwargs: - :return: A dictionary of arguments for a POST request to an Ollama service """ return {"model": self.model, "prompt": text, "options": {**self.generation_kwargs, **(generation_kwargs or {})}} @component.output_types(embedding=List[float], meta=Dict[str, Any]) def run(self, text: str, generation_kwargs: Optional[Dict[str, Any]] = None): """ - Run an Ollama Model on a given chat history. - :param text: Text to be converted to an embedding. - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + Runs an Ollama Model to compute embeddings of the provided text. + + :param text: + Text to be converted to an embedding. + :param generation_kwargs: + Optional arguments to pass to the Ollama generation endpoint, such as temperature, top_p, etc. See the [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :return: A dictionary with the key "embedding" and a list of floats as the value + :returns: A dictionary with the following keys: + - `embedding`: The computed embeddings + - `meta`: The metadata collected during the embedding process """ payload = self._create_json_payload(text, generation_kwargs) diff --git a/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py b/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py index 6a8c5493b..2abf3066b 100644 --- a/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py +++ b/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py @@ -9,8 +9,26 @@ @component class OllamaChatGenerator: """ - Chat Generator based on Ollama. Ollama is a library for easily running LLMs locally. - This component provides an interface to generate text using a LLM running in Ollama. + Supports models running on Ollama, such as llama2 and mixtral. Find the full list of supported models + [here](https://ollama.ai/library). + + Usage example: + ```python + from haystack_integrations.components.generators.ollama import OllamaChatGenerator + from haystack.dataclasses import ChatMessage + + generator = OllamaChatGenerator(model="zephyr", + url = "http://localhost:11434/api/chat", + generation_kwargs={ + "num_predict": 100, + "temperature": 0.9, + }) + + messages = [ChatMessage.from_system("\nYou are a helpful, respectful and honest assistant"), + ChatMessage.from_user("What's Natural Language Processing?")] + + print(generator.run(messages=messages)) + ``` """ def __init__( @@ -22,16 +40,18 @@ def __init__( timeout: int = 120, ): """ - :param model: The name of the model to use. The model should be available in the running Ollama instance. - Default is "orca-mini". - :param url: The URL of the chat endpoint of a running Ollama instance. - Default is "http://localhost:11434/api/chat". - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + :param model: + The name of the model to use. The model should be available in the running Ollama instance. + :param url: + The URL of the chat endpoint of a running Ollama instance. + :param generation_kwargs: + Optional arguments to pass to the Ollama generation endpoint, such as temperature, top_p, and others. See the available arguments in [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :param template: The full prompt template (overrides what is defined in the Ollama Modelfile). - :param timeout: The number of seconds before throwing a timeout error from the Ollama API. - Default is 120 seconds. + :param template: + The full prompt template (overrides what is defined in the Ollama Modelfile). + :param timeout: + The number of seconds before throwing a timeout error from the Ollama API. """ self.timeout = timeout @@ -46,9 +66,6 @@ def _message_to_dict(self, message: ChatMessage) -> Dict[str, str]: def _create_json_payload(self, messages: List[ChatMessage], generation_kwargs=None) -> Dict[str, Any]: """ Returns A dictionary of JSON arguments for a POST request to an Ollama service - :param messages: A history/list of chat messages - :param generation_kwargs: - :return: A dictionary of arguments for a POST request to an Ollama service """ generation_kwargs = generation_kwargs or {} return { @@ -62,8 +79,6 @@ def _create_json_payload(self, messages: List[ChatMessage], generation_kwargs=No def _build_message_from_ollama_response(self, ollama_response: Response) -> ChatMessage: """ Converts the non-streaming response from the Ollama API to a ChatMessage. - :param ollama_response: The completion returned by the Ollama API. - :return: The ChatMessage. """ json_content = ollama_response.json() message = ChatMessage.from_assistant(content=json_content["message"]["content"]) @@ -77,12 +92,16 @@ def run( generation_kwargs: Optional[Dict[str, Any]] = None, ): """ - Run an Ollama Model on a given chat history. - :param messages: A list of ChatMessage instances representing the input messages. - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + Runs an Ollama Model on a given chat history. + + :param messages: + A list of ChatMessage instances representing the input messages. + :param generation_kwargs: + Optional arguments to pass to the Ollama generation endpoint, such as temperature, top_p, etc. See the [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :return: A dictionary of the replies containing their metadata + :returns: A dictionary with the following keys: + - `replies`: The responses from the model """ generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})} diff --git a/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py b/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py index f3ab86282..bbd7b05ca 100644 --- a/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py +++ b/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py @@ -11,8 +11,21 @@ @component class OllamaGenerator: """ - Generator based on Ollama. Ollama is a library for easily running LLMs locally. - This component provides an interface to generate text using a LLM running in Ollama. + Provides an interface to generate text using an LLM running on Ollama. + + Usage example: + ```python + from haystack_integrations.components.generators.ollama import OllamaGenerator + + generator = OllamaGenerator(model="zephyr", + url = "http://localhost:11434/api/generate", + generation_kwargs={ + "num_predict": 100, + "temperature": 0.9, + }) + + print(generator.run("Who is the best American actor?")) + ``` """ def __init__( @@ -27,20 +40,25 @@ def __init__( streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, ): """ - :param model: The name of the model to use. The model should be available in the running Ollama instance. - Default is "orca-mini". - :param url: The URL of the generation endpoint of a running Ollama instance. - Default is "http://localhost:11434/api/generate". - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + :param model: + The name of the model to use. The model should be available in the running Ollama instance. + :param url: + The URL of the generation endpoint of a running Ollama instance. + :param generation_kwargs: + Optional arguments to pass to the Ollama generation endpoint, such as temperature, top_p, and others. See the available arguments in [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :param system_prompt: Optional system message (overrides what is defined in the Ollama Modelfile). - :param template: The full prompt template (overrides what is defined in the Ollama Modelfile). - :param raw: If True, no formatting will be applied to the prompt. You may choose to use the raw parameter + :param system_prompt: + Optional system message (overrides what is defined in the Ollama Modelfile). + :param template: + The full prompt template (overrides what is defined in the Ollama Modelfile). + :param raw: + If True, no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your API request. - :param timeout: The number of seconds before throwing a timeout error from the Ollama API. - Default is 120 seconds. - :param streaming_callback: A callback function that is called when a new token is received from the stream. + :param timeout: + The number of seconds before throwing a timeout error from the Ollama API. + :param streaming_callback: + A callback function that is called when a new token is received from the stream. The callback function accepts StreamingChunk as an argument. """ self.timeout = timeout @@ -54,8 +72,10 @@ def __init__( def to_dict(self) -> Dict[str, Any]: """ - Serialize this component to a dictionary. - :return: The serialized component as a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. """ callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None return default_to_dict( @@ -73,9 +93,12 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "OllamaGenerator": """ - Deserialize this component from a dictionary. - :param data: The dictionary representation of this component. - :return: The deserialized component instance. + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. """ init_params = data.get("init_parameters", {}) serialized_callback_handler = init_params.get("streaming_callback") @@ -86,11 +109,6 @@ def from_dict(cls, data: Dict[str, Any]) -> "OllamaGenerator": def _create_json_payload(self, prompt: str, stream: bool, generation_kwargs=None) -> Dict[str, Any]: """ Returns a dictionary of JSON arguments for a POST request to an Ollama service. - :param prompt: The prompt to generate a response for. - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, - top_p, and others. See the available arguments in - [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :return: A dictionary of arguments for a POST request to an Ollama service. """ generation_kwargs = generation_kwargs or {} return { @@ -105,9 +123,7 @@ def _create_json_payload(self, prompt: str, stream: bool, generation_kwargs=None def _convert_to_response(self, ollama_response: Response) -> Dict[str, List[Any]]: """ - Convert a response from the Ollama API to the required Haystack format. - :param ollama_response: A response (requests library) from the Ollama API. - :return: A dictionary of the returned responses and metadata. + Converts a response from the Ollama API to the required Haystack format. """ resp_dict = ollama_response.json() @@ -119,9 +135,7 @@ def _convert_to_response(self, ollama_response: Response) -> Dict[str, List[Any] def _convert_to_streaming_response(self, chunks: List[StreamingChunk]) -> Dict[str, List[Any]]: """ - Convert a list of chunks response required Haystack format. - :param chunks: List of StreamingChunks - :return: A dictionary of the returned responses and metadata. + Converts a list of chunks response required Haystack format. """ replies = ["".join([c.content for c in chunks])] @@ -130,10 +144,8 @@ def _convert_to_streaming_response(self, chunks: List[StreamingChunk]) -> Dict[s return {"replies": replies, "meta": [meta]} def _handle_streaming_response(self, response) -> List[StreamingChunk]: - """Handles Streaming response case - - :param response: streaming response from ollama api. - :return: The List[StreamingChunk]. + """ + Handles Streaming response cases """ chunks: List[StreamingChunk] = [] for chunk in response.iter_lines(): @@ -146,8 +158,6 @@ def _handle_streaming_response(self, response) -> List[StreamingChunk]: def _build_chunk(self, chunk_response: Any) -> StreamingChunk: """ Converts the response from the Ollama API to a StreamingChunk. - :param chunk: The chunk returned by the Ollama API. - :return: The StreamingChunk. """ decoded_chunk = json.loads(chunk_response.decode("utf-8")) @@ -164,12 +174,17 @@ def run( generation_kwargs: Optional[Dict[str, Any]] = None, ): """ - Run an Ollama Model on the given prompt. - :param prompt: The prompt to generate a response for. - :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + Runs an Ollama Model on the given prompt. + + :param prompt: + The prompt to generate a response for. + :param generation_kwargs: + Optional arguments to pass to the Ollama generation endpoint, such as temperature, top_p, and others. See the available arguments in [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). - :return: A dictionary of the response and returned metadata + :returns: A dictionary with the following keys: + - `replies`: The responses from the model + - `meta`: The metadata collected during the run """ generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}