Quansight · pmeier · Jun 10, 2024 · Mar 22, 2024 · Mar 22, 2024 · Mar 22, 2024
diff --git a/docs/examples/gallery_streaming.py b/docs/examples/gallery_streaming.py
@@ -38,6 +38,14 @@
 #     - [OpenAI](https://openai.com/)
 #       - [ragna.assistants.Gpt35Turbo16k][]
 #       - [ragna.assistants.Gpt4][]
+#     - [Ollama](https://ollama.com/)
+#       - [ragna.assistants.OllamaGemma2B][]
+#       - [ragna.assistants.OllamaLlama2][]
+#       - [ragna.assistants.OllamaLlava][]
+#       - [ragna.assistants.OllamaMistral][]
+#       - [ragna.assistants.OllamaMixtral][]
+#       - [ragna.assistants.OllamaOrcaMini][]
+#       - [ragna.assistants.OllamaPhi2][]
 
 from ragna import assistants
 

diff --git a/docs/tutorials/gallery_python_api.py b/docs/tutorials/gallery_python_api.py
@@ -91,6 +91,14 @@
 #     - [MosaicML](https://www.mosaicml.com/)
 #       - [ragna.assistants.Mpt7bInstruct][]
 #       - [ragna.assistants.Mpt30bInstruct][]
+#     - [Ollama](https://ollama.com/)
+#       - [ragna.assistants.OllamaGemma2B][]
+#       - [ragna.assistants.OllamaLlama2][]
+#       - [ragna.assistants.OllamaLlava][]
+#       - [ragna.assistants.OllamaMistral][]
+#       - [ragna.assistants.OllamaMixtral][]
+#       - [ragna.assistants.OllamaOrcaMini][]
+#       - [ragna.assistants.OllamaPhi2][]
 #
 #     !!! note
 #

diff --git a/ragna/assistants/__init__.py b/ragna/assistants/__init__.py
@@ -5,6 +5,13 @@
     "CommandLight",
     "GeminiPro",
     "GeminiUltra",
+    "OllamaGemma2B",
+    "OllamaPhi2",
+    "OllamaLlama2",
+    "OllamaLlava",
+    "OllamaMistral",
+    "OllamaMixtral",
+    "OllamaOrcaMini",
     "Gpt35Turbo16k",
     "Gpt4",
     "Jurassic2Ultra",
@@ -19,6 +26,15 @@
 from ._demo import RagnaDemoAssistant
 from ._google import GeminiPro, GeminiUltra
 from ._mosaicml import Mpt7bInstruct, Mpt30bInstruct
+from ._ollama import (
+    OllamaGemma2B,
+    OllamaLlama2,
+    OllamaLlava,
+    OllamaMistral,
+    OllamaMixtral,
+    OllamaOrcaMini,
+    OllamaPhi2,
+)
 from ._openai import Gpt4, Gpt35Turbo16k
 
 # isort: split

diff --git a/ragna/assistants/_ollama.py b/ragna/assistants/_ollama.py
@@ -0,0 +1,138 @@
+import contextlib
+import json
+import os
+from typing import AsyncIterator, cast
+
+import httpx
+from httpx import Response
+
+import ragna
+from ragna.core import Assistant, RagnaException, Source
+
+
+class OllamaApiAssistant(Assistant):
+    _MODEL: str
+
+    @classmethod
+    def display_name(cls) -> str:
+        return f"Ollama/{cls._MODEL}"
+
+    def __init__(self, url: str = "http://localhost:11434/api/chat") -> None:
+        self._client = httpx.AsyncClient(
+            headers={"User-Agent": f"{ragna.__version__}/{self}"},
+            timeout=60,
+        )
+        self._url = os.environ.get("RAGNA_ASSISTANTS_OLLAMA_URL", url)
+
+    @classmethod
+    def is_available(cls) -> bool:
+        if not super().is_available():
+            return False
+
+        try:
+            return httpx.get("http://localhost:11434/").raise_for_status().is_success
+        except httpx.HTTPError:
+            return False
+
+    def _make_system_content(self, sources: list[Source]) -> str:
+        instruction = (
+            "You are a helpful assistant that answers user questions given the context below. "
+            "If you don't know the answer, just say so. Don't try to make up an answer. "
+            "Only use the following sources to generate the answer."
+        )
+        return instruction + "\n\n".join(source.content for source in sources)
+
+    async def _assert_api_call_is_success(self, response: Response) -> None:
+        if response.is_success:
+            return
+
+        content = await response.aread()
+        with contextlib.suppress(Exception):
+            content = json.loads(content)
+
+        raise RagnaException(
+            "API call failed",
+            request_method=response.request.method,
+            request_url=str(response.request.url),
+            response_status_code=response.status_code,
+            response_content=content,
+        )
+
+    async def answer(
+        self, prompt: str, sources: list[Source], *, max_new_tokens: int = 256
+    ) -> AsyncIterator[str]:
+        async with self._client.stream(
+            "POST",
+            self._url,
+            headers={
+                "Content-Type": "application/json",
+            },
+            json={
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": self._make_system_content(sources),
+                    },
+                    {
+                        "role": "user",
+                        "content": prompt,
+                    },
+                ],
+                "model": self._MODEL,
+                "stream": True,
+                "temperature": 0.0,
+            },
+        ) as response:
+            await self._assert_api_call_is_success(response)
+
+            async for chunk in response.aiter_lines():
+                # This part modeled after https://github.com/ollama/ollama/blob/06a1508bfe456e82ba053ea554264e140c5057b5/examples/python-loganalysis/readme.md?plain=1#L57-L62
+                if chunk:
+                    json_data = json.loads(chunk)
+
+                    if "error" in json_data:
+                        raise RagnaException(json_data["error"])
+                    if not json_data["done"]:
+                        yield cast(str, json_data["message"]["content"])
+
+
+class OllamaGemma2B(OllamaApiAssistant):
+    """[Gemma:2B](https://ollama.com/library/gemma)"""
+
+    _MODEL = "gemma:2b"
+
+
+class OllamaLlama2(OllamaApiAssistant):
+    """[Llama 2](https://ollama.com/library/llama2)"""
+
+    _MODEL = "llama2"
+
+
+class OllamaLlava(OllamaApiAssistant):
+    """[Llava](https://ollama.com/library/llava)"""
+
+    _MODEL = "llava"
+
+
+class OllamaMistral(OllamaApiAssistant):
+    """[Mistral](https://ollama.com/library/mistral)"""
+
+    _MODEL = "mistral"
+
+
+class OllamaMixtral(OllamaApiAssistant):
+    """[Mixtral](https://ollama.com/library/mixtral)"""
+
+    _MODEL = "mixtral"
+
+
+class OllamaOrcaMini(OllamaApiAssistant):
+    """[Orca Mini](https://ollama.com/library/orca-mini)"""
+
+    _MODEL = "orca-mini"
+
+
+class OllamaPhi2(OllamaApiAssistant):
+    """[Phi-2](https://ollama.com/library/phi)"""
+
+    _MODEL = "phi"