From 29cb0eed7ff5384dc5b914962a7537cc946fbcdd Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Mon, 23 Dec 2024 22:17:57 +0100
Subject: [PATCH] Cache: Documentation about `CrateDBCache` and
 `CrateDBSemanticCache`

---
 docs/cache.ipynb  | 218 ++++++++++++++++++++++++++++++++++++++++++++++
 examples/cache.py | 140 +++++++++++++++++++++++++++++
 pyproject.toml    |   3 +
 3 files changed, 361 insertions(+)
 create mode 100644 docs/cache.ipynb
 create mode 100644 examples/cache.py

diff --git a/docs/cache.ipynb b/docs/cache.ipynb
new file mode 100644
index 0000000..0649ccc
--- /dev/null
+++ b/docs/cache.ipynb
@@ -0,0 +1,218 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "592ad789648d2d4",
+   "metadata": {},
+   "source": [
+    "# CrateDB Caches\n",
+    "\n",
+    "## About\n",
+    "Caching outcomes of LLM conversations improves performance and decreases costs.\n",
+    "[LangChain's caching subsystem] covers two different popular caching strategies.\n",
+    "You can use CrateDB for caching LLM responses, choosing either the exact-match\n",
+    "CrateDBCache or the vector-similarity based CrateDBSemanticCache.\n",
+    "\n",
+    "### Standard Cache\n",
+    "The standard cache looks for an exact match of the user prompt. It does not use\n",
+    "Semantic Caching, nor does it require a vector search index to be made on the\n",
+    "collection before generation. This will avoid invoking the LLM when the supplied\n",
+    "prompt is exactly the same as one encountered already.\n",
+    "\n",
+    "### Semantic Cache\n",
+    "Semantic caching allows users to retrieve cached prompts based on semantic\n",
+    "similarity between the user input and previously cached inputs. Under the hood,\n",
+    "it uses CrateDB as both a cache and a vectorstore. This needs an appropriate\n",
+    "vector search index defined to work.\n",
+    "\n",
+    "### CrateDB\n",
+    "[CrateDB] is a distributed and scalable SQL database for storing and analyzing\n",
+    "massive amounts of data in near real-time, even with complex queries. It is\n",
+    "PostgreSQL-compatible, based on Lucene, and inheriting from Elasticsearch.\n",
+    "[CrateDB Cloud] is a fully-managed cloud database available in AWS, Azure,\n",
+    "and GCP.\n",
+    "\n",
+    "CrateDB has native support for Vector Search. Use [CrateDB Vector Search] to\n",
+    "semantically cache prompts and responses.\n",
+    "\n",
+    "[CrateDB]: https://cratedb.com/database\n",
+    "[CrateDB Cloud]: https://cratedb.com/database/cloud\n",
+    "[CrateDB Vector Search]: https://cratedb.com/docs/guide/feature/search/vector/\n",
+    "[LangChain's caching subsystem]: https://python.langchain.com/docs/integrations/llm_caching/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6e9f94547c9ca8f",
+   "metadata": {},
+   "source": [
+    "## Installation\n",
+    "\n",
+    "Install the most recent version of the LangChain CrateDB adapter,\n",
+    "and a few other packages that are needed by the tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4855c4efb1b3a8e3",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-23T22:26:52.736631Z",
+     "start_time": "2024-12-23T22:26:52.733319Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%pip install --upgrade langchain-cratedb langchain-openai"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a825899f282222f8",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "Because this notebook uses OpenAI's APIs, you need to supply an authentication\n",
+    "token. Either set the environment variable `OPENAI_API_KEY`, or optionally\n",
+    "configure your token here."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b2e9df61239db0ef",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-23T22:52:56.983349Z",
+     "start_time": "2024-12-23T22:52:56.979697Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "_ = os.environ.setdefault(\n",
+    "    \"OPENAI_API_KEY\", \"sk-XJZ7pfog5Gp8Kus8D--invalid--0CJ5lyAKSefZLaV1Y9S1\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cfc26bf419e77612",
+   "metadata": {},
+   "source": [
+    "## Usage\n",
+    "\n",
+    "### CrateDBCache\n",
+    "\n",
+    "The standard cache `CrateDBCache` uses LangChain's `SQLAlchemyCache` under the hood."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2f82e8072556d60f",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-23T22:52:58.988148Z",
+     "start_time": "2024-12-23T22:52:58.898239Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The answer to everything is subjective and can vary depending on individual beliefs and perspectives. Some may say the answer to everything is love, others may say it is knowledge or understanding. Ultimately, there may not be one definitive answer to everything.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sqlalchemy as sa\n",
+    "from langchain.globals import set_llm_cache\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "from langchain_cratedb import CrateDBCache\n",
+    "\n",
+    "# Configure standard cache.\n",
+    "engine = sa.create_engine(\"crate://crate@localhost:4200/?schema=testdrive\")\n",
+    "set_llm_cache(CrateDBCache(engine))\n",
+    "\n",
+    "# Invoke LLM conversation.\n",
+    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n",
+    "answer = llm.invoke(\"What is the answer to everything?\")\n",
+    "print(answer.content)\n",
+    "\n",
+    "# Turn off cache.\n",
+    "set_llm_cache(None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "802eba11df3ebb9a",
+   "metadata": {},
+   "source": [
+    "### CrateDBSemanticCache\n",
+    "\n",
+    "The semantic cache `CrateDBSemanticCache` uses `CrateDBVectorStore` under the hood."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "2ce4ca26032af33c",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-23T22:47:13.647735Z",
+     "start_time": "2024-12-23T22:47:12.893095Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ah, you're referencing the famous science fiction series *The Hitchhiker's Guide to the Galaxy* by Douglas Adams! In the story, the supercomputer Deep Thought determines that the \"Answer to the Ultimate Question of Life, the Universe, and Everything\" is **42**. However, the actual \"Ultimate Question\" itself is unknown, leading to much cosmic humor and philosophical pondering.\n",
+      "\n",
+      "So, the answer is **42** — but what the question is, well, that's a whole other mystery! 😊\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sqlalchemy as sa\n",
+    "from langchain.globals import set_llm_cache\n",
+    "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
+    "\n",
+    "from langchain_cratedb import CrateDBSemanticCache\n",
+    "\n",
+    "# Configure semantic cache.\n",
+    "engine = sa.create_engine(\"crate://crate@localhost:4200/?schema=testdrive\")\n",
+    "set_llm_cache(\n",
+    "    CrateDBSemanticCache(\n",
+    "        embedding=OpenAIEmbeddings(),\n",
+    "        connection=engine,\n",
+    "        search_threshold=1.0,\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Invoke LLM conversation.\n",
+    "llm = ChatOpenAI(model_name=\"chatgpt-4o-latest\")\n",
+    "answer = llm.invoke(\"What is the answer to everything?\")\n",
+    "print(answer.content)\n",
+    "\n",
+    "# Turn off cache.\n",
+    "set_llm_cache(None)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/cache.py b/examples/cache.py
new file mode 100644
index 0000000..315ce41
--- /dev/null
+++ b/examples/cache.py
@@ -0,0 +1,140 @@
+# ruff: noqa: T201
+"""
+Use CrateDB to cache LLM prompts and responses.
+
+The standard / full cache avoids invoking the LLM when the supplied
+prompt is exactly the same as one encountered already.
+
+The semantic cache allows users to retrieve cached prompts based on semantic
+similarity between the user input and previously cached inputs.
+
+When turning on the cache, redundant LLM conversations don't need
+to talk to the LLM (API), so they can also work offline.
+"""
+
+import os
+
+import sqlalchemy as sa
+from langchain.globals import set_llm_cache
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+
+from langchain_cratedb import CrateDBCache, CrateDBSemanticCache
+
+"""
+Prerequisites: Because this program uses OpenAI's APIs, you need to supply an
+authentication token. Either set the environment variable `OPENAI_API_KEY`,
+or optionally configure your token here.
+"""
+_ = os.environ.setdefault(
+    "OPENAI_API_KEY", "sk-XJZ7pfog5Gp8Kus8D--invalid--0CJ5lyAKSefZLaV1Y9S1"
+)
+
+
+def standard_cache() -> None:
+    """
+    Demonstrate LangChain standard cache with CrateDB.
+    """
+
+    # Configure cache.
+    engine = sa.create_engine("crate://crate@localhost:4200/?schema=testdrive")
+    set_llm_cache(CrateDBCache(engine))
+
+    # Invoke LLM conversation.
+    llm = ChatOpenAI(
+        # model_name="gpt-3.5-turbo",
+        # model_name="gpt-4o-mini",
+        model_name="chatgpt-4o-latest",  # type: ignore[call-arg]
+        temperature=0.7,
+    )
+    print()
+    print("Asking with standard cache:")
+    answer = llm.invoke("What is the answer to everything?")
+    print(answer.content)
+
+    # Turn off cache.
+    set_llm_cache(None)
+
+
+def semantic_cache() -> None:
+    """
+    Demonstrate LangChain semantic cache with CrateDB.
+    """
+
+    # Configure LLM models.
+    # model_name_embedding = "text-embedding-ada-002"
+    model_name_embedding = "text-embedding-3-small"
+    # model_name_embedding = "text-embedding-3-large"
+
+    # model_name_chat = "gpt-3.5-turbo"
+    # model_name_chat = "gpt-4o-mini"
+    model_name_chat = "chatgpt-4o-latest"
+
+    # Configure embeddings.
+    embeddings = OpenAIEmbeddings(model=model_name_embedding)
+
+    # Configure cache.
+    engine = sa.create_engine("crate://crate@localhost:4200/?schema=testdrive")
+    set_llm_cache(
+        CrateDBSemanticCache(
+            embedding=embeddings,
+            connection=engine,
+            search_threshold=1.0,
+        )
+    )
+
+    # Invoke LLM conversation.
+    llm = ChatOpenAI(
+        model_name=model_name_chat,  # type: ignore[call-arg]
+    )
+    print()
+    print("Asking with semantic cache:")
+    answer = llm.invoke("What is the answer to everything?")
+    print(answer.content)
+
+    # Turn off cache.
+    set_llm_cache(None)
+
+
+if __name__ == "__main__":
+    standard_cache()
+    semantic_cache()
+
+
+"""
+What is the answer to everything?
+
+Date: 2024-12-23
+
+## gpt-3.5-turbo
+The answer to everything is subjective and may vary depending on individual
+beliefs or philosophies. Some may say that love is the answer to everything,
+while others may say that knowledge or self-awareness is the key. Ultimately,
+the answer to everything may be different for each person and can only be
+discovered through personal reflection and introspection.
+
+## gpt-4o-mini
+The answer to the ultimate question of life, the universe, and everything,
+according to Douglas Adams' "The Hitchhiker's Guide to the Galaxy", is
+famously given as the number 42. However, the context and meaning behind
+that answer remains a philosophical and humorous mystery. In a broader
+sense, different people and cultures may have various interpretations of
+what the "answer to everything" truly is, often reflecting their beliefs,
+values, and experiences.
+
+## chatgpt-4o-latest, pure
+Ah, you're referencing the famous answer from Douglas Adams'
+*The Hitchhiker's Guide to the Galaxy*! In the book, the supercomputer
+Deep Thought determines that the "Answer to the Ultimate Question of
+Life, the Universe, and Everything" is **42**.
+Of course, the real kicker is that no one actually knows what the Ultimate
+Question is. So, while 42 is the answer, its true meaning remains a cosmic
+mystery! 😊
+
+## chatgpt-4o-latest, with text-embedding-3-small embeddings
+Ah, you're referring to the famous answer from Douglas Adams'
+*The Hitchhiker's Guide to the Galaxy*! The answer to the ultimate question
+of life, the universe, and everything is **42**. However, as the story
+humorously points out, the actual *question* remains unknown. 😊
+If you're looking for a deeper or more philosophical answer, feel free to
+elaborate!
+"""
diff --git a/pyproject.toml b/pyproject.toml
index 513155c..886674a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -140,6 +140,9 @@ select = [
 "tests/*" = ["S101"]  # Use of `assert` detected
 ".github/scripts/*" = ["S101"]  # Use of `assert` detected
 
+[tool.ruff.lint.per-file-ignores]
+"docs/*.ipynb" = ["F401", "F821", "T201"]
+
 [tool.coverage.run]
 omit = [
     "langchain_cratedb/retrievers.py",