From a3491cfdac7a8969ad1755f56c7980baedbbc70a Mon Sep 17 00:00:00 2001 From: Ofer Mendelevitch Date: Wed, 6 Mar 2024 16:28:12 -0800 Subject: [PATCH] fixed but when using Vectara query engine in non-summary mode (#11668) * fixed but when using Vecrtara query engine in non-summary node - it needs to have output synthesizers after refactor * updated vectara autoRetriever example notebook * bugfix * bugfix in handling error updated documentId has to be 64 characters (instead of longer) * fixes from review * fix per review --- .../retrievers/vectara_auto_retriever.ipynb | 13 +++++++++++-- .../indices/managed/vectara/base.py | 18 +++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/docs/examples/retrievers/vectara_auto_retriever.ipynb b/docs/examples/retrievers/vectara_auto_retriever.ipynb index 5d76c14caccd7..1ec6e6187fdea 100644 --- a/docs/examples/retrievers/vectara_auto_retriever.ipynb +++ b/docs/examples/retrievers/vectara_auto_retriever.ipynb @@ -56,7 +56,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install llama-index" + "!pip install llama-index llama-index-indices-managed-vectara llama-index-llms-openai" ] }, { @@ -222,6 +222,7 @@ "source": [ "import getpass\n", "import openai\n", + "import os\n", "\n", "if not os.environ.get(\"OPENAI_API_KEY\", None):\n", " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", @@ -290,11 +291,17 @@ "metadata": {}, "outputs": [], "source": [ - "llm = OpenAI(model=\"gpt-4-1106-preview\", temperature=0)\n", + "from llama_index.indices.managed.vectara import VectaraAutoRetriever\n", + "from llama_index.core.indices.service_context import ServiceContext\n", + "from llama_index.llms.openai import OpenAI\n", + "\n", + "llm = OpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", "\n", "retriever = VectaraAutoRetriever(\n", " index,\n", " vector_store_info=vector_store_info,\n", + " llm=llm,\n", + " verbose=False,\n", ")" ] }, @@ -395,6 +402,7 @@ "retriever = VectaraAutoRetriever(\n", " index,\n", " vector_store_info=vector_store_info,\n", + " llm=llm,\n", " filter=\"doc.rating > 8\",\n", ")\n", "retriever.retrieve(\"movie about toys\")" @@ -438,6 +446,7 @@ "retriever = VectaraAutoRetriever(\n", " index,\n", " vector_store_info=vector_store_info,\n", + " llm=llm,\n", " filter=\"doc.rating > 8\",\n", " vectara_query_mode=\"mmr\",\n", " mmr_k=50,\n", diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py b/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py index 18dc4f2f5ed13..c3eb19026c963 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py @@ -30,6 +30,9 @@ from llama_index.core.settings import Settings from llama_index.core.storage.storage_context import StorageContext +from llama_index.core.response_synthesizers import ResponseMode +from llama_index.core import get_response_synthesizer + _logger = logging.getLogger(__name__) @@ -193,13 +196,12 @@ def _index_doc(self, doc: dict) -> str: ) status_code = response.status_code - result = response.json() status_str = result["status"]["code"] if "status" in result else None - if status_code == 409 or status_str and (status_str == "ALREADY_EXISTS"): + if status_code == 409 and status_str and (status_str == "ALREADY_EXISTS"): return "E_ALREADY_EXISTS" - elif status_code == 200 or status_str and (status_str == "INVALID_ARGUMENT"): + elif status_code == 200 and status_str and (status_str == "INVALID_ARGUMENT"): return "E_INVALID_ARGUMENT" elif status_str and (status_str == "FORBIDDEN"): return "E_NO_PERMISSIONS" @@ -215,7 +217,7 @@ def _insert( """Insert a set of documents (each a node).""" def gen_hash(s: str) -> str: - hash_object = blake2b() + hash_object = blake2b(digest_size=32) hash_object.update(s.encode("utf-8")) return hash_object.hexdigest() @@ -360,8 +362,14 @@ def as_query_engine( ) retriever = self.as_retriever(**kwargs) + response_synthesizer = get_response_synthesizer( + response_mode=ResponseMode.COMPACT, + llm=llm, + ) return RetrieverQueryEngine.from_args( - retriever=retriever, llm=llm, **kwargs + retriever=retriever, + response_synthesizer=response_synthesizer, + **kwargs, ) @classmethod