Also use ref_doc_id to dedup in _ahandle_recursive_retrieval (#14383)

* dedup on ref_doc_id * version bump
run-llama · Jun 25, 2024 · 5142603 · 5142603
1 parent 6791756
commit 5142603
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 3 deletions.
diff --git a/llama-index-core/llama_index/core/base/base_retriever.py b/llama-index-core/llama_index/core/base/base_retriever.py
@@ -208,12 +208,12 @@ async def _ahandle_recursive_retrieval(
             else:
                 retrieved_nodes.append(n)
 
-        # remove any duplicates based on hash
+        # remove any duplicates based on hash and ref_doc_id
         seen = set()
         return [
             n
             for n in retrieved_nodes
-            if not (n.node.hash in seen or seen.add(n.node.hash))  # type: ignore[func-returns-value]
+            if not ((n.node.hash, n.node.ref_doc_id) in seen or seen.add((n.node.hash, n.node.ref_doc_id)))  # type: ignore[func-returns-value]
         ]
 
     @dispatcher.span

diff --git a/llama-index-core/pyproject.toml b/llama-index-core/pyproject.toml
@@ -43,7 +43,7 @@ name = "llama-index-core"
 packages = [{include = "llama_index"}]
 readme = "README.md"
 repository = "https://github.com/run-llama/llama_index"
-version = "0.10.50"
+version = "0.10.50.post1"
 
 [tool.poetry.dependencies]
 SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"}