From 51426033cac5b21fcda71705fdfdbfa7d3bcb1cf Mon Sep 17 00:00:00 2001 From: Javier Torres Date: Tue, 25 Jun 2024 11:29:32 -0500 Subject: [PATCH] Also use ref_doc_id to dedup in _ahandle_recursive_retrieval (#14383) * dedup on ref_doc_id * version bump --- llama-index-core/llama_index/core/base/base_retriever.py | 4 ++-- llama-index-core/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llama-index-core/llama_index/core/base/base_retriever.py b/llama-index-core/llama_index/core/base/base_retriever.py index 0910ef830382b..5b802fd11ef69 100644 --- a/llama-index-core/llama_index/core/base/base_retriever.py +++ b/llama-index-core/llama_index/core/base/base_retriever.py @@ -208,12 +208,12 @@ async def _ahandle_recursive_retrieval( else: retrieved_nodes.append(n) - # remove any duplicates based on hash + # remove any duplicates based on hash and ref_doc_id seen = set() return [ n for n in retrieved_nodes - if not (n.node.hash in seen or seen.add(n.node.hash)) # type: ignore[func-returns-value] + if not ((n.node.hash, n.node.ref_doc_id) in seen or seen.add((n.node.hash, n.node.ref_doc_id))) # type: ignore[func-returns-value] ] @dispatcher.span diff --git a/llama-index-core/pyproject.toml b/llama-index-core/pyproject.toml index 06d63a0ef1bb3..6c960b859f594 100644 --- a/llama-index-core/pyproject.toml +++ b/llama-index-core/pyproject.toml @@ -43,7 +43,7 @@ name = "llama-index-core" packages = [{include = "llama_index"}] readme = "README.md" repository = "https://github.com/run-llama/llama_index" -version = "0.10.50" +version = "0.10.50.post1" [tool.poetry.dependencies] SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"}