From 2f8d5cae259ecbcd3e1aecadbe35060a864223d1 Mon Sep 17 00:00:00 2001 From: marcus-ny Date: Wed, 25 Sep 2024 03:57:02 +0800 Subject: [PATCH] fix: add unique id for vector embedding for consistent upserts with no duplicated events --- backend/src/embeddings/vector_store.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/backend/src/embeddings/vector_store.py b/backend/src/embeddings/vector_store.py index 461f0259..29a2776c 100644 --- a/backend/src/embeddings/vector_store.py +++ b/backend/src/embeddings/vector_store.py @@ -68,8 +68,15 @@ def store_documents(): ) documents.append(document) - uuids = [str(uuid4()) for _ in range(len(documents))] - vector_store.add_documents(documents=documents, ids=uuids) + ids = [ + str(document.metadata["id"]) + + "-" + + str(document.metadata["event_id"]) + + "-" + + str(document.metadata["category_id"]) + for document in documents + ] + vector_store.add_documents(documents=documents, ids=ids) print(f"Stored {len(documents)} documents")