From ff47bd284de8cb9d47ac6b0712275578fbb2fdf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fahreddin=20=C3=96zcan?= <88107904+fahreddinozcan@users.noreply.github.com> Date: Wed, 29 May 2024 02:26:12 +0200 Subject: [PATCH] community[patch]: Upstash Vector Store Namespace Feature (#5557) * feat: Upstash Vector Namespace feature * add: Upstash Vector Namespace Tests * docs: Upstash Vector namespace * fmt --- .../integrations/vectorstores/upstash.mdx | 7 ++ examples/package.json | 2 +- .../vector_stores/upstash/namespaces.ts | 50 ++++++++ libs/langchain-community/package.json | 4 +- .../vectorstores/tests/upstash.int.test.ts | 114 ++++++++++++++++++ .../src/vectorstores/upstash.ts | 24 ++-- yarn.lock | 14 +-- 7 files changed, 198 insertions(+), 17 deletions(-) create mode 100644 examples/src/indexes/vector_stores/upstash/namespaces.ts diff --git a/docs/core_docs/docs/integrations/vectorstores/upstash.mdx b/docs/core_docs/docs/integrations/vectorstores/upstash.mdx index 9f42a68713df..878209f30e54 100644 --- a/docs/core_docs/docs/integrations/vectorstores/upstash.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/upstash.mdx @@ -3,6 +3,7 @@ import CreateClientExample from "@examples/indexes/vector_stores/upstash/create_ import IndexQueryExample from "@examples/indexes/vector_stores/upstash/index_and_query_docs.ts"; import DeleteExample from "@examples/indexes/vector_stores/upstash/delete_docs.ts"; import UpstashEmbeddingsExample from "@examples/indexes/vector_stores/upstash/upstash_embeddings.ts"; +import NamespaceExample from "@examples/indexes/vector_stores/upstash/namespaces.ts"; import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; # Upstash Vector @@ -42,6 +43,12 @@ You can index the LangChain documents with any model of your choice, and perform {IndexQueryExample} +## Namespaces + +You can use namespaces to partition your data in the index. Namespaces are useful when you want to query over huge amount of data, and you want to partition the data to make the queries faster. When you use namespaces, there won't be post-filtering on the results which will make the query results more precise. + +{NamespaceExample} + ## Upstash embeddings It's possible to use the embeddings service of Upstash, which is based on the embedding model of choice when creating the vector database. You don't need to create the embeddings manually, as the Upstash Vector service will handle this for you. diff --git a/examples/package.json b/examples/package.json index 8bcdc09fa375..c95c98fa8c93 100644 --- a/examples/package.json +++ b/examples/package.json @@ -67,7 +67,7 @@ "@supabase/supabase-js": "^2.10.0", "@tensorflow/tfjs-backend-cpu": "^4.4.0", "@upstash/redis": "^1.20.6", - "@upstash/vector": "^1.0.7", + "@upstash/vector": "^1.1.1", "@vercel/kv": "^0.2.3", "@xata.io/client": "^0.28.0", "@zilliz/milvus2-sdk-node": "^2.2.7", diff --git a/examples/src/indexes/vector_stores/upstash/namespaces.ts b/examples/src/indexes/vector_stores/upstash/namespaces.ts new file mode 100644 index 000000000000..ef20fb5bd893 --- /dev/null +++ b/examples/src/indexes/vector_stores/upstash/namespaces.ts @@ -0,0 +1,50 @@ +import { Index } from "@upstash/vector"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { UpstashVectorStore } from "@langchain/community/vectorstores/upstash"; + +const index = new Index({ + url: process.env.UPSTASH_VECTOR_REST_URL as string, + token: process.env.UPSTASH_VECTOR_REST_TOKEN as string, +}); + +const embeddings = new OpenAIEmbeddings({}); + +const UpstashVector = new UpstashVectorStore(embeddings, { + index, + namespace: "test-namespace", +}); + +// Creating the docs to be indexed. +const id = new Date().getTime(); +const documents = [ + new Document({ + metadata: { name: id }, + pageContent: "Vector databases are great!", + }), +]; + +// Creating embeddings from the provided documents, and adding them to target namespace in Upstash Vector database. +await UpstashVector.addDocuments(documents); + +// Waiting vectors to be indexed in the vector store. +// eslint-disable-next-line no-promise-executor-return +await new Promise((resolve) => setTimeout(resolve, 1000)); + +const queryResult = await UpstashVector.similaritySearchWithScore( + "Vector database", + 1 +); + +console.log(queryResult); +/** +[ + [ + Document { + pageContent: 'Vector databases are great!', + metadata: [Object] + }, + 0.9016147 + ], +] + */ diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index e0a0f940e0c1..7c6d7e8d67b1 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -127,7 +127,7 @@ "@typescript-eslint/parser": "^5.58.0", "@upstash/ratelimit": "^1.1.3", "@upstash/redis": "^1.20.6", - "@upstash/vector": "^1.0.7", + "@upstash/vector": "^1.1.1", "@vercel/kv": "^0.2.3", "@vercel/postgres": "^0.5.0", "@writerai/writer-sdk": "^0.40.2", @@ -266,7 +266,7 @@ "@tensorflow/tfjs-core": "*", "@upstash/ratelimit": "^1.1.3", "@upstash/redis": "^1.20.6", - "@upstash/vector": "^1.0.7", + "@upstash/vector": "^1.1.1", "@vercel/kv": "^0.2.3", "@vercel/postgres": "^0.5.0", "@writerai/writer-sdk": "^0.40.2", diff --git a/libs/langchain-community/src/vectorstores/tests/upstash.int.test.ts b/libs/langchain-community/src/vectorstores/tests/upstash.int.test.ts index a1ed8d8c4400..076501cb8398 100644 --- a/libs/langchain-community/src/vectorstores/tests/upstash.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/upstash.int.test.ts @@ -172,4 +172,118 @@ describe("UpstashVectorStore", () => { expect(results3).toHaveLength(2); }); + + test("Should upsert the documents to target namespace", async () => { + index = new Index({ + url: process.env.UPSTASH_VECTOR_REST_URL, + token: process.env.UPSTASH_VECTOR_REST_TOKEN, + }); + + await index.reset(); + + embeddings = new SyntheticEmbeddings({ + vectorSize: 384, + }); + + const storeNamespace1 = new UpstashVectorStore(embeddings, { + index, + namespace: "namespace-1", + }); + const storeNamespace2 = new UpstashVectorStore(embeddings, { + index, + namespace: "namespace-2", + }); + + await storeNamespace1.addDocuments([ + { + pageContent: "namespace-test-original", + metadata: { namespace: "namespace-1" }, + }, + ]); + + // Sleeping for a second to make sure that all the indexing operations are finished. + await sleep(1000); + + const resultsNamespace2 = await storeNamespace2.similaritySearchWithScore( + "namespace-test-original", + 1, + "namespace = 'namespace-1'" + ); + expect(resultsNamespace2).toHaveLength(0); + + const resultsNamespace1 = await storeNamespace1.similaritySearchWithScore( + "namespace-test-original", + 1, + "namespace = 'namespace-1'" + ); + expect(resultsNamespace1).toHaveLength(1); + + expect([resultsNamespace1[0][0]]).toEqual([ + new Document({ + metadata: { namespace: "namespace-1" }, + pageContent: "namespace-test-original", + }), + ]); + }); + + test("Should delete the documents from target namespace", async () => { + index = new Index({ + url: process.env.UPSTASH_VECTOR_REST_URL, + token: process.env.UPSTASH_VECTOR_REST_TOKEN, + }); + + await index.reset(); + + embeddings = new SyntheticEmbeddings({ + vectorSize: 384, + }); + + const storeNamespace1 = new UpstashVectorStore(embeddings, { + index, + namespace: "namespace-1", + }); + const storeNamespace2 = new UpstashVectorStore(embeddings, { + index, + namespace: "namespace-2", + }); + + const idNamespace1 = await storeNamespace1.addDocuments([ + { + pageContent: "namespace-test-original", + metadata: { namespace: "namespace-test" }, + }, + ]); + await storeNamespace2.addDocuments([ + { + pageContent: "namespace-test-original", + metadata: { namespace: "namespace-test" }, + }, + ]); + + // Sleeping for a second to make sure that all the indexing operations are finished. + await sleep(1000); + + await storeNamespace1.delete({ ids: idNamespace1 }); + + const resultsNamespace1 = await storeNamespace1.similaritySearchWithScore( + "namespace-test-original", + 1, + "namespace = 'namespace-test'" + ); + expect(resultsNamespace1).toHaveLength(0); + + const resultsNamespace2 = await storeNamespace2.similaritySearchWithScore( + "namespace-test-original", + 1, + "namespace = 'namespace-test'" + ); + expect(resultsNamespace2).toHaveLength(1); + + expect([resultsNamespace2[0][0]]).toEqual([ + new Document({ + metadata: { namespace: "namespace-test" }, + pageContent: "namespace-test-original", + }), + ]); + }); }); diff --git a/libs/langchain-community/src/vectorstores/upstash.ts b/libs/langchain-community/src/vectorstores/upstash.ts index 37ac287b6d24..f1e59d556ae7 100644 --- a/libs/langchain-community/src/vectorstores/upstash.ts +++ b/libs/langchain-community/src/vectorstores/upstash.ts @@ -17,6 +17,7 @@ import { export interface UpstashVectorLibArgs extends AsyncCallerParams { index: UpstashIndex; filter?: string; + namespace?: string; } // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -56,6 +57,8 @@ export class UpstashVectorStore extends VectorStore { filter?: this["FilterType"]; + namespace?: string; + _vectorstoreType(): string { return "upstash"; } @@ -68,11 +71,12 @@ export class UpstashVectorStore extends VectorStore { this.useUpstashEmbeddings = true; } - const { index, ...asyncCallerArgs } = args; + const { index, namespace, ...asyncCallerArgs } = args; this.index = index; this.caller = new AsyncCaller(asyncCallerArgs); this.filter = args.filter; + this.namespace = namespace; } /** @@ -127,10 +131,12 @@ export class UpstashVectorStore extends VectorStore { }; }); + const namespace = this.index.namespace(this.namespace ?? ""); + const vectorChunks = chunkArray(upstashVectors, CONCURRENT_UPSERT_LIMIT); const batchRequests = vectorChunks.map((chunk) => - this.caller.call(async () => this.index.upsert(chunk)) + this.caller.call(async () => namespace.upsert(chunk)) ); await Promise.all(batchRequests); @@ -166,13 +172,14 @@ export class UpstashVectorStore extends VectorStore { }; }); + const namespace = this.index.namespace(this.namespace ?? ""); const vectorChunks = chunkArray( upstashVectorsWithData, CONCURRENT_UPSERT_LIMIT ); const batchRequests = vectorChunks.map((chunk) => - this.caller.call(async () => this.index.upsert(chunk)) + this.caller.call(async () => namespace.upsert(chunk)) ); await Promise.all(batchRequests); @@ -187,10 +194,11 @@ export class UpstashVectorStore extends VectorStore { * @returns Promise that resolves when the specified documents have been deleted from the database. */ async delete(params: UpstashDeleteParams): Promise { + const namespace = this.index.namespace(this.namespace ?? ""); if (params.deleteAll) { - await this.index.reset(); + await namespace.reset(); } else if (params.ids) { - await this.index.delete(params.ids); + await namespace.delete(params.ids); } } @@ -202,8 +210,10 @@ export class UpstashVectorStore extends VectorStore { ) { let queryResult: QueryResult[] = []; + const namespace = this.index.namespace(this.namespace ?? ""); + if (typeof query === "string") { - queryResult = await this.index.query({ + queryResult = await namespace.query({ data: query, topK: k, includeMetadata: true, @@ -211,7 +221,7 @@ export class UpstashVectorStore extends VectorStore { ...options, }); } else { - queryResult = await this.index.query({ + queryResult = await namespace.query({ vector: query, topK: k, includeMetadata: true, diff --git a/yarn.lock b/yarn.lock index 70619a0649d7..893c185426d5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9140,7 +9140,7 @@ __metadata: "@typescript-eslint/parser": ^5.58.0 "@upstash/ratelimit": ^1.1.3 "@upstash/redis": ^1.20.6 - "@upstash/vector": ^1.0.7 + "@upstash/vector": ^1.1.1 "@vercel/kv": ^0.2.3 "@vercel/postgres": ^0.5.0 "@writerai/writer-sdk": ^0.40.2 @@ -9287,7 +9287,7 @@ __metadata: "@tensorflow/tfjs-core": "*" "@upstash/ratelimit": ^1.1.3 "@upstash/redis": ^1.20.6 - "@upstash/vector": ^1.0.7 + "@upstash/vector": ^1.1.1 "@vercel/kv": ^0.2.3 "@vercel/postgres": ^0.5.0 "@writerai/writer-sdk": ^0.40.2 @@ -16131,10 +16131,10 @@ __metadata: languageName: node linkType: hard -"@upstash/vector@npm:^1.0.7": - version: 1.0.7 - resolution: "@upstash/vector@npm:1.0.7" - checksum: 38d6ef4fd8cd970e3b83e39cf90e0f57622ac032afc409fa6782911ec2452d19decb0184c5a8f7849b8fb06c865c9397f142633a31cea49e82dd6fc4b43e8484 +"@upstash/vector@npm:^1.1.1": + version: 1.1.1 + resolution: "@upstash/vector@npm:1.1.1" + checksum: 2eeaa655b46d9182dfdb5e12ff58654e820df18e7b03e8cfff2795cbdb370cc7652a198638f36e8f29dab3e6a67004cda693b62e0885dc87472a08f908ffe0e6 languageName: node linkType: hard @@ -22533,7 +22533,7 @@ __metadata: "@typescript-eslint/eslint-plugin": ^5.51.0 "@typescript-eslint/parser": ^5.51.0 "@upstash/redis": ^1.20.6 - "@upstash/vector": ^1.0.7 + "@upstash/vector": ^1.1.1 "@vercel/kv": ^0.2.3 "@xata.io/client": ^0.28.0 "@zilliz/milvus2-sdk-node": ^2.2.7