Skip to content

Commit

Permalink
community[patch]: Upstash Vector Store Namespace Feature (#5557)
Browse files Browse the repository at this point in the history
* feat: Upstash Vector Namespace feature

* add: Upstash Vector Namespace Tests

* docs: Upstash Vector namespace

* fmt
  • Loading branch information
fahreddinozcan authored May 29, 2024
1 parent 079931f commit ff47bd2
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 17 deletions.
7 changes: 7 additions & 0 deletions docs/core_docs/docs/integrations/vectorstores/upstash.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import CreateClientExample from "@examples/indexes/vector_stores/upstash/create_
import IndexQueryExample from "@examples/indexes/vector_stores/upstash/index_and_query_docs.ts";
import DeleteExample from "@examples/indexes/vector_stores/upstash/delete_docs.ts";
import UpstashEmbeddingsExample from "@examples/indexes/vector_stores/upstash/upstash_embeddings.ts";
import NamespaceExample from "@examples/indexes/vector_stores/upstash/namespaces.ts";
import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";

# Upstash Vector
Expand Down Expand Up @@ -42,6 +43,12 @@ You can index the LangChain documents with any model of your choice, and perform

<CodeBlock language="typescript">{IndexQueryExample}</CodeBlock>

## Namespaces

You can use namespaces to partition your data in the index. Namespaces are useful when you want to query over huge amount of data, and you want to partition the data to make the queries faster. When you use namespaces, there won't be post-filtering on the results which will make the query results more precise.

<CodeBlock language="typescript">{NamespaceExample}</CodeBlock>

## Upstash embeddings

It's possible to use the embeddings service of Upstash, which is based on the embedding model of choice when creating the vector database. You don't need to create the embeddings manually, as the Upstash Vector service will handle this for you.
Expand Down
2 changes: 1 addition & 1 deletion examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"@supabase/supabase-js": "^2.10.0",
"@tensorflow/tfjs-backend-cpu": "^4.4.0",
"@upstash/redis": "^1.20.6",
"@upstash/vector": "^1.0.7",
"@upstash/vector": "^1.1.1",
"@vercel/kv": "^0.2.3",
"@xata.io/client": "^0.28.0",
"@zilliz/milvus2-sdk-node": "^2.2.7",
Expand Down
50 changes: 50 additions & 0 deletions examples/src/indexes/vector_stores/upstash/namespaces.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { Index } from "@upstash/vector";
import { OpenAIEmbeddings } from "@langchain/openai";
import { Document } from "@langchain/core/documents";
import { UpstashVectorStore } from "@langchain/community/vectorstores/upstash";

const index = new Index({
url: process.env.UPSTASH_VECTOR_REST_URL as string,
token: process.env.UPSTASH_VECTOR_REST_TOKEN as string,
});

const embeddings = new OpenAIEmbeddings({});

const UpstashVector = new UpstashVectorStore(embeddings, {
index,
namespace: "test-namespace",
});

// Creating the docs to be indexed.
const id = new Date().getTime();
const documents = [
new Document({
metadata: { name: id },
pageContent: "Vector databases are great!",
}),
];

// Creating embeddings from the provided documents, and adding them to target namespace in Upstash Vector database.
await UpstashVector.addDocuments(documents);

// Waiting vectors to be indexed in the vector store.
// eslint-disable-next-line no-promise-executor-return
await new Promise((resolve) => setTimeout(resolve, 1000));

const queryResult = await UpstashVector.similaritySearchWithScore(
"Vector database",
1
);

console.log(queryResult);
/**
[
[
Document {
pageContent: 'Vector databases are great!',
metadata: [Object]
},
0.9016147
],
]
*/
4 changes: 2 additions & 2 deletions libs/langchain-community/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
"@typescript-eslint/parser": "^5.58.0",
"@upstash/ratelimit": "^1.1.3",
"@upstash/redis": "^1.20.6",
"@upstash/vector": "^1.0.7",
"@upstash/vector": "^1.1.1",
"@vercel/kv": "^0.2.3",
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
Expand Down Expand Up @@ -266,7 +266,7 @@
"@tensorflow/tfjs-core": "*",
"@upstash/ratelimit": "^1.1.3",
"@upstash/redis": "^1.20.6",
"@upstash/vector": "^1.0.7",
"@upstash/vector": "^1.1.1",
"@vercel/kv": "^0.2.3",
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
Expand Down
114 changes: 114 additions & 0 deletions libs/langchain-community/src/vectorstores/tests/upstash.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,118 @@ describe("UpstashVectorStore", () => {

expect(results3).toHaveLength(2);
});

test("Should upsert the documents to target namespace", async () => {
index = new Index({
url: process.env.UPSTASH_VECTOR_REST_URL,
token: process.env.UPSTASH_VECTOR_REST_TOKEN,
});

await index.reset();

embeddings = new SyntheticEmbeddings({
vectorSize: 384,
});

const storeNamespace1 = new UpstashVectorStore(embeddings, {
index,
namespace: "namespace-1",
});
const storeNamespace2 = new UpstashVectorStore(embeddings, {
index,
namespace: "namespace-2",
});

await storeNamespace1.addDocuments([
{
pageContent: "namespace-test-original",
metadata: { namespace: "namespace-1" },
},
]);

// Sleeping for a second to make sure that all the indexing operations are finished.
await sleep(1000);

const resultsNamespace2 = await storeNamespace2.similaritySearchWithScore(
"namespace-test-original",
1,
"namespace = 'namespace-1'"
);
expect(resultsNamespace2).toHaveLength(0);

const resultsNamespace1 = await storeNamespace1.similaritySearchWithScore(
"namespace-test-original",
1,
"namespace = 'namespace-1'"
);
expect(resultsNamespace1).toHaveLength(1);

expect([resultsNamespace1[0][0]]).toEqual([
new Document({
metadata: { namespace: "namespace-1" },
pageContent: "namespace-test-original",
}),
]);
});

test("Should delete the documents from target namespace", async () => {
index = new Index({
url: process.env.UPSTASH_VECTOR_REST_URL,
token: process.env.UPSTASH_VECTOR_REST_TOKEN,
});

await index.reset();

embeddings = new SyntheticEmbeddings({
vectorSize: 384,
});

const storeNamespace1 = new UpstashVectorStore(embeddings, {
index,
namespace: "namespace-1",
});
const storeNamespace2 = new UpstashVectorStore(embeddings, {
index,
namespace: "namespace-2",
});

const idNamespace1 = await storeNamespace1.addDocuments([
{
pageContent: "namespace-test-original",
metadata: { namespace: "namespace-test" },
},
]);
await storeNamespace2.addDocuments([
{
pageContent: "namespace-test-original",
metadata: { namespace: "namespace-test" },
},
]);

// Sleeping for a second to make sure that all the indexing operations are finished.
await sleep(1000);

await storeNamespace1.delete({ ids: idNamespace1 });

const resultsNamespace1 = await storeNamespace1.similaritySearchWithScore(
"namespace-test-original",
1,
"namespace = 'namespace-test'"
);
expect(resultsNamespace1).toHaveLength(0);

const resultsNamespace2 = await storeNamespace2.similaritySearchWithScore(
"namespace-test-original",
1,
"namespace = 'namespace-test'"
);
expect(resultsNamespace2).toHaveLength(1);

expect([resultsNamespace2[0][0]]).toEqual([
new Document({
metadata: { namespace: "namespace-test" },
pageContent: "namespace-test-original",
}),
]);
});
});
24 changes: 17 additions & 7 deletions libs/langchain-community/src/vectorstores/upstash.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
export interface UpstashVectorLibArgs extends AsyncCallerParams {
index: UpstashIndex;
filter?: string;
namespace?: string;
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand Down Expand Up @@ -56,6 +57,8 @@ export class UpstashVectorStore extends VectorStore {

filter?: this["FilterType"];

namespace?: string;

_vectorstoreType(): string {
return "upstash";
}
Expand All @@ -68,11 +71,12 @@ export class UpstashVectorStore extends VectorStore {
this.useUpstashEmbeddings = true;
}

const { index, ...asyncCallerArgs } = args;
const { index, namespace, ...asyncCallerArgs } = args;

this.index = index;
this.caller = new AsyncCaller(asyncCallerArgs);
this.filter = args.filter;
this.namespace = namespace;
}

/**
Expand Down Expand Up @@ -127,10 +131,12 @@ export class UpstashVectorStore extends VectorStore {
};
});

const namespace = this.index.namespace(this.namespace ?? "");

const vectorChunks = chunkArray(upstashVectors, CONCURRENT_UPSERT_LIMIT);

const batchRequests = vectorChunks.map((chunk) =>
this.caller.call(async () => this.index.upsert(chunk))
this.caller.call(async () => namespace.upsert(chunk))
);

await Promise.all(batchRequests);
Expand Down Expand Up @@ -166,13 +172,14 @@ export class UpstashVectorStore extends VectorStore {
};
});

const namespace = this.index.namespace(this.namespace ?? "");
const vectorChunks = chunkArray(
upstashVectorsWithData,
CONCURRENT_UPSERT_LIMIT
);

const batchRequests = vectorChunks.map((chunk) =>
this.caller.call(async () => this.index.upsert(chunk))
this.caller.call(async () => namespace.upsert(chunk))
);

await Promise.all(batchRequests);
Expand All @@ -187,10 +194,11 @@ export class UpstashVectorStore extends VectorStore {
* @returns Promise that resolves when the specified documents have been deleted from the database.
*/
async delete(params: UpstashDeleteParams): Promise<void> {
const namespace = this.index.namespace(this.namespace ?? "");
if (params.deleteAll) {
await this.index.reset();
await namespace.reset();
} else if (params.ids) {
await this.index.delete(params.ids);
await namespace.delete(params.ids);
}
}

Expand All @@ -202,16 +210,18 @@ export class UpstashVectorStore extends VectorStore {
) {
let queryResult: QueryResult<UpstashQueryMetadata>[] = [];

const namespace = this.index.namespace(this.namespace ?? "");

if (typeof query === "string") {
queryResult = await this.index.query<UpstashQueryMetadata>({
queryResult = await namespace.query<UpstashQueryMetadata>({
data: query,
topK: k,
includeMetadata: true,
filter,
...options,
});
} else {
queryResult = await this.index.query<UpstashQueryMetadata>({
queryResult = await namespace.query<UpstashQueryMetadata>({
vector: query,
topK: k,
includeMetadata: true,
Expand Down
14 changes: 7 additions & 7 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -9140,7 +9140,7 @@ __metadata:
"@typescript-eslint/parser": ^5.58.0
"@upstash/ratelimit": ^1.1.3
"@upstash/redis": ^1.20.6
"@upstash/vector": ^1.0.7
"@upstash/vector": ^1.1.1
"@vercel/kv": ^0.2.3
"@vercel/postgres": ^0.5.0
"@writerai/writer-sdk": ^0.40.2
Expand Down Expand Up @@ -9287,7 +9287,7 @@ __metadata:
"@tensorflow/tfjs-core": "*"
"@upstash/ratelimit": ^1.1.3
"@upstash/redis": ^1.20.6
"@upstash/vector": ^1.0.7
"@upstash/vector": ^1.1.1
"@vercel/kv": ^0.2.3
"@vercel/postgres": ^0.5.0
"@writerai/writer-sdk": ^0.40.2
Expand Down Expand Up @@ -16131,10 +16131,10 @@ __metadata:
languageName: node
linkType: hard

"@upstash/vector@npm:^1.0.7":
version: 1.0.7
resolution: "@upstash/vector@npm:1.0.7"
checksum: 38d6ef4fd8cd970e3b83e39cf90e0f57622ac032afc409fa6782911ec2452d19decb0184c5a8f7849b8fb06c865c9397f142633a31cea49e82dd6fc4b43e8484
"@upstash/vector@npm:^1.1.1":
version: 1.1.1
resolution: "@upstash/vector@npm:1.1.1"
checksum: 2eeaa655b46d9182dfdb5e12ff58654e820df18e7b03e8cfff2795cbdb370cc7652a198638f36e8f29dab3e6a67004cda693b62e0885dc87472a08f908ffe0e6
languageName: node
linkType: hard

Expand Down Expand Up @@ -22533,7 +22533,7 @@ __metadata:
"@typescript-eslint/eslint-plugin": ^5.51.0
"@typescript-eslint/parser": ^5.51.0
"@upstash/redis": ^1.20.6
"@upstash/vector": ^1.0.7
"@upstash/vector": ^1.1.1
"@vercel/kv": ^0.2.3
"@xata.io/client": ^0.28.0
"@zilliz/milvus2-sdk-node": ^2.2.7
Expand Down

0 comments on commit ff47bd2

Please sign in to comment.