From 2494ed703debc1e74f2eb370ad5613607bf87467 Mon Sep 17 00:00:00 2001 From: George Herbert Date: Sun, 26 May 2024 03:33:03 +0100 Subject: [PATCH] community[patch]: add ?| (arrayContains) filter on metadata to PGVector search (#5381) * add ?| (arrayContains) filter on metadata to PGVector search * fix: align to style of IN filter * Format * Update build artifacts * Format --------- Co-authored-by: jacoblee93 --- .../pgvector_vectorstore/pgvector.ts | 17 ++++++++- .../src/vectorstores/pgvector.ts | 12 ++++++ .../tests/pgvector/pgvector.int.test.ts | 38 +++++++++++++++++++ 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/examples/src/indexes/vector_stores/pgvector_vectorstore/pgvector.ts b/examples/src/indexes/vector_stores/pgvector_vectorstore/pgvector.ts index 849dd657b4c9..3a006298d9ba 100644 --- a/examples/src/indexes/vector_stores/pgvector_vectorstore/pgvector.ts +++ b/examples/src/indexes/vector_stores/pgvector_vectorstore/pgvector.ts @@ -34,8 +34,8 @@ const pgvectorStore = await PGVectorStore.initialize( ); await pgvectorStore.addDocuments([ - { pageContent: "what's this", metadata: { a: 2 } }, - { pageContent: "Cat drinks milk", metadata: { a: 1 } }, + { pageContent: "what's this", metadata: { a: 2, b: ["tag1", "tag2"] } }, + { pageContent: "Cat drinks milk", metadata: { a: 1, b: ["tag2"] } }, ]); const results = await pgvectorStore.similaritySearch("water", 1); @@ -84,4 +84,17 @@ console.log(results4); [ Document { pageContent: 'what's this', metadata: { a: 2 } } ] */ +// Filtering using arrayContains (?|) is supported +const results5 = await pgvectorStore.similaritySearch("water", 1, { + b: { + arrayContains: ["tag1"], + }, +}); + +console.log(results5); + +/* + [ Document { pageContent: "what's this", metadata: { a: 2, b: ['tag1', 'tag2'] } } } ] +*/ + await pgvectorStore.end(); diff --git a/libs/langchain-community/src/vectorstores/pgvector.ts b/libs/langchain-community/src/vectorstores/pgvector.ts index 9961ebe76bd0..af5ac76bc047 100644 --- a/libs/langchain-community/src/vectorstores/pgvector.ts +++ b/libs/langchain-community/src/vectorstores/pgvector.ts @@ -501,6 +501,18 @@ export class PGVectorStore extends VectorStore { parameters.push(..._value.in); paramCount += _value.in.length; } + if (Array.isArray(_value.arrayContains)) { + const placeholders = _value.arrayContains + .map( + (_: unknown, index: number) => `$${currentParamCount + index + 1}` + ) + .join(","); + whereClauses.push( + `${this.metadataColumnName}->'${key}' ?| array[${placeholders}]` + ); + parameters.push(..._value.arrayContains); + paramCount += _value.arrayContains.length; + } } else { paramCount += 1; whereClauses.push( diff --git a/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts index 54fc42d5de05..c22627443912 100644 --- a/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts @@ -141,6 +141,44 @@ describe("PGVectorStore", () => { expect(result3.length).toEqual(3); }); + test("PGvector supports arrayContains (?|) in metadata filter ", async () => { + const documents = [ + { pageContent: "Lorem Ipsum", metadata: { a: ["tag1", "tag2"] } }, + { pageContent: "Lorem Ipsum", metadata: { a: ["tag2"] } }, + { pageContent: "Lorem Ipsum", metadata: { a: ["tag1"] } }, + ]; + + await pgvectorVectorStore.addDocuments(documents); + + const result = await pgvectorVectorStore.similaritySearch("hello", 2, { + a: { + arrayContains: ["tag1"], + }, + }); + + expect(result.length).toEqual(2); + expect(result).toEqual([ + { pageContent: "Lorem Ipsum", metadata: { a: ["tag1", "tag2"] } }, + { pageContent: "Lorem Ipsum", metadata: { a: ["tag1"] } }, + ]); + + const result2 = await pgvectorVectorStore.similaritySearch("hello", 2, { + a: { + arrayContains: ["tag2"], + }, + }); + expect(result2.length).toEqual(2); + expect(result2).toEqual([ + { pageContent: "Lorem Ipsum", metadata: { a: ["tag1", "tag2"] } }, + { pageContent: "Lorem Ipsum", metadata: { a: ["tag2"] } }, + ]); + + const result3 = await pgvectorVectorStore.similaritySearch("hello", 3); + + expect(result3.length).toEqual(3); + expect(result3).toEqual(documents); + }); + test("PGvector can delete document by id", async () => { try { const documents = [