From 8fee8930c2d316ca1a03109ee81cf074e48085f8 Mon Sep 17 00:00:00 2001 From: Nazar Pechevystyi Date: Mon, 30 Jun 2025 15:08:00 +0200 Subject: [PATCH 1/2] feat: add $contains_any and $contains_none operators for array inclusion filtering --- examples/pg_vectorstore.ipynb | 2 ++ examples/pg_vectorstore_how_to.ipynb | 2 ++ langchain_postgres/v2/async_vectorstore.py | 22 +++++++++++++ .../fixtures/metadata_filtering_data.py | 31 +++++++++++++++++++ .../v2/test_async_pg_vectorstore_search.py | 2 ++ .../v2/test_pg_vectorstore_search.py | 4 +++ 6 files changed, 63 insertions(+) diff --git a/examples/pg_vectorstore.ipynb b/examples/pg_vectorstore.ipynb index 2c20e90e..abc87fba 100644 --- a/examples/pg_vectorstore.ipynb +++ b/examples/pg_vectorstore.ipynb @@ -375,6 +375,8 @@ "| \\$nin | Special Cased (not in) |\n", "| \\$between | Special Cased (between) |\n", "| \\$exists | Special Cased (is null) |\n", + "| \\$contains_any | Special Cased (contains any of values) |\n", + "| \\$contains_none| Special Cased (contains none of values) |\n", "| \\$like | Text (like) |\n", "| \\$ilike | Text (case-insensitive like) |\n", "| \\$and | Logical (and) |\n", diff --git a/examples/pg_vectorstore_how_to.ipynb b/examples/pg_vectorstore_how_to.ipynb index bbdd7237..187f5e6e 100644 --- a/examples/pg_vectorstore_how_to.ipynb +++ b/examples/pg_vectorstore_how_to.ipynb @@ -546,6 +546,8 @@ "| $nin | Special Cased (not in) |\n", "| $between | Special Cased (between) |\n", "| $exists | Special Cased (is null) |\n", + "| $contains_any | Special Cased (contains any of values) |\n", + "| $contains_none| Special Cased (contains none of values) |\n", "| $like | Text (like) |\n", "| $ilike | Text (case-insensitive like) |\n", "| $and | Logical (and) |\n", diff --git a/langchain_postgres/v2/async_vectorstore.py b/langchain_postgres/v2/async_vectorstore.py index fd0bfd75..ad873f82 100644 --- a/langchain_postgres/v2/async_vectorstore.py +++ b/langchain_postgres/v2/async_vectorstore.py @@ -38,6 +38,8 @@ "$nin", "$between", "$exists", + "$contains_any", + "$contains_none", } TEXT_OPERATORS = { @@ -1148,6 +1150,26 @@ def _handle_field_filter( return f"({field} IS NOT NULL)", {} else: return f"({field} IS NULL)", {} + + elif operator in {"$contains_any", "$contains_none"}: + # We expect a list of numeric or text values + if not isinstance(filter_value, (list, tuple)): + raise ValueError( + f"Invalid filter value for {operator}: expected list, got {type(filter_value)}" + ) + for val in filter_value: + if isinstance(val, bool) or not isinstance(val, (str, int, float)): + raise NotImplementedError( + f"Unsupported type: {type(val)} for value: {val}" + ) + + param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}" + + if operator == "$contains_any": + sql = f"({field} && :{param_name})" + else: # i.e. $contains_none + sql = f"(NOT ({field} && :{param_name}))" + return sql, {param_name: filter_value} else: raise NotImplementedError() diff --git a/tests/unit_tests/fixtures/metadata_filtering_data.py b/tests/unit_tests/fixtures/metadata_filtering_data.py index 8df8c01c..0128f577 100644 --- a/tests/unit_tests/fixtures/metadata_filtering_data.py +++ b/tests/unit_tests/fixtures/metadata_filtering_data.py @@ -239,6 +239,35 @@ {"inventory_location": {"$exists": False}}, ["WB003"], ), + # Tests for $contains_any and $contains_none + ( + {"dimensions": {"$contains_any": [25.0]}}, + ["FT004", "WB003"] + ), + ( + {"dimensions": {"$contains_none": [99.99]}}, + ["WH001", "FT004", "WB003", "EC002"] + ), + ( + {"tags": {"$contains_any": ["audio", "hydration"]}}, + ["WH001", "WB003"] + ), + ( + {"tags": {"$contains_none": ["audio", "hydration"]}}, + ["FT004", "EC002"] + ), + ( + {"tags": {"$contains_any": ["dummy"]}}, + [] + ), + ( + {"tags": {"$contains_any": []}}, + [] + ), + ( + {"tags": {"$contains_none": []}}, + ["WH001", "FT004", "WB003", "EC002"] + ), ] NEGATIVE_TEST_CASES = [ @@ -251,4 +280,6 @@ {"$and": {}}, {"$and": []}, {"$not": True}, + {"tags": {"$contains_any": "wireless"}}, + {"tags": {"$contains_none": [False]}}, ] diff --git a/tests/unit_tests/v2/test_async_pg_vectorstore_search.py b/tests/unit_tests/v2/test_async_pg_vectorstore_search.py index 16c70fdd..f073af74 100644 --- a/tests/unit_tests/v2/test_async_pg_vectorstore_search.py +++ b/tests/unit_tests/v2/test_async_pg_vectorstore_search.py @@ -190,6 +190,7 @@ async def vs_custom_filter( Column("price", "FLOAT"), Column("is_available", "BOOLEAN"), Column("tags", "TEXT[]"), + Column("dimensions", "FLOAT[]"), Column("inventory_location", "INTEGER[]"), Column("available_quantity", "INTEGER", nullable=True), ], @@ -207,6 +208,7 @@ async def vs_custom_filter( "price", "is_available", "tags", + "dimensions", "inventory_location", "available_quantity", ], diff --git a/tests/unit_tests/v2/test_pg_vectorstore_search.py b/tests/unit_tests/v2/test_pg_vectorstore_search.py index 7815a25a..284b0ca6 100644 --- a/tests/unit_tests/v2/test_pg_vectorstore_search.py +++ b/tests/unit_tests/v2/test_pg_vectorstore_search.py @@ -137,6 +137,7 @@ async def vs_custom_filter(self, engine: PGEngine) -> AsyncIterator[PGVectorStor Column("price", "FLOAT"), Column("is_available", "BOOLEAN"), Column("tags", "TEXT[]"), + Column("dimensions", "FLOAT[]"), Column("inventory_location", "INTEGER[]"), Column("available_quantity", "INTEGER", nullable=True), ], @@ -155,6 +156,7 @@ async def vs_custom_filter(self, engine: PGEngine) -> AsyncIterator[PGVectorStor "price", "is_available", "tags", + "dimensions", "inventory_location", "available_quantity", ], @@ -348,6 +350,7 @@ async def vs_custom_filter_sync( Column("price", "FLOAT"), Column("is_available", "BOOLEAN"), Column("tags", "TEXT[]"), + Column("dimensions", "FLOAT[]"), Column("inventory_location", "INTEGER[]"), Column("available_quantity", "INTEGER", nullable=True), ], @@ -366,6 +369,7 @@ async def vs_custom_filter_sync( "price", "is_available", "tags", + "dimensions", "inventory_location", "available_quantity", ], From c4ea3c7568ed513837a697499c238c7eb095735b Mon Sep 17 00:00:00 2001 From: Nazar Pechevystyi Date: Tue, 1 Jul 2025 08:54:43 +0200 Subject: [PATCH 2/2] accept single/multiple values --- langchain_postgres/v2/async_vectorstore.py | 19 ++++++--- .../fixtures/metadata_filtering_data.py | 39 +++++-------------- 2 files changed, 24 insertions(+), 34 deletions(-) diff --git a/langchain_postgres/v2/async_vectorstore.py b/langchain_postgres/v2/async_vectorstore.py index ad873f82..5a34238b 100644 --- a/langchain_postgres/v2/async_vectorstore.py +++ b/langchain_postgres/v2/async_vectorstore.py @@ -1152,12 +1152,21 @@ def _handle_field_filter( return f"({field} IS NULL)", {} elif operator in {"$contains_any", "$contains_none"}: - # We expect a list of numeric or text values - if not isinstance(filter_value, (list, tuple)): + # Accept eithar a single scalar value, or a list of scalar values + + if isinstance(filter_value, (str, int, float)) and not isinstance( + filter_value, bool + ): + values = [filter_value] + elif isinstance(filter_value, (list, tuple)): + values = list(filter_value) + else: raise ValueError( - f"Invalid filter value for {operator}: expected list, got {type(filter_value)}" + f"Invalid filter value for {operator}: " + f"expected list or scalar, got {type(filter_value)}" ) - for val in filter_value: + + for val in values: if isinstance(val, bool) or not isinstance(val, (str, int, float)): raise NotImplementedError( f"Unsupported type: {type(val)} for value: {val}" @@ -1169,7 +1178,7 @@ def _handle_field_filter( sql = f"({field} && :{param_name})" else: # i.e. $contains_none sql = f"(NOT ({field} && :{param_name}))" - return sql, {param_name: filter_value} + return sql, {param_name: values} else: raise NotImplementedError() diff --git a/tests/unit_tests/fixtures/metadata_filtering_data.py b/tests/unit_tests/fixtures/metadata_filtering_data.py index 0128f577..6c92e58a 100644 --- a/tests/unit_tests/fixtures/metadata_filtering_data.py +++ b/tests/unit_tests/fixtures/metadata_filtering_data.py @@ -240,34 +240,15 @@ ["WB003"], ), # Tests for $contains_any and $contains_none - ( - {"dimensions": {"$contains_any": [25.0]}}, - ["FT004", "WB003"] - ), - ( - {"dimensions": {"$contains_none": [99.99]}}, - ["WH001", "FT004", "WB003", "EC002"] - ), - ( - {"tags": {"$contains_any": ["audio", "hydration"]}}, - ["WH001", "WB003"] - ), - ( - {"tags": {"$contains_none": ["audio", "hydration"]}}, - ["FT004", "EC002"] - ), - ( - {"tags": {"$contains_any": ["dummy"]}}, - [] - ), - ( - {"tags": {"$contains_any": []}}, - [] - ), - ( - {"tags": {"$contains_none": []}}, - ["WH001", "FT004", "WB003", "EC002"] - ), + ({"dimensions": {"$contains_any": [25.0]}}, ["FT004", "WB003"]), + ({"dimensions": {"$contains_any": 25.0}}, ["FT004", "WB003"]), + ({"dimensions": {"$contains_none": 99.99}}, ["WH001", "FT004", "WB003", "EC002"]), + ({"tags": {"$contains_any": ["audio", "hydration"]}}, ["WH001", "WB003"]), + ({"tags": {"$contains_none": ["audio", "hydration"]}}, ["FT004", "EC002"]), + ({"tags": {"$contains_any": "audio"}}, ["WH001"]), + ({"tags": {"$contains_any": ["dummy"]}}, []), + ({"tags": {"$contains_any": []}}, []), + ({"tags": {"$contains_none": []}}, ["WH001", "FT004", "WB003", "EC002"]), ] NEGATIVE_TEST_CASES = [ @@ -280,6 +261,6 @@ {"$and": {}}, {"$and": []}, {"$not": True}, - {"tags": {"$contains_any": "wireless"}}, + {"tags": {"$contains_any": {"tag": "wireless"}}}, {"tags": {"$contains_none": [False]}}, ]