diff --git a/examples/pg_vectorstore.ipynb b/examples/pg_vectorstore.ipynb index 2c20e90e..abc87fba 100644 --- a/examples/pg_vectorstore.ipynb +++ b/examples/pg_vectorstore.ipynb @@ -375,6 +375,8 @@ "| \\$nin | Special Cased (not in) |\n", "| \\$between | Special Cased (between) |\n", "| \\$exists | Special Cased (is null) |\n", + "| \\$contains_any | Special Cased (contains any of values) |\n", + "| \\$contains_none| Special Cased (contains none of values) |\n", "| \\$like | Text (like) |\n", "| \\$ilike | Text (case-insensitive like) |\n", "| \\$and | Logical (and) |\n", diff --git a/examples/pg_vectorstore_how_to.ipynb b/examples/pg_vectorstore_how_to.ipynb index bbdd7237..187f5e6e 100644 --- a/examples/pg_vectorstore_how_to.ipynb +++ b/examples/pg_vectorstore_how_to.ipynb @@ -546,6 +546,8 @@ "| $nin | Special Cased (not in) |\n", "| $between | Special Cased (between) |\n", "| $exists | Special Cased (is null) |\n", + "| $contains_any | Special Cased (contains any of values) |\n", + "| $contains_none| Special Cased (contains none of values) |\n", "| $like | Text (like) |\n", "| $ilike | Text (case-insensitive like) |\n", "| $and | Logical (and) |\n", diff --git a/langchain_postgres/v2/async_vectorstore.py b/langchain_postgres/v2/async_vectorstore.py index fd0bfd75..5a34238b 100644 --- a/langchain_postgres/v2/async_vectorstore.py +++ b/langchain_postgres/v2/async_vectorstore.py @@ -38,6 +38,8 @@ "$nin", "$between", "$exists", + "$contains_any", + "$contains_none", } TEXT_OPERATORS = { @@ -1148,6 +1150,35 @@ def _handle_field_filter( return f"({field} IS NOT NULL)", {} else: return f"({field} IS NULL)", {} + + elif operator in {"$contains_any", "$contains_none"}: + # Accept eithar a single scalar value, or a list of scalar values + + if isinstance(filter_value, (str, int, float)) and not isinstance( + filter_value, bool + ): + values = [filter_value] + elif isinstance(filter_value, (list, tuple)): + values = list(filter_value) + else: + raise ValueError( + f"Invalid filter value for {operator}: " + f"expected list or scalar, got {type(filter_value)}" + ) + + for val in values: + if isinstance(val, bool) or not isinstance(val, (str, int, float)): + raise NotImplementedError( + f"Unsupported type: {type(val)} for value: {val}" + ) + + param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}" + + if operator == "$contains_any": + sql = f"({field} && :{param_name})" + else: # i.e. $contains_none + sql = f"(NOT ({field} && :{param_name}))" + return sql, {param_name: values} else: raise NotImplementedError() diff --git a/tests/unit_tests/fixtures/metadata_filtering_data.py b/tests/unit_tests/fixtures/metadata_filtering_data.py index 8df8c01c..6c92e58a 100644 --- a/tests/unit_tests/fixtures/metadata_filtering_data.py +++ b/tests/unit_tests/fixtures/metadata_filtering_data.py @@ -239,6 +239,16 @@ {"inventory_location": {"$exists": False}}, ["WB003"], ), + # Tests for $contains_any and $contains_none + ({"dimensions": {"$contains_any": [25.0]}}, ["FT004", "WB003"]), + ({"dimensions": {"$contains_any": 25.0}}, ["FT004", "WB003"]), + ({"dimensions": {"$contains_none": 99.99}}, ["WH001", "FT004", "WB003", "EC002"]), + ({"tags": {"$contains_any": ["audio", "hydration"]}}, ["WH001", "WB003"]), + ({"tags": {"$contains_none": ["audio", "hydration"]}}, ["FT004", "EC002"]), + ({"tags": {"$contains_any": "audio"}}, ["WH001"]), + ({"tags": {"$contains_any": ["dummy"]}}, []), + ({"tags": {"$contains_any": []}}, []), + ({"tags": {"$contains_none": []}}, ["WH001", "FT004", "WB003", "EC002"]), ] NEGATIVE_TEST_CASES = [ @@ -251,4 +261,6 @@ {"$and": {}}, {"$and": []}, {"$not": True}, + {"tags": {"$contains_any": {"tag": "wireless"}}}, + {"tags": {"$contains_none": [False]}}, ] diff --git a/tests/unit_tests/v2/test_async_pg_vectorstore_search.py b/tests/unit_tests/v2/test_async_pg_vectorstore_search.py index 16c70fdd..f073af74 100644 --- a/tests/unit_tests/v2/test_async_pg_vectorstore_search.py +++ b/tests/unit_tests/v2/test_async_pg_vectorstore_search.py @@ -190,6 +190,7 @@ async def vs_custom_filter( Column("price", "FLOAT"), Column("is_available", "BOOLEAN"), Column("tags", "TEXT[]"), + Column("dimensions", "FLOAT[]"), Column("inventory_location", "INTEGER[]"), Column("available_quantity", "INTEGER", nullable=True), ], @@ -207,6 +208,7 @@ async def vs_custom_filter( "price", "is_available", "tags", + "dimensions", "inventory_location", "available_quantity", ], diff --git a/tests/unit_tests/v2/test_pg_vectorstore_search.py b/tests/unit_tests/v2/test_pg_vectorstore_search.py index 7815a25a..284b0ca6 100644 --- a/tests/unit_tests/v2/test_pg_vectorstore_search.py +++ b/tests/unit_tests/v2/test_pg_vectorstore_search.py @@ -137,6 +137,7 @@ async def vs_custom_filter(self, engine: PGEngine) -> AsyncIterator[PGVectorStor Column("price", "FLOAT"), Column("is_available", "BOOLEAN"), Column("tags", "TEXT[]"), + Column("dimensions", "FLOAT[]"), Column("inventory_location", "INTEGER[]"), Column("available_quantity", "INTEGER", nullable=True), ], @@ -155,6 +156,7 @@ async def vs_custom_filter(self, engine: PGEngine) -> AsyncIterator[PGVectorStor "price", "is_available", "tags", + "dimensions", "inventory_location", "available_quantity", ], @@ -348,6 +350,7 @@ async def vs_custom_filter_sync( Column("price", "FLOAT"), Column("is_available", "BOOLEAN"), Column("tags", "TEXT[]"), + Column("dimensions", "FLOAT[]"), Column("inventory_location", "INTEGER[]"), Column("available_quantity", "INTEGER", nullable=True), ], @@ -366,6 +369,7 @@ async def vs_custom_filter_sync( "price", "is_available", "tags", + "dimensions", "inventory_location", "available_quantity", ],