langchain-ai · gRedHeadphone · Oct 13, 2025 · Oct 13, 2025 · Oct 17, 2025 · Nov 3, 2025
diff --git a/examples/pg_vectorstore.ipynb b/examples/pg_vectorstore.ipynb
@@ -359,7 +359,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To enable search with filters, it is necessary to declare the columns that you want to filter on when creating the table. The vectorstore supports a set of filters that can be applied against the metadata fields of the documents.\n",
+    "To achieve performant search with filters, it is crucial to declare the columns you want to filter on within the `metadata_columns` when creating the table, as filtering directly on these columns is far more efficient than attempting to filter on fields within a metadata JSON column. The vectorstore supports a set of filters that can be applied against the metadata fields of the documents.\n",
     "\n",
     "`PGVectorStore` currently supports the following operators.\n",
     "\n",

diff --git a/examples/pg_vectorstore_how_to.ipynb b/examples/pg_vectorstore_how_to.ipynb
@@ -530,7 +530,7 @@
    "source": [
     "### Search for documents with metadata filter\n",
     "\n",
-    "A Vector Store can take advantage of relational data to filter similarity searches. The vectorstore supports a set of filters that can be applied against the metadata fields of the documents. See the [migration guide](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/migrate_pgvector_to_pgvectorstore.ipynb) for details on how to migrate to use metadata columns.\n",
+    "A Vector Store can take advantage of relational data to filter similarity searches. The vectorstore supports a set of filters that can be applied against the metadata fields of the documents. See the [migration guide](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/migrate_pgvector_to_pgvectorstore.ipynb) for details on how to migrate to use metadata columns for efficient filtering.\n",
     "\n",
     "`PGVectorStore` currently supports the following operators and all Postgres data types.\n",
     "\n",
@@ -645,7 +645,7 @@
     "\n",
     "- **`metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"]`**: These columns are treated as metadata for each product. Metadata provides additional information about a product, such as its name, category, price, quantity available, SKU (Stock Keeping Unit), and an image URL. This information is useful for displaying product details in search results or for filtering and categorization.\n",
     "\n",
-    "- **`metadata_json_column=\"metadata\"`**: The `metadata` column can store any additional information about the products in a flexible JSON format. This allows for storing varied and complex data that doesn't fit into the standard columns.\n"
+    "- **`metadata_json_column=\"metadata\"`**: The `metadata` column can store any additional information about the products in a flexible JSON format. This allows for storing varied and complex data that doesn't fit into the standard columns. Note that filtering on fields within the JSON but not in `metadata_columns` will be less efficient.\n"
    ]
   },
   {

diff --git a/langchain_postgres/v2/async_vectorstore.py b/langchain_postgres/v2/async_vectorstore.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import copy
+import datetime
 import json
 import uuid
 from typing import Any, Callable, Iterable, Optional, Sequence
@@ -54,6 +55,16 @@
     .union(SPECIAL_CASED_OPERATORS)
 )
 
+PYTHON_TO_POSTGRES_TYPE_MAP = {
+    int: "INTEGER",
+    float: "FLOAT",
+    str: "TEXT",
+    bool: "BOOLEAN",
+    datetime.date: "DATE",
+    datetime.datetime: "TIMESTAMP",
+    datetime.time: "TIME",
+}
+
 
 class AsyncPGVectorStore(VectorStore):
     """Postgres Vector Store class"""
@@ -1093,19 +1104,33 @@ def _handle_field_filter(
             operator = "$eq"
             filter_value = value
 
+        field_selector = field
+        if self.metadata_json_column is not None and field not in self.metadata_columns and field not in (
+            self.id_column,
+            self.content_column,
+            self.embedding_column
+        ):
+            filter_value_type = type(filter_value[0]) if (isinstance(filter_value, list) or isinstance(filter_value, tuple)) else type(filter_value)
+            postgres_type = PYTHON_TO_POSTGRES_TYPE_MAP.get(filter_value_type)
+            if postgres_type is None:
+                raise ValueError(f"Unsupported type: {filter_value_type}")
+            field_selector = f"{self.metadata_json_column}->>'{field}'"
+            if postgres_type != "TEXT" and operator != "$exists":
+                field_selector = f"({field_selector})::{postgres_type}"
+
         suffix_id = str(uuid.uuid4()).split("-")[0]
         if operator in COMPARISONS_TO_NATIVE:
             # Then we implement an equality filter
             # native is trusted input
             native = COMPARISONS_TO_NATIVE[operator]
             param_name = f"{field}_{suffix_id}"
-            return f"{field} {native} :{param_name}", {f"{param_name}": filter_value}
+            return f"{field_selector} {native} :{param_name}", {f"{param_name}": filter_value}
         elif operator == "$between":
             # Use AND with two comparisons
             low, high = filter_value
             low_param_name = f"{field}_low_{suffix_id}"
             high_param_name = f"{field}_high_{suffix_id}"
-            return f"({field} BETWEEN :{low_param_name} AND :{high_param_name})", {
+            return f"({field_selector} BETWEEN :{low_param_name} AND :{high_param_name})", {
                 f"{low_param_name}": low,
                 f"{high_param_name}": high,
             }
@@ -1123,18 +1148,18 @@ def _handle_field_filter(
                     )
             param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
             if operator == "$in":
-                return f"{field} = ANY(:{param_name})", {f"{param_name}": filter_value}
+                return f"{field_selector} = ANY(:{param_name})", {f"{param_name}": filter_value}
             else:  # i.e. $nin
-                return f"{field} <> ALL (:{param_name})", {
+                return f"{field_selector} <> ALL (:{param_name})", {
                     f"{param_name}": filter_value
                 }
 
         elif operator in {"$like", "$ilike"}:
             param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
             if operator == "$like":
-                return f"({field} LIKE :{param_name})", {f"{param_name}": filter_value}
+                return f"({field_selector} LIKE :{param_name})", {f"{param_name}": filter_value}
             else:  # i.e. $ilike
-                return f"({field} ILIKE :{param_name})", {f"{param_name}": filter_value}
+                return f"({field_selector} ILIKE :{param_name})", {f"{param_name}": filter_value}
         elif operator == "$exists":
             if not isinstance(filter_value, bool):
                 raise ValueError(
@@ -1143,9 +1168,9 @@ def _handle_field_filter(
                 )
             else:
                 if filter_value:
-                    return f"({field} IS NOT NULL)", {}
+                    return f"({field_selector} IS NOT NULL)", {}
                 else:
-                    return f"({field} IS NULL)", {}
+                    return f"({field_selector} IS NULL)", {}
         else:
             raise NotImplementedError()
 

diff --git a/tests/unit_tests/v2/test_async_pg_vectorstore_search.py b/tests/unit_tests/v2/test_async_pg_vectorstore_search.py
@@ -27,6 +27,7 @@
 HYBRID_SEARCH_TABLE1 = "test_table_hybrid1" + str(uuid.uuid4()).replace("-", "_")
 HYBRID_SEARCH_TABLE2 = "test_table_hybrid2" + str(uuid.uuid4()).replace("-", "_")
 CUSTOM_FILTER_TABLE = "custom_filter" + str(uuid.uuid4()).replace("-", "_")
+CUSTOM_METADATA_JSON_TABLE = "custom_metadata_json" + str(uuid.uuid4()).replace("-", "_")
 VECTOR_SIZE = 768
 sync_method_exception_str = "Sync methods are not implemented for AsyncPGVectorStore. Use PGVectorStore interface instead."
 
@@ -215,6 +216,24 @@ async def vs_custom_filter(
         await vs_custom_filter.aadd_documents(filter_docs, ids=ids)
         yield vs_custom_filter
 
+    @pytest_asyncio.fixture(scope="class")
+    async def vs_metadata_json(
+        self, engine: PGEngine
+    ) -> AsyncIterator[AsyncPGVectorStore]:
+        await engine._ainit_vectorstore_table(
+            CUSTOM_METADATA_JSON_TABLE,
+            VECTOR_SIZE,
+            store_metadata=True,
+        )
+
+        vs_metadata_json = await AsyncPGVectorStore.create(
+            engine,
+            embedding_service=embeddings_service,
+            table_name=CUSTOM_METADATA_JSON_TABLE,
+        )
+        await vs_metadata_json.aadd_documents(filter_docs, ids=ids)
+        yield vs_metadata_json
+
     async def test_asimilarity_search_score(self, vs: AsyncPGVectorStore) -> None:
         results = await vs.asimilarity_search_with_score("foo")
         assert len(results) == 4
@@ -370,6 +389,19 @@ async def test_vectorstore_with_metadata_filters(
         )
         assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter
 
+    @pytest.mark.parametrize("test_filter, expected_ids", FILTERING_TEST_CASES)
+    async def test_vectorstore_with_json_metadata_filters(
+        self,
+        vs_metadata_json: AsyncPGVectorStore,
+        test_filter: dict,
+        expected_ids: list[str],
+    ) -> None:
+        """Test end to end construction and search on json metadata."""
+        docs = await vs_metadata_json.asimilarity_search(
+            "meow", k=5, filter=test_filter
+        )
+        assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter
+
     async def test_asimilarity_hybrid_search(self, vs: AsyncPGVectorStore) -> None:
         results = await vs.asimilarity_search(
             "foo", k=1, hybrid_search_config=HybridSearchConfig()

diff --git a/tests/unit_tests/v2/test_pg_vectorstore_search.py b/tests/unit_tests/v2/test_pg_vectorstore_search.py
@@ -27,6 +27,8 @@
 CUSTOM_TABLE = "custom" + str(uuid.uuid4()).replace("-", "_")
 CUSTOM_FILTER_TABLE = "custom_filter" + str(uuid.uuid4()).replace("-", "_")
 CUSTOM_FILTER_TABLE_SYNC = "custom_filter_sync" + str(uuid.uuid4()).replace("-", "_")
+CUSTOM_METADATA_JSON_TABLE = "custom_metadata_json" + str(uuid.uuid4()).replace("-", "_")
+CUSTOM_METADATA_JSON_TABLE_SYNC = "custom_metadata_json_sync" + str(uuid.uuid4()).replace("-", "_")
 VECTOR_SIZE = 768
 
 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE)
@@ -163,6 +165,24 @@ async def vs_custom_filter(self, engine: PGEngine) -> AsyncIterator[PGVectorStor
         await vs_custom_filter.aadd_documents(filter_docs, ids=ids)
         yield vs_custom_filter
 
+    @pytest_asyncio.fixture(scope="class")
+    async def vs_metadata_json(
+        self, engine: PGEngine
+    ) -> AsyncIterator[PGVectorStore]:
+        await engine.ainit_vectorstore_table(
+            CUSTOM_METADATA_JSON_TABLE,
+            VECTOR_SIZE,
+            store_metadata=True,
+        )
+
+        vs_metadata_json = await PGVectorStore.create(
+            engine,
+            embedding_service=embeddings_service,
+            table_name=CUSTOM_METADATA_JSON_TABLE,
+        )
+        await vs_metadata_json.aadd_documents(filter_docs, ids=ids)
+        yield vs_metadata_json
+
     async def test_asimilarity_search_score(self, vs: PGVectorStore) -> None:
         results = await vs.asimilarity_search_with_score("foo")
         assert len(results) == 4
@@ -265,6 +285,19 @@ async def test_vectorstore_with_metadata_filters(
             "meow", k=5, filter=test_filter
         )
         assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter
+
+    @pytest.mark.parametrize("test_filter, expected_ids", FILTERING_TEST_CASES)
+    async def test_vectorstore_with_json_metadata_filters(
+        self,
+        vs_metadata_json: PGVectorStore,
+        test_filter: dict,
+        expected_ids: list[str],
+    ) -> None:
+        """Test end to end construction and search on json metadata."""
+        docs = await vs_metadata_json.asimilarity_search(
+            "meow", k=5, filter=test_filter
+        )
+        assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter
 
     async def test_asimilarity_hybrid_search(self, vs: PGVectorStore) -> None:
         results = await vs.asimilarity_search(
@@ -375,6 +408,24 @@ async def vs_custom_filter_sync(
         vs_custom_filter_sync.add_documents(filter_docs, ids=ids)
         yield vs_custom_filter_sync
 
+    @pytest_asyncio.fixture(scope="class")
+    async def vs_metadata_json_sync(
+        self, engine_sync: PGEngine
+    ) -> AsyncIterator[PGVectorStore]:
+        engine_sync.init_vectorstore_table(
+            CUSTOM_METADATA_JSON_TABLE_SYNC,
+            VECTOR_SIZE,
+            store_metadata=True,
+        )
+
+        vs_metadata_json_sync = await PGVectorStore.create(
+            engine_sync,
+            embedding_service=embeddings_service,
+            table_name=CUSTOM_METADATA_JSON_TABLE_SYNC,
+        )
+        vs_metadata_json_sync.add_documents(filter_docs, ids=ids)
+        yield vs_metadata_json_sync
+
     def test_similarity_search_score(self, vs_custom: PGVectorStore) -> None:
         results = vs_custom.similarity_search_with_score("foo")
         assert len(results) == 4
@@ -429,6 +480,19 @@ def test_sync_vectorstore_with_metadata_filters(
         docs = vs_custom_filter_sync.similarity_search("meow", k=5, filter=test_filter)
         assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter
 
+    @pytest.mark.parametrize("test_filter, expected_ids", FILTERING_TEST_CASES)
+    def test_sync_vectorstore_with_json_metadata_filters(
+        self,
+        vs_metadata_json_sync: PGVectorStore,
+        test_filter: dict,
+        expected_ids: list[str],
+    ) -> None:
+        """Test end to end construction and search on json metadata."""
+        docs = vs_metadata_json_sync.similarity_search(
+            "meow", k=5, filter=test_filter
+        )
+        assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter
+
     @pytest.mark.parametrize("test_filter", NEGATIVE_TEST_CASES)
     def test_metadata_filter_negative_tests(
         self, vs_custom_filter_sync: PGVectorStore, test_filter: dict