From be898250e39ddb5954f3cfdef8e2b6093a4064c2 Mon Sep 17 00:00:00 2001
From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
Date: Wed, 29 Nov 2023 17:16:39 +0100
Subject: [PATCH] Update `ElasticSearchDocumentStore` to use latest
 `haystack-ai` version (#63)

* Update haystack version

* Update imports

* Support new filters

* Update ElasticSearchDocumentStore

* Update tests

* Fix corner cases when filter value is None

* Convert legacy filters if used

* Fix linting
---
 integrations/elasticsearch/pyproject.toml     |   4 +-
 .../elasticsearch_haystack/bm25_retriever.py  |   4 +-
 .../elasticsearch_haystack/document_store.py  | 105 +-----
 .../embedding_retriever.py                    |   4 +-
 .../src/elasticsearch_haystack/filters.py     | 307 ++++++++++++------
 .../tests/test_bm25_retriever.py              |  10 +-
 .../tests/test_document_store.py              | 229 ++++---------
 .../tests/test_embedding_retriever.py         |  10 +-
 .../elasticsearch/tests/test_filters.py       | 258 +++++++--------
 9 files changed, 428 insertions(+), 503 deletions(-)

diff --git a/integrations/elasticsearch/pyproject.toml b/integrations/elasticsearch/pyproject.toml
index c54be02f2..f67e9cc35 100644
--- a/integrations/elasticsearch/pyproject.toml
+++ b/integrations/elasticsearch/pyproject.toml
@@ -24,10 +24,8 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
 dependencies = [
-  # we distribute the preview version of Haystack 2.0 under the package "haystack-ai"
-  "haystack-ai==0.143.0",
+  "haystack-ai",
   "elasticsearch>=8,<9",
-  "typing_extensions", # This is not a direct dependency, but `haystack-ai` is missing it cause `canals` is missing it
 ]
 
 [project.urls]
diff --git a/integrations/elasticsearch/src/elasticsearch_haystack/bm25_retriever.py b/integrations/elasticsearch/src/elasticsearch_haystack/bm25_retriever.py
index 017860a9a..804e8db15 100644
--- a/integrations/elasticsearch/src/elasticsearch_haystack/bm25_retriever.py
+++ b/integrations/elasticsearch/src/elasticsearch_haystack/bm25_retriever.py
@@ -3,8 +3,8 @@
 # SPDX-License-Identifier: Apache-2.0
 from typing import Any, Dict, List, Optional
 
-from haystack.preview import component, default_from_dict, default_to_dict
-from haystack.preview.dataclasses import Document
+from haystack import component, default_from_dict, default_to_dict
+from haystack.dataclasses import Document
 
 from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
 
diff --git a/integrations/elasticsearch/src/elasticsearch_haystack/document_store.py b/integrations/elasticsearch/src/elasticsearch_haystack/document_store.py
index d131cdf01..6dae14341 100644
--- a/integrations/elasticsearch/src/elasticsearch_haystack/document_store.py
+++ b/integrations/elasticsearch/src/elasticsearch_haystack/document_store.py
@@ -9,11 +9,10 @@
 # There are no import stubs for elastic_transport and elasticsearch so mypy fails
 from elastic_transport import NodeConfig  # type: ignore[import-not-found]
 from elasticsearch import Elasticsearch, helpers  # type: ignore[import-not-found]
-from haystack.preview import default_from_dict, default_to_dict
-from haystack.preview.dataclasses import Document
-from haystack.preview.document_stores.decorator import document_store
-from haystack.preview.document_stores.errors import DocumentStoreError, DuplicateDocumentError
-from haystack.preview.document_stores.protocols import DuplicatePolicy
+from haystack import default_from_dict, default_to_dict
+from haystack.dataclasses import Document
+from haystack.document_stores import DocumentStoreError, DuplicateDocumentError, DuplicatePolicy, document_store
+from haystack.utils.filters import convert
 
 from elasticsearch_haystack.filters import _normalize_filters
 
@@ -130,103 +129,29 @@ def _search_documents(self, **kwargs) -> List[Document]:
         return documents
 
     def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
-        """
-        Returns the documents that match the filters provided.
-
-        Filters are defined as nested dictionaries. The keys of the dictionaries can be a logical operator (`"$and"`,
-        `"$or"`, `"$not"`), a comparison operator (`"$eq"`, `$ne`, `"$in"`, `$nin`, `"$gt"`, `"$gte"`, `"$lt"`,
-        `"$lte"`) or a metadata field name.
-
-        Logical operator keys take a dictionary of metadata field names and/or logical operators as value. Metadata
-        field names take a dictionary of comparison operators as value. Comparison operator keys take a single value or
-        (in case of `"$in"`) a list of values as value. If no logical operator is provided, `"$and"` is used as default
-        operation. If no comparison operator is provided, `"$eq"` (or `"$in"` if the comparison value is a list) is used
-        as default operation.
-
-        Example:
-
-        ```python
-        filters = {
-            "$and": {
-                "type": {"$eq": "article"},
-                "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-                "rating": {"$gte": 3},
-                "$or": {
-                    "genre": {"$in": ["economy", "politics"]},
-                    "publisher": {"$eq": "nytimes"}
-                }
-            }
-        }
-        # or simpler using default operators
-        filters = {
-            "type": "article",
-            "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-            "rating": {"$gte": 3},
-            "$or": {
-                "genre": ["economy", "politics"],
-                "publisher": "nytimes"
-            }
-        }
-        ```
-
-        To use the same logical operator multiple times on the same level, logical operators can take a list of
-        dictionaries as value.
-
-        Example:
-
-        ```python
-        filters = {
-            "$or": [
-                {
-                    "$and": {
-                        "Type": "News Paper",
-                        "Date": {
-                            "$lt": "2019-01-01"
-                        }
-                    }
-                },
-                {
-                    "$and": {
-                        "Type": "Blog Post",
-                        "Date": {
-                            "$gte": "2019-01-01"
-                        }
-                    }
-                }
-            ]
-        }
-        ```
+        if filters and "operator" not in filters and "conditions" not in filters:
+            filters = convert(filters)
 
-        :param filters: the filters to apply to the document list.
-        :return: a list of Documents that match the given filters.
-        """
         query = {"bool": {"filter": _normalize_filters(filters)}} if filters else None
         documents = self._search_documents(query=query)
         return documents
 
-    def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> None:
+    def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int:
         """
-        Writes (or overwrites) documents into the store.
-
-        :param documents: a list of documents.
-        :param policy: documents with the same ID count as duplicates. When duplicates are met,
-            the store can:
-             - skip: keep the existing document and ignore the new one.
-             - overwrite: remove the old document and write the new one.
-             - fail: an error is raised
-
-        :raises ValueError: if 'documents' parameter is not a list of Document objects
-        :raises DuplicateDocumentError: Exception trigger on duplicate document if `policy=DuplicatePolicy.FAIL`
-        :raises DocumentStoreError: Exception trigger on any other error when writing documents
-        :return: None
+        Writes Documents to Elasticsearch.
+        If policy is not specified or set to DuplicatePolicy.NONE, it will raise an exception if a document with the
+        same ID already exists in the document store.
         """
         if len(documents) > 0:
             if not isinstance(documents[0], Document):
                 msg = "param 'documents' must contain a list of objects of type Document"
                 raise ValueError(msg)
 
+        if policy == DuplicatePolicy.NONE:
+            policy = DuplicatePolicy.FAIL
+
         action = "index" if policy == DuplicatePolicy.OVERWRITE else "create"
-        _, errors = helpers.bulk(
+        documents_written, errors = helpers.bulk(
             client=self._client,
             actions=(
                 {
@@ -262,6 +187,8 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D
                 msg = f"Failed to write documents to Elasticsearch. Errors:\n{other_errors}"
                 raise DocumentStoreError(msg)
 
+        return documents_written
+
     def _deserialize_document(self, hit: Dict[str, Any]) -> Document:
         """
         Creates a Document from the search hit provided.
diff --git a/integrations/elasticsearch/src/elasticsearch_haystack/embedding_retriever.py b/integrations/elasticsearch/src/elasticsearch_haystack/embedding_retriever.py
index 3bb4576ec..2aaba382d 100644
--- a/integrations/elasticsearch/src/elasticsearch_haystack/embedding_retriever.py
+++ b/integrations/elasticsearch/src/elasticsearch_haystack/embedding_retriever.py
@@ -3,8 +3,8 @@
 # SPDX-License-Identifier: Apache-2.0
 from typing import Any, Dict, List, Optional
 
-from haystack.preview import component, default_from_dict, default_to_dict
-from haystack.preview.dataclasses import Document
+from haystack import component, default_from_dict, default_to_dict
+from haystack.dataclasses import Document
 
 from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
 
diff --git a/integrations/elasticsearch/src/elasticsearch_haystack/filters.py b/integrations/elasticsearch/src/elasticsearch_haystack/filters.py
index 78adae585..bb5b15311 100644
--- a/integrations/elasticsearch/src/elasticsearch_haystack/filters.py
+++ b/integrations/elasticsearch/src/elasticsearch_haystack/filters.py
@@ -1,123 +1,218 @@
-from typing import Any, Dict, List, Union
+from datetime import datetime
+from typing import Any, Dict, List
 
-from haystack.preview.errors import FilterError
+from haystack.errors import FilterError
 from pandas import DataFrame
 
 
-def _normalize_filters(filters: Union[List[Dict], Dict], logical_condition="") -> Dict[str, Any]:
+def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]:
     """
     Converts Haystack filters in ElasticSearch compatible filters.
     """
-    if not isinstance(filters, dict) and not isinstance(filters, list):
-        msg = "Filters must be either a dictionary or a list"
+    if not isinstance(filters, dict):
+        msg = "Filters must be a dictionary"
         raise FilterError(msg)
-    conditions = []
-    if isinstance(filters, dict):
-        filters = [filters]
-    for filter_ in filters:
-        for operator, value in filter_.items():
-            if operator in ["$not", "$and", "$or"]:
-                # Logical operators
-                conditions.append(_normalize_filters(value, operator))
-            else:
-                # Comparison operators
-                conditions.extend(_parse_comparison(operator, value))
 
+    if "field" in filters:
+        return {"bool": {"must": _parse_comparison_condition(filters)}}
+    return _parse_logical_condition(filters)
+
+
+def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]:
+    if "operator" not in condition:
+        msg = f"'operator' key missing in {condition}"
+        raise FilterError(msg)
+    if "conditions" not in condition:
+        msg = f"'conditions' key missing in {condition}"
+        raise FilterError(msg)
+
+    operator = condition["operator"]
+    conditions = [_parse_comparison_condition(c) for c in condition["conditions"]]
     if len(conditions) > 1:
         conditions = _normalize_ranges(conditions)
+    if operator == "AND":
+        return {"bool": {"must": conditions}}
+    elif operator == "OR":
+        return {"bool": {"should": conditions}}
+    elif operator == "NOT":
+        return {"bool": {"must_not": [{"bool": {"must": conditions}}]}}
     else:
-        # mypy is complaining we're assigning a dict to a list of dicts.
-        # We're ok with this as we're returning right after this.
-        conditions = conditions[0]  # type: ignore[assignment]
+        msg = f"Unknown logical operator '{operator}'"
+        raise FilterError(msg)
 
-    if logical_condition == "$not":
-        return {"bool": {"must_not": conditions}}
-    elif logical_condition == "$or":
-        return {"bool": {"should": conditions}}
 
-    # If no logical condition is specified we default to "$and"
-    return {"bool": {"must": conditions}}
-
-
-def _parse_comparison(field: str, comparison: Union[Dict, List, str, float]) -> List:
-    result: List[Dict[str, Any]] = []
-    if isinstance(comparison, dict):
-        for comparator, val in comparison.items():
-            if isinstance(val, DataFrame):
-                # Ruff is complaining we're overriding the loop variable `var`
-                # but we actually want to override it. So we ignore the error.
-                val = val.to_json()  # noqa: PLW2901
-            if comparator == "$eq":
-                if isinstance(val, list):
-                    result.append(
-                        {
-                            "terms_set": {
-                                field: {
-                                    "terms": val,
-                                    "minimum_should_match_script": {
-                                        "source": f"Math.max(params.num_terms, doc['{field}'].size())"
-                                    },
-                                }
-                            }
-                        }
-                    )
-                result.append({"term": {field: val}})
-            elif comparator == "$ne":
-                if isinstance(val, list):
-                    result.append({"bool": {"must_not": {"terms": {field: val}}}})
-                else:
-                    result.append(
-                        {"bool": {"must_not": {"match": {field: {"query": val, "minimum_should_match": "100%"}}}}}
-                    )
-            elif comparator == "$in":
-                if not isinstance(val, list):
-                    msg = f"{field}'s value must be a list when using '{comparator}' comparator"
-                    raise FilterError(msg)
-                result.append({"terms": {field: val}})
-            elif comparator == "$nin":
-                if not isinstance(val, list):
-                    msg = f"{field}'s value must be a list when using '{comparator}' comparator"
-                    raise FilterError(msg)
-                result.append({"bool": {"must_not": {"terms": {field: val}}}})
-            elif comparator in ["$gt", "$gte", "$lt", "$lte"]:
-                if not isinstance(val, str) and not isinstance(val, int) and not isinstance(val, float):
-                    msg = f"{field}'s value must be 'str', 'int', 'float' types when using '{comparator}' comparator"
-                    raise FilterError(msg)
-                result.append({"range": {field: {comparator[1:]: val}}})
-            elif comparator in ["$not", "$or"]:
-                if isinstance(val, list):
-                    # This handles corner cases like this:
-                    # `{"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}}`
-                    # If we don't handle it like this we'd lose the "name" field and the
-                    # generated query would be wrong and return unexpected results.
-                    comparisons = [_parse_comparison(field, v)[0] for v in val]
-                    if comparator == "$not":
-                        result.append({"bool": {"must_not": comparisons}})
-                    elif comparator == "$or":
-                        result.append({"bool": {"should": comparisons}})
-                else:
-                    result.append(_normalize_filters(val, comparator))
-            elif comparator == "$and" and isinstance(val, list):
-                # We're assuming there are no duplicate items in the list
-                flat_filters = {k: v for d in val for k, v in d.items()}
-                result.extend(_parse_comparison(field, flat_filters))
-            elif comparator == "$and":
-                result.append(_normalize_filters({field: val}, comparator))
-            else:
-                msg = f"Unknown comparator '{comparator}'"
-                raise FilterError(msg)
-    elif isinstance(comparison, list):
-        result.append({"terms": {field: comparison}})
-    elif isinstance(comparison, DataFrame):
-        result.append({"match": {field: {"query": comparison.to_json(), "minimum_should_match": "100%"}}})
-    elif isinstance(comparison, str):
-        # We can't use "term" for text fields as ElasticSearch changes the value of text.
-        # More info here:
-        # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html#query-dsl-term-query
-        result.append({"match": {field: {"query": comparison, "minimum_should_match": "100%"}}})
-    else:
-        result.append({"term": {field: comparison}})
-    return result
+def _equal(field: str, value: Any) -> Dict[str, Any]:
+    if value is None:
+        return {"bool": {"must_not": {"exists": {"field": field}}}}
+
+    if isinstance(value, list):
+        return {
+            "terms_set": {
+                field: {
+                    "terms": value,
+                    "minimum_should_match_script": {"source": f"Math.max(params.num_terms, doc['{field}'].size())"},
+                }
+            }
+        }
+    if field in ["text", "dataframe"]:
+        # We want to fully match the text field.
+        return {"match": {field: {"query": value, "minimum_should_match": "100%"}}}
+    return {"term": {field: value}}
+
+
+def _not_equal(field: str, value: Any) -> Dict[str, Any]:
+    if value is None:
+        return {"exists": {"field": field}}
+
+    if isinstance(value, list):
+        return {"bool": {"must_not": {"terms": {field: value}}}}
+    if field in ["text", "dataframe"]:
+        # We want to fully match the text field.
+        return {"bool": {"must_not": {"match": {field: {"query": value, "minimum_should_match": "100%"}}}}}
+
+    return {"bool": {"must_not": {"term": {field: value}}}}
+
+
+def _greater_than(field: str, value: Any) -> Dict[str, Any]:
+    if value is None:
+        # When the value is None and '>' is used we create a filter that would return a Document
+        # if it has a field set and not set at the same time.
+        # This will cause the filter to match no Document.
+        # This way we keep the behavior consistent with other Document Stores.
+        return {"bool": {"must": [{"exists": {"field": field}}, {"bool": {"must_not": {"exists": {"field": field}}}}]}}
+    if isinstance(value, str):
+        try:
+            datetime.fromisoformat(value)
+        except (ValueError, TypeError) as exc:
+            msg = (
+                "Can't compare strings using operators '>', '>=', '<', '<='. "
+                "Strings are only comparable if they are ISO formatted dates."
+            )
+            raise FilterError(msg) from exc
+    if type(value) in [list, DataFrame]:
+        msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
+        raise FilterError(msg)
+    return {"range": {field: {"gt": value}}}
+
+
+def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]:
+    if value is None:
+        # When the value is None and '>=' is used we create a filter that would return a Document
+        # if it has a field set and not set at the same time.
+        # This will cause the filter to match no Document.
+        # This way we keep the behavior consistent with other Document Stores.
+        return {"bool": {"must": [{"exists": {"field": field}}, {"bool": {"must_not": {"exists": {"field": field}}}}]}}
+    if isinstance(value, str):
+        try:
+            datetime.fromisoformat(value)
+        except (ValueError, TypeError) as exc:
+            msg = (
+                "Can't compare strings using operators '>', '>=', '<', '<='. "
+                "Strings are only comparable if they are ISO formatted dates."
+            )
+            raise FilterError(msg) from exc
+    if type(value) in [list, DataFrame]:
+        msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
+        raise FilterError(msg)
+    return {"range": {field: {"gte": value}}}
+
+
+def _less_than(field: str, value: Any) -> Dict[str, Any]:
+    if value is None:
+        # When the value is None and '<' is used we create a filter that would return a Document
+        # if it has a field set and not set at the same time.
+        # This will cause the filter to match no Document.
+        # This way we keep the behavior consistent with other Document Stores.
+        return {"bool": {"must": [{"exists": {"field": field}}, {"bool": {"must_not": {"exists": {"field": field}}}}]}}
+    if isinstance(value, str):
+        try:
+            datetime.fromisoformat(value)
+        except (ValueError, TypeError) as exc:
+            msg = (
+                "Can't compare strings using operators '>', '>=', '<', '<='. "
+                "Strings are only comparable if they are ISO formatted dates."
+            )
+            raise FilterError(msg) from exc
+    if type(value) in [list, DataFrame]:
+        msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
+        raise FilterError(msg)
+    return {"range": {field: {"lt": value}}}
+
+
+def _less_than_equal(field: str, value: Any) -> Dict[str, Any]:
+    if value is None:
+        # When the value is None and '<=' is used we create a filter that would return a Document
+        # if it has a field set and not set at the same time.
+        # This will cause the filter to match no Document.
+        # This way we keep the behavior consistent with other Document Stores.
+        return {"bool": {"must": [{"exists": {"field": field}}, {"bool": {"must_not": {"exists": {"field": field}}}}]}}
+    if isinstance(value, str):
+        try:
+            datetime.fromisoformat(value)
+        except (ValueError, TypeError) as exc:
+            msg = (
+                "Can't compare strings using operators '>', '>=', '<', '<='. "
+                "Strings are only comparable if they are ISO formatted dates."
+            )
+            raise FilterError(msg) from exc
+    if type(value) in [list, DataFrame]:
+        msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
+        raise FilterError(msg)
+    return {"range": {field: {"lte": value}}}
+
+
+def _in(field: str, value: Any) -> Dict[str, Any]:
+    if not isinstance(value, list):
+        msg = f"{field}'s value must be a list when using 'in' or 'not in' comparators"
+        raise FilterError(msg)
+    return {"terms": {field: value}}
+
+
+def _not_in(field: str, value: Any) -> Dict[str, Any]:
+    if not isinstance(value, list):
+        msg = f"{field}'s value must be a list when using 'in' or 'not in' comparators"
+        raise FilterError(msg)
+    return {"bool": {"must_not": {"terms": {field: value}}}}
+
+
+COMPARISON_OPERATORS = {
+    "==": _equal,
+    "!=": _not_equal,
+    ">": _greater_than,
+    ">=": _greater_than_equal,
+    "<": _less_than,
+    "<=": _less_than_equal,
+    "in": _in,
+    "not in": _not_in,
+}
+
+
+def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]:
+    if "field" not in condition:
+        # 'field' key is only found in comparison dictionaries.
+        # We assume this is a logic dictionary since it's not present.
+        return _parse_logical_condition(condition)
+    field: str = condition["field"]
+
+    if field.startswith("meta."):
+        # Remove the "meta." prefix if present.
+        # Documents are flattened when using the ElasticSearchDocumentStore
+        # so we don't need to specify the "meta." prefix.
+        # Instead of raising an error we handle it gracefully.
+        field = field[5:]
+
+    if "operator" not in condition:
+        msg = f"'operator' key missing in {condition}"
+        raise FilterError(msg)
+    if "value" not in condition:
+        msg = f"'value' key missing in {condition}"
+        raise FilterError(msg)
+    operator: str = condition["operator"]
+    value: Any = condition["value"]
+    if isinstance(value, DataFrame):
+        value = value.to_json()
+
+    return COMPARISON_OPERATORS[operator](field, value)
 
 
 def _normalize_ranges(conditions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
diff --git a/integrations/elasticsearch/tests/test_bm25_retriever.py b/integrations/elasticsearch/tests/test_bm25_retriever.py
index 9139368d9..8f19c8897 100644
--- a/integrations/elasticsearch/tests/test_bm25_retriever.py
+++ b/integrations/elasticsearch/tests/test_bm25_retriever.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from unittest.mock import Mock, patch
 
-from haystack.preview.dataclasses import Document
+from haystack.dataclasses import Document
 
 from elasticsearch_haystack.bm25_retriever import ElasticsearchBM25Retriever
 from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
@@ -24,7 +24,7 @@ def test_to_dict(_mock_elasticsearch_client):
     retriever = ElasticsearchBM25Retriever(document_store=document_store)
     res = retriever.to_dict()
     assert res == {
-        "type": "ElasticsearchBM25Retriever",
+        "type": "elasticsearch_haystack.bm25_retriever.ElasticsearchBM25Retriever",
         "init_parameters": {
             "document_store": {
                 "init_parameters": {
@@ -32,7 +32,7 @@ def test_to_dict(_mock_elasticsearch_client):
                     "index": "default",
                     "embedding_similarity_function": "cosine",
                 },
-                "type": "ElasticsearchDocumentStore",
+                "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
             },
             "filters": {},
             "fuzziness": "AUTO",
@@ -45,11 +45,11 @@ def test_to_dict(_mock_elasticsearch_client):
 @patch("elasticsearch_haystack.document_store.Elasticsearch")
 def test_from_dict(_mock_elasticsearch_client):
     data = {
-        "type": "ElasticsearchBM25Retriever",
+        "type": "elasticsearch_haystack.bm25_retriever.ElasticsearchBM25Retriever",
         "init_parameters": {
             "document_store": {
                 "init_parameters": {"hosts": "some fake host", "index": "default"},
-                "type": "ElasticsearchDocumentStore",
+                "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
             },
             "filters": {},
             "fuzziness": "AUTO",
diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py
index e71603126..d6428e762 100644
--- a/integrations/elasticsearch/tests/test_document_store.py
+++ b/integrations/elasticsearch/tests/test_document_store.py
@@ -6,13 +6,12 @@
 from typing import List
 from unittest.mock import patch
 
-import pandas as pd
 import pytest
 from elasticsearch.exceptions import BadRequestError  # type: ignore[import-not-found]
-from haystack.preview.dataclasses.document import Document
-from haystack.preview.document_stores.errors import DocumentStoreError, DuplicateDocumentError
-from haystack.preview.document_stores.protocols import DuplicatePolicy
-from haystack.preview.testing.document_store import DocumentStoreBaseTests
+from haystack.dataclasses.document import Document
+from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
+from haystack.document_stores.protocols import DuplicatePolicy
+from haystack.testing.document_store import DocumentStoreBaseTests
 
 from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
 
@@ -24,7 +23,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
     """
 
     @pytest.fixture
-    def docstore(self, request):
+    def document_store(self, request):
         """
         This is the most basic requirement for the child class: provide
         an instance of this document store so the base class can use it.
@@ -43,12 +42,37 @@ def docstore(self, request):
         yield store
         store._client.options(ignore_status=[400, 404]).indices.delete(index=index)
 
+    def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
+        """
+        The ElasticSearchDocumentStore.filter_documents() method returns a Documents with their score set.
+        We don't want to compare the score, so we set it to None before comparing the documents.
+        """
+        received_meta = []
+        for doc in received:
+            r = {
+                "number": doc.meta.get("number"),
+                "name": doc.meta.get("name"),
+            }
+            received_meta.append(r)
+
+        expected_meta = []
+        for doc in expected:
+            r = {
+                "number": doc.meta.get("number"),
+                "name": doc.meta.get("name"),
+            }
+            expected_meta.append(r)
+        for doc in received:
+            doc.score = None
+
+        super().assert_documents_are_equal(received, expected)
+
     @patch("elasticsearch_haystack.document_store.Elasticsearch")
     def test_to_dict(self, _mock_elasticsearch_client):
         document_store = ElasticsearchDocumentStore(hosts="some hosts")
         res = document_store.to_dict()
         assert res == {
-            "type": "ElasticsearchDocumentStore",
+            "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
             "init_parameters": {
                 "hosts": "some hosts",
                 "index": "default",
@@ -59,7 +83,7 @@ def test_to_dict(self, _mock_elasticsearch_client):
     @patch("elasticsearch_haystack.document_store.Elasticsearch")
     def test_from_dict(self, _mock_elasticsearch_client):
         data = {
-            "type": "ElasticsearchDocumentStore",
+            "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
             "init_parameters": {
                 "hosts": "some hosts",
                 "index": "default",
@@ -71,8 +95,14 @@ def test_from_dict(self, _mock_elasticsearch_client):
         assert document_store._index == "default"
         assert document_store._embedding_similarity_function == "cosine"
 
-    def test_bm25_retrieval(self, docstore: ElasticsearchDocumentStore):
-        docstore.write_documents(
+    def test_write_documents(self, document_store: ElasticsearchDocumentStore):
+        docs = [Document(id="1")]
+        assert document_store.write_documents(docs) == 1
+        with pytest.raises(DuplicateDocumentError):
+            document_store.write_documents(docs, DuplicatePolicy.FAIL)
+
+    def test_bm25_retrieval(self, document_store: ElasticsearchDocumentStore):
+        document_store.write_documents(
             [
                 Document(content="Haskell is a functional programming language"),
                 Document(content="Lisp is a functional programming language"),
@@ -88,17 +118,17 @@ def test_bm25_retrieval(self, docstore: ElasticsearchDocumentStore):
             ]
         )
 
-        res = docstore._bm25_retrieval("functional", top_k=3)
+        res = document_store._bm25_retrieval("functional", top_k=3)
         assert len(res) == 3
         assert "functional" in res[0].content
         assert "functional" in res[1].content
         assert "functional" in res[2].content
 
-    def test_bm25_retrieval_pagination(self, docstore: ElasticsearchDocumentStore):
+    def test_bm25_retrieval_pagination(self, document_store: ElasticsearchDocumentStore):
         """
         Test that handling of pagination works as expected, when the matching documents are > 10.
         """
-        docstore.write_documents(
+        document_store.write_documents(
             [
                 Document(content="Haskell is a functional programming language"),
                 Document(content="Lisp is a functional programming language"),
@@ -118,12 +148,12 @@ def test_bm25_retrieval_pagination(self, docstore: ElasticsearchDocumentStore):
             ]
         )
 
-        res = docstore._bm25_retrieval("programming", top_k=11)
+        res = document_store._bm25_retrieval("programming", top_k=11)
         assert len(res) == 11
         assert all("programming" in doc.content for doc in res)
 
-    def test_bm25_retrieval_with_fuzziness(self, docstore: ElasticsearchDocumentStore):
-        docstore.write_documents(
+    def test_bm25_retrieval_with_fuzziness(self, document_store: ElasticsearchDocumentStore):
+        document_store.write_documents(
             [
                 Document(content="Haskell is a functional programming language"),
                 Document(content="Lisp is a functional programming language"),
@@ -141,161 +171,30 @@ def test_bm25_retrieval_with_fuzziness(self, docstore: ElasticsearchDocumentStor
 
         query_with_typo = "functinal"
         # Query without fuzziness to search for the exact match
-        res = docstore._bm25_retrieval(query_with_typo, top_k=3, fuzziness="0")
+        res = document_store._bm25_retrieval(query_with_typo, top_k=3, fuzziness="0")
         # Nothing is found as the query contains a typo
         assert res == []
 
         # Query with fuzziness with the same query
-        res = docstore._bm25_retrieval(query_with_typo, top_k=3, fuzziness="1")
+        res = document_store._bm25_retrieval(query_with_typo, top_k=3, fuzziness="1")
         assert len(res) == 3
         assert "functional" in res[0].content
         assert "functional" in res[1].content
         assert "functional" in res[2].content
 
-    def test_write_duplicate_fail(self, docstore: ElasticsearchDocumentStore):
-        """
-        Verify `DuplicateDocumentError` is raised when trying to write duplicate files.
-
-        `DocumentStoreBaseTests` declares this test but we override it since we return
-        a different error message that it expects.
-        """
-        doc = Document(content="test doc")
-        docstore.write_documents([doc])
-        with pytest.raises(DuplicateDocumentError):
-            docstore.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL)
-        assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
-
-    def test_delete_not_empty(self, docstore: ElasticsearchDocumentStore):
-        """
-        Verifies delete properly deletes specified document.
-
-        `DocumentStoreBaseTests` declares this test but we override it since we
-        want `delete_documents` to be idempotent.
-        """
-        doc = Document(content="test doc")
-        docstore.write_documents([doc])
-
-        docstore.delete_documents([doc.id])
-
-        res = docstore.filter_documents(filters={"id": doc.id})
-        assert res == []
-
-    def test_delete_empty(self, docstore: ElasticsearchDocumentStore):
-        """
-        Verifies delete doesn't raises when trying to delete a non-existing document.
-
-        `DocumentStoreBaseTests` declares this test but we override it since we
-        want `delete_documents` to be idempotent.
-        """
-        docstore.delete_documents(["test"])
-
-    def test_delete_not_empty_nonexisting(self, docstore: ElasticsearchDocumentStore):
-        """
-        `DocumentStoreBaseTests` declares this test but we override it since we
-        want `delete_documents` to be idempotent.
-        """
-        doc = Document(content="test doc")
-        docstore.write_documents([doc])
-
-        docstore.delete_documents(["non_existing"])
-
-        assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
-
-    @pytest.mark.skip(reason="Not supported")
-    def test_in_filter_table(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        pass
-
-    @pytest.mark.skip(reason="Not supported")
-    def test_in_filter_embedding(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        pass
-
-    @pytest.mark.skip(reason="Not supported")
-    def test_nin_filter_table(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        pass
-
-    @pytest.mark.skip(reason="Not supported")
-    def test_nin_filter_embedding(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        pass
-
-    @pytest.mark.skip(reason="Not supported")
-    def test_eq_filter_embedding(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        """
-        If the embedding field is a dense vector (as expected), raise the following error:
-
-        elasticsearch.BadRequestError: BadRequestError(400, 'search_phase_execution_exception',
-        "failed to create query: Field [embedding] of type [dense_vector] doesn't support term queries")
-        """
-        pass
-
-    @pytest.mark.skip(reason="Not supported")
-    def test_ne_filter_embedding(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        """
-        If the embedding field is a dense vector (as expected), raise the following error:
-
-        elasticsearch.BadRequestError: BadRequestError(400, 'search_phase_execution_exception',
-        "failed to create query: Field [embedding] of type [dense_vector] doesn't support term queries")
-        """
-        pass
-
-    def test_gt_filter_non_numeric(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        docstore.write_documents(filterable_docs)
-        result = docstore.filter_documents(filters={"page": {"$gt": "100"}})
-        assert self.contains_same_docs(
-            result, [d for d in filterable_docs if "page" in d.meta and d.meta["page"] > "100"]
-        )
-
-    def test_gt_filter_table(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        docstore.write_documents(filterable_docs)
-        result = docstore.filter_documents(filters={"dataframe": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
-        assert result == []
-
-    def test_gte_filter_non_numeric(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        docstore.write_documents(filterable_docs)
-        result = docstore.filter_documents(filters={"page": {"$gte": "100"}})
-        assert self.contains_same_docs(
-            result, [d for d in filterable_docs if "page" in d.meta and d.meta["page"] >= "100"]
-        )
-
-    def test_gte_filter_table(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        docstore.write_documents(filterable_docs)
-        result = docstore.filter_documents(filters={"dataframe": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
-        assert result == []
-
-    def test_lt_filter_non_numeric(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        docstore.write_documents(filterable_docs)
-        result = docstore.filter_documents(filters={"page": {"$lt": "100"}})
-        assert result == []
-
-    def test_lt_filter_table(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        docstore.write_documents(filterable_docs)
-        result = docstore.filter_documents(filters={"dataframe": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
-        assert self.contains_same_docs(result, [d for d in filterable_docs if d.dataframe is not None])
-
-    def test_lte_filter_non_numeric(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        docstore.write_documents(filterable_docs)
-        result = docstore.filter_documents(filters={"page": {"$lte": "100"}})
-        assert self.contains_same_docs(
-            result, [d for d in filterable_docs if "page" in d.meta and d.meta["page"] <= "100"]
-        )
-
-    def test_lte_filter_table(self, docstore: ElasticsearchDocumentStore, filterable_docs: List[Document]):
-        docstore.write_documents(filterable_docs)
-        result = docstore.filter_documents(filters={"dataframe": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
-        assert self.contains_same_docs(result, [d for d in filterable_docs if d.dataframe is not None])
-
-    def test_embedding_retrieval(self, docstore: ElasticsearchDocumentStore):
+    def test_embedding_retrieval(self, document_store: ElasticsearchDocumentStore):
         docs = [
             Document(content="Most similar document", embedding=[1.0, 1.0, 1.0, 1.0]),
             Document(content="2nd best document", embedding=[0.8, 0.8, 0.8, 1.0]),
             Document(content="Not very similar document", embedding=[0.0, 0.8, 0.3, 0.9]),
         ]
-        docstore.write_documents(docs)
-        results = docstore._embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=2, filters={})
+        document_store.write_documents(docs)
+        results = document_store._embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=2, filters={})
         assert len(results) == 2
         assert results[0].content == "Most similar document"
         assert results[1].content == "2nd best document"
 
-    def test_embedding_retrieval_w_filters(self, docstore: ElasticsearchDocumentStore):
+    def test_embedding_retrieval_with_filters(self, document_store: ElasticsearchDocumentStore):
         docs = [
             Document(content="Most similar document", embedding=[1.0, 1.0, 1.0, 1.0]),
             Document(content="2nd best document", embedding=[0.8, 0.8, 0.8, 1.0]),
@@ -305,14 +204,14 @@ def test_embedding_retrieval_w_filters(self, docstore: ElasticsearchDocumentStor
                 meta={"meta_field": "custom_value"},
             ),
         ]
-        docstore.write_documents(docs)
+        document_store.write_documents(docs)
 
-        filters = {"meta_field": {"$eq": "custom_value"}}
-        results = docstore._embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=2, filters=filters)
+        filters = {"field": "meta_field", "operator": "==", "value": "custom_value"}
+        results = document_store._embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=2, filters=filters)
         assert len(results) == 1
         assert results[0].content == "Not very similar document with meta field"
 
-    def test_embedding_retrieval_pagination(self, docstore: ElasticsearchDocumentStore):
+    def test_embedding_retrieval_pagination(self, document_store: ElasticsearchDocumentStore):
         """
         Test that handling of pagination works as expected, when the matching documents are > 10.
         """
@@ -322,21 +221,23 @@ def test_embedding_retrieval_pagination(self, docstore: ElasticsearchDocumentSto
             for i in range(20)
         ]
 
-        docstore.write_documents(docs)
-        results = docstore._embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=11, filters={})
+        document_store.write_documents(docs)
+        results = document_store._embedding_retrieval(query_embedding=[0.1, 0.1, 0.1, 0.1], top_k=11, filters={})
         assert len(results) == 11
 
-    def test_embedding_retrieval_query_documents_different_embedding_sizes(self, docstore: ElasticsearchDocumentStore):
+    def test_embedding_retrieval_query_documents_different_embedding_sizes(
+        self, document_store: ElasticsearchDocumentStore
+    ):
         """
         Test that the retrieval fails if the query embedding and the documents have different embedding sizes.
         """
         docs = [Document(content="Hello world", embedding=[0.1, 0.2, 0.3, 0.4])]
-        docstore.write_documents(docs)
+        document_store.write_documents(docs)
 
         with pytest.raises(BadRequestError):
-            docstore._embedding_retrieval(query_embedding=[0.1, 0.1])
+            document_store._embedding_retrieval(query_embedding=[0.1, 0.1])
 
-    def test_write_documents_different_embedding_sizes_fail(self, docstore: ElasticsearchDocumentStore):
+    def test_write_documents_different_embedding_sizes_fail(self, document_store: ElasticsearchDocumentStore):
         """
         Test that write_documents fails if the documents have different embedding sizes.
         """
@@ -346,4 +247,4 @@ def test_write_documents_different_embedding_sizes_fail(self, docstore: Elastics
         ]
 
         with pytest.raises(DocumentStoreError):
-            docstore.write_documents(docs)
+            document_store.write_documents(docs)
diff --git a/integrations/elasticsearch/tests/test_embedding_retriever.py b/integrations/elasticsearch/tests/test_embedding_retriever.py
index b16e28830..fd60b0940 100644
--- a/integrations/elasticsearch/tests/test_embedding_retriever.py
+++ b/integrations/elasticsearch/tests/test_embedding_retriever.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from unittest.mock import Mock, patch
 
-from haystack.preview.dataclasses import Document
+from haystack.dataclasses import Document
 
 from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
 from elasticsearch_haystack.embedding_retriever import ElasticsearchEmbeddingRetriever
@@ -24,7 +24,7 @@ def test_to_dict(_mock_elasticsearch_client):
     retriever = ElasticsearchEmbeddingRetriever(document_store=document_store)
     res = retriever.to_dict()
     assert res == {
-        "type": "ElasticsearchEmbeddingRetriever",
+        "type": "elasticsearch_haystack.embedding_retriever.ElasticsearchEmbeddingRetriever",
         "init_parameters": {
             "document_store": {
                 "init_parameters": {
@@ -32,7 +32,7 @@ def test_to_dict(_mock_elasticsearch_client):
                     "index": "default",
                     "embedding_similarity_function": "cosine",
                 },
-                "type": "ElasticsearchDocumentStore",
+                "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
             },
             "filters": {},
             "top_k": 10,
@@ -44,11 +44,11 @@ def test_to_dict(_mock_elasticsearch_client):
 @patch("elasticsearch_haystack.document_store.Elasticsearch")
 def test_from_dict(_mock_elasticsearch_client):
     data = {
-        "type": "ElasticsearchEmbeddingRetriever",
+        "type": "elasticsearch_haystack.embedding_retriever.ElasticsearchEmbeddingRetriever",
         "init_parameters": {
             "document_store": {
                 "init_parameters": {"hosts": "some fake host", "index": "default"},
-                "type": "ElasticsearchDocumentStore",
+                "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
             },
             "filters": {},
             "top_k": 10,
diff --git a/integrations/elasticsearch/tests/test_filters.py b/integrations/elasticsearch/tests/test_filters.py
index efaa168b0..6db6a0dd2 100644
--- a/integrations/elasticsearch/tests/test_filters.py
+++ b/integrations/elasticsearch/tests/test_filters.py
@@ -1,108 +1,99 @@
 import pytest
-from haystack.preview.errors import FilterError
+from haystack.errors import FilterError
 
 from elasticsearch_haystack.filters import _normalize_filters, _normalize_ranges
 
 filters_data = [
     (
         {
-            "$and": {
-                "type": {"$eq": "article"},
-                "$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}},
-                "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-                "rating": {"$gte": 3},
-            }
+            "operator": "AND",
+            "conditions": [
+                {"field": "meta.type", "operator": "==", "value": "article"},
+                {
+                    "operator": "OR",
+                    "conditions": [
+                        {"field": "meta.genre", "operator": "in", "value": ["economy", "politics"]},
+                        {"field": "meta.publisher", "operator": "==", "value": "nytimes"},
+                    ],
+                },
+                {"field": "meta.date", "operator": ">=", "value": "2015-01-01"},
+                {"field": "meta.date", "operator": "<", "value": "2021-01-01"},
+                {"field": "meta.rating", "operator": ">=", "value": 3},
+            ],
         },
         {
             "bool": {
-                "must": {
-                    "bool": {
-                        "must": [
-                            {"term": {"type": "article"}},
-                            {
-                                "bool": {
-                                    "should": [
-                                        {"terms": {"genre": ["economy", "politics"]}},
-                                        {"term": {"publisher": "nytimes"}},
-                                    ]
-                                }
-                            },
-                            {"range": {"date": {"gte": "2015-01-01", "lt": "2021-01-01"}}},
-                            {"range": {"rating": {"gte": 3}}},
-                        ]
-                    }
-                }
-            }
-        },
-    ),
-    (
-        {
-            "$or": [
-                {"Type": "News Paper", "Date": {"$lt": "2019-01-01"}},
-                {"Type": "Blog Post", "Date": {"$gte": "2019-01-01"}},
-            ]
-        },
-        {
-            "bool": {
-                "must": {
-                    "bool": {
-                        "should": [
-                            {"match": {"Type": {"query": "News Paper", "minimum_should_match": "100%"}}},
-                            {"match": {"Type": {"query": "Blog Post", "minimum_should_match": "100%"}}},
-                            {"range": {"Date": {"lt": "2019-01-01", "gte": "2019-01-01"}}},
-                        ]
-                    }
-                }
+                "must": [
+                    {"term": {"type": "article"}},
+                    {
+                        "bool": {
+                            "should": [
+                                {"terms": {"genre": ["economy", "politics"]}},
+                                {"term": {"publisher": "nytimes"}},
+                            ]
+                        }
+                    },
+                    {"range": {"date": {"gte": "2015-01-01", "lt": "2021-01-01"}}},
+                    {"range": {"rating": {"gte": 3}}},
+                ]
             }
         },
     ),
     (
         {
-            "$and": {
-                "type": {"$eq": "article"},
-                "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-                "rating": {"$gte": 3},
-                "$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}},
-            }
+            "operator": "OR",
+            "conditions": [
+                {
+                    "operator": "AND",
+                    "conditions": [
+                        {"field": "meta.Type", "operator": "==", "value": "News Paper"},
+                        {"field": "meta.Date", "operator": "<", "value": "2020-01-01"},
+                    ],
+                },
+                {
+                    "operator": "AND",
+                    "conditions": [
+                        {"field": "meta.Type", "operator": "==", "value": "Blog Post"},
+                        {"field": "meta.Date", "operator": ">=", "value": "2019-01-01"},
+                    ],
+                },
+            ],
         },
         {
             "bool": {
-                "must": {
-                    "bool": {
-                        "must": [
-                            {"term": {"type": "article"}},
-                            {
-                                "bool": {
-                                    "should": [
-                                        {"terms": {"genre": ["economy", "politics"]}},
-                                        {"term": {"publisher": "nytimes"}},
-                                    ]
-                                }
-                            },
-                            {"range": {"date": {"gte": "2015-01-01", "lt": "2021-01-01"}}},
-                            {"range": {"rating": {"gte": 3}}},
-                        ]
-                    }
-                }
+                "should": [
+                    {"bool": {"must": [{"term": {"Type": "News Paper"}}, {"range": {"Date": {"lt": "2020-01-01"}}}]}},
+                    {"bool": {"must": [{"term": {"Type": "Blog Post"}}, {"range": {"Date": {"gte": "2019-01-01"}}}]}},
+                ]
             }
         },
     ),
     (
         {
-            "type": "article",
-            "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-            "rating": {"$gte": 3},
-            "$or": {"genre": ["economy", "politics"], "publisher": "nytimes"},
+            "operator": "AND",
+            "conditions": [
+                {"field": "meta.type", "operator": "==", "value": "article"},
+                {"field": "meta.date", "operator": ">=", "value": "2015-01-01"},
+                {"field": "meta.date", "operator": "<", "value": "2021-01-01"},
+                {"field": "meta.rating", "operator": ">=", "value": 3},
+                {
+                    "operator": "OR",
+                    "conditions": [
+                        {"field": "meta.genre", "operator": "in", "value": ["economy", "politics"]},
+                        {"field": "meta.publisher", "operator": "==", "value": "nytimes"},
+                    ],
+                },
+            ],
         },
         {
             "bool": {
                 "must": [
-                    {"match": {"type": {"query": "article", "minimum_should_match": "100%"}}},
+                    {"term": {"type": "article"}},
                     {
                         "bool": {
                             "should": [
                                 {"terms": {"genre": ["economy", "politics"]}},
-                                {"match": {"publisher": {"query": "nytimes", "minimum_should_match": "100%"}}},
+                                {"term": {"publisher": "nytimes"}},
                             ]
                         }
                     },
@@ -113,75 +104,72 @@
         },
     ),
     (
-        {"text": "A Foo Document 1"},
-        {"bool": {"must": {"match": {"text": {"query": "A Foo Document 1", "minimum_should_match": "100%"}}}}},
+        {"operator": "AND", "conditions": [{"field": "text", "operator": "==", "value": "A Foo Document 1"}]},
+        {"bool": {"must": [{"match": {"text": {"query": "A Foo Document 1", "minimum_should_match": "100%"}}}]}},
     ),
     (
-        {"$or": {"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}, "number": {"$lt": 1.0}}},
+        {
+            "operator": "OR",
+            "conditions": [
+                {
+                    "operator": "OR",
+                    "conditions": [
+                        {"field": "meta.name", "operator": "==", "value": "name_0"},
+                        {"field": "meta.name", "operator": "==", "value": "name_1"},
+                    ],
+                },
+                {"field": "meta.number", "operator": "<", "value": 1.0},
+            ],
+        },
         {
             "bool": {
-                "must": {
-                    "bool": {
-                        "should": [
-                            {
-                                "bool": {
-                                    "should": [
-                                        {"term": {"name": "name_0"}},
-                                        {"term": {"name": "name_1"}},
-                                    ]
-                                }
-                            },
-                            {"range": {"number": {"lt": 1.0}}},
-                        ]
-                    }
-                }
+                "should": [
+                    {"bool": {"should": [{"term": {"name": "name_0"}}, {"term": {"name": "name_1"}}]}},
+                    {"range": {"number": {"lt": 1.0}}},
+                ]
             }
         },
     ),
     (
-        {"$and": {"number": {"$and": {"$lte": 2, "$gte": 0}}, "name": {"$in": ["name_0", "name_1"]}}},
         {
-            "bool": {
-                "must": {
-                    "bool": {
-                        "must": [
-                            {"bool": {"must": [{"range": {"number": {"lte": 2, "gte": 0}}}]}},
-                            {"terms": {"name": ["name_0", "name_1"]}},
-                        ]
-                    }
-                }
-            }
+            "operator": "AND",
+            "conditions": [
+                {"field": "meta.number", "operator": "<=", "value": 2},
+                {"field": "meta.number", "operator": ">=", "value": 0},
+                {"field": "meta.name", "operator": "in", "value": ["name_0", "name_1"]},
+            ],
         },
+        {"bool": {"must": [{"terms": {"name": ["name_0", "name_1"]}}, {"range": {"number": {"lte": 2, "gte": 0}}}]}},
     ),
     (
-        {"number": {"$lte": 2, "$gte": 0}, "name": ["name_0", "name_1"]},
         {
-            "bool": {
-                "must": [
-                    {"terms": {"name": ["name_0", "name_1"]}},
-                    {"range": {"number": {"lte": 2, "gte": 0}}},
-                ]
-            }
+            "operator": "AND",
+            "conditions": [
+                {"field": "meta.number", "operator": "<=", "value": 2},
+                {"field": "meta.number", "operator": ">=", "value": 0},
+            ],
         },
+        {"bool": {"must": [{"range": {"number": {"lte": 2, "gte": 0}}}]}},
     ),
     (
-        {"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}},
-        {"bool": {"must": [{"range": {"number": {"lte": 2, "gte": 0}}}]}},
+        {
+            "operator": "OR",
+            "conditions": [
+                {"field": "meta.name", "operator": "==", "value": "name_0"},
+                {"field": "meta.name", "operator": "==", "value": "name_1"},
+            ],
+        },
+        {"bool": {"should": [{"term": {"name": "name_0"}}, {"term": {"name": "name_1"}}]}},
     ),
     (
-        {"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}},
         {
-            "bool": {
-                "must": {
-                    "bool": {
-                        "should": [
-                            {"term": {"name": "name_0"}},
-                            {"term": {"name": "name_1"}},
-                        ]
-                    }
-                }
-            }
+            "operator": "NOT",
+            "conditions": [
+                {"field": "meta.number", "operator": "==", "value": 100},
+                {"field": "meta.name", "operator": "==", "value": "name_0"},
+            ],
         },
+        {"bool": {"must_not": [{"bool": {"must": [{"term": {"number": 100}}, {"term": {"name": "name_0"}}]}}]}},
     ),
 ]
 
@@ -192,15 +180,31 @@ def test_normalize_filters(filters, expected):
     assert result == expected
 
 
-def test_normalize_filters_raises_with_malformed_filters():
+def test_normalize_filters_invalid_operator():
+    with pytest.raises(FilterError):
+        _normalize_filters({"operator": "INVALID", "conditions": []})
+
+
+def test_normalize_filters_malformed():
+    # Missing operator
+    with pytest.raises(FilterError):
+        _normalize_filters({"conditions": []})
+
+    # Missing conditions
+    with pytest.raises(FilterError):
+        _normalize_filters({"operator": "AND"})
+
+    # Missing comparison field
     with pytest.raises(FilterError):
-        _normalize_filters("not a filter")
+        _normalize_filters({"operator": "AND", "conditions": [{"operator": "==", "value": "article"}]})
 
+    # Missing comparison operator
     with pytest.raises(FilterError):
-        _normalize_filters({"number": {"page": "100"}})
+        _normalize_filters({"operator": "AND", "conditions": [{"field": "meta.type", "operator": "=="}]})
 
+    # Missing comparison value
     with pytest.raises(FilterError):
-        _normalize_filters({"number": {"page": {"chapter": "intro"}}})
+        _normalize_filters({"operator": "AND", "conditions": [{"field": "meta.type", "value": "article"}]})
 
 
 def test_normalize_ranges():