diff --git a/docs/reference/esql-query-builder.md b/docs/reference/esql-query-builder.md new file mode 100644 index 000000000..1cdc0c5b3 --- /dev/null +++ b/docs/reference/esql-query-builder.md @@ -0,0 +1,240 @@ +# ES|QL Query Builder + +::::{warning} +This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +:::: + +The ES|QL Query Builder allows you to construct ES|QL queries using Python syntax. Consider the following example: + +```python +>>> from elasticsearch.esql import ESQL +>>> query = ( + ESQL.from_("employees") + .sort("emp_no") + .keep("first_name", "last_name", "height") + .eval(height_feet="height * 3.281", height_cm="height * 100") + .limit(3) +) +``` + +You can then see the assembled ES|QL query by printing the resulting query object: + +```python +>>> query +FROM employees +| SORT emp_no +| KEEP first_name, last_name, height +| EVAL height_feet = height * 3.281, height_cm = height * 100 +| LIMIT 3 +``` + +To execute this query, you can cast it to a string and pass the string to the `client.esql.query()` endpoint: + +```python +>>> from elasticsearch import Elasticsearch +>>> client = Elasticsearch(hosts=[os.environ['ELASTICSEARCH_URL']]) +>>> response = client.esql.query(query=str(query)) +``` + +The response body contains a `columns` attribute with the list of columns included in the results, and a `values` attribute with the list of results for the query, each given as a list of column values. Here is a possible response body returned by the example query given above: + +```python +>>> from pprint import pprint +>>> pprint(response.body) +{'columns': [{'name': 'first_name', 'type': 'text'}, + {'name': 'last_name', 'type': 'text'}, + {'name': 'height', 'type': 'double'}, + {'name': 'height_feet', 'type': 'double'}, + {'name': 'height_cm', 'type': 'double'}], + 'is_partial': False, + 'took': 11, + 'values': [['Adrian', 'Wells', 2.424, 7.953144, 242.4], + ['Aaron', 'Gonzalez', 1.584, 5.1971, 158.4], + ['Miranda', 'Kramer', 1.55, 5.08555, 155]]} +``` + +## Creating an ES|QL query + +To construct an ES|QL query you start from one of the ES|QL source commands: + +### `ESQL.from_` + +The `FROM` command selects the indices, data streams or aliases to be queried. + +Examples: + +```python +from elasticsearch.esql import ESQL + +# FROM employees +query1 = ESQL.from_("employees") + +# FROM +query2 = ESQL.from_("") + +# FROM employees-00001, other-employees-* +query3 = ESQL.from_("employees-00001", "other-employees-*") + +# FROM cluster_one:employees-00001, cluster_two:other-employees-* +query4 = ESQL.from_("cluster_one:employees-00001", "cluster_two:other-employees-*") + +# FROM employees METADATA _id +query5 = ESQL.from_("employees").metadata("_id") +``` + +Note how in the last example the optional `METADATA` clause of the `FROM` command is added as a chained method. + +### `ESQL.row` + +The `ROW` command produces a row with one or more columns, with the values that you specify. + +Examples: + +```python +from elasticsearch.esql import ESQL, functions + +# ROW a = 1, b = "two", c = null +query1 = ESQL.row(a=1, b="two", c=None) + +# ROW a = [1, 2] +query2 = ESQL.row(a=[1, 2]) + +# ROW a = ROUND(1.23, 0) +query3 = ESQL.row(a=functions.round(1.23, 0)) +``` + +### `ESQL.show` + +The `SHOW` command returns information about the deployment and its capabilities. + +Example: + +```python +from elasticsearch.esql import ESQL + +# SHOW INFO +query = ESQL.show("INFO") +``` + +## Adding processing commands + +Once you have a query object, you can add one or more processing commands to it. The following +example shows how to create a query that uses the `WHERE` and `LIMIT` commands to filter the +results: + +```python +from elasticsearch.esql import ESQL + +# FROM employees +# | WHERE still_hired == true +# | LIMIT 10 +query = ESQL.from_("employees").where("still_hired == true").limit(10) +``` + +For a complete list of available commands, review the methods of the [`ESQLBase` class](https://elasticsearch-py.readthedocs.io/en/stable/esql.html) in the Elasticsearch Python API documentation. + +## Creating ES|QL Expressions and Conditions + +The ES|QL query builder for Python provides two ways to create expressions and conditions in ES|QL queries. + +The simplest option is to provide all ES|QL expressions and conditionals as strings. The following example uses this approach to add two calculated columns to the results using the `EVAL` command: + +```python +from elasticsearch.esql import ESQL + +# FROM employees +# | SORT emp_no +# | KEEP first_name, last_name, height +# | EVAL height_feet = height * 3.281, height_cm = height * 100 +query = ( + ESQL.from_("employees") + .sort("emp_no") + .keep("first_name", "last_name", "height") + .eval(height_feet="height * 3.281", height_cm="height * 100") +) +``` + +A more advanced alternative is to replace the strings with Python expressions, which are automatically translated to ES|QL when the query object is rendered to a string. The following example is functionally equivalent to the one above: + +```python +from elasticsearch.esql import ESQL, E + +# FROM employees +# | SORT emp_no +# | KEEP first_name, last_name, height +# | EVAL height_feet = height * 3.281, height_cm = height * 100 +query = ( + ESQL.from_("employees") + .sort("emp_no") + .keep("first_name", "last_name", "height") + .eval(height_feet=E("height") * 3.281, height_cm=E("height") * 100) +) +``` + +Here the `E()` helper function is used as a wrapper to the column name that initiates an ES|QL expression. The `E()` function transforms the given column into an ES|QL expression that can be modified with Python operators. + +Here is a second example, which uses a conditional expression in the `WHERE` command: + +```python +from elasticsearch.esql import ESQL + +# FROM employees +# | KEEP first_name, last_name, height +# | WHERE first_name == "Larry" +query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .where('first_name == "Larry"') +) +``` + +Using Python syntax, the condition can be rewritten as follows: + +```python +from elasticsearch.esql import ESQL, E + +# FROM employees +# | KEEP first_name, last_name, height +# | WHERE first_name == "Larry" +query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .where(E("first_name") == "Larry") +) +``` + +## Using ES|QL functions + +The ES|QL language includes a rich set of functions that can be used in expressions and conditionals. These can be included in expressions given as strings, as shown in the example below: + +```python +from elasticsearch.esql import ESQL + +# FROM employees +# | KEEP first_name, last_name, height +# | WHERE LENGTH(first_name) < 4" +query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .where("LENGTH(first_name) < 4") +) +``` + +All available ES|QL functions have Python wrappers in the `elasticsearch.esql.functions` module, which can be used when building expressions using Python syntax. Below is the example above coded using Python syntax: + +```python +from elasticsearch.esql import ESQL, functions + +# FROM employees +# | KEEP first_name, last_name, height +# | WHERE LENGTH(first_name) < 4" +query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .where(functions.length(E("first_name")) < 4) +) +``` + +Note that arguments passed to functions are assumed to be literals. When passing field names, it is necessary to wrap them with the `E()` helper function so that they are interpreted correctly. + +You can find the complete list of available functions in the Python client's [ES|QL API reference documentation](https://elasticsearch-py.readthedocs.io/en/stable/esql.html#module-elasticsearch.esql.functions). diff --git a/docs/reference/toc.yml b/docs/reference/toc.yml index 015027e4d..7e26b7274 100644 --- a/docs/reference/toc.yml +++ b/docs/reference/toc.yml @@ -5,6 +5,7 @@ toc: - file: connecting.md - file: configuration.md - file: querying.md + - file: esql-query-builder.md - file: async.md - file: integrations.md children: diff --git a/docs/sphinx/esql.rst b/docs/sphinx/esql.rst new file mode 100644 index 000000000..1104b5b97 --- /dev/null +++ b/docs/sphinx/esql.rst @@ -0,0 +1,100 @@ +ES|QL Query Builder +=================== + +Commands +-------- + +.. autoclass:: elasticsearch.esql.ESQL + :inherited-members: + :members: + +.. autoclass:: elasticsearch.esql.esql.ESQLBase + :inherited-members: + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.From + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Row + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Show + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.ChangePoint + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Completion + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Dissect + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Drop + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Enrich + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Eval + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Fork + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Grok + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Keep + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Limit + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.LookupJoin + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.MvExpand + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Rename + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Sample + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Sort + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Stats + :members: + :exclude-members: __init__ + +.. autoclass:: elasticsearch.esql.esql.Where + :members: + :exclude-members: __init__ + +Functions +--------- + +.. automodule:: elasticsearch.esql.functions + :members: diff --git a/docs/sphinx/index.rst b/docs/sphinx/index.rst index 4cf5f92cc..afbdf0aef 100644 --- a/docs/sphinx/index.rst +++ b/docs/sphinx/index.rst @@ -11,6 +11,7 @@ High-level documentation for this client is `also available str: + if isinstance(value, InstrumentedExpression): + return str(value) + return json.dumps(value) + + def __str__(self) -> str: + return self._expr + + def __repr__(self) -> str: + return f"InstrumentedExpression[{self._expr}]" + + def __pos__(self) -> "InstrumentedExpression": + return self + + def __neg__(self) -> "InstrumentedExpression": + return InstrumentedExpression(f"-({self._expr})") + + def __eq__(self, value: Any) -> "InstrumentedExpression": # type: ignore[override] + return InstrumentedExpression(f"{self._expr} == {self._render_value(value)}") + + def __ne__(self, value: Any) -> "InstrumentedExpression": # type: ignore[override] + return InstrumentedExpression(f"{self._expr} != {self._render_value(value)}") + + def __lt__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} < {self._render_value(value)}") + + def __gt__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} > {self._render_value(value)}") + + def __le__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} <= {self._render_value(value)}") + + def __ge__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} >= {self._render_value(value)}") + + def __add__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} + {self._render_value(value)}") + + def __radd__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._render_value(value)} + {self._expr}") + + def __sub__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} - {self._render_value(value)}") + + def __rsub__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._render_value(value)} - {self._expr}") + + def __mul__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} * {self._render_value(value)}") + + def __rmul__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._render_value(value)} * {self._expr}") + + def __truediv__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} / {self._render_value(value)}") + + def __rtruediv__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._render_value(value)} / {self._expr}") + + def __mod__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._expr} % {self._render_value(value)}") + + def __rmod__(self, value: Any) -> "InstrumentedExpression": + return InstrumentedExpression(f"{self._render_value(value)} % {self._expr}") + + def is_null(self) -> "InstrumentedExpression": + """Compare the expression against NULL.""" + return InstrumentedExpression(f"{self._expr} IS NULL") + + def is_not_null(self) -> "InstrumentedExpression": + """Compare the expression against NOT NULL.""" + return InstrumentedExpression(f"{self._expr} IS NOT NULL") + + def in_(self, *values: Any) -> "InstrumentedExpression": + """Test if the expression equals one of the given values.""" + rendered_values = ", ".join([f"{value}" for value in values]) + return InstrumentedExpression(f"{self._expr} IN ({rendered_values})") + + def like(self, *patterns: str) -> "InstrumentedExpression": + """Filter the expression using a string pattern.""" + if len(patterns) == 1: + return InstrumentedExpression( + f"{self._expr} LIKE {self._render_value(patterns[0])}" + ) + else: + return InstrumentedExpression( + f'{self._expr} LIKE ({", ".join([self._render_value(p) for p in patterns])})' + ) + + def rlike(self, *patterns: str) -> "InstrumentedExpression": + """Filter the expression using a regular expression.""" + if len(patterns) == 1: + return InstrumentedExpression( + f"{self._expr} RLIKE {self._render_value(patterns[0])}" + ) + else: + return InstrumentedExpression( + f'{self._expr} RLIKE ({", ".join([self._render_value(p) for p in patterns])})' + ) + + def match(self, query: str) -> "InstrumentedExpression": + """Perform a match query on the field.""" + return InstrumentedExpression(f"{self._expr}:{self._render_value(query)}") + + def asc(self) -> "InstrumentedExpression": + """Return the field name representation for ascending sort order. + + For use in ES|QL queries only. + """ + return InstrumentedExpression(f"{self._expr} ASC") + + def desc(self) -> "InstrumentedExpression": + """Return the field name representation for descending sort order. + + For use in ES|QL queries only. + """ + return InstrumentedExpression(f"{self._expr} DESC") + + def nulls_first(self) -> "InstrumentedExpression": + """Return the field name representation for nulls first sort order. + + For use in ES|QL queries only. + """ + return InstrumentedExpression(f"{self._expr} NULLS FIRST") + + def nulls_last(self) -> "InstrumentedExpression": + """Return the field name representation for nulls last sort order. + + For use in ES|QL queries only. + """ + return InstrumentedExpression(f"{self._expr} NULLS LAST") + + def where( + self, *expressions: Union[str, "InstrumentedExpression"] + ) -> "InstrumentedExpression": + """Add a condition to be met for the row to be included. + + Use only in expressions given in the ``STATS`` command. + """ + if len(expressions) == 1: + return InstrumentedExpression(f"{self._expr} WHERE {expressions[0]}") + else: + return InstrumentedExpression( + f'{self._expr} WHERE {" AND ".join([f"({expr})" for expr in expressions])}' + ) + + +E = InstrumentedExpression + + +class InstrumentedField(InstrumentedExpression): """Proxy object for a mapped document field. An object of this instance is returned when a field is accessed as a class @@ -71,8 +228,8 @@ class MyDocument(Document): s = s.sort(-MyDocument.name) # sort by name in descending order """ - def __init__(self, name: str, field: Field): - self._name = name + def __init__(self, name: str, field: Optional[Field]): + super().__init__(name) self._field = field # note that the return value type here assumes classes will only be used to @@ -83,26 +240,29 @@ def __getattr__(self, attr: str) -> "InstrumentedField": # first let's see if this is an attribute of this object return super().__getattribute__(attr) # type: ignore[no-any-return] except AttributeError: - try: - # next we see if we have a sub-field with this name - return InstrumentedField(f"{self._name}.{attr}", self._field[attr]) - except KeyError: - # lastly we let the wrapped field resolve this attribute - return getattr(self._field, attr) # type: ignore[no-any-return] - - def __pos__(self) -> str: + if self._field: + try: + # next we see if we have a sub-field with this name + return InstrumentedField(f"{self._expr}.{attr}", self._field[attr]) + except KeyError: + # lastly we let the wrapped field resolve this attribute + return getattr(self._field, attr) # type: ignore[no-any-return] + else: + raise + + def __pos__(self) -> str: # type: ignore[override] """Return the field name representation for ascending sort order""" - return f"{self._name}" + return f"{self._expr}" - def __neg__(self) -> str: + def __neg__(self) -> str: # type: ignore[override] """Return the field name representation for descending sort order""" - return f"-{self._name}" + return f"-{self._expr}" def __str__(self) -> str: - return self._name + return self._expr def __repr__(self) -> str: - return f"InstrumentedField[{self._name}]" + return f"InstrumentedField[{self._expr}]" class DocumentMeta(type): diff --git a/elasticsearch/dsl/utils.py b/elasticsearch/dsl/utils.py index b52ec63a0..127a48cc2 100644 --- a/elasticsearch/dsl/utils.py +++ b/elasticsearch/dsl/utils.py @@ -333,7 +333,7 @@ def __init__(self, _expand__to_dot: Optional[bool] = None, **params: Any) -> Non _expand__to_dot = EXPAND__TO_DOT self._params: Dict[str, Any] = {} for pname, pvalue in params.items(): - if pvalue == DEFAULT: + if pvalue is DEFAULT: continue # expand "__" to dots if "__" in pname and _expand__to_dot: diff --git a/elasticsearch/esql/__init__.py b/elasticsearch/esql/__init__.py new file mode 100644 index 000000000..d872c329a --- /dev/null +++ b/elasticsearch/esql/__init__.py @@ -0,0 +1,18 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .esql import ESQL, and_, not_, or_ # noqa: F401 diff --git a/elasticsearch/esql/esql.py b/elasticsearch/esql/esql.py new file mode 100644 index 000000000..07ccdf839 --- /dev/null +++ b/elasticsearch/esql/esql.py @@ -0,0 +1,1105 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional, Tuple, Type, Union + +from ..dsl.document_base import DocumentBase, InstrumentedExpression, InstrumentedField + +FieldType = Union[InstrumentedField, str] +IndexType = Union[Type[DocumentBase], str] +ExpressionType = Any + + +class ESQL(ABC): + """The static methods of the ``ESQL`` class provide access to the ES|QL source + commands, used to create ES|QL queries. + + These methods return an instance of class ``ESQLBase``, which provides access to + the ES|QL processing commands. + """ + + @staticmethod + def from_(*indices: IndexType) -> "From": + """The ``FROM`` source command returns a table with data from a data stream, index, or alias. + + :param indices: A list of indices, data streams or aliases. Supports wildcards and date math. + + Examples:: + + query1 = ESQL.from_("employees") + query2 = ESQL.from_("") + query3 = ESQL.from_("employees-00001", "other-employees-*") + query4 = ESQL.from_("cluster_one:employees-00001", "cluster_two:other-employees-*") + query5 = ESQL.from_("employees").metadata("_id") + """ + return From(*indices) + + @staticmethod + def row(**params: ExpressionType) -> "Row": + """The ``ROW`` source command produces a row with one or more columns with values that you specify. + This can be useful for testing. + + :param params: the column values to produce, given as keyword arguments. + + Examples:: + + query1 = ESQL.row(a=1, b="two", c=None) + query2 = ESQL.row(a=[1, 2]) + query3 = ESQL.row(a=functions.round(1.23, 0)) + """ + return Row(**params) + + @staticmethod + def show(item: str) -> "Show": + """The ``SHOW`` source command returns information about the deployment and its capabilities. + + :param item: Can only be ``INFO``. + + Examples:: + + query = ESQL.show("INFO") + """ + return Show(item) + + @staticmethod + def branch() -> "Branch": + """This method can only be used inside a ``FORK`` command to create each branch. + + Examples:: + + query = ESQL.from_("employees").fork( + ESQL.branch().where("emp_no == 10001"), + ESQL.branch().where("emp_no == 10002"), + ) + """ + return Branch() + + +class ESQLBase(ABC): + """The methods of the ``ESQLBase`` class provide access to the ES|QL processing + commands, used to build ES|QL queries. + """ + + def __init__(self, parent: Optional["ESQLBase"] = None): + self._parent = parent + + def __repr__(self) -> str: + return self.render() + + def render(self) -> str: + return ( + self._parent.render() + "\n| " if self._parent else "" + ) + self._render_internal() + + @abstractmethod + def _render_internal(self) -> str: + pass + + def _is_forked(self) -> bool: + if self.__class__.__name__ == "Fork": + return True + if self._parent: + return self._parent._is_forked() + return False + + def change_point(self, value: FieldType) -> "ChangePoint": + """`CHANGE_POINT` detects spikes, dips, and change points in a metric. + + :param value: The column with the metric in which you want to detect a change point. + + Examples:: + + query = ( + ESQL.row(key=list(range(1, 26))) + .mv_expand("key") + .eval(value=functions.case("key<13", 0, 42)) + .change_point("value") + .on("key") + .where("type IS NOT NULL") + ) + """ + return ChangePoint(self, value) + + def completion( + self, *prompt: ExpressionType, **named_prompt: ExpressionType + ) -> "Completion": + """The `COMPLETION` command allows you to send prompts and context to a Large + Language Model (LLM) directly within your ES|QL queries, to perform text + generation tasks. + + :param prompt: The input text or expression used to prompt the LLM. This can + be a string literal or a reference to a column containing text. + :param named_prompt: The input text or expresion, given as a keyword argument. + The argument name is used for the column name. If not + specified, the results will be stored in a column named + `completion`. If the specified column already exists, it + will be overwritten with the new results. + + Examples:: + + query1 = ( + ESQL.row(question="What is Elasticsearch?") + .completion("question").with_("test_completion_model") + .keep("question", "completion") + ) + query2 = ( + ESQL.row(question="What is Elasticsearch?") + .completion(answer="question").with_("test_completion_model") + .keep("question", "answer") + ) + query3 = ( + ESQL.from_("movies") + .sort("rating DESC") + .limit(10) + .eval(prompt=\"\"\"CONCAT( + "Summarize this movie using the following information: \\n", + "Title: ", title, "\\n", + "Synopsis: ", synopsis, "\\n", + "Actors: ", MV_CONCAT(actors, ", "), "\\n", + )\"\"\") + .completion(summary="prompt").with_("test_completion_model") + .keep("title", "summary", "rating") + ) + """ + return Completion(self, *prompt, **named_prompt) + + def dissect(self, input: FieldType, pattern: str) -> "Dissect": + """``DISSECT`` enables you to extract structured data out of a string. + + :param input: The column that contains the string you want to structure. If + the column has multiple values, ``DISSECT`` will process each value. + :param pattern: A dissect pattern. If a field name conflicts with an existing + column, the existing column is dropped. If a field name is used + more than once, only the rightmost duplicate creates a column. + + Examples:: + + query = ( + ESQL.row(a="2023-01-23T12:15:00.000Z - some text - 127.0.0.1") + .dissect("a", "%{date} - %{msg} - %{ip}") + .keep("date", "msg", "ip") + .eval(date="TO_DATETIME(date)") + ) + """ + return Dissect(self, input, pattern) + + def drop(self, *columns: FieldType) -> "Drop": + """The ``DROP`` processing command removes one or more columns. + + :param columns: The columns to drop, given as positional arguments. Supports wildcards. + + Examples:: + + query1 = ESQL.from_("employees").drop("height") + query2 = ESQL.from_("employees").drop("height*") + """ + return Drop(self, *columns) + + def enrich(self, policy: str) -> "Enrich": + """``ENRICH`` enables you to add data from existing indices as new columns using an + enrich policy. + + :param policy: The name of the enrich policy. You need to create and execute the + enrich policy first. + + Examples:: + + query1 = ( + ESQL.row(a="1") + .enrich("languages_policy").on("a").with_("language_name") + ) + query2 = ( + ESQL.row(a="1") + .enrich("languages_policy").on("a").with_(name="language_name") + ) + """ + return Enrich(self, policy) + + def eval(self, *columns: ExpressionType, **named_columns: ExpressionType) -> "Eval": + """The ``EVAL`` processing command enables you to append new columns with calculated values. + + :param columns: The values for the columns, given as positional arguments. Can be literals, + expressions, or functions. Can use columns defined left of this one. + :param named_columns: The values for the new columns, given as keyword arguments. The name + of the arguments is used as column name. If a column with the same + name already exists, the existing column is dropped. If a column name + is used more than once, only the rightmost duplicate creates a column. + + Examples:: + + query1 = ( + ESQL.from_("employees") + .sort("emp_no") + .keep("first_name", "last_name", "height") + .eval(height_feet="height * 3.281", height_cm="height * 100") + ) + query2 = ( + ESQL.from_("employees") + .eval("height * 3.281") + .stats(avg_height_feet=functions.avg("`height * 3.281`")) + ) + """ + return Eval(self, *columns, **named_columns) + + def fork( + self, + fork1: "ESQLBase", + fork2: Optional["ESQLBase"] = None, + fork3: Optional["ESQLBase"] = None, + fork4: Optional["ESQLBase"] = None, + fork5: Optional["ESQLBase"] = None, + fork6: Optional["ESQLBase"] = None, + fork7: Optional["ESQLBase"] = None, + fork8: Optional["ESQLBase"] = None, + ) -> "Fork": + """The ``FORK`` processing command creates multiple execution branches to operate on the + same input data and combines the results in a single output table. + + :param fork: Up to 8 execution branches, created with the ``ESQL.branch()`` method. + + Examples:: + + query = ( + ESQL.from_("employees") + .fork( + ESQL.branch().where("emp_no == 10001"), + ESQL.branch().where("emp_no == 10002"), + ) + .keep("emp_no", "_fork") + .sort("emp_no") + ) + """ + if self._is_forked(): + raise ValueError("a query can only have one fork") + return Fork(self, fork1, fork2, fork3, fork4, fork5, fork6, fork7, fork8) + + def grok(self, input: FieldType, pattern: str) -> "Grok": + """``GROK`` enables you to extract structured data out of a string. + + :param input: The column that contains the string you want to structure. If the + column has multiple values, ``GROK`` will process each value. + :param pattern: A grok pattern. If a field name conflicts with an existing column, + the existing column is discarded. If a field name is used more than + once, a multi-valued column will be created with one value per each + occurrence of the field name. + + Examples:: + + query1 = ( + ESQL.row(a="2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42") + .grok("a", "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}") + .keep("date", "ip", "email", "num") + ) + query2 = ( + ESQL.row(a="2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42") + .grok( + "a", + "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}", + ) + .keep("date", "ip", "email", "num") + .eval(date=functions.to_datetime("date")) + ) + query3 = ( + ESQL.from_("addresses") + .keep("city.name", "zip_code") + .grok("zip_code", "%{WORD:zip_parts} %{WORD:zip_parts}") + ) + """ + return Grok(self, input, pattern) + + def keep(self, *columns: FieldType) -> "Keep": + """The ``KEEP`` processing command enables you to specify what columns are returned + and the order in which they are returned. + + :param columns: The columns to keep, given as positional arguments. Supports + wildcards. + + Examples:: + + query1 = ESQL.from_("employees").keep("emp_no", "first_name", "last_name", "height") + query2 = ESQL.from_("employees").keep("h*") + query3 = ESQL.from_("employees").keep("h*", "*") + """ + return Keep(self, *columns) + + def limit(self, max_number_of_rows: int) -> "Limit": + """The ``LIMIT`` processing command enables you to limit the number of rows that are + returned. + + :param max_number_of_rows: The maximum number of rows to return. + + Examples:: + + query1 = ESQL.from_("employees").sort("emp_no ASC").limit(5) + query2 = ESQL.from_("index").stats(functions.avg("field1")).by("field2").limit(20000) + """ + return Limit(self, max_number_of_rows) + + def lookup_join(self, lookup_index: IndexType) -> "LookupJoin": + """`LOOKUP JOIN` enables you to add data from another index, AKA a 'lookup' index, + to your ES|QL query results, simplifying data enrichment and analysis workflows. + + :param lookup_index: The name of the lookup index. This must be a specific index + name - wildcards, aliases, and remote cluster references are + not supported. Indices used for lookups must be configured + with the lookup index mode. + + Examples:: + + query1 = ( + ESQL.from_("firewall_logs") + .lookup_join("threat_list").on("source.IP") + .where("threat_level IS NOT NULL") + ) + query2 = ( + ESQL.from_("system_metrics") + .lookup_join("host_inventory").on("host.name") + .lookup_join("ownerships").on("host.name") + ) + query3 = ( + ESQL.from_("app_logs") + .lookup_join("service_owners").on("service_id") + ) + query4 = ( + ESQL.from_("employees") + .eval(language_code="languages") + .where("emp_no >= 10091 AND emp_no < 10094") + .lookup_join("languages_lookup").on("language_code") + ) + """ + return LookupJoin(self, lookup_index) + + def mv_expand(self, column: FieldType) -> "MvExpand": + """The `MV_EXPAND` processing command expands multivalued columns into one row per + value, duplicating other columns. + + :param column: The multivalued column to expand. + + Examples:: + + query = ESQL.row(a=[1, 2, 3], b="b", j=["a", "b"]).mv_expand("a") + """ + return MvExpand(self, column) + + def rename(self, **columns: FieldType) -> "Rename": + """The ``RENAME`` processing command renames one or more columns. + + :param columns: The old and new column name pairs, given as keyword arguments. + If a name conflicts with an existing column name, the existing column + is dropped. If multiple columns are renamed to the same name, all but + the rightmost column with the same new name are dropped. + + Examples:: + + query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "still_hired") + .rename(still_hired="employed") + ) + """ + return Rename(self, **columns) + + def sample(self, probability: float) -> "Sample": + """The ``SAMPLE`` command samples a fraction of the table rows. + + :param probability: The probability that a row is included in the sample. The value + must be between 0 and 1, exclusive. + + Examples:: + + query = ESQL.from_("employees").keep("emp_no").sample(0.05) + """ + return Sample(self, probability) + + def sort(self, *columns: FieldType) -> "Sort": + """The ``SORT`` processing command sorts a table on one or more columns. + + :param columns: The columns to sort on. + + Examples:: + + query1 = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .sort("height") + ) + query2 = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .sort("height DESC") + ) + query3 = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .sort("height DESC", "first_name ASC") + ) + query4 = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .sort("first_name ASC NULLS FIRST") + ) + """ + return Sort(self, *columns) + + def stats( + self, *expressions: ExpressionType, **named_expressions: ExpressionType + ) -> "Stats": + """The ``STATS`` processing command groups rows according to a common value and + calculates one or more aggregated values over the grouped rows. + + :param expressions: A list of expressions, given as positional arguments. + :param named_expressions: A list of expressions, given as keyword arguments. The + argument names are used for the returned aggregated values. + + Note that only one of `expressions` and `named_expressions` must be provided. + + Examples:: + + query1 = ( + ESQL.from_("employees") + .stats(count=functions.count("emp_no")).by("languages") + .sort("languages") + ) + query2 = ( + ESQL.from_("employees") + .stats(avg_lang=functions.avg("languages")) + ) + query3 = ( + ESQL.from_("employees") + .stats( + avg_lang=functions.avg("languages"), + max_lang=functions.max("languages") + ) + ) + query4 = ( + ESQL.from_("employees") + .stats( + avg50s=functions.avg("salary").where('birth_date < "1960-01-01"'), + avg60s=functions.avg("salary").where('birth_date >= "1960-01-01"'), + ).by("gender") + .sort("gender") + ) + query5 = ( + ESQL.from_("employees") + .eval(Ks="salary / 1000") + .stats( + under_40K=functions.count("*").where("Ks < 40"), + inbetween=functions.count("*").where("40 <= Ks AND Ks < 60"), + over_60K=functions.count("*").where("60 <= Ks"), + total=f.count("*") + ) + ) + query6 = ( + ESQL.row(i=1, a=["a", "b"]) + .stats(functions.min("i")).by("a") + .sort("a ASC") + ) + query7 = ( + ESQL.from_("employees") + .eval(hired=functions.date_format("hire_date", "yyyy")) + .stats(avg_salary=functions.avg("salary")).by("hired", "languages.long") + .eval(avg_salary=functions.round("avg_salary")) + .sort("hired", "languages.long") + + ) + """ + return Stats(self, *expressions, **named_expressions) + + def where(self, *expressions: ExpressionType) -> "Where": + """The ``WHERE`` processing command produces a table that contains all the rows + from the input table for which the provided condition evaluates to `true`. + + :param expressions: A list of boolean expressions, given as positional arguments. + These expressions are combined with an ``AND`` logical operator. + + Examples:: + + query1 = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "still_hired") + .where("still_hired == true") + ) + query2 = ( + ESQL.from_("sample_data") + .where("@timestamp > NOW() - 1 hour") + ) + query3 = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .where("LENGTH(first_name) < 4") + ) + """ + return Where(self, *expressions) + + +class From(ESQLBase): + """Implementation of the ``FROM`` source command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, *indices: IndexType): + super().__init__() + self._indices = indices + self._metadata_fields: Tuple[FieldType, ...] = tuple() + + def metadata(self, *fields: FieldType) -> "From": + """Continuation of the ``FROM`` source command. + + :param fields: metadata fields to retrieve, given as positional arguments. + """ + self._metadata_fields = fields + return self + + def _render_internal(self) -> str: + indices = [ + index if isinstance(index, str) else index._index._name + for index in self._indices + ] + s = f'{self.__class__.__name__.upper()} {", ".join(indices)}' + if self._metadata_fields: + s = ( + s + + f' METADATA {", ".join([str(field) for field in self._metadata_fields])}' + ) + return s + + +class Row(ESQLBase): + """Implementation of the ``ROW`` source command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, **params: ExpressionType): + super().__init__() + self._params = { + k: json.dumps(v) if not isinstance(v, InstrumentedExpression) else v + for k, v in params.items() + } + + def _render_internal(self) -> str: + return "ROW " + ", ".join([f"{k} = {v}" for k, v in self._params.items()]) + + +class Show(ESQLBase): + """Implementation of the ``SHOW`` source command. + + This class inherits from :class:`ESQLBase `, + which makes it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, item: str): + super().__init__() + self._item = item + + def _render_internal(self) -> str: + return f"SHOW {self._item}" + + +class Branch(ESQLBase): + """Implementation of a branch inside a ``FORK`` processing command. + + This class inherits from :class:`ESQLBase `, + which makes it possible to chain all the commands that belong to the branch + in a single expression. + """ + + def _render_internal(self) -> str: + return "" + + +class ChangePoint(ESQLBase): + """Implementation of the ``CHANGE POINT`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, value: FieldType): + super().__init__(parent) + self._value = value + self._key: Optional[FieldType] = None + self._type_name: Optional[str] = None + self._pvalue_name: Optional[str] = None + + def on(self, key: FieldType) -> "ChangePoint": + """Continuation of the `CHANGE_POINT` command. + + :param key: The column with the key to order the values by. If not specified, + `@timestamp` is used. + """ + self._key = key + return self + + def as_(self, type_name: str, pvalue_name: str) -> "ChangePoint": + """Continuation of the `CHANGE_POINT` command. + + :param type_name: The name of the output column with the change point type. + If not specified, `type` is used. + :param pvalue_name: The name of the output column with the p-value that indicates + how extreme the change point is. If not specified, `pvalue` is used. + """ + self._type_name = type_name + self._pvalue_name = pvalue_name + return self + + def _render_internal(self) -> str: + key = "" if not self._key else f" ON {self._key}" + names = ( + "" + if not self._type_name and not self._pvalue_name + else f' AS {self._type_name or "type"}, {self._pvalue_name or "pvalue"}' + ) + return f"CHANGE_POINT {self._value}{key}{names}" + + +class Completion(ESQLBase): + """Implementation of the ``COMPLETION`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__( + self, parent: ESQLBase, *prompt: ExpressionType, **named_prompt: ExpressionType + ): + if len(prompt) + len(named_prompt) > 1: + raise ValueError( + "this method requires either one positional or one keyword argument only" + ) + super().__init__(parent) + self._prompt = prompt + self._named_prompt = named_prompt + self._inference_id: Optional[str] = None + + def with_(self, inference_id: str) -> "Completion": + """Continuation of the `COMPLETION` command. + + :param inference_id: The ID of the inference endpoint to use for the task. The + inference endpoint must be configured with the completion + task type. + """ + self._inference_id = inference_id + return self + + def _render_internal(self) -> str: + if self._inference_id is None: + raise ValueError("The completion command requires an inference ID") + if self._named_prompt: + column = list(self._named_prompt.keys())[0] + prompt = list(self._named_prompt.values())[0] + return f"COMPLETION {column} = {prompt} WITH {self._inference_id}" + else: + return f"COMPLETION {self._prompt[0]} WITH {self._inference_id}" + + +class Dissect(ESQLBase): + """Implementation of the ``DISSECT`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, input: FieldType, pattern: str): + super().__init__(parent) + self._input = input + self._pattern = pattern + self._separator: Optional[str] = None + + def append_separator(self, separator: str) -> "Dissect": + """Continuation of the ``DISSECT`` command. + + :param separator: A string used as the separator between appended values, + when using the append modifier. + """ + self._separator = separator + return self + + def _render_internal(self) -> str: + sep = ( + "" if self._separator is None else f' APPEND_SEPARATOR="{self._separator}"' + ) + return f"DISSECT {self._input} {json.dumps(self._pattern)}{sep}" + + +class Drop(ESQLBase): + """Implementation of the ``DROP`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, *columns: FieldType): + super().__init__(parent) + self._columns = columns + + def _render_internal(self) -> str: + return f'DROP {", ".join([str(col) for col in self._columns])}' + + +class Enrich(ESQLBase): + """Implementation of the ``ENRICH`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, policy: str): + super().__init__(parent) + self._policy = policy + self._match_field: Optional[FieldType] = None + self._fields: Optional[Tuple[FieldType, ...]] = None + self._named_fields: Optional[Dict[str, FieldType]] = None + + def on(self, match_field: FieldType) -> "Enrich": + """Continuation of the ``ENRICH`` command. + + :param match_field: The match field. ``ENRICH`` uses its value to look for records + in the enrich index. If not specified, the match will be + performed on the column with the same name as the + `match_field` defined in the enrich policy. + """ + self._match_field = match_field + return self + + def with_(self, *fields: FieldType, **named_fields: FieldType) -> "Enrich": + """Continuation of the ``ENRICH`` command. + + :param fields: The enrich fields from the enrich index that are added to the result + as new columns, given as positional arguments. If a column with the + same name as the enrich field already exists, the existing column will + be replaced by the new column. If not specified, each of the enrich + fields defined in the policy is added. A column with the same name as + the enrich field will be dropped unless the enrich field is renamed. + :param named_fields: The enrich fields from the enrich index that are added to the + result as new columns, given as keyword arguments. The name of + the keyword arguments are used as column names. If a column has + the same name as the new name, it will be discarded. If a name + (new or original) occurs more than once, only the rightmost + duplicate creates a new column. + """ + if fields and named_fields: + raise ValueError( + "this method supports positional or keyword arguments but not both" + ) + self._fields = fields + self._named_fields = named_fields + return self + + def _render_internal(self) -> str: + on = "" if self._match_field is None else f" ON {self._match_field}" + with_ = "" + if self._named_fields: + with_ = f' WITH {", ".join([f"{name} = {field}" for name, field in self._named_fields.items()])}' + elif self._fields is not None: + with_ = f' WITH {", ".join([str(field) for field in self._fields])}' + return f"ENRICH {self._policy}{on}{with_}" + + +class Eval(ESQLBase): + """Implementation of the ``EVAL`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__( + self, parent: ESQLBase, *columns: FieldType, **named_columns: FieldType + ): + if columns and named_columns: + raise ValueError( + "this method supports positional or keyword arguments but not both" + ) + super().__init__(parent) + self._columns = columns or named_columns + + def _render_internal(self) -> str: + if isinstance(self._columns, dict): + cols = ", ".join( + [f"{name} = {value}" for name, value in self._columns.items()] + ) + else: + cols = ", ".join([f"{col}" for col in self._columns]) + return f"EVAL {cols}" + + +class Fork(ESQLBase): + """Implementation of the ``FORK`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__( + self, + parent: ESQLBase, + fork1: ESQLBase, + fork2: Optional[ESQLBase] = None, + fork3: Optional[ESQLBase] = None, + fork4: Optional[ESQLBase] = None, + fork5: Optional[ESQLBase] = None, + fork6: Optional[ESQLBase] = None, + fork7: Optional[ESQLBase] = None, + fork8: Optional[ESQLBase] = None, + ): + super().__init__(parent) + self._branches = [fork1, fork2, fork3, fork4, fork5, fork6, fork7, fork8] + + def _render_internal(self) -> str: + cmds = "" + for branch in self._branches: + if branch: + cmd = branch.render()[3:].replace("\n", " ") + if cmds == "": + cmds = f"( {cmd} )" + else: + cmds += f"\n ( {cmd} )" + return f"FORK {cmds}" + + +class Grok(ESQLBase): + """Implementation of the ``GROK`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, input: FieldType, pattern: str): + super().__init__(parent) + self._input = input + self._pattern = pattern + + def _render_internal(self) -> str: + return f"GROK {self._input} {json.dumps(self._pattern)}" + + +class Keep(ESQLBase): + """Implementation of the ``KEEP`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, *columns: FieldType): + super().__init__(parent) + self._columns = columns + + def _render_internal(self) -> str: + return f'KEEP {", ".join([f"{col}" for col in self._columns])}' + + +class Limit(ESQLBase): + """Implementation of the ``LIMIT`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, max_number_of_rows: int): + super().__init__(parent) + self._max_number_of_rows = max_number_of_rows + + def _render_internal(self) -> str: + return f"LIMIT {self._max_number_of_rows}" + + +class LookupJoin(ESQLBase): + """Implementation of the ``LOOKUP JOIN`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, lookup_index: IndexType): + super().__init__(parent) + self._lookup_index = lookup_index + self._field: Optional[FieldType] = None + + def on(self, field: FieldType) -> "LookupJoin": + """Continuation of the `LOOKUP_JOIN` command. + + :param field: The field to join on. This field must exist in both your current query + results and in the lookup index. If the field contains multi-valued + entries, those entries will not match anything (the added fields will + contain null for those rows). + """ + self._field = field + return self + + def _render_internal(self) -> str: + if self._field is None: + raise ValueError("Joins require a field to join on.") + index = ( + self._lookup_index + if isinstance(self._lookup_index, str) + else self._lookup_index._index._name + ) + return f"LOOKUP JOIN {index} ON {self._field}" + + +class MvExpand(ESQLBase): + """Implementation of the ``MV_EXPAND`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, column: FieldType): + super().__init__(parent) + self._column = column + + def _render_internal(self) -> str: + return f"MV_EXPAND {self._column}" + + +class Rename(ESQLBase): + """Implementation of the ``RENAME`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, **columns: FieldType): + super().__init__(parent) + self._columns = columns + + def _render_internal(self) -> str: + return f'RENAME {", ".join([f"{old_name} AS {new_name}" for old_name, new_name in self._columns.items()])}' + + +class Sample(ESQLBase): + """Implementation of the ``SAMPLE`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, probability: float): + super().__init__(parent) + self._probability = probability + + def _render_internal(self) -> str: + return f"SAMPLE {self._probability}" + + +class Sort(ESQLBase): + """Implementation of the ``SORT`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, *columns: FieldType): + super().__init__(parent) + self._columns = columns + + def _render_internal(self) -> str: + return f'SORT {", ".join([f"{col}" for col in self._columns])}' + + +class Stats(ESQLBase): + """Implementation of the ``STATS`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__( + self, + parent: ESQLBase, + *expressions: ExpressionType, + **named_expressions: ExpressionType, + ): + if expressions and named_expressions: + raise ValueError( + "this method supports positional or keyword arguments but not both" + ) + super().__init__(parent) + self._expressions = expressions or named_expressions + self._grouping_expressions: Optional[Tuple[ExpressionType, ...]] = None + + def by(self, *grouping_expressions: ExpressionType) -> "Stats": + self._grouping_expressions = grouping_expressions + return self + + def _render_internal(self) -> str: + if isinstance(self._expressions, dict): + exprs = [f"{key} = {value}" for key, value in self._expressions.items()] + else: + exprs = [f"{expr}" for expr in self._expressions] + expression_separator = ",\n " + by = ( + "" + if self._grouping_expressions is None + else f'\n BY {", ".join([f"{expr}" for expr in self._grouping_expressions])}' + ) + return f'STATS {expression_separator.join([f"{expr}" for expr in exprs])}{by}' + + +class Where(ESQLBase): + """Implementation of the ``WHERE`` processing command. + + This class inherits from :class:`ESQLBase `, + to make it possible to chain all the commands that belong to an ES|QL query + in a single expression. + """ + + def __init__(self, parent: ESQLBase, *expressions: ExpressionType): + super().__init__(parent) + self._expressions = expressions + + def _render_internal(self) -> str: + return f'WHERE {" AND ".join([f"{expr}" for expr in self._expressions])}' + + +def and_(*expressions: InstrumentedExpression) -> "InstrumentedExpression": + """Combine two or more expressions with the AND operator.""" + return InstrumentedExpression(" AND ".join([f"({expr})" for expr in expressions])) + + +def or_(*expressions: InstrumentedExpression) -> "InstrumentedExpression": + """Combine two or more expressions with the OR operator.""" + return InstrumentedExpression(" OR ".join([f"({expr})" for expr in expressions])) + + +def not_(expression: InstrumentedExpression) -> "InstrumentedExpression": + """Negate an expression.""" + return InstrumentedExpression(f"NOT ({expression})") diff --git a/elasticsearch/esql/functions.py b/elasticsearch/esql/functions.py new file mode 100644 index 000000000..515e3ddfc --- /dev/null +++ b/elasticsearch/esql/functions.py @@ -0,0 +1,1738 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +from typing import Any + +from elasticsearch.dsl.document_base import InstrumentedExpression +from elasticsearch.esql.esql import ExpressionType + + +def _render(v: Any) -> str: + return json.dumps(v) if not isinstance(v, InstrumentedExpression) else str(v) + + +def abs(number: ExpressionType) -> InstrumentedExpression: + """Returns the absolute value. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ABS({_render(number)})") + + +def acos(number: ExpressionType) -> InstrumentedExpression: + """Returns the arccosine of `n` as an angle, expressed in radians. + + :param number: Number between -1 and 1. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ACOS({_render(number)})") + + +def asin(number: ExpressionType) -> InstrumentedExpression: + """Returns the arcsine of the input numeric expression as an angle, + expressed in radians. + + :param number: Number between -1 and 1. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ASIN({_render(number)})") + + +def atan(number: ExpressionType) -> InstrumentedExpression: + """Returns the arctangent of the input numeric expression as an angle, + expressed in radians. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ATAN({_render(number)})") + + +def atan2( + y_coordinate: ExpressionType, x_coordinate: ExpressionType +) -> InstrumentedExpression: + """The angle between the positive x-axis and the ray from the origin to the + point (x , y) in the Cartesian plane, expressed in radians. + + :param y_coordinate: y coordinate. If `null`, the function returns `null`. + :param x_coordinate: x coordinate. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ATAN2({y_coordinate}, {x_coordinate})") + + +def avg(number: ExpressionType) -> InstrumentedExpression: + """The average of a numeric field. + + :param number: Expression that outputs values to average. + """ + return InstrumentedExpression(f"AVG({_render(number)})") + + +def avg_over_time(number: ExpressionType) -> InstrumentedExpression: + """The average over time of a numeric field. + + :param number: Expression that outputs values to average. + """ + return InstrumentedExpression(f"AVG_OVER_TIME({_render(number)})") + + +def bit_length(string: ExpressionType) -> InstrumentedExpression: + """Returns the bit length of a string. + + :param string: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"BIT_LENGTH({_render(string)})") + + +def bucket( + field: ExpressionType, + buckets: ExpressionType, + from_: ExpressionType, + to: ExpressionType, +) -> InstrumentedExpression: + """Creates groups of values - buckets - out of a datetime or numeric input. + The size of the buckets can either be provided directly, or chosen based on + a recommended count and values range. + + :param field: Numeric or date expression from which to derive buckets. + :param buckets: Target number of buckets, or desired bucket size if `from` + and `to` parameters are omitted. + :param from_: Start of the range. Can be a number, a date or a date expressed + as a string. + :param to: End of the range. Can be a number, a date or a date expressed as a string. + """ + return InstrumentedExpression( + f"BUCKET({_render(field)}, {_render(buckets)}, {from_}, {_render(to)})" + ) + + +def byte_length(string: ExpressionType) -> InstrumentedExpression: + """Returns the byte length of a string. + + :param string: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"BYTE_LENGTH({_render(string)})") + + +def case(*conditions: ExpressionType) -> InstrumentedExpression: + """Accepts pairs of conditions and values. The function returns the value + that belongs to the first condition that evaluates to `true`. If the + number of arguments is odd, the last argument is the default value which is + returned when no condition matches. If the number of arguments is even, and + no condition matches, the function returns `null`. + """ + return InstrumentedExpression( + f'CASE({", ".join([_render(c) for c in conditions])})' + ) + + +def categorize(field: ExpressionType) -> InstrumentedExpression: + """Groups text messages into categories of similarly formatted text values. + + :param field: Expression to categorize + """ + return InstrumentedExpression(f"CATEGORIZE({_render(field)})") + + +def cbrt(number: ExpressionType) -> InstrumentedExpression: + """Returns the cube root of a number. The input can be any numeric value, + the return value is always a double. Cube roots of infinities are null. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"CBRT({_render(number)})") + + +def ceil(number: ExpressionType) -> InstrumentedExpression: + """Round a number up to the nearest integer. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"CEIL({_render(number)})") + + +def cidr_match(ip: ExpressionType, block_x: ExpressionType) -> InstrumentedExpression: + """Returns true if the provided IP is contained in one of the provided CIDR blocks. + + :param ip: IP address of type `ip` (both IPv4 and IPv6 are supported). + :param block_x: CIDR block to test the IP against. + """ + return InstrumentedExpression(f"CIDR_MATCH({_render(ip)}, {block_x})") + + +def coalesce(first: ExpressionType, rest: ExpressionType) -> InstrumentedExpression: + """Returns the first of its arguments that is not null. If all arguments + are null, it returns `null`. + + :param first: Expression to evaluate. + :param rest: Other expression to evaluate. + """ + return InstrumentedExpression(f"COALESCE({_render(first)}, {_render(rest)})") + + +def concat(*strings: ExpressionType) -> InstrumentedExpression: + """Concatenates two or more strings.""" + return InstrumentedExpression( + f'CONCAT({", ".join([f"{_render(s)}" for s in strings])})' + ) + + +def cos(angle: ExpressionType) -> InstrumentedExpression: + """Returns the cosine of an angle. + + :param angle: An angle, in radians. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"COS({_render(angle)})") + + +def cosh(number: ExpressionType) -> InstrumentedExpression: + """Returns the hyperbolic cosine of a number. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"COSH({_render(number)})") + + +def count(field: ExpressionType) -> InstrumentedExpression: + """Returns the total number (count) of input values. + + :param field: Expression that outputs values to be counted. If omitted, + equivalent to `COUNT(*)` (the number of rows). + """ + return InstrumentedExpression(f"COUNT({_render(field)})") + + +def count_distinct( + field: ExpressionType, precision: ExpressionType +) -> InstrumentedExpression: + """Returns the approximate number of distinct values. + + :param field: Column or literal for which to count the number of distinct values. + :param precision: Precision threshold. The maximum supported value is 40000. Thresholds + above this number will have the same effect as a threshold of 40000. + The default value is 3000. + """ + return InstrumentedExpression( + f"COUNT_DISTINCT({_render(field)}, {_render(precision)})" + ) + + +def count_distinct_over_time( + field: ExpressionType, precision: ExpressionType +) -> InstrumentedExpression: + """The count of distinct values over time for a field. + + :param field: + :param precision: Precision threshold. The maximum supported value is 40000. Thresholds + above this number will have the same effect as a threshold of 40000. The + default value is 3000. + """ + return InstrumentedExpression( + f"COUNT_DISTINCT_OVER_TIME({_render(field)}, {_render(precision)})" + ) + + +def count_over_time(field: ExpressionType) -> InstrumentedExpression: + """The count over time value of a field. + + :param field: + """ + return InstrumentedExpression(f"COUNT_OVER_TIME({_render(field)})") + + +def date_diff( + unit: ExpressionType, start_timestamp: ExpressionType, end_timestamp: ExpressionType +) -> InstrumentedExpression: + """Subtracts the `startTimestamp` from the `endTimestamp` and returns the + difference in multiples of `unit`. If `startTimestamp` is later than the + `endTimestamp`, negative values are returned. + + :param unit: Time difference unit + :param start_timestamp: A string representing a start timestamp + :param end_timestamp: A string representing an end timestamp + """ + return InstrumentedExpression( + f"DATE_DIFF({_render(unit)}, {start_timestamp}, {end_timestamp})" + ) + + +def date_extract( + date_part: ExpressionType, date: ExpressionType +) -> InstrumentedExpression: + """Extracts parts of a date, like year, month, day, hour. + + :param date_part: Part of the date to extract. Can be: + `aligned_day_of_week_in_month`, `aligned_day_of_week_in_year`, + `aligned_week_of_month`, `aligned_week_of_year`, `ampm_of_day`, + `clock_hour_of_ampm`, `clock_hour_of_day`, `day_of_month`, `day_of_week`, + `day_of_year`, `epoch_day`, `era`, `hour_of_ampm`, `hour_of_day`, + `instant_seconds`, `micro_of_day`, `micro_of_second`, `milli_of_day`, + `milli_of_second`, `minute_of_day`, `minute_of_hour`, `month_of_year`, + `nano_of_day`, `nano_of_second`, `offset_seconds`, `proleptic_month`, + `second_of_day`, `second_of_minute`, `year`, or `year_of_era`. If `null`, + the function returns `null`. + :param date: Date expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"DATE_EXTRACT({date_part}, {_render(date)})") + + +def date_format( + date: ExpressionType, + date_format: ExpressionType = None, +) -> InstrumentedExpression: + """Returns a string representation of a date, in the provided format. + + :param date_format: Date format (optional). If no format is specified, the + `yyyy-MM-dd'T'HH:mm:ss.SSSZ` format is used. If `null`, the + function returns `null`. + :param date: Date expression. If `null`, the function returns `null`. + """ + if date_format is not None: + return InstrumentedExpression( + f"DATE_FORMAT({json.dumps(date_format)}, {_render(date)})" + ) + else: + return InstrumentedExpression(f"DATE_FORMAT({_render(date)})") + + +def date_parse( + date_pattern: ExpressionType, date_string: ExpressionType +) -> InstrumentedExpression: + """Returns a date by parsing the second argument using the format specified + in the first argument. + + :param date_pattern: The date format. If `null`, the function returns `null`. + :param date_string: Date expression as a string. If `null` or an empty + string, the function returns `null`. + """ + return InstrumentedExpression(f"DATE_PARSE({date_pattern}, {date_string})") + + +def date_trunc( + interval: ExpressionType, date: ExpressionType +) -> InstrumentedExpression: + """Rounds down a date to the closest interval since epoch, which starts at `0001-01-01T00:00:00Z`. + + :param interval: Interval; expressed using the timespan literal syntax. + :param date: Date expression + """ + return InstrumentedExpression(f"DATE_TRUNC({_render(interval)}, {_render(date)})") + + +def e() -> InstrumentedExpression: + """Returns Euler’s number).""" + return InstrumentedExpression("E()") + + +def ends_with(str: ExpressionType, suffix: ExpressionType) -> InstrumentedExpression: + """Returns a boolean that indicates whether a keyword string ends with + another string. + + :param str: String expression. If `null`, the function returns `null`. + :param suffix: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ENDS_WITH({_render(str)}, {_render(suffix)})") + + +def exp(number: ExpressionType) -> InstrumentedExpression: + """Returns the value of e raised to the power of the given number. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"EXP({_render(number)})") + + +def first_over_time(field: ExpressionType) -> InstrumentedExpression: + """The earliest value of a field, where recency determined by the + `@timestamp` field. + + :param field: + """ + return InstrumentedExpression(f"FIRST_OVER_TIME({_render(field)})") + + +def floor(number: ExpressionType) -> InstrumentedExpression: + """Round a number down to the nearest integer. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"FLOOR({_render(number)})") + + +def from_base64(string: ExpressionType) -> InstrumentedExpression: + """Decode a base64 string. + + :param string: A base64 string. + """ + return InstrumentedExpression(f"FROM_BASE64({_render(string)})") + + +def greatest(first: ExpressionType, rest: ExpressionType) -> InstrumentedExpression: + """Returns the maximum value from multiple columns. This is similar to + `MV_MAX` except it is intended to run on multiple columns at once. + + :param first: First of the columns to evaluate. + :param rest: The rest of the columns to evaluate. + """ + return InstrumentedExpression(f"GREATEST({_render(first)}, {_render(rest)})") + + +def hash(algorithm: ExpressionType, input: ExpressionType) -> InstrumentedExpression: + """Computes the hash of the input using various algorithms such as MD5, + SHA, SHA-224, SHA-256, SHA-384, SHA-512. + + :param algorithm: Hash algorithm to use. + :param input: Input to hash. + """ + return InstrumentedExpression(f"HASH({_render(algorithm)}, {_render(input)})") + + +def hypot(number1: ExpressionType, number2: ExpressionType) -> InstrumentedExpression: + """Returns the hypotenuse of two numbers. The input can be any numeric + values, the return value is always a double. Hypotenuses of infinities are null. + + :param number1: Numeric expression. If `null`, the function returns `null`. + :param number2: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"HYPOT({number1}, {number2})") + + +def ip_prefix( + ip: ExpressionType, + prefix_length_v4: ExpressionType, + prefix_length_v6: ExpressionType, +) -> InstrumentedExpression: + """Truncates an IP to a given prefix length. + + :param ip: IP address of type `ip` (both IPv4 and IPv6 are supported). + :param prefix_length_v4: Prefix length for IPv4 addresses. + :param prefix_length_v6: Prefix length for IPv6 addresses. + """ + return InstrumentedExpression( + f"IP_PREFIX({_render(ip)}, {prefix_length_v4}, {prefix_length_v6})" + ) + + +def knn( + field: ExpressionType, query: ExpressionType, options: ExpressionType = None +) -> InstrumentedExpression: + """Finds the k nearest vectors to a query vector, as measured by a + similarity metric. knn function finds nearest vectors through approximate + search on indexed dense_vectors. + + :param field: Field that the query will target. + :param query: Vector value to find top nearest neighbours for. + :param options: (Optional) kNN additional options as function named parameters. + """ + if options is not None: + return InstrumentedExpression( + f"KNN({_render(field)}, {_render(query)}, {_render(options)})" + ) + else: + return InstrumentedExpression(f"KNN({_render(field)}, {_render(query)})") + + +def kql(query: ExpressionType) -> InstrumentedExpression: + """Performs a KQL query. Returns true if the provided KQL query string + matches the row. + + :param query: Query string in KQL query string format. + """ + return InstrumentedExpression(f"KQL({_render(query)})") + + +def last_over_time(field: ExpressionType) -> InstrumentedExpression: + """The latest value of a field, where recency determined by the + `@timestamp` field. + + :param field: + """ + return InstrumentedExpression(f"LAST_OVER_TIME({_render(field)})") + + +def least(first: ExpressionType, rest: ExpressionType) -> InstrumentedExpression: + """Returns the minimum value from multiple columns. This is similar to + `MV_MIN` except it is intended to run on multiple columns at once. + + :param first: First of the columns to evaluate. + :param rest: The rest of the columns to evaluate. + """ + return InstrumentedExpression(f"LEAST({_render(first)}, {_render(rest)})") + + +def left(string: ExpressionType, length: ExpressionType) -> InstrumentedExpression: + """Returns the substring that extracts *length* chars from *string* + starting from the left. + + :param string: The string from which to return a substring. + :param length: The number of characters to return. + """ + return InstrumentedExpression(f"LEFT({_render(string)}, {_render(length)})") + + +def length(string: ExpressionType) -> InstrumentedExpression: + """Returns the character length of a string. + + :param string: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"LENGTH({_render(string)})") + + +def locate( + string: ExpressionType, substring: ExpressionType, start: ExpressionType +) -> InstrumentedExpression: + """Returns an integer that indicates the position of a keyword substring + within another string. Returns `0` if the substring cannot be found. Note + that string positions start from `1`. + + :param string: An input string + :param substring: A substring to locate in the input string + :param start: The start index + """ + return InstrumentedExpression( + f"LOCATE({_render(string)}, {_render(substring)}, {_render(start)})" + ) + + +def log(base: ExpressionType, number: ExpressionType) -> InstrumentedExpression: + """Returns the logarithm of a value to a base. The input can be any numeric + value, the return value is always a double. Logs of zero, negative + numbers, and base of one return `null` as well as a warning. + + :param base: Base of logarithm. If `null`, the function returns `null`. If + not provided, this function returns the natural logarithm (base e) of a value. + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"LOG({_render(base)}, {_render(number)})") + + +def log10(number: ExpressionType) -> InstrumentedExpression: + """Returns the logarithm of a value to base 10. The input can be any + numeric value, the return value is always a double. Logs of 0 and negative + numbers return `null` as well as a warning. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"LOG10({_render(number)})") + + +def ltrim(string: ExpressionType) -> InstrumentedExpression: + """Removes leading whitespaces from a string. + + :param string: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"LTRIM({_render(string)})") + + +def match( + field: ExpressionType, query: ExpressionType, options: ExpressionType = None +) -> InstrumentedExpression: + """Use `MATCH` to perform a match query on the specified field. Using + `MATCH` is equivalent to using the `match` query in the Elasticsearch Query DSL. + + :param field: Field that the query will target. + :param query: Value to find in the provided field. + :param options: (Optional) Match additional options as function named parameters. + """ + if options is not None: + return InstrumentedExpression( + f"MATCH({_render(field)}, {_render(query)}, {_render(options)})" + ) + else: + return InstrumentedExpression(f"MATCH({_render(field)}, {_render(query)})") + + +def match_phrase( + field: ExpressionType, query: ExpressionType, options: ExpressionType = None +) -> InstrumentedExpression: + """Use `MATCH_PHRASE` to perform a `match_phrase` on the specified field. + Using `MATCH_PHRASE` is equivalent to using the `match_phrase` query in the + Elasticsearch Query DSL. + + :param field: Field that the query will target. + :param query: Value to find in the provided field. + :param options: (Optional) MatchPhrase additional options as function named parameters. + """ + if options is not None: + return InstrumentedExpression( + f"MATCH_PHRASE({_render(field)}, {_render(query)}, {_render(options)})" + ) + else: + return InstrumentedExpression( + f"MATCH_PHRASE({_render(field)}, {_render(query)})" + ) + + +def max(field: ExpressionType) -> InstrumentedExpression: + """The maximum value of a field. + + :param field: + """ + return InstrumentedExpression(f"MAX({_render(field)})") + + +def max_over_time(field: ExpressionType) -> InstrumentedExpression: + """The maximum over time value of a field. + + :param field: + """ + return InstrumentedExpression(f"MAX_OVER_TIME({_render(field)})") + + +def md5(input: ExpressionType) -> InstrumentedExpression: + """Computes the MD5 hash of the input. + + :param input: Input to hash. + """ + return InstrumentedExpression(f"MD5({_render(input)})") + + +def median(number: ExpressionType) -> InstrumentedExpression: + """The value that is greater than half of all values and less than half of + all values, also known as the 50% `PERCENTILE`. + + :param number: Expression that outputs values to calculate the median of. + """ + return InstrumentedExpression(f"MEDIAN({_render(number)})") + + +def median_absolute_deviation(number: ExpressionType) -> InstrumentedExpression: + """Returns the median absolute deviation, a measure of variability. It is a + robust statistic, meaning that it is useful for describing data that may + have outliers, or may not be normally distributed. For such data it can be + more descriptive than standard deviation. It is calculated as the median + of each data point’s deviation from the median of the entire sample. That + is, for a random variable `X`, the median absolute deviation is + `median(|median(X) - X|)`. + + :param number: + """ + return InstrumentedExpression(f"MEDIAN_ABSOLUTE_DEVIATION({_render(number)})") + + +def min(field: ExpressionType) -> InstrumentedExpression: + """The minimum value of a field. + + :param field: + """ + return InstrumentedExpression(f"MIN({_render(field)})") + + +def min_over_time(field: ExpressionType) -> InstrumentedExpression: + """The minimum over time value of a field. + + :param field: + """ + return InstrumentedExpression(f"MIN_OVER_TIME({_render(field)})") + + +def multi_match( + query: ExpressionType, fields: ExpressionType, options: ExpressionType = None +) -> InstrumentedExpression: + """Use `MULTI_MATCH` to perform a multi-match query on the specified field. + The multi_match query builds on the match query to allow multi-field queries. + + :param query: Value to find in the provided fields. + :param fields: Fields to use for matching + :param options: (Optional) Additional options for MultiMatch, passed as function + named parameters + """ + if options is not None: + return InstrumentedExpression( + f"MULTI_MATCH({_render(query)}, {_render(fields)}, {_render(options)})" + ) + else: + return InstrumentedExpression( + f"MULTI_MATCH({_render(query)}, {_render(fields)})" + ) + + +def mv_append(field1: ExpressionType, field2: ExpressionType) -> InstrumentedExpression: + """Concatenates values of two multi-value fields. + + :param field1: + :param field2: + """ + return InstrumentedExpression(f"MV_APPEND({field1}, {field2})") + + +def mv_avg(number: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued field into a single valued field containing the + average of all of the values. + + :param number: Multivalue expression. + """ + return InstrumentedExpression(f"MV_AVG({_render(number)})") + + +def mv_concat(string: ExpressionType, delim: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued string expression into a single valued column + containing the concatenation of all values separated by a delimiter. + + :param string: Multivalue expression. + :param delim: Delimiter. + """ + return InstrumentedExpression(f"MV_CONCAT({_render(string)}, {_render(delim)})") + + +def mv_count(field: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued expression into a single valued column containing + a count of the number of values. + + :param field: Multivalue expression. + """ + return InstrumentedExpression(f"MV_COUNT({_render(field)})") + + +def mv_dedupe(field: ExpressionType) -> InstrumentedExpression: + """Remove duplicate values from a multivalued field. + + :param field: Multivalue expression. + """ + return InstrumentedExpression(f"MV_DEDUPE({_render(field)})") + + +def mv_first(field: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued expression into a single valued column containing + the first value. This is most useful when reading from a function that + emits multivalued columns in a known order like `SPLIT`. + + :param field: Multivalue expression. + """ + return InstrumentedExpression(f"MV_FIRST({_render(field)})") + + +def mv_last(field: ExpressionType) -> InstrumentedExpression: + """Converts a multivalue expression into a single valued column containing + the last value. This is most useful when reading from a function that emits + multivalued columns in a known order like `SPLIT`. + + :param field: Multivalue expression. + """ + return InstrumentedExpression(f"MV_LAST({_render(field)})") + + +def mv_max(field: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued expression into a single valued column containing + the maximum value. + + :param field: Multivalue expression. + """ + return InstrumentedExpression(f"MV_MAX({_render(field)})") + + +def mv_median(number: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued field into a single valued field containing the + median value. + + :param number: Multivalue expression. + """ + return InstrumentedExpression(f"MV_MEDIAN({_render(number)})") + + +def mv_median_absolute_deviation(number: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued field into a single valued field containing the + median absolute deviation. It is calculated as the median of each data + point’s deviation from the median of the entire sample. That is, for a + random variable `X`, the median absolute deviation is `median(|median(X) - X|)`. + + :param number: Multivalue expression. + """ + return InstrumentedExpression(f"MV_MEDIAN_ABSOLUTE_DEVIATION({_render(number)})") + + +def mv_min(field: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued expression into a single valued column containing + the minimum value. + + :param field: Multivalue expression. + """ + return InstrumentedExpression(f"MV_MIN({_render(field)})") + + +def mv_percentile( + number: ExpressionType, percentile: ExpressionType +) -> InstrumentedExpression: + """Converts a multivalued field into a single valued field containing the + value at which a certain percentage of observed values occur. + + :param number: Multivalue expression. + :param percentile: The percentile to calculate. Must be a number between 0 + and 100. Numbers out of range will return a null instead. + """ + return InstrumentedExpression( + f"MV_PERCENTILE({_render(number)}, {_render(percentile)})" + ) + + +def mv_pseries_weighted_sum( + number: ExpressionType, p: ExpressionType +) -> InstrumentedExpression: + """Converts a multivalued expression into a single-valued column by + multiplying every element on the input list by its corresponding term in + P-Series and computing the sum. + + :param number: Multivalue expression. + :param p: It is a constant number that represents the *p* parameter in the + P-Series. It impacts every element’s contribution to the weighted sum. + """ + return InstrumentedExpression( + f"MV_PSERIES_WEIGHTED_SUM({_render(number)}, {_render(p)})" + ) + + +def mv_slice( + field: ExpressionType, start: ExpressionType, end: ExpressionType = None +) -> InstrumentedExpression: + """Returns a subset of the multivalued field using the start and end index + values. This is most useful when reading from a function that emits + multivalued columns in a known order like `SPLIT` or `MV_SORT`. + + :param field: Multivalue expression. If `null`, the function returns `null`. + :param start: Start position. If `null`, the function returns `null`. The + start argument can be negative. An index of -1 is used to specify + the last value in the list. + :param end: End position(included). Optional; if omitted, the position at + `start` is returned. The end argument can be negative. An index of -1 + is used to specify the last value in the list. + """ + if end is not None: + return InstrumentedExpression( + f"MV_SLICE({_render(field)}, {_render(start)}, {_render(end)})" + ) + else: + return InstrumentedExpression(f"MV_SLICE({_render(field)}, {_render(start)})") + + +def mv_sort(field: ExpressionType, order: ExpressionType) -> InstrumentedExpression: + """Sorts a multivalued field in lexicographical order. + + :param field: Multivalue expression. If `null`, the function returns `null`. + :param order: Sort order. The valid options are ASC and DESC, the default is ASC. + """ + return InstrumentedExpression(f"MV_SORT({_render(field)}, {_render(order)})") + + +def mv_sum(number: ExpressionType) -> InstrumentedExpression: + """Converts a multivalued field into a single valued field containing the + sum of all of the values. + + :param number: Multivalue expression. + """ + return InstrumentedExpression(f"MV_SUM({_render(number)})") + + +def mv_zip( + string1: ExpressionType, string2: ExpressionType, delim: ExpressionType = None +) -> InstrumentedExpression: + """Combines the values from two multivalued fields with a delimiter that + joins them together. + + :param string1: Multivalue expression. + :param string2: Multivalue expression. + :param delim: Delimiter. Optional; if omitted, `,` is used as a default delimiter. + """ + if delim is not None: + return InstrumentedExpression(f"MV_ZIP({string1}, {string2}, {_render(delim)})") + else: + return InstrumentedExpression(f"MV_ZIP({string1}, {string2})") + + +def now() -> InstrumentedExpression: + """Returns current date and time.""" + return InstrumentedExpression("NOW()") + + +def percentile( + number: ExpressionType, percentile: ExpressionType +) -> InstrumentedExpression: + """Returns the value at which a certain percentage of observed values + occur. For example, the 95th percentile is the value which is greater than + 95% of the observed values and the 50th percentile is the `MEDIAN`. + + :param number: + :param percentile: + """ + return InstrumentedExpression( + f"PERCENTILE({_render(number)}, {_render(percentile)})" + ) + + +def pi() -> InstrumentedExpression: + """Returns Pi, the ratio of a circle’s circumference to its diameter.""" + return InstrumentedExpression("PI()") + + +def pow(base: ExpressionType, exponent: ExpressionType) -> InstrumentedExpression: + """Returns the value of `base` raised to the power of `exponent`. + + :param base: Numeric expression for the base. If `null`, the function returns `null`. + :param exponent: Numeric expression for the exponent. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"POW({_render(base)}, {_render(exponent)})") + + +def qstr( + query: ExpressionType, options: ExpressionType = None +) -> InstrumentedExpression: + """Performs a query string query. Returns true if the provided query string + matches the row. + + :param query: Query string in Lucene query string format. + :param options: (Optional) Additional options for Query String as function named + parameters. + """ + if options is not None: + return InstrumentedExpression(f"QSTR({_render(query)}, {_render(options)})") + else: + return InstrumentedExpression(f"QSTR({_render(query)})") + + +def rate(field: ExpressionType) -> InstrumentedExpression: + """The rate of a counter field. + + :param field: + """ + return InstrumentedExpression(f"RATE({_render(field)})") + + +def repeat(string: ExpressionType, number: ExpressionType) -> InstrumentedExpression: + """Returns a string constructed by concatenating `string` with itself the + specified `number` of times. + + :param string: String expression. + :param number: Number times to repeat. + """ + return InstrumentedExpression(f"REPEAT({_render(string)}, {_render(number)})") + + +def replace( + string: ExpressionType, regex: ExpressionType, new_string: ExpressionType +) -> InstrumentedExpression: + """The function substitutes in the string `str` any match of the regular + expression `regex` with the replacement string `newStr`. + + :param string: String expression. + :param regex: Regular expression. + :param new_string: Replacement string. + """ + return InstrumentedExpression( + f"REPLACE({_render(string)}, {_render(regex)}, {new_string})" + ) + + +def reverse(str: ExpressionType) -> InstrumentedExpression: + """Returns a new string representing the input string in reverse order. + + :param str: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"REVERSE({_render(str)})") + + +def right(string: ExpressionType, length: ExpressionType) -> InstrumentedExpression: + """Return the substring that extracts *length* chars from *str* starting + from the right. + + :param string: The string from which to returns a substring. + :param length: The number of characters to return. + """ + return InstrumentedExpression(f"RIGHT({_render(string)}, {_render(length)})") + + +def round( + number: ExpressionType, decimals: ExpressionType = None +) -> InstrumentedExpression: + """Rounds a number to the specified number of decimal places. Defaults to + 0, which returns the nearest integer. If the precision is a negative + number, rounds to the number of digits left of the decimal point. + + :param number: The numeric value to round. If `null`, the function returns `null`. + :param decimals: The number of decimal places to round to. Defaults to 0. If + `null`, the function returns `null`. + """ + if decimals is not None: + return InstrumentedExpression(f"ROUND({_render(number)}, {_render(decimals)})") + else: + return InstrumentedExpression(f"ROUND({_render(number)})") + + +def round_to(field: ExpressionType, points: ExpressionType) -> InstrumentedExpression: + """Rounds down to one of a list of fixed points. + + :param field: The numeric value to round. If `null`, the function returns `null`. + :param points: Remaining rounding points. Must be constants. + """ + return InstrumentedExpression(f"ROUND_TO({_render(field)}, {_render(points)})") + + +def rtrim(string: ExpressionType) -> InstrumentedExpression: + """Removes trailing whitespaces from a string. + + :param string: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"RTRIM({_render(string)})") + + +def sample(field: ExpressionType, limit: ExpressionType) -> InstrumentedExpression: + """Collects sample values for a field. + + :param field: The field to collect sample values for. + :param limit: The maximum number of values to collect. + """ + return InstrumentedExpression(f"SAMPLE({_render(field)}, {_render(limit)})") + + +def scalb(d: ExpressionType, scale_factor: ExpressionType) -> InstrumentedExpression: + """Returns the result of `d * 2 ^ scaleFactor`, Similar to Java's `scalb` + function. Result is rounded as if performed by a single correctly rounded + floating-point multiply to a member of the double value set. + + :param d: Numeric expression for the multiplier. If `null`, the function + returns `null`. + :param scale_factor: Numeric expression for the scale factor. If `null`, the + function returns `null`. + """ + return InstrumentedExpression(f"SCALB({_render(d)}, {scale_factor})") + + +def sha1(input: ExpressionType) -> InstrumentedExpression: + """Computes the SHA1 hash of the input. + + :param input: Input to hash. + """ + return InstrumentedExpression(f"SHA1({_render(input)})") + + +def sha256(input: ExpressionType) -> InstrumentedExpression: + """Computes the SHA256 hash of the input. + + :param input: Input to hash. + """ + return InstrumentedExpression(f"SHA256({_render(input)})") + + +def signum(number: ExpressionType) -> InstrumentedExpression: + """Returns the sign of the given number. It returns `-1` for negative + numbers, `0` for `0` and `1` for positive numbers. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"SIGNUM({_render(number)})") + + +def sin(angle: ExpressionType) -> InstrumentedExpression: + """Returns the sine of an angle. + + :param angle: An angle, in radians. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"SIN({_render(angle)})") + + +def sinh(number: ExpressionType) -> InstrumentedExpression: + """Returns the hyperbolic sine of a number. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"SINH({_render(number)})") + + +def space(number: ExpressionType) -> InstrumentedExpression: + """Returns a string made of `number` spaces. + + :param number: Number of spaces in result. + """ + return InstrumentedExpression(f"SPACE({_render(number)})") + + +def split(string: ExpressionType, delim: ExpressionType) -> InstrumentedExpression: + """Split a single valued string into multiple strings. + + :param string: String expression. If `null`, the function returns `null`. + :param delim: Delimiter. Only single byte delimiters are currently supported. + """ + return InstrumentedExpression(f"SPLIT({_render(string)}, {_render(delim)})") + + +def sqrt(number: ExpressionType) -> InstrumentedExpression: + """Returns the square root of a number. The input can be any numeric value, + the return value is always a double. Square roots of negative numbers and + infinities are null. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"SQRT({_render(number)})") + + +def starts_with(str: ExpressionType, prefix: ExpressionType) -> InstrumentedExpression: + """Returns a boolean that indicates whether a keyword string starts with + another string. + + :param str: String expression. If `null`, the function returns `null`. + :param prefix: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"STARTS_WITH({_render(str)}, {_render(prefix)})") + + +def std_dev(number: ExpressionType) -> InstrumentedExpression: + """The population standard deviation of a numeric field. + + :param number: + """ + return InstrumentedExpression(f"STD_DEV({_render(number)})") + + +def st_centroid_agg(field: ExpressionType) -> InstrumentedExpression: + """Calculate the spatial centroid over a field with spatial point geometry type. + + :param field: + """ + return InstrumentedExpression(f"ST_CENTROID_AGG({_render(field)})") + + +def st_contains( + geom_a: ExpressionType, geom_b: ExpressionType +) -> InstrumentedExpression: + """Returns whether the first geometry contains the second geometry. This is + the inverse of the ST_WITHIN function. + + :param geom_a: Expression of type `geo_point`, `cartesian_point`, + `geo_shape` or `cartesian_shape`. If `null`, the function returns + `null`. + :param geom_b: Expression of type `geo_point`, `cartesian_point`, `geo_shape` + or `cartesian_shape`. If `null`, the function returns `null`. The + second parameter must also have the same coordinate system as the + first. This means it is not possible to combine `geo_*` and + `cartesian_*` parameters. + """ + return InstrumentedExpression(f"ST_CONTAINS({geom_a}, {geom_b})") + + +def st_disjoint( + geom_a: ExpressionType, geom_b: ExpressionType +) -> InstrumentedExpression: + """Returns whether the two geometries or geometry columns are disjoint. + This is the inverse of the ST_INTERSECTS function. In mathematical terms: + ST_Disjoint(A, B) ⇔ A ⋂ B = ∅ + + :param geom_a: Expression of type `geo_point`, `cartesian_point`, + `geo_shape` or `cartesian_shape`. If `null`, the function returns + `null`. + :param geom_b: Expression of type `geo_point`, `cartesian_point`, `geo_shape` + or `cartesian_shape`. If `null`, the function returns `null`. The + second parameter must also have the same coordinate system as the + first. This means it is not possible to combine `geo_*` and + `cartesian_*` parameters. + """ + return InstrumentedExpression(f"ST_DISJOINT({geom_a}, {geom_b})") + + +def st_distance( + geom_a: ExpressionType, geom_b: ExpressionType +) -> InstrumentedExpression: + """Computes the distance between two points. For cartesian geometries, this + is the pythagorean distance in the same units as the original coordinates. + For geographic geometries, this is the circular distance along the great + circle in meters. + + :param geom_a: Expression of type `geo_point` or `cartesian_point`. If + `null`, the function returns `null`. + :param geom_b: Expression of type `geo_point` or `cartesian_point`. If + `null`, the function returns `null`. The second parameter must + also have the same coordinate system as the first. This means it + is not possible to combine `geo_point` and `cartesian_point` parameters. + """ + return InstrumentedExpression(f"ST_DISTANCE({geom_a}, {geom_b})") + + +def st_envelope(geometry: ExpressionType) -> InstrumentedExpression: + """Determines the minimum bounding box of the supplied geometry. + + :param geometry: Expression of type `geo_point`, `geo_shape`, + `cartesian_point` or `cartesian_shape`. If `null`, the function + returns `null`. + """ + return InstrumentedExpression(f"ST_ENVELOPE({_render(geometry)})") + + +def st_extent_agg(field: ExpressionType) -> InstrumentedExpression: + """Calculate the spatial extent over a field with geometry type. Returns a + bounding box for all values of the field. + + :param field: + """ + return InstrumentedExpression(f"ST_EXTENT_AGG({_render(field)})") + + +def st_geohash( + geometry: ExpressionType, precision: ExpressionType, bounds: ExpressionType = None +) -> InstrumentedExpression: + """Calculates the `geohash` of the supplied geo_point at the specified + precision. The result is long encoded. Use ST_GEOHASH_TO_STRING to convert + the result to a string. These functions are related to the `geo_grid` + query and the `geohash_grid` aggregation. + + :param geometry: Expression of type `geo_point`. If `null`, the function + returns `null`. + :param precision: Expression of type `integer`. If `null`, the function + returns `null`. Valid values are between 1 and 12. + :param bounds: Optional bounds to filter the grid tiles, a `geo_shape` of + type `BBOX`. Use `ST_ENVELOPE` if the `geo_shape` is of any + other type. + """ + if bounds is not None: + return InstrumentedExpression( + f"ST_GEOHASH({_render(geometry)}, {_render(precision)}, {_render(bounds)})" + ) + else: + return InstrumentedExpression( + f"ST_GEOHASH({_render(geometry)}, {_render(precision)})" + ) + + +def st_geohash_to_long(grid_id: ExpressionType) -> InstrumentedExpression: + """Converts an input value representing a geohash grid-ID in string format + into a long. + + :param grid_id: Input geohash grid-id. The input can be a single- or + multi-valued column or an expression. + """ + return InstrumentedExpression(f"ST_GEOHASH_TO_LONG({grid_id})") + + +def st_geohash_to_string(grid_id: ExpressionType) -> InstrumentedExpression: + """Converts an input value representing a geohash grid-ID in long format + into a string. + + :param grid_id: Input geohash grid-id. The input can be a single- or + multi-valued column or an expression. + """ + return InstrumentedExpression(f"ST_GEOHASH_TO_STRING({grid_id})") + + +def st_geohex( + geometry: ExpressionType, precision: ExpressionType, bounds: ExpressionType = None +) -> InstrumentedExpression: + """Calculates the `geohex`, the H3 cell-id, of the supplied geo_point at + the specified precision. The result is long encoded. Use + ST_GEOHEX_TO_STRING to convert the result to a string. These functions are + related to the `geo_grid` query and the `geohex_grid` aggregation. + + :param geometry: Expression of type `geo_point`. If `null`, the function + returns `null`. + :param precision: Expression of type `integer`. If `null`, the function + returns `null`. Valid values are between 0 and 15. + :param bounds: Optional bounds to filter the grid tiles, a `geo_shape` of + type `BBOX`. Use `ST_ENVELOPE` if the `geo_shape` + is of any other type. + """ + if bounds is not None: + return InstrumentedExpression( + f"ST_GEOHEX({_render(geometry)}, {_render(precision)}, {_render(bounds)})" + ) + else: + return InstrumentedExpression( + f"ST_GEOHEX({_render(geometry)}, {_render(precision)})" + ) + + +def st_geohex_to_long(grid_id: ExpressionType) -> InstrumentedExpression: + """Converts an input value representing a geohex grid-ID in string format + into a long. + + :param grid_id: Input geohex grid-id. The input can be a single- or + multi-valued column or an expression. + """ + return InstrumentedExpression(f"ST_GEOHEX_TO_LONG({grid_id})") + + +def st_geohex_to_string(grid_id: ExpressionType) -> InstrumentedExpression: + """Converts an input value representing a Geohex grid-ID in long format + into a string. + + :param grid_id: Input Geohex grid-id. The input can be a single- or + multi-valued column or an expression. + """ + return InstrumentedExpression(f"ST_GEOHEX_TO_STRING({grid_id})") + + +def st_geotile( + geometry: ExpressionType, precision: ExpressionType, bounds: ExpressionType = None +) -> InstrumentedExpression: + """Calculates the `geotile` of the supplied geo_point at the specified + precision. The result is long encoded. Use ST_GEOTILE_TO_STRING to convert + the result to a string. These functions are related to the `geo_grid` + query and the `geotile_grid` aggregation. + + :param geometry: Expression of type `geo_point`. If `null`, the function + returns `null`. + :param precision: Expression of type `integer`. If `null`, the function + returns `null`. Valid values are between 0 and 29. + :param bounds: Optional bounds to filter the grid tiles, a `geo_shape` of + type `BBOX`. Use `ST_ENVELOPE` if the `geo_shape` is of any + other type. + """ + if bounds is not None: + return InstrumentedExpression( + f"ST_GEOTILE({_render(geometry)}, {_render(precision)}, {_render(bounds)})" + ) + else: + return InstrumentedExpression( + f"ST_GEOTILE({_render(geometry)}, {_render(precision)})" + ) + + +def st_geotile_to_long(grid_id: ExpressionType) -> InstrumentedExpression: + """Converts an input value representing a geotile grid-ID in string format + into a long. + + :param grid_id: Input geotile grid-id. The input can be a single- or + multi-valued column or an expression. + """ + return InstrumentedExpression(f"ST_GEOTILE_TO_LONG({grid_id})") + + +def st_geotile_to_string(grid_id: ExpressionType) -> InstrumentedExpression: + """Converts an input value representing a geotile grid-ID in long format + into a string. + + :param grid_id: Input geotile grid-id. The input can be a single- or + multi-valued column or an expression. + """ + return InstrumentedExpression(f"ST_GEOTILE_TO_STRING({grid_id})") + + +def st_intersects( + geom_a: ExpressionType, geom_b: ExpressionType +) -> InstrumentedExpression: + """Returns true if two geometries intersect. They intersect if they have + any point in common, including their interior points (points along lines or + within polygons). This is the inverse of the ST_DISJOINT function. In + mathematical terms: ST_Intersects(A, B) ⇔ A ⋂ B ≠ ∅ + + :param geom_a: Expression of type `geo_point`, `cartesian_point`, + `geo_shape` or `cartesian_shape`. If `null`, the function returns + `null`. + :param geom_b: Expression of type `geo_point`, `cartesian_point`, `geo_shape` + or `cartesian_shape`. If `null`, the function returns `null`. The + second parameter must also have the same coordinate system as the + first. This means it is not possible to combine `geo_*` and + `cartesian_*` parameters. + """ + return InstrumentedExpression(f"ST_INTERSECTS({geom_a}, {geom_b})") + + +def st_within(geom_a: ExpressionType, geom_b: ExpressionType) -> InstrumentedExpression: + """Returns whether the first geometry is within the second geometry. This + is the inverse of the ST_CONTAINS function. + + :param geom_a: Expression of type `geo_point`, `cartesian_point`, + `geo_shape` or `cartesian_shape`. If `null`, the function returns + `null`. + :param geom_b: Expression of type `geo_point`, `cartesian_point`, `geo_shape` + or `cartesian_shape`. If `null`, the function returns `null`. The + second parameter must also have the same coordinate system as the + first. This means it is not possible to combine `geo_*` and + `cartesian_*` parameters. + """ + return InstrumentedExpression(f"ST_WITHIN({geom_a}, {geom_b})") + + +def st_x(point: ExpressionType) -> InstrumentedExpression: + """Extracts the `x` coordinate from the supplied point. If the points is of + type `geo_point` this is equivalent to extracting the `longitude` value. + + :param point: Expression of type `geo_point` or `cartesian_point`. If + `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ST_X({_render(point)})") + + +def st_xmax(point: ExpressionType) -> InstrumentedExpression: + """Extracts the maximum value of the `x` coordinates from the supplied + geometry. If the geometry is of type `geo_point` or `geo_shape` this is + equivalent to extracting the maximum `longitude` value. + + :param point: Expression of type `geo_point`, `geo_shape`, `cartesian_point` + or `cartesian_shape`. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ST_XMAX({_render(point)})") + + +def st_xmin(point: ExpressionType) -> InstrumentedExpression: + """Extracts the minimum value of the `x` coordinates from the supplied + geometry. If the geometry is of type `geo_point` or `geo_shape` this is + equivalent to extracting the minimum `longitude` value. + + :param point: Expression of type `geo_point`, `geo_shape`, `cartesian_point` + or `cartesian_shape`. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ST_XMIN({_render(point)})") + + +def st_y(point: ExpressionType) -> InstrumentedExpression: + """Extracts the `y` coordinate from the supplied point. If the points is of + type `geo_point` this is equivalent to extracting the `latitude` value. + + :param point: Expression of type `geo_point` or `cartesian_point`. If + `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ST_Y({_render(point)})") + + +def st_ymax(point: ExpressionType) -> InstrumentedExpression: + """Extracts the maximum value of the `y` coordinates from the supplied + geometry. If the geometry is of type `geo_point` or `geo_shape` this is + equivalent to extracting the maximum `latitude` value. + + :param point: Expression of type `geo_point`, `geo_shape`, `cartesian_point` + or `cartesian_shape`. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ST_YMAX({_render(point)})") + + +def st_ymin(point: ExpressionType) -> InstrumentedExpression: + """Extracts the minimum value of the `y` coordinates from the supplied + geometry. If the geometry is of type `geo_point` or `geo_shape` this is + equivalent to extracting the minimum `latitude` value. + + :param point: Expression of type `geo_point`, `geo_shape`, `cartesian_point` + or `cartesian_shape`. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"ST_YMIN({_render(point)})") + + +def substring( + string: ExpressionType, start: ExpressionType, length: ExpressionType = None +) -> InstrumentedExpression: + """Returns a substring of a string, specified by a start position and an + optional length. + + :param string: String expression. If `null`, the function returns `null`. + :param start: Start position. + :param length: Length of the substring from the start position. Optional; if + omitted, all positions after `start` are returned. + """ + if length is not None: + return InstrumentedExpression( + f"SUBSTRING({_render(string)}, {_render(start)}, {_render(length)})" + ) + else: + return InstrumentedExpression(f"SUBSTRING({_render(string)}, {_render(start)})") + + +def sum(number: ExpressionType) -> InstrumentedExpression: + """The sum of a numeric expression. + + :param number: + """ + return InstrumentedExpression(f"SUM({_render(number)})") + + +def tan(angle: ExpressionType) -> InstrumentedExpression: + """Returns the tangent of an angle. + + :param angle: An angle, in radians. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"TAN({_render(angle)})") + + +def tanh(number: ExpressionType) -> InstrumentedExpression: + """Returns the hyperbolic tangent of a number. + + :param number: Numeric expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"TANH({_render(number)})") + + +def tau() -> InstrumentedExpression: + """Returns the ratio of a circle’s circumference to its radius.""" + return InstrumentedExpression("TAU()") + + +def term(field: ExpressionType, query: ExpressionType) -> InstrumentedExpression: + """Performs a Term query on the specified field. Returns true if the + provided term matches the row. + + :param field: Field that the query will target. + :param query: Term you wish to find in the provided field. + """ + return InstrumentedExpression(f"TERM({_render(field)}, {_render(query)})") + + +def top( + field: ExpressionType, limit: ExpressionType, order: ExpressionType +) -> InstrumentedExpression: + """Collects the top values for a field. Includes repeated values. + + :param field: The field to collect the top values for. + :param limit: The maximum number of values to collect. + :param order: The order to calculate the top values. Either `asc` or `desc`. + """ + return InstrumentedExpression( + f"TOP({_render(field)}, {_render(limit)}, {_render(order)})" + ) + + +def to_aggregate_metric_double(number: ExpressionType) -> InstrumentedExpression: + """Encode a numeric to an aggregate_metric_double. + + :param number: Input value. The input can be a single- or multi-valued + column or an expression. + """ + return InstrumentedExpression(f"TO_AGGREGATE_METRIC_DOUBLE({_render(number)})") + + +def to_base64(string: ExpressionType) -> InstrumentedExpression: + """Encode a string to a base64 string. + + :param string: A string. + """ + return InstrumentedExpression(f"TO_BASE64({_render(string)})") + + +def to_boolean(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to a boolean value. A string value of `true` + will be case-insensitive converted to the Boolean `true`. For anything + else, including the empty string, the function will return `false`. The + numerical value of `0` will be converted to `false`, anything else will be + converted to `true`. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_BOOLEAN({_render(field)})") + + +def to_cartesianpoint(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to a `cartesian_point` value. A string will only + be successfully converted if it respects the WKT Point format. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_CARTESIANPOINT({_render(field)})") + + +def to_cartesianshape(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to a `cartesian_shape` value. A string will only + be successfully converted if it respects the WKT format. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_CARTESIANSHAPE({_render(field)})") + + +def to_dateperiod(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value into a `date_period` value. + + :param field: Input value. The input is a valid constant date period expression. + """ + return InstrumentedExpression(f"TO_DATEPERIOD({_render(field)})") + + +def to_datetime(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to a date value. A string will only be + successfully converted if it’s respecting the format + `yyyy-MM-dd'T'HH:mm:ss.SSS'Z'`. To convert dates in other formats, use `DATE_PARSE`. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_DATETIME({_render(field)})") + + +def to_date_nanos(field: ExpressionType) -> InstrumentedExpression: + """Converts an input to a nanosecond-resolution date value (aka date_nanos). + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_DATE_NANOS({_render(field)})") + + +def to_degrees(number: ExpressionType) -> InstrumentedExpression: + """Converts a number in radians to degrees). + + :param number: Input value. The input can be a single- or multi-valued + column or an expression. + """ + return InstrumentedExpression(f"TO_DEGREES({_render(number)})") + + +def to_double(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to a double value. If the input parameter is of + a date type, its value will be interpreted as milliseconds since the Unix + epoch, converted to double. Boolean `true` will be converted to double + `1.0`, `false` to `0.0`. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_DOUBLE({_render(field)})") + + +def to_geopoint(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to a `geo_point` value. A string will only be + successfully converted if it respects the WKT Point format. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_GEOPOINT({_render(field)})") + + +def to_geoshape(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to a `geo_shape` value. A string will only be + successfully converted if it respects the WKT format. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_GEOSHAPE({_render(field)})") + + +def to_integer(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to an integer value. If the input parameter is + of a date type, its value will be interpreted as milliseconds since the + Unix epoch, converted to integer. Boolean `true` will be converted to + integer `1`, `false` to `0`. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_INTEGER({_render(field)})") + + +def to_ip( + field: ExpressionType, options: ExpressionType = None +) -> InstrumentedExpression: + """Converts an input string to an IP value. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + :param options: (Optional) Additional options. + """ + if options is not None: + return InstrumentedExpression(f"TO_IP({_render(field)}, {_render(options)})") + else: + return InstrumentedExpression(f"TO_IP({_render(field)})") + + +def to_long(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to a long value. If the input parameter is of a + date type, its value will be interpreted as milliseconds since the Unix + epoch, converted to long. Boolean `true` will be converted to long `1`, + `false` to `0`. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_LONG({_render(field)})") + + +def to_lower(str: ExpressionType) -> InstrumentedExpression: + """Returns a new string representing the input string converted to lower case. + + :param str: String expression. If `null`, the function returns `null`. The + input can be a single-valued column or expression, or a multi-valued + column or expression. + """ + return InstrumentedExpression(f"TO_LOWER({_render(str)})") + + +def to_radians(number: ExpressionType) -> InstrumentedExpression: + """Converts a number in degrees) to radians. + + :param number: Input value. The input can be a single- or multi-valued + column or an expression. + """ + return InstrumentedExpression(f"TO_RADIANS({_render(number)})") + + +def to_string(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value into a string. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_STRING({_render(field)})") + + +def to_timeduration(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value into a `time_duration` value. + + :param field: Input value. The input is a valid constant time duration expression. + """ + return InstrumentedExpression(f"TO_TIMEDURATION({_render(field)})") + + +def to_unsigned_long(field: ExpressionType) -> InstrumentedExpression: + """Converts an input value to an unsigned long value. If the input + parameter is of a date type, its value will be interpreted as milliseconds + since the Unix epoch, converted to unsigned long. Boolean `true` will be + converted to unsigned long `1`, `false` to `0`. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_UNSIGNED_LONG({_render(field)})") + + +def to_upper(str: ExpressionType) -> InstrumentedExpression: + """Returns a new string representing the input string converted to upper case. + + :param str: String expression. If `null`, the function returns `null`. The + input can be a single-valued column or expression, or a multi-valued + column or expression. + """ + return InstrumentedExpression(f"TO_UPPER({_render(str)})") + + +def to_version(field: ExpressionType) -> InstrumentedExpression: + """Converts an input string to a version value. + + :param field: Input value. The input can be a single- or multi-valued column + or an expression. + """ + return InstrumentedExpression(f"TO_VERSION({_render(field)})") + + +def trim(string: ExpressionType) -> InstrumentedExpression: + """Removes leading and trailing whitespaces from a string. + + :param string: String expression. If `null`, the function returns `null`. + """ + return InstrumentedExpression(f"TRIM({_render(string)})") + + +def values(field: ExpressionType) -> InstrumentedExpression: + """Returns unique values as a multivalued field. The order of the returned + values isn’t guaranteed. If you need the values returned in order use `MV_SORT`. + + :param field: + """ + return InstrumentedExpression(f"VALUES({_render(field)})") + + +def weighted_avg( + number: ExpressionType, weight: ExpressionType +) -> InstrumentedExpression: + """The weighted average of a numeric expression. + + :param number: A numeric value. + :param weight: A numeric weight. + """ + return InstrumentedExpression(f"WEIGHTED_AVG({_render(number)}, {_render(weight)})") diff --git a/test_elasticsearch/test_dsl/_async/test_esql.py b/test_elasticsearch/test_dsl/_async/test_esql.py new file mode 100644 index 000000000..7aacb833c --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_esql.py @@ -0,0 +1,93 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +from elasticsearch.dsl import AsyncDocument, M +from elasticsearch.esql import ESQL, functions + + +class Employee(AsyncDocument): + emp_no: M[int] + first_name: M[str] + last_name: M[str] + height: M[float] + still_hired: M[bool] + + class Index: + name = "employees" + + +async def load_db(): + data = [ + [10000, "Joseph", "Wall", 2.2, True], + [10001, "Stephanie", "Ward", 1.749, True], + [10002, "David", "Keller", 1.872, True], + [10003, "Roger", "Hinton", 1.694, False], + [10004, "Joshua", "Garcia", 1.661, False], + [10005, "Matthew", "Richards", 1.633, False], + [10006, "Maria", "Luna", 1.893, True], + [10007, "Angela", "Navarro", 1.604, False], + [10008, "Maria", "Cannon", 2.079, False], + [10009, "Joseph", "Sutton", 2.025, True], + ] + if await Employee._index.exists(): + await Employee._index.delete() + await Employee.init() + + for e in data: + employee = Employee( + emp_no=e[0], first_name=e[1], last_name=e[2], height=e[3], still_hired=e[4] + ) + await employee.save() + await Employee._index.refresh() + + +@pytest.mark.asyncio +async def test_esql(async_client): + await load_db() + + # get the full names of the employees + query = ( + ESQL.from_(Employee) + .eval(name=functions.concat(Employee.first_name, " ", Employee.last_name)) + .keep("name") + .sort("name") + .limit(10) + ) + r = await async_client.esql.query(query=str(query)) + assert r.body["values"] == [ + ["Angela Navarro"], + ["David Keller"], + ["Joseph Sutton"], + ["Joseph Wall"], + ["Joshua Garcia"], + ["Maria Cannon"], + ["Maria Luna"], + ["Matthew Richards"], + ["Roger Hinton"], + ["Stephanie Ward"], + ] + + # get the average height of all hired employees + query = ESQL.from_(Employee).stats( + avg_height=functions.round(functions.avg(Employee.height), 2).where( + Employee.still_hired == True # noqa: E712 + ) + ) + r = await async_client.esql.query(query=str(query)) + assert r.body["values"] == [[1.95]] diff --git a/test_elasticsearch/test_dsl/_sync/test_esql.py b/test_elasticsearch/test_dsl/_sync/test_esql.py new file mode 100644 index 000000000..1c4084fc7 --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_esql.py @@ -0,0 +1,93 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +from elasticsearch.dsl import Document, M +from elasticsearch.esql import ESQL, functions + + +class Employee(Document): + emp_no: M[int] + first_name: M[str] + last_name: M[str] + height: M[float] + still_hired: M[bool] + + class Index: + name = "employees" + + +def load_db(): + data = [ + [10000, "Joseph", "Wall", 2.2, True], + [10001, "Stephanie", "Ward", 1.749, True], + [10002, "David", "Keller", 1.872, True], + [10003, "Roger", "Hinton", 1.694, False], + [10004, "Joshua", "Garcia", 1.661, False], + [10005, "Matthew", "Richards", 1.633, False], + [10006, "Maria", "Luna", 1.893, True], + [10007, "Angela", "Navarro", 1.604, False], + [10008, "Maria", "Cannon", 2.079, False], + [10009, "Joseph", "Sutton", 2.025, True], + ] + if Employee._index.exists(): + Employee._index.delete() + Employee.init() + + for e in data: + employee = Employee( + emp_no=e[0], first_name=e[1], last_name=e[2], height=e[3], still_hired=e[4] + ) + employee.save() + Employee._index.refresh() + + +@pytest.mark.sync +def test_esql(client): + load_db() + + # get the full names of the employees + query = ( + ESQL.from_(Employee) + .eval(name=functions.concat(Employee.first_name, " ", Employee.last_name)) + .keep("name") + .sort("name") + .limit(10) + ) + r = client.esql.query(query=str(query)) + assert r.body["values"] == [ + ["Angela Navarro"], + ["David Keller"], + ["Joseph Sutton"], + ["Joseph Wall"], + ["Joshua Garcia"], + ["Maria Cannon"], + ["Maria Luna"], + ["Matthew Richards"], + ["Roger Hinton"], + ["Stephanie Ward"], + ] + + # get the average height of all hired employees + query = ESQL.from_(Employee).stats( + avg_height=functions.round(functions.avg(Employee.height), 2).where( + Employee.still_hired == True # noqa: E712 + ) + ) + r = client.esql.query(query=str(query)) + assert r.body["values"] == [[1.95]] diff --git a/test_elasticsearch/test_esql.py b/test_elasticsearch/test_esql.py new file mode 100644 index 000000000..70c9ec679 --- /dev/null +++ b/test_elasticsearch/test_esql.py @@ -0,0 +1,715 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch.dsl import E +from elasticsearch.esql import ESQL, and_, functions, not_, or_ + + +def test_from(): + query = ESQL.from_("employees") + assert query.render() == "FROM employees" + + query = ESQL.from_("") + assert query.render() == "FROM " + + query = ESQL.from_("employees-00001", "other-employees-*") + assert query.render() == "FROM employees-00001, other-employees-*" + + query = ESQL.from_("cluster_one:employees-00001", "cluster_two:other-employees-*") + assert ( + query.render() + == "FROM cluster_one:employees-00001, cluster_two:other-employees-*" + ) + + query = ESQL.from_("employees").metadata("_id") + assert query.render() == "FROM employees METADATA _id" + + +def test_row(): + query = ESQL.row(a=1, b="two", c=None) + assert query.render() == 'ROW a = 1, b = "two", c = null' + + query = ESQL.row(a=[2, 1]) + assert query.render() == "ROW a = [2, 1]" + + query = ESQL.row(a=functions.round(1.23, 0)) + assert query.render() == "ROW a = ROUND(1.23, 0)" + + +def test_show(): + query = ESQL.show("INFO") + assert query.render() == "SHOW INFO" + + +def test_change_point(): + query = ( + ESQL.row(key=list(range(1, 26))) + .mv_expand("key") + .eval(value=functions.case(E("key") < 13, 0, 42)) + .change_point("value") + .on("key") + .where("type IS NOT NULL") + ) + assert ( + query.render() + == """ROW key = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] +| MV_EXPAND key +| EVAL value = CASE(key < 13, 0, 42) +| CHANGE_POINT value ON key +| WHERE type IS NOT NULL""" + ) + + +def test_completion(): + query = ( + ESQL.row(question="What is Elasticsearch?") + .completion("question") + .with_("test_completion_model") + .keep("question", "completion") + ) + assert ( + query.render() + == """ROW question = "What is Elasticsearch?" +| COMPLETION question WITH test_completion_model +| KEEP question, completion""" + ) + + query = ( + ESQL.row(question="What is Elasticsearch?") + .completion(answer=E("question")) + .with_("test_completion_model") + .keep("question", "answer") + ) + assert ( + query.render() + == """ROW question = "What is Elasticsearch?" +| COMPLETION answer = question WITH test_completion_model +| KEEP question, answer""" + ) + + query = ( + ESQL.from_("movies") + .sort("rating DESC") + .limit(10) + .eval( + prompt="""CONCAT( + "Summarize this movie using the following information: \\n", + "Title: ", title, "\\n", + "Synopsis: ", synopsis, "\\n", + "Actors: ", MV_CONCAT(actors, ", "), "\\n", + )""" + ) + .completion(summary="prompt") + .with_("test_completion_model") + .keep("title", "summary", "rating") + ) + assert ( + query.render() + == """FROM movies +| SORT rating DESC +| LIMIT 10 +| EVAL prompt = CONCAT( + "Summarize this movie using the following information: \\n", + "Title: ", title, "\\n", + "Synopsis: ", synopsis, "\\n", + "Actors: ", MV_CONCAT(actors, ", "), "\\n", + ) +| COMPLETION summary = prompt WITH test_completion_model +| KEEP title, summary, rating""" + ) + + query = ( + ESQL.from_("movies") + .sort("rating DESC") + .limit(10) + .eval( + prompt=functions.concat( + "Summarize this movie using the following information: \n", + "Title: ", + E("title"), + "\n", + "Synopsis: ", + E("synopsis"), + "\n", + "Actors: ", + functions.mv_concat(E("actors"), ", "), + "\n", + ) + ) + .completion(summary="prompt") + .with_("test_completion_model") + .keep("title", "summary", "rating") + ) + assert ( + query.render() + == """FROM movies +| SORT rating DESC +| LIMIT 10 +| EVAL prompt = CONCAT("Summarize this movie using the following information: \\n", "Title: ", title, "\\n", "Synopsis: ", synopsis, "\\n", "Actors: ", MV_CONCAT(actors, ", "), "\\n") +| COMPLETION summary = prompt WITH test_completion_model +| KEEP title, summary, rating""" + ) + + +def test_dissect(): + query = ( + ESQL.row(a="2023-01-23T12:15:00.000Z - some text - 127.0.0.1") + .dissect("a", "%{date} - %{msg} - %{ip}") + .keep("date", "msg", "ip") + ) + assert ( + query.render() + == """ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" +| DISSECT a "%{date} - %{msg} - %{ip}" +| KEEP date, msg, ip""" + ) + + +def test_drop(): + query = ESQL.from_("employees").drop("height") + assert query.render() == "FROM employees\n| DROP height" + query = ESQL.from_("employees").drop("height*") + assert query.render() == "FROM employees\n| DROP height*" + + +def test_enrich(): + query = ESQL.row(language_code="1").enrich("languages_policy") + assert ( + query.render() + == """ROW language_code = "1" +| ENRICH languages_policy""" + ) + + query = ESQL.row(language_code="1").enrich("languages_policy").on("a") + assert ( + query.render() + == """ROW language_code = "1" +| ENRICH languages_policy ON a""" + ) + + query = ( + ESQL.row(language_code="1") + .enrich("languages_policy") + .on("a") + .with_(name="language_name") + ) + assert ( + query.render() + == """ROW language_code = "1" +| ENRICH languages_policy ON a WITH name = language_name""" + ) + + +def test_eval(): + query = ( + ESQL.from_("employees") + .sort("emp_no") + .keep("first_name", "last_name", "height") + .eval(height_feet=E("height") * 3.281, height_cm=E("height") * 100) + ) + assert ( + query.render() + == """FROM employees +| SORT emp_no +| KEEP first_name, last_name, height +| EVAL height_feet = height * 3.281, height_cm = height * 100""" + ) + + query = ( + ESQL.from_("employees") + .sort("emp_no") + .keep("first_name", "last_name", "height") + .eval(E("height") * 3.281) + ) + assert ( + query.render() + == """FROM employees +| SORT emp_no +| KEEP first_name, last_name, height +| EVAL height * 3.281""" + ) + + query = ( + ESQL.from_("employees") + .eval("height * 3.281") + .stats(avg_height_feet=functions.avg(E("`height * 3.281`"))) + ) + assert ( + query.render() + == """FROM employees +| EVAL height * 3.281 +| STATS avg_height_feet = AVG(`height * 3.281`)""" + ) + + +def test_fork(): + query = ( + ESQL.from_("employees") + .fork( + ESQL.branch().where(E("emp_no") == 10001), + ESQL.branch().where("emp_no == 10002"), + ) + .keep("emp_no", "_fork") + .sort("emp_no") + ) + assert ( + query.render() + == """FROM employees +| FORK ( WHERE emp_no == 10001 ) + ( WHERE emp_no == 10002 ) +| KEEP emp_no, _fork +| SORT emp_no""" + ) + + +def test_grok(): + query = ( + ESQL.row(a="2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42") + .grok( + "a", + "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}", + ) + .keep("date", "ip", "email", "num") + ) + assert ( + query.render() + == """ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" +| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}" +| KEEP date, ip, email, num""" + ) + + query = ( + ESQL.row(a="2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42") + .grok( + "a", + "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}", + ) + .keep("date", "ip", "email", "num") + .eval(date=functions.to_datetime(E("date"))) + ) + assert ( + query.render() + == """ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" +| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| KEEP date, ip, email, num +| EVAL date = TO_DATETIME(date)""" + ) + + query = ( + ESQL.from_("addresses") + .keep("city.name", "zip_code") + .grok("zip_code", "%{WORD:zip_parts} %{WORD:zip_parts}") + ) + assert ( + query.render() + == """FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}\"""" + ) + + +def test_keep(): + query = ESQL.from_("employees").keep("emp_no", "first_name", "last_name", "height") + assert ( + query.render() == "FROM employees\n| KEEP emp_no, first_name, last_name, height" + ) + + query = ESQL.from_("employees").keep("h*") + assert query.render() == "FROM employees\n| KEEP h*" + + query = ESQL.from_("employees").keep("*", "first_name") + assert query.render() == "FROM employees\n| KEEP *, first_name" + + +def test_limit(): + query = ESQL.from_("index").where(E("field") == "value").limit(1000) + assert query.render() == 'FROM index\n| WHERE field == "value"\n| LIMIT 1000' + + query = ( + ESQL.from_("index").stats(functions.avg(E("field1"))).by("field2").limit(20000) + ) + assert ( + query.render() + == "FROM index\n| STATS AVG(field1)\n BY field2\n| LIMIT 20000" + ) + + +def test_lookup_join(): + query = ( + ESQL.from_("firewall_logs") + .lookup_join("threat_list") + .on("source.IP") + .where("threat_level IS NOT NULL") + ) + assert ( + query.render() + == """FROM firewall_logs +| LOOKUP JOIN threat_list ON source.IP +| WHERE threat_level IS NOT NULL""" + ) + + query = ( + ESQL.from_("system_metrics") + .lookup_join("host_inventory") + .on("host.name") + .lookup_join("ownerships") + .on("host.name") + ) + assert ( + query.render() + == """FROM system_metrics +| LOOKUP JOIN host_inventory ON host.name +| LOOKUP JOIN ownerships ON host.name""" + ) + + query = ESQL.from_("app_logs").lookup_join("service_owners").on("service_id") + assert ( + query.render() + == """FROM app_logs +| LOOKUP JOIN service_owners ON service_id""" + ) + + query = ( + ESQL.from_("employees") + .eval(language_code="languages") + .where(E("emp_no") >= 10091, E("emp_no") < 10094) + .lookup_join("languages_lookup") + .on("language_code") + ) + assert ( + query.render() + == """FROM employees +| EVAL language_code = languages +| WHERE emp_no >= 10091 AND emp_no < 10094 +| LOOKUP JOIN languages_lookup ON language_code""" + ) + + +def test_mv_expand(): + query = ESQL.row(a=[1, 2, 3], b="b", j=["a", "b"]).mv_expand("a") + assert ( + query.render() + == """ROW a = [1, 2, 3], b = "b", j = ["a", "b"] +| MV_EXPAND a""" + ) + + +def test_rename(): + query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "still_hired") + .rename(still_hired="employed") + ) + assert ( + query.render() + == """FROM employees +| KEEP first_name, last_name, still_hired +| RENAME still_hired AS employed""" + ) + + +def test_sample(): + query = ESQL.from_("employees").keep("emp_no").sample(0.05) + assert ( + query.render() + == """FROM employees +| KEEP emp_no +| SAMPLE 0.05""" + ) + + +def test_sort(): + query = ( + ESQL.from_("employees").keep("first_name", "last_name", "height").sort("height") + ) + assert ( + query.render() + == """FROM employees +| KEEP first_name, last_name, height +| SORT height""" + ) + + query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .sort("height DESC") + ) + assert ( + query.render() + == """FROM employees +| KEEP first_name, last_name, height +| SORT height DESC""" + ) + + query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .sort("height DESC", "first_name ASC") + ) + assert ( + query.render() + == """FROM employees +| KEEP first_name, last_name, height +| SORT height DESC, first_name ASC""" + ) + + query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .sort("first_name ASC NULLS FIRST") + ) + assert ( + query.render() + == """FROM employees +| KEEP first_name, last_name, height +| SORT first_name ASC NULLS FIRST""" + ) + + +def test_stats(): + query = ( + ESQL.from_("employees") + .stats(count=functions.count(E("emp_no"))) + .by("languages") + .sort("languages") + ) + assert ( + query.render() + == """FROM employees +| STATS count = COUNT(emp_no) + BY languages +| SORT languages""" + ) + + query = ESQL.from_("employees").stats(avg_lang=functions.avg(E("languages"))) + assert ( + query.render() + == """FROM employees +| STATS avg_lang = AVG(languages)""" + ) + + query = ESQL.from_("employees").stats( + avg_lang=functions.avg(E("languages")), max_lang=functions.max(E("languages")) + ) + assert ( + query.render() + == """FROM employees +| STATS avg_lang = AVG(languages), + max_lang = MAX(languages)""" + ) + + query = ( + ESQL.from_("employees") + .stats( + avg50s=functions.avg(E("salary")).where('birth_date < "1960-01-01"'), + avg60s=functions.avg(E("salary")).where('birth_date >= "1960-01-01"'), + ) + .by("gender") + .sort("gender") + ) + assert ( + query.render() + == """FROM employees +| STATS avg50s = AVG(salary) WHERE birth_date < "1960-01-01", + avg60s = AVG(salary) WHERE birth_date >= "1960-01-01" + BY gender +| SORT gender""" + ) + + query = ( + ESQL.from_("employees") + .eval(Ks="salary / 1000") + .stats( + under_40K=functions.count(E("*")).where("Ks < 40"), + inbetween=functions.count(E("*")).where("40 <= Ks", "Ks < 60"), + over_60K=functions.count(E("*")).where("60 <= Ks"), + total=functions.count(E("*")), + ) + ) + assert ( + query.render() + == """FROM employees +| EVAL Ks = salary / 1000 +| STATS under_40K = COUNT(*) WHERE Ks < 40, + inbetween = COUNT(*) WHERE (40 <= Ks) AND (Ks < 60), + over_60K = COUNT(*) WHERE 60 <= Ks, + total = COUNT(*)""" + ) + + query = ( + ESQL.row(i=1, a=["a", "b"]).stats(functions.min(E("i"))).by("a").sort("a ASC") + ) + assert ( + query.render() + == 'ROW i = 1, a = ["a", "b"]\n| STATS MIN(i)\n BY a\n| SORT a ASC' + ) + + query = ( + ESQL.from_("employees") + .eval(hired=functions.date_format(E("hire_date"), "yyyy")) + .stats(avg_salary=functions.avg(E("salary"))) + .by("hired", "languages.long") + .eval(avg_salary=functions.round(E("avg_salary"))) + .sort("hired", "languages.long") + ) + assert ( + query.render() + == """FROM employees +| EVAL hired = DATE_FORMAT("yyyy", hire_date) +| STATS avg_salary = AVG(salary) + BY hired, languages.long +| EVAL avg_salary = ROUND(avg_salary) +| SORT hired, languages.long""" + ) + + +def test_where(): + query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "still_hired") + .where("still_hired == true") + ) + assert ( + query.render() + == """FROM employees +| KEEP first_name, last_name, still_hired +| WHERE still_hired == true""" + ) + + query = ESQL.from_("sample_data").where("@timestamp > NOW() - 1 hour") + assert ( + query.render() + == """FROM sample_data +| WHERE @timestamp > NOW() - 1 hour""" + ) + + query = ( + ESQL.from_("employees") + .keep("first_name", "last_name", "height") + .where("LENGTH(first_name) < 4") + ) + assert ( + query.render() + == """FROM employees +| KEEP first_name, last_name, height +| WHERE LENGTH(first_name) < 4""" + ) + + +def test_and_operator(): + query = ESQL.from_("index").where( + and_(E("age") > 30, E("age") < 40, E("name").is_not_null()) + ) + assert ( + query.render() + == """FROM index +| WHERE (age > 30) AND (age < 40) AND (name IS NOT NULL)""" + ) + + +def test_or_operator(): + query = ESQL.from_("index").where( + or_(E("age") < 30, E("age") > 40, E("name").is_null()) + ) + assert ( + query.render() + == """FROM index +| WHERE (age < 30) OR (age > 40) OR (name IS NULL)""" + ) + + +def test_not_operator(): + query = ESQL.from_("index").where(not_(E("age") > 40)) + assert ( + query.render() + == """FROM index +| WHERE NOT (age > 40)""" + ) + + +def test_in_operator(): + query = ESQL.row(a=1, b=4, c=3).where((E("c") - E("a")).in_(3, E("b") / 2, "a")) + assert ( + query.render() + == """ROW a = 1, b = 4, c = 3 +| WHERE c - a IN (3, b / 2, a)""" + ) + + +def test_like_operator(): + query = ( + ESQL.from_("employees") + .where(E("first_name").like("?b*")) + .keep("first_name", "last_name") + ) + assert ( + query.render() + == """FROM employees +| WHERE first_name LIKE "?b*" +| KEEP first_name, last_name""" + ) + + query = ESQL.row(message="foo * bar").where(E("message").like("foo \\* bar")) + assert ( + query.render() + == """ROW message = "foo * bar" +| WHERE message LIKE "foo \\\\* bar\"""" + ) + + query = ESQL.row(message="foobar").where(E("message").like("foo*", "bar?")) + assert ( + query.render() + == """ROW message = "foobar" +| WHERE message LIKE ("foo*", "bar?")""" + ) + + +def test_rlike_operator(): + query = ( + ESQL.from_("employees") + .where(E("first_name").rlike(".leja*")) + .keep("first_name", "last_name") + ) + assert ( + query.render() + == """FROM employees +| WHERE first_name RLIKE ".leja*" +| KEEP first_name, last_name""" + ) + + query = ESQL.row(message="foo ( bar").where(E("message").rlike("foo \\( bar")) + assert ( + query.render() + == """ROW message = "foo ( bar" +| WHERE message RLIKE "foo \\\\( bar\"""" + ) + + query = ESQL.row(message="foobar").where(E("message").rlike("foo.*", "bar.")) + assert ( + query.render() + == """ROW message = "foobar" +| WHERE message RLIKE ("foo.*", "bar.")""" + ) + + +def test_match_operator(): + query = ESQL.from_("books").where(E("author").match("Faulkner")) + assert ( + query.render() + == """FROM books +| WHERE author:"Faulkner\"""" + )