From 6546116d4ef3edd16db7d4777c2c429b4729788c Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Thu, 3 Jul 2025 17:27:19 +0100 Subject: [PATCH 1/3] Update opentelemetry conventions --- elasticsearch/_otel.py | 10 +++++----- test_elasticsearch/test_otel.py | 12 ++++++------ test_elasticsearch/test_server/test_otel.py | 16 ++++++++-------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/elasticsearch/_otel.py b/elasticsearch/_otel.py index f37ca24cd..4a17bd635 100644 --- a/elasticsearch/_otel.py +++ b/elasticsearch/_otel.py @@ -75,11 +75,11 @@ def span( span_name = endpoint_id or method with self.tracer.start_as_current_span(span_name) as otel_span: otel_span.set_attribute("http.request.method", method) - otel_span.set_attribute("db.system", "elasticsearch") + otel_span.set_attribute("db.system.name", "elasticsearch") if endpoint_id is not None: - otel_span.set_attribute("db.operation", endpoint_id) + otel_span.set_attribute("db.operation.name", endpoint_id) for key, value in path_parts.items(): - otel_span.set_attribute(f"db.elasticsearch.path_parts.{key}", value) + otel_span.set_attribute(f"db.operation.parameter.{key}", value) yield OpenTelemetrySpan( otel_span, @@ -94,8 +94,8 @@ def helpers_span(self, span_name: str) -> Generator[OpenTelemetrySpan, None, Non return with self.tracer.start_as_current_span(span_name) as otel_span: - otel_span.set_attribute("db.system", "elasticsearch") - otel_span.set_attribute("db.operation", span_name) + otel_span.set_attribute("db.system.name", "elasticsearch") + otel_span.set_attribute("db.operation.name", span_name) # Without a request method, Elastic APM does not display the traces otel_span.set_attribute("http.request.method", "null") yield OpenTelemetrySpan(otel_span) diff --git a/test_elasticsearch/test_otel.py b/test_elasticsearch/test_otel.py index 48eb9ea58..39edf5869 100644 --- a/test_elasticsearch/test_otel.py +++ b/test_elasticsearch/test_otel.py @@ -68,7 +68,7 @@ def test_minimal_span(): assert spans[0].name == "GET" assert spans[0].attributes == { "http.request.method": "GET", - "db.system": "elasticsearch", + "db.system.name": "elasticsearch", } @@ -92,11 +92,11 @@ def test_detailed_span(): assert spans[0].name == "ml.open_job" assert spans[0].attributes == { "http.request.method": "GET", - "db.system": "elasticsearch", - "db.operation": "ml.open_job", - "db.elasticsearch.path_parts.job_id": "my-job", - "db.elasticsearch.cluster.name": "e9106fc68e3044f0b1475b04bf4ffd5f", - "db.elasticsearch.node.name": "instance-0000000001", + "db.system.name": "elasticsearch", + "db.operation.name": "ml.open_job", + "db.operation.parameter.job_id": "my-job", + "db.namespace": "e9106fc68e3044f0b1475b04bf4ffd5f", + "elasticsearch.node.name": "instance-0000000001", } diff --git a/test_elasticsearch/test_server/test_otel.py b/test_elasticsearch/test_server/test_otel.py index 3f8033d7b..e0b0cc776 100644 --- a/test_elasticsearch/test_server/test_otel.py +++ b/test_elasticsearch/test_server/test_otel.py @@ -44,9 +44,9 @@ def test_otel_end_to_end(sync_client): assert spans[0].name == "search" expected_attributes = { "http.request.method": "POST", - "db.system": "elasticsearch", - "db.operation": "search", - "db.elasticsearch.path_parts.index": "logs-*", + "db.system.name": "elasticsearch", + "db.operation.name": "search", + "db.operation.parameter.index": "logs-*", } # Assert expected atttributes are here, but allow other attributes too # to make this test robust to elastic-transport changes @@ -89,8 +89,8 @@ def test_otel_bulk(sync_client, elasticsearch_url, bulk_helper_name): parent_span = spans.pop() assert parent_span.name == f"helpers.{bulk_helper_name}" assert parent_span.attributes == { - "db.system": "elasticsearch", - "db.operation": f"helpers.{bulk_helper_name}", + "db.system.name": "elasticsearch", + "db.operation.name": f"helpers.{bulk_helper_name}", "http.request.method": "null", } @@ -99,9 +99,9 @@ def test_otel_bulk(sync_client, elasticsearch_url, bulk_helper_name): assert span.name == "bulk" expected_attributes = { "http.request.method": "PUT", - "db.system": "elasticsearch", - "db.operation": "bulk", - "db.elasticsearch.path_parts.index": "test-index", + "db.system.name": "elasticsearch", + "db.operation.name": "bulk", + "db.operation.parameter.index": "test-index", } # Assert expected atttributes are here, but allow other attributes too # to make this test robust to elastic-transport changes From 2983d50deb9e27bc6f48fbe20ac28cc6e9186eca Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Mon, 14 Jul 2025 15:52:25 +0100 Subject: [PATCH 2/3] upgrade transport required versions --- elasticsearch/__init__.py | 2 +- elasticsearch/dsl/field.py | 8 +++++ elasticsearch/dsl/types.py | 64 ++++++++++++++++++++++++++++++++++++-- pyproject.toml | 2 +- 4 files changed, 72 insertions(+), 4 deletions(-) diff --git a/elasticsearch/__init__.py b/elasticsearch/__init__.py index c2277228a..363b09182 100644 --- a/elasticsearch/__init__.py +++ b/elasticsearch/__init__.py @@ -28,7 +28,7 @@ # Ensure that a compatible version of elastic-transport is installed. _version_groups = tuple(int(x) for x in re.search(r"^(\d+)\.(\d+)\.(\d+)", _elastic_transport_version).groups()) # type: ignore[union-attr] -if _version_groups < (8, 0, 0) or _version_groups > (9, 0, 0): +if _version_groups < (9, 1, 0) or _version_groups > (10, 0, 0): raise ImportError( "An incompatible version of elastic-transport is installed. Must be between " "v8.0.0 and v9.0.0. Install the correct version with the following command: " diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py index 73108bf3f..c33261458 100644 --- a/elasticsearch/dsl/field.py +++ b/elasticsearch/dsl/field.py @@ -4081,6 +4081,9 @@ def __init__( class SparseVector(Field): """ :arg store: + :arg index_options: Additional index options for the sparse vector + field that controls the token pruning behavior of the sparse + vector field. :arg meta: Metadata about the field. :arg properties: :arg ignore_above: @@ -4099,6 +4102,9 @@ def __init__( self, *args: Any, store: Union[bool, "DefaultType"] = DEFAULT, + index_options: Union[ + "types.SparseVectorIndexOptions", Dict[str, Any], "DefaultType" + ] = DEFAULT, meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT, properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT, ignore_above: Union[int, "DefaultType"] = DEFAULT, @@ -4113,6 +4119,8 @@ def __init__( ): if store is not DEFAULT: kwargs["store"] = store + if index_options is not DEFAULT: + kwargs["index_options"] = index_options if meta is not DEFAULT: kwargs["meta"] = meta if properties is not DEFAULT: diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py index 7aaf52da6..383a69d83 100644 --- a/elasticsearch/dsl/types.py +++ b/elasticsearch/dsl/types.py @@ -144,8 +144,26 @@ def __init__( class ChunkingSettings(AttrDict[Any]): """ - :arg strategy: (required) The chunking strategy: `sentence` or `word`. - Defaults to `sentence` if omitted. + :arg strategy: (required) The chunking strategy: `sentence`, `word`, + `none` or `recursive`. * If `strategy` is set to `recursive`, + you must also specify: - `max_chunk_size` - either `separators` + or`separator_group` Learn more about different chunking + strategies in the linked documentation. Defaults to `sentence` if + omitted. + :arg separator_group: (required) This parameter is only applicable + when using the `recursive` chunking strategy. Sets a predefined + list of separators in the saved chunking settings based on the + selected text type. Values can be `markdown` or `plaintext`. + Using this parameter is an alternative to manually specifying a + custom `separators` list. + :arg separators: (required) A list of strings used as possible split + points when chunking text with the `recursive` strategy. Each + string can be a plain string or a regular expression (regex) + pattern. The system tries each separator in order to split the + text, starting from the first item in the list. After splitting, + it attempts to recombine smaller pieces into larger chunks that + stay within the `max_chunk_size` limit, to reduce the total number + of chunks generated. :arg max_chunk_size: (required) The maximum size of a chunk in words. This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy). Defaults to @@ -160,6 +178,8 @@ class ChunkingSettings(AttrDict[Any]): """ strategy: Union[str, DefaultType] + separator_group: Union[str, DefaultType] + separators: Union[Sequence[str], DefaultType] max_chunk_size: Union[int, DefaultType] overlap: Union[int, DefaultType] sentence_overlap: Union[int, DefaultType] @@ -168,6 +188,8 @@ def __init__( self, *, strategy: Union[str, DefaultType] = DEFAULT, + separator_group: Union[str, DefaultType] = DEFAULT, + separators: Union[Sequence[str], DefaultType] = DEFAULT, max_chunk_size: Union[int, DefaultType] = DEFAULT, overlap: Union[int, DefaultType] = DEFAULT, sentence_overlap: Union[int, DefaultType] = DEFAULT, @@ -175,6 +197,10 @@ def __init__( ): if strategy is not DEFAULT: kwargs["strategy"] = strategy + if separator_group is not DEFAULT: + kwargs["separator_group"] = separator_group + if separators is not DEFAULT: + kwargs["separators"] = separators if max_chunk_size is not DEFAULT: kwargs["max_chunk_size"] = max_chunk_size if overlap is not DEFAULT: @@ -3723,6 +3749,38 @@ def __init__( super().__init__(kwargs) +class SparseVectorIndexOptions(AttrDict[Any]): + """ + :arg prune: Whether to perform pruning, omitting the non-significant + tokens from the query to improve query performance. If prune is + true but the pruning_config is not specified, pruning will occur + but default values will be used. Default: false + :arg pruning_config: Optional pruning configuration. If enabled, this + will omit non-significant tokens from the query in order to + improve query performance. This is only used if prune is set to + true. If prune is set to true but pruning_config is not specified, + default values will be used. + """ + + prune: Union[bool, DefaultType] + pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + prune: Union[bool, DefaultType] = DEFAULT, + pruning_config: Union[ + "TokenPruningConfig", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if prune is not DEFAULT: + kwargs["prune"] = prune + if pruning_config is not DEFAULT: + kwargs["pruning_config"] = pruning_config + super().__init__(kwargs) + + class SuggestContext(AttrDict[Any]): """ :arg name: (required) @@ -5166,9 +5224,11 @@ def buckets_as_dict(self) -> Mapping[str, "FiltersBucket"]: class FiltersBucket(AttrDict[Any]): """ :arg doc_count: (required) + :arg key: """ doc_count: int + key: str class FrequentItemSetsAggregate(AttrDict[Any]): diff --git a/pyproject.toml b/pyproject.toml index 0c66e2f50..2e45a0813 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ keywords = [ ] dynamic = ["version"] dependencies = [ - "elastic-transport>=8.15.1,<9", + "elastic-transport>=9.1.0,<10", "python-dateutil", "typing-extensions", ] From 9db5c1a650c12a8d7d8d3078ca6d9efb8989b190 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Fri, 25 Jul 2025 12:59:42 +0100 Subject: [PATCH 3/3] feedback --- elasticsearch/__init__.py | 4 +-- elasticsearch/dsl/field.py | 8 ----- elasticsearch/dsl/types.py | 64 ++------------------------------------ 3 files changed, 4 insertions(+), 72 deletions(-) diff --git a/elasticsearch/__init__.py b/elasticsearch/__init__.py index 363b09182..c696bd91b 100644 --- a/elasticsearch/__init__.py +++ b/elasticsearch/__init__.py @@ -31,8 +31,8 @@ if _version_groups < (9, 1, 0) or _version_groups > (10, 0, 0): raise ImportError( "An incompatible version of elastic-transport is installed. Must be between " - "v8.0.0 and v9.0.0. Install the correct version with the following command: " - "$ python -m pip install 'elastic-transport>=8, <9'" + "v9.1.0 and v10.0.0. Install the correct version with the following command: " + "$ python -m pip install 'elastic-transport>=9.1, <10'" ) _version_groups = re.search(r"^(\d+)\.(\d+)\.(\d+)", __versionstr__).groups() # type: ignore[assignment, union-attr] diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py index c33261458..73108bf3f 100644 --- a/elasticsearch/dsl/field.py +++ b/elasticsearch/dsl/field.py @@ -4081,9 +4081,6 @@ def __init__( class SparseVector(Field): """ :arg store: - :arg index_options: Additional index options for the sparse vector - field that controls the token pruning behavior of the sparse - vector field. :arg meta: Metadata about the field. :arg properties: :arg ignore_above: @@ -4102,9 +4099,6 @@ def __init__( self, *args: Any, store: Union[bool, "DefaultType"] = DEFAULT, - index_options: Union[ - "types.SparseVectorIndexOptions", Dict[str, Any], "DefaultType" - ] = DEFAULT, meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT, properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT, ignore_above: Union[int, "DefaultType"] = DEFAULT, @@ -4119,8 +4113,6 @@ def __init__( ): if store is not DEFAULT: kwargs["store"] = store - if index_options is not DEFAULT: - kwargs["index_options"] = index_options if meta is not DEFAULT: kwargs["meta"] = meta if properties is not DEFAULT: diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py index 383a69d83..7aaf52da6 100644 --- a/elasticsearch/dsl/types.py +++ b/elasticsearch/dsl/types.py @@ -144,26 +144,8 @@ def __init__( class ChunkingSettings(AttrDict[Any]): """ - :arg strategy: (required) The chunking strategy: `sentence`, `word`, - `none` or `recursive`. * If `strategy` is set to `recursive`, - you must also specify: - `max_chunk_size` - either `separators` - or`separator_group` Learn more about different chunking - strategies in the linked documentation. Defaults to `sentence` if - omitted. - :arg separator_group: (required) This parameter is only applicable - when using the `recursive` chunking strategy. Sets a predefined - list of separators in the saved chunking settings based on the - selected text type. Values can be `markdown` or `plaintext`. - Using this parameter is an alternative to manually specifying a - custom `separators` list. - :arg separators: (required) A list of strings used as possible split - points when chunking text with the `recursive` strategy. Each - string can be a plain string or a regular expression (regex) - pattern. The system tries each separator in order to split the - text, starting from the first item in the list. After splitting, - it attempts to recombine smaller pieces into larger chunks that - stay within the `max_chunk_size` limit, to reduce the total number - of chunks generated. + :arg strategy: (required) The chunking strategy: `sentence` or `word`. + Defaults to `sentence` if omitted. :arg max_chunk_size: (required) The maximum size of a chunk in words. This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy). Defaults to @@ -178,8 +160,6 @@ class ChunkingSettings(AttrDict[Any]): """ strategy: Union[str, DefaultType] - separator_group: Union[str, DefaultType] - separators: Union[Sequence[str], DefaultType] max_chunk_size: Union[int, DefaultType] overlap: Union[int, DefaultType] sentence_overlap: Union[int, DefaultType] @@ -188,8 +168,6 @@ def __init__( self, *, strategy: Union[str, DefaultType] = DEFAULT, - separator_group: Union[str, DefaultType] = DEFAULT, - separators: Union[Sequence[str], DefaultType] = DEFAULT, max_chunk_size: Union[int, DefaultType] = DEFAULT, overlap: Union[int, DefaultType] = DEFAULT, sentence_overlap: Union[int, DefaultType] = DEFAULT, @@ -197,10 +175,6 @@ def __init__( ): if strategy is not DEFAULT: kwargs["strategy"] = strategy - if separator_group is not DEFAULT: - kwargs["separator_group"] = separator_group - if separators is not DEFAULT: - kwargs["separators"] = separators if max_chunk_size is not DEFAULT: kwargs["max_chunk_size"] = max_chunk_size if overlap is not DEFAULT: @@ -3749,38 +3723,6 @@ def __init__( super().__init__(kwargs) -class SparseVectorIndexOptions(AttrDict[Any]): - """ - :arg prune: Whether to perform pruning, omitting the non-significant - tokens from the query to improve query performance. If prune is - true but the pruning_config is not specified, pruning will occur - but default values will be used. Default: false - :arg pruning_config: Optional pruning configuration. If enabled, this - will omit non-significant tokens from the query in order to - improve query performance. This is only used if prune is set to - true. If prune is set to true but pruning_config is not specified, - default values will be used. - """ - - prune: Union[bool, DefaultType] - pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType] - - def __init__( - self, - *, - prune: Union[bool, DefaultType] = DEFAULT, - pruning_config: Union[ - "TokenPruningConfig", Dict[str, Any], DefaultType - ] = DEFAULT, - **kwargs: Any, - ): - if prune is not DEFAULT: - kwargs["prune"] = prune - if pruning_config is not DEFAULT: - kwargs["pruning_config"] = pruning_config - super().__init__(kwargs) - - class SuggestContext(AttrDict[Any]): """ :arg name: (required) @@ -5224,11 +5166,9 @@ def buckets_as_dict(self) -> Mapping[str, "FiltersBucket"]: class FiltersBucket(AttrDict[Any]): """ :arg doc_count: (required) - :arg key: """ doc_count: int - key: str class FrequentItemSetsAggregate(AttrDict[Any]):