diff --git a/lib/charms/tempo_k8s/v1/charm_tracing.py b/lib/charms/tempo_k8s/v1/charm_tracing.py index 7c118856..5932c8d8 100644 --- a/lib/charms/tempo_k8s/v1/charm_tracing.py +++ b/lib/charms/tempo_k8s/v1/charm_tracing.py @@ -126,15 +126,14 @@ def my_tracing_endpoint(self) -> Optional[str]: from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import Span, TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.trace import INVALID_SPAN, Tracer +from opentelemetry.trace import get_current_span as otlp_get_current_span from opentelemetry.trace import ( - INVALID_SPAN, - Tracer, get_tracer, get_tracer_provider, set_span_in_context, set_tracer_provider, ) -from opentelemetry.trace import get_current_span as otlp_get_current_span from ops.charm import CharmBase from ops.framework import Framework @@ -147,7 +146,7 @@ def my_tracing_endpoint(self) -> Optional[str]: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 6 +LIBPATCH = 9 PYDEPS = ["opentelemetry-exporter-otlp-proto-http==1.21.0"] @@ -295,8 +294,8 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): # self.handle = Handle(None, self.handle_kind, None) original_event_context = framework._event_context - - _service_name = service_name or self.app.name + # default service name isn't just app name because it could conflict with the workload service name + _service_name = service_name or f"{self.app.name}-charm" resource = Resource.create( attributes={ @@ -509,18 +508,20 @@ def trace_type(cls: _T) -> _T: logger.info(f"skipping {method} (dunder)") continue - isstatic = isinstance(inspect.getattr_static(cls, method.__name__), staticmethod) - setattr(cls, name, trace_method(method, static=isstatic)) + new_method = trace_method(method) + if isinstance(inspect.getattr_static(cls, method.__name__), staticmethod): + new_method = staticmethod(new_method) + setattr(cls, name, new_method) return cls -def trace_method(method: _F, static: bool = False) -> _F: +def trace_method(method: _F) -> _F: """Trace this method. A span will be opened when this method is called and closed when it returns. """ - return _trace_callable(method, "method", static=static) + return _trace_callable(method, "method") def trace_function(function: _F) -> _F: @@ -531,20 +532,14 @@ def trace_function(function: _F) -> _F: return _trace_callable(function, "function") -def _trace_callable(callable: _F, qualifier: str, static: bool = False) -> _F: +def _trace_callable(callable: _F, qualifier: str) -> _F: logger.info(f"instrumenting {callable}") # sig = inspect.signature(callable) @functools.wraps(callable) def wrapped_function(*args, **kwargs): # type: ignore name = getattr(callable, "__qualname__", getattr(callable, "__name__", str(callable))) - with _span(f"{'(static) ' if static else ''}{qualifier} call: {name}"): # type: ignore - if static: - # fixme: do we or don't we need [1:]? - # The _trace_callable decorator doesn't always play nice with @staticmethods. - # Sometimes it will receive 'self', sometimes it won't. - # return callable(*args, **kwargs) # type: ignore - return callable(*args[1:], **kwargs) # type: ignore + with _span(f"{qualifier} call: {name}"): # type: ignore return callable(*args, **kwargs) # type: ignore # wrapped_function.__signature__ = sig diff --git a/lib/charms/tempo_k8s/v2/tracing.py b/lib/charms/tempo_k8s/v2/tracing.py index 9e5defc8..8b9fb4f3 100644 --- a/lib/charms/tempo_k8s/v2/tracing.py +++ b/lib/charms/tempo_k8s/v2/tracing.py @@ -69,8 +69,10 @@ def __init__(self, *args): """ # noqa: W505 +import enum import json import logging +from pathlib import Path from typing import ( TYPE_CHECKING, Any, @@ -81,6 +83,7 @@ def __init__(self, *args): Optional, Sequence, Tuple, + Union, cast, ) @@ -94,7 +97,7 @@ def __init__(self, *args): ) from ops.framework import EventSource, Object from ops.model import ModelError, Relation -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict, Field # The unique Charmhub library identifier, never change it LIBID = "12977e9aa0b34367903d8afeb8c3d85d" @@ -104,7 +107,7 @@ def __init__(self, *args): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 5 +LIBPATCH = 7 PYDEPS = ["pydantic"] @@ -121,16 +124,36 @@ def __init__(self, *args): "tempo_grpc", "otlp_grpc", "otlp_http", - # "jaeger_grpc", - "jaeger_thrift_compact", - "jaeger_thrift_http", - "jaeger_thrift_binary", ] -RawReceiver = Tuple[ReceiverProtocol, int] +RawReceiver = Tuple[ReceiverProtocol, str] +"""Helper type. A raw receiver is defined as a tuple consisting of the protocol name, and the (external, if available), +(secured, if available) resolvable server url. +""" + BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"} +class TransportProtocolType(str, enum.Enum): + """Receiver Type.""" + + http = "http" + grpc = "grpc" + + +receiver_protocol_to_transport_protocol = { + "zipkin": TransportProtocolType.http, + "kafka": TransportProtocolType.http, + "opencensus": TransportProtocolType.http, + "tempo_http": TransportProtocolType.http, + "tempo_grpc": TransportProtocolType.grpc, + "otlp_grpc": TransportProtocolType.grpc, + "otlp_http": TransportProtocolType.http, +} +"""A mapping between telemetry protocols and their corresponding transport protocol. +""" + + class TracingError(Exception): """Base class for custom errors raised by this library.""" @@ -289,27 +312,81 @@ def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): # todo use models from charm-relation-interfaces -class Receiver(BaseModel): # noqa: D101 - """Receiver data structure.""" +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class ProtocolType(BaseModel): # type: ignore + """Protocol Type.""" - protocol: ReceiverProtocol - port: int + class Config: + """Pydantic config.""" + use_enum_values = True + """Allow serializing enum values.""" -class TracingProviderAppData(DatabagModel): # noqa: D101 - """Application databag model for the tracing provider.""" + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + +else: + + class ProtocolType(BaseModel): + """Protocol Type.""" - host: str - """Server hostname (local fqdn).""" + model_config = ConfigDict( + # Allow serializing enum values. + use_enum_values=True + ) + """Pydantic config.""" - receivers: List[Receiver] - """Enabled receivers and ports at which they are listening.""" + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) - external_url: Optional[str] = None - """Server url. If an ingress is present, it will be the ingress address.""" - internal_scheme: Optional[str] = None - """Scheme for internal communication. If it is present, it will be protocol accepted by the provider.""" +class Receiver(BaseModel): + """Specification of an active receiver.""" + + protocol: ProtocolType = Field(..., description="Receiver protocol name and type.") + url: str = Field( + ..., + description="""URL at which the receiver is reachable. If there's an ingress, it would be the external URL. + Otherwise, it would be the service's fqdn or internal IP. + If the protocol type is grpc, the url will not contain a scheme.""", + examples=[ + "http://traefik_address:2331", + "https://traefik_address:2331", + "http://tempo_public_ip:2331", + "https://tempo_public_ip:2331", + "tempo_public_ip:2331", + ], + ) + + +class TracingProviderAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing provider.""" + + receivers: List[Receiver] = Field( + ..., + description="List of all receivers enabled on the tracing provider.", + ) class TracingRequirerAppData(DatabagModel): # noqa: D101 @@ -481,10 +558,15 @@ def requested_receivers(self) -> List[ReceiverProtocol]: return TracingRequirerAppData.load(relation.data[app]).receivers +class BrokenEvent(RelationBrokenEvent): + """Event emitted when a relation on tracing is broken.""" + + class TracingEndpointProviderEvents(CharmEvents): """TracingEndpointProvider events.""" request = EventSource(RequestEvent) + broken = EventSource(BrokenEvent) class TracingEndpointProvider(Object): @@ -495,21 +577,17 @@ class TracingEndpointProvider(Object): def __init__( self, charm: CharmBase, - host: str, external_url: Optional[str] = None, relation_name: str = DEFAULT_RELATION_NAME, - internal_scheme: Optional[Literal["http", "https"]] = "http", ): """Initialize. Args: charm: a `CharmBase` instance that manages this instance of the Tempo service. - host: address of the node hosting the tempo server. external_url: external address of the node hosting the tempo server, if an ingress is present. relation_name: an optional string name of the relation between `charm` and the Tempo charmed service. The default is "tracing". - internal_scheme: scheme to use with internal urls. Raises: RelationNotFoundError: If there is no relation in the charm's metadata.yaml @@ -525,12 +603,10 @@ def __init__( charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides ) - super().__init__(charm, relation_name + "tracing-provider-v2") + super().__init__(charm, relation_name + "tracing-provider") self._charm = charm - self._host = host self._external_url = external_url self._relation_name = relation_name - self._internal_scheme = internal_scheme self.framework.observe( self._charm.on[relation_name].relation_joined, self._on_relation_event ) @@ -540,18 +616,21 @@ def __init__( self.framework.observe( self._charm.on[relation_name].relation_changed, self._on_relation_event ) + self.framework.observe( + self._charm.on[relation_name].relation_broken, self._on_relation_broken_event + ) + + def _on_relation_broken_event(self, e: RelationBrokenEvent): + """Handle relation broken events.""" + self.on.broken.emit(e.relation) def _on_relation_event(self, e: RelationEvent): """Handle relation created/joined/changed events.""" - if self.is_v2(e.relation): + if self.is_requirer_ready(e.relation): self.on.request.emit(e.relation) - def is_v2(self, relation: Relation): - """Attempt to determine if this relation is a tracing v2 relation. - - Assumes that the V2 requirer will, as soon as possible (relation-created), - publish the list of requested ingestion receivers (can be empty too). - """ + def is_requirer_ready(self, relation: Relation): + """Attempt to determine if requirer has already populated app data.""" try: self._get_requested_protocols(relation) except NotReadyError: @@ -567,7 +646,7 @@ def _get_requested_protocols(relation: Relation): try: databag = TracingRequirerAppData.load(relation.data[app]) except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): - logger.info(f"relation {relation} is not ready to talk tracing v2") + logger.info(f"relation {relation} is not ready to talk tracing") raise NotReadyError() return databag.receivers @@ -584,8 +663,8 @@ def requested_protocols(self): @property def relations(self) -> List[Relation]: - """All v2 relations active on this endpoint.""" - return [r for r in self._charm.model.relations[self._relation_name] if self.is_v2(r)] + """All relations active on this endpoint.""" + return self._charm.model.relations[self._relation_name] def publish_receivers(self, receivers: Sequence[RawReceiver]): """Let all requirers know that these receivers are active and listening.""" @@ -595,12 +674,16 @@ def publish_receivers(self, receivers: Sequence[RawReceiver]): for relation in self.relations: try: TracingProviderAppData( - host=self._host, - external_url=self._external_url or None, receivers=[ - Receiver(port=port, protocol=protocol) for protocol, port in receivers + Receiver( + url=url, + protocol=ProtocolType( + name=protocol, + type=receiver_protocol_to_transport_protocol[protocol], + ), + ) + for protocol, url in receivers ], - internal_scheme=self._internal_scheme, ).dump(relation.data[self._charm.app]) except ModelError as e: @@ -625,11 +708,9 @@ class EndpointRemovedEvent(RelationBrokenEvent): class EndpointChangedEvent(_AutoSnapshotEvent): """Event representing a change in one of the receiver endpoints.""" - __args__ = ("host", "external_url", "_receivers") + __args__ = ("_receivers",) if TYPE_CHECKING: - host = "" # type: str - external_url = "" # type: str _receivers = [] # type: List[dict] @property @@ -769,12 +850,6 @@ def is_ready(self, relation: Optional[Relation] = None): return False try: databag = dict(relation.data[relation.app]) - # "ingesters" Might be populated if the provider sees a v1 relation before a v2 requirer has had time to - # publish the 'receivers' list. This will make Tempo incorrectly assume that this is a v1 - # relation, and act accordingly. Later, when the requirer publishes the requested receivers, - # tempo will be able to course-correct. - if "ingesters" in databag: - del databag["ingesters"] TracingProviderAppData.load(databag) except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): @@ -790,9 +865,7 @@ def _on_tracing_relation_changed(self, event): return data = TracingProviderAppData.load(relation.data[relation.app]) - self.on.endpoint_changed.emit( # type: ignore - relation, data.host, data.external_url, [i.dict() for i in data.receivers] - ) + self.on.endpoint_changed.emit(relation, [i.dict() for i in data.receivers]) # type: ignore def _on_tracing_relation_broken(self, event: RelationBrokenEvent): """Notify the providers that the endpoint is broken.""" @@ -815,7 +888,7 @@ def _get_endpoint( if not app_data: return None receivers: List[Receiver] = list( - filter(lambda i: i.protocol == protocol, app_data.receivers) + filter(lambda i: i.protocol.name == protocol, app_data.receivers) ) if not receivers: logger.error(f"no receiver found with protocol={protocol!r}") @@ -827,18 +900,7 @@ def _get_endpoint( return receiver = receivers[0] - # if there's an external_url argument (v2.5+), use that. Otherwise, we use the tempo local fqdn - if app_data.external_url: - url = f"{app_data.external_url}:{receiver.port}" - else: - # if we didn't receive a scheme (old provider), we assume HTTP is used - url = f"{app_data.internal_scheme or 'http'}://{app_data.host}:{receiver.port}" - - if receiver.protocol.endswith("grpc"): - # TCP protocols don't want an http/https scheme prefix - url = url.split("://")[1] - - return url + return receiver.url def get_endpoint( self, protocol: ReceiverProtocol, relation: Optional[Relation] = None @@ -862,19 +924,67 @@ def get_endpoint( return None return endpoint - # for backwards compatibility with earlier revisions: - def otlp_grpc_endpoint(self): - """Use TracingEndpointRequirer.get_endpoint('otlp_grpc') instead.""" - logger.warning( - "`TracingEndpointRequirer.otlp_grpc_endpoint` is deprecated. " - "Use `TracingEndpointRequirer.get_endpoint('otlp_grpc') instead.`" - ) - return self.get_endpoint("otlp_grpc") - def otlp_http_endpoint(self): - """Use TracingEndpointRequirer.get_endpoint('otlp_http') instead.""" - logger.warning( - "`TracingEndpointRequirer.otlp_http_endpoint` is deprecated. " - "Use `TracingEndpointRequirer.get_endpoint('otlp_http') instead.`" - ) - return self.get_endpoint("otlp_http") +def charm_tracing_config( + endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[str], Optional[str]]: + """Utility function to determine the charm_tracing config you will likely want. + + If no endpoint is provided: + disable charm tracing. + If https endpoint is provided but cert_path is not found on disk: + disable charm tracing. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm tracing (with or without tls, as appropriate) + + Usage: + If you are using charm_tracing >= v1.9: + >>> from lib.charms.tempo_k8s.v1.charm_tracing import trace_charm + >>> from lib.charms.tempo_k8s.v2.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self.my_endpoint, self.cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + + If you are using charm_tracing < v1.9: + >>> from lib.charms.tempo_k8s.v1.charm_tracing import trace_charm + >>> from lib.charms.tempo_k8s.v2.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self._my_endpoint, self._cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + >>> @property + >>> def my_endpoint(self): + >>> return self._my_endpoint + >>> @property + >>> def cert_path(self): + >>> return self._cert_path + + """ + if not endpoint_requirer.is_ready(): + return None, None + + endpoint = endpoint_requirer.get_endpoint("otlp_http") + if not endpoint: + return None, None + + is_https = endpoint.startswith("https://") + + if is_https: + if cert_path is None: + raise TracingError("Cannot send traces to an https endpoint without a certificate.") + elif not Path(cert_path).exists(): + # if endpoint is https BUT we don't have a server_cert yet: + # disable charm tracing until we do to prevent tls errors + return None, None + return endpoint, str(cert_path) + else: + return endpoint, None