Skip to content

Commit

Permalink
Split endpoints into charm-tracing and workload-tracing (#358)
Browse files Browse the repository at this point in the history
* Split endpoints into charm-tracing and workload-tracing; integration tests

* formatting

* Adjust method import to better naming in lib

* Add juju topology to workload traces

* Don't use shared helpers yet

* Review remarks: duplicate method, upper-case constants
  • Loading branch information
mmkay authored Nov 26, 2024
1 parent fc56db9 commit 5b6e32c
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 19 deletions.
9 changes: 8 additions & 1 deletion charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,16 @@ requires:
description: |
Receive oauth server's info and a set of client credentials.
This relation can be used to integrate grafana with an oAuth2/OIDC Provider.
tracing:
charm-tracing:
description: |
Enables sending charm traces to a distributed tracing backend such as Tempo.
limit: 1
interface: tracing
workload-tracing:
description: |
Enables sending workload traces to a distributed tracing backend such as Tempo.
limit: 1
interface: tracing

provides:
metrics-endpoint:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ cryptography
# lib/charms/tempo_k8s/v1/charm_tracing.py
opentelemetry-exporter-otlp-proto-http
pydantic
cosl
60 changes: 42 additions & 18 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@
import socket
import string
import time
from cosl import JujuTopology
from io import StringIO
from pathlib import Path
from typing import Any, Callable, Dict, cast, Optional
from typing import Any, Callable, Dict, cast
from urllib.parse import urlparse
import subprocess

Expand Down Expand Up @@ -72,7 +73,7 @@
UpgradeCharmEvent,
)
from charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm
from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer
from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer, charm_tracing_config
from ops.framework import StoredState
from ops.main import main
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, OpenedPort
Expand Down Expand Up @@ -124,8 +125,8 @@


@trace_charm(
tracing_endpoint="tracing_endpoint",
server_cert="server_ca_cert_path",
tracing_endpoint="charm_tracing_endpoint",
server_cert="server_cert",
extra_types=[
AuthRequirer,
CertHandler,
Expand Down Expand Up @@ -159,6 +160,7 @@ def __init__(self, *args):
self._grafana_config_ini_hash = None
self._grafana_datasources_hash = None
self._stored.set_default(admin_password="")
self._topology = JujuTopology.from_charm(self)

# -- cert_handler
self.cert_handler = CertHandler(
Expand Down Expand Up @@ -193,7 +195,15 @@ def __init__(self, *args):
self.cert_handler.on.cert_changed, # pyright: ignore
],
)
self.tracing = TracingEndpointRequirer(self, protocols=["otlp_http", "otlp_grpc"])
self.charm_tracing = TracingEndpointRequirer(
self, relation_name="charm-tracing", protocols=["otlp_http"]
)
self.workload_tracing = TracingEndpointRequirer(
self, relation_name="workload-tracing", protocols=["otlp_grpc"]
)
self.charm_tracing_endpoint, self.server_cert = charm_tracing_config(
self.charm_tracing, CA_CERT_PATH
)

# -- standard events
self.framework.observe(self.on.install, self._on_install)
Expand Down Expand Up @@ -265,6 +275,15 @@ def __init__(self, *args):
self.grafana_auth_requirer.on.auth_conf_available, # pyright: ignore
self._on_grafana_auth_conf_available,
)
# -- workload tracing observations
self.framework.observe(
self.workload_tracing.on.endpoint_changed, # type: ignore
self._on_workload_tracing_endpoint_changed,
)
self.framework.observe(
self.workload_tracing.on.endpoint_removed, # type: ignore
self._on_workload_tracing_endpoint_removed,
)

# -- cert_handler observations
self.framework.observe(
Expand Down Expand Up @@ -745,6 +764,14 @@ def _on_database_broken(self, event: RelationBrokenEvent) -> None:
# Cleanup the config file
self._configure()

def _on_workload_tracing_endpoint_changed(self, event: RelationChangedEvent) -> None:
"""Adds workload tracing information to grafana's config."""
self._configure()

def _on_workload_tracing_endpoint_removed(self, event: RelationBrokenEvent) -> None:
"""Removes workload tracing information from grafana's config."""
self._configure()

def _generate_grafana_config(self) -> str:
"""Generate a database configuration for Grafana.
Expand Down Expand Up @@ -775,7 +802,7 @@ def _generate_tracing_config(self) -> str:
A string containing the required tracing information to be stubbed into the config
file.
"""
tracing = self.tracing
tracing = self.workload_tracing
if not tracing.is_ready():
return ""
endpoint = tracing.get_endpoint("otlp_grpc")
Expand Down Expand Up @@ -1054,6 +1081,15 @@ def _build_layer(self) -> Layer:
}
)

if self.workload_tracing.is_ready():
topology = self._topology
extra_info.update(
{
"OTEL_RESOURCE_ATTRIBUTES": f"juju_application={topology.application},juju_model={topology.model}"
+ f",juju_model_uuid={topology.model_uuid},juju_unit={topology.unit},juju_charm={topology.charm_name}",
}
)

layer = Layer(
{
"summary": "grafana-k8s layer",
Expand Down Expand Up @@ -1546,18 +1582,6 @@ def _on_oauth_info_changed(self, event: OAuthInfoChangedEvent) -> None:
"""Event handler for the oauth_info_changed event."""
self._configure()

@property
def tracing_endpoint(self) -> Optional[str]:
"""Tempo endpoint for charm tracing."""
if self.tracing.is_ready():
return self.tracing.get_endpoint("otlp_http")
return None

@property
def server_ca_cert_path(self) -> Optional[str]:
"""Server CA certificate path for TLS tracing."""
return str(CA_CERT_PATH) if self.cert_handler.enabled else None


if __name__ == "__main__":
main(GrafanaCharm, use_juju_for_storage=True)
105 changes: 105 additions & 0 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@
# See LICENSE file for licensing details.
import asyncio
import grp
import json
import logging
import subprocess
from pathlib import Path
from typing import Tuple

import requests
import yaml
from asyncstdlib import functools
from pytest_operator.plugin import OpsTest
from urllib.parse import urlparse
from workload import Grafana
from juju.application import Application
from juju.unit import Unit
from minio import Minio
from tenacity import retry, stop_after_attempt, wait_exponential

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -377,6 +383,105 @@ async def curl(ops_test: OpsTest, *, cert_dir: str, cert_path: str, ip_addr: str
return stdout


async def deploy_and_configure_minio(ops_test: OpsTest) -> None:
"""Deploy and set up minio and s3-integrator needed for s3-like storage backend in the HA charms."""
config = {
"access-key": "accesskey",
"secret-key": "secretkey",
}
await ops_test.model.deploy("minio", channel="edge", trust=True, config=config)
await ops_test.model.wait_for_idle(apps=["minio"], status="active", timeout=2000)
minio_addr = await unit_address(ops_test, "minio", 0)

mc_client = Minio(
f"{minio_addr}:9000",
access_key="accesskey",
secret_key="secretkey",
secure=False,
)

# create tempo bucket
found = mc_client.bucket_exists("tempo")
if not found:
mc_client.make_bucket("tempo")

# configure s3-integrator
s3_integrator_app: Application = ops_test.model.applications["s3-integrator"]
s3_integrator_leader: Unit = s3_integrator_app.units[0]

await s3_integrator_app.set_config(
{
"endpoint": f"minio-0.minio-endpoints.{ops_test.model.name}.svc.cluster.local:9000",
"bucket": "tempo",
}
)

action = await s3_integrator_leader.run_action("sync-s3-credentials", **config)
action_result = await action.wait()
assert action_result.status == "completed"


async def deploy_tempo_cluster(ops_test: OpsTest):
"""Deploys tempo in its HA version together with minio and s3-integrator."""
tempo_app = "tempo"
worker_app = "tempo-worker"
tempo_worker_charm_url, worker_channel = "tempo-worker-k8s", "edge"
tempo_coordinator_charm_url, coordinator_channel = "tempo-coordinator-k8s", "edge"
await ops_test.model.deploy(
tempo_worker_charm_url, application_name=worker_app, channel=worker_channel, trust=True
)
await ops_test.model.deploy(
tempo_coordinator_charm_url,
application_name=tempo_app,
channel=coordinator_channel,
trust=True,
)
await ops_test.model.deploy("s3-integrator", channel="edge")

await ops_test.model.integrate(tempo_app + ":s3", "s3-integrator" + ":s3-credentials")
await ops_test.model.integrate(tempo_app + ":tempo-cluster", worker_app + ":tempo-cluster")

await deploy_and_configure_minio(ops_test)
async with ops_test.fast_forward():
await ops_test.model.wait_for_idle(
apps=[tempo_app, worker_app, "s3-integrator"],
status="active",
timeout=2000,
idle_period=30,
)


def get_traces(tempo_host: str, service_name="tracegen-otlp_http", tls=True):
"""Get traces directly from Tempo REST API."""
url = f"{'https' if tls else 'http'}://{tempo_host}:3200/api/search?tags=service.name={service_name}"
req = requests.get(
url,
verify=False,
)
assert req.status_code == 200
traces = json.loads(req.text)["traces"]
return traces


@retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=1, min=4, max=10))
async def get_traces_patiently(tempo_host, service_name="tracegen-otlp_http", tls=True):
"""Get traces directly from Tempo REST API, but also try multiple times.
Useful for cases when Tempo might not return the traces immediately (its API is known for returning data in
random order).
"""
traces = get_traces(tempo_host, service_name=service_name, tls=tls)
assert len(traces) > 0
return traces


async def get_application_ip(ops_test: OpsTest, app_name: str) -> str:
"""Get the application IP address."""
status = await ops_test.model.get_status()
app = status["applications"][app_name]
return app.public_address


class ModelConfigChange:
"""Context manager for temporarily changing a model config option."""

Expand Down
58 changes: 58 additions & 0 deletions tests/integration/test_workload_tracing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python3
# Copyright 2021 Canonical Ltd.
# See LICENSE file for licensing details.

import logging
from pathlib import Path

import pytest
import yaml
from helpers import (
oci_image,
check_grafana_is_ready,
deploy_tempo_cluster,
get_traces_patiently,
get_application_ip,
)

logger = logging.getLogger(__name__)

METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
APP_NAME = "grafana"
TEMPO_APP_NAME = "tempo"
RESOURCES = {
"grafana-image": oci_image("./charmcraft.yaml", "grafana-image"),
"litestream-image": oci_image("./charmcraft.yaml", "litestream-image"),
}


async def test_setup_env(ops_test):
await ops_test.model.set_config({"logging-config": "<root>=WARNING; unit=DEBUG"})


@pytest.mark.abort_on_fail
async def test_workload_tracing_is_present(ops_test, grafana_charm):
logger.info("deploying tempo cluster")
await deploy_tempo_cluster(ops_test)

logger.info("deploying local charm")
await ops_test.model.deploy(
grafana_charm, resources=RESOURCES, application_name=APP_NAME, trust=True
)
await ops_test.model.wait_for_idle(
apps=[APP_NAME], status="active", timeout=300, wait_for_exact_units=1
)

await check_grafana_is_ready(ops_test, APP_NAME, 0)
# we relate _only_ workload tracing not to confuse with charm traces
await ops_test.model.add_relation(
"{}:workload-tracing".format(APP_NAME), "{}:tracing".format(TEMPO_APP_NAME)
)
await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active")

# Verify workload traces from grafana are ingested into Tempo
assert await get_traces_patiently(
await get_application_ip(ops_test, TEMPO_APP_NAME),
service_name=f"{APP_NAME}",
tls=False,
)
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ deps =
pytest-playwright
lightkube
git+https://github.com/canonical/iam-bundle@671a33419869fab1fe63d81873b41f6b181498e3#egg=oauth_tools
cosl
minio
commands =
playwright install
pytest -vv --tb native --log-cli-level=INFO --color=yes -s {posargs} {toxinidir}/tests/integration

0 comments on commit 5b6e32c

Please sign in to comment.