diff --git a/lib/charms/observability_libs/v0/cert_handler.py b/lib/charms/observability_libs/v1/cert_handler.py similarity index 54% rename from lib/charms/observability_libs/v0/cert_handler.py rename to lib/charms/observability_libs/v1/cert_handler.py index 9dcfc8f1..79458e09 100644 --- a/lib/charms/observability_libs/v0/cert_handler.py +++ b/lib/charms/observability_libs/v1/cert_handler.py @@ -17,7 +17,6 @@ self.cert_handler = CertHandler( charm=self, key="my-app-cert-manager", - peer_relation_name="replicas", cert_subject="unit_name", # Optional ) ``` @@ -26,18 +25,16 @@ ```python self.framework.observe(self.cert_handler.on.cert_changed, self._on_server_cert_changed) -container.push(keypath, self.cert_handler.key) -container.push(certpath, self.cert_handler.cert) +container.push(keypath, self.cert_handler.private_key) +container.push(certpath, self.cert_handler.servert_cert) ``` -This library requires a peer relation to be declared in the requirer's metadata. Peer relation data -is used for "persistent storage" of the private key and certs. +Since this library uses [Juju Secrets](https://juju.is/docs/juju/secret) it requires Juju >= 3.0.3. """ import ipaddress -import json import socket from itertools import filterfalse -from typing import List, Optional, Union, cast +from typing import List, Optional, Union try: from charms.tls_certificates_interface.v3.tls_certificates import ( # type: ignore @@ -51,7 +48,7 @@ ) except ImportError as e: raise ImportError( - "failed to import charms.tls_certificates_interface.v3.tls_certificates; " + "failed to import charms.tls_certificates_interface.v2.tls_certificates; " "Either the library itself is missing (please get it through charmcraft fetch-lib) " "or one of its dependencies is unmet." ) from e @@ -60,14 +57,15 @@ from ops.charm import CharmBase, RelationBrokenEvent from ops.framework import EventBase, EventSource, Object, ObjectEvents -from ops.model import Relation +from ops.jujuversion import JujuVersion +from ops.model import SecretNotFoundError logger = logging.getLogger(__name__) LIBID = "b5cd5cd580f3428fa5f59a8876dcbe6a" -LIBAPI = 0 -LIBPATCH = 11 +LIBAPI = 1 +LIBPATCH = 5 def is_ip_address(value: str) -> bool: @@ -99,10 +97,9 @@ def __init__( charm: CharmBase, *, key: str, - peer_relation_name: str, certificates_relation_name: str = "certificates", cert_subject: Optional[str] = None, - extra_sans_dns: Optional[List[str]] = None, # TODO: in v1, rename arg to `sans` + sans: Optional[List[str]] = None, ): """CertHandler is used to wrap TLS Certificates management operations for charms. @@ -112,12 +109,12 @@ def __init__( charm: The owning charm. key: A manually-crafted, static, unique identifier used by ops to identify events. It shouldn't change between one event to another. - peer_relation_name: Must match metadata.yaml. certificates_relation_name: Must match metadata.yaml. cert_subject: Custom subject. Name collisions are under the caller's responsibility. - extra_sans_dns: DNS names. If none are given, use FQDN. + sans: DNS names. If none are given, use FQDN. """ super().__init__(charm, key) + self._check_juju_supports_secrets() self.charm = charm # We need to sanitize the unit name, otherwise route53 complains: @@ -125,13 +122,11 @@ def __init__( self.cert_subject = charm.unit.name.replace("/", "-") if not cert_subject else cert_subject # Use fqdn only if no SANs were given, and drop empty/duplicate SANs - sans = list(set(filter(None, (extra_sans_dns or [socket.getfqdn()])))) + sans = list(set(filter(None, (sans or [socket.getfqdn()])))) self.sans_ip = list(filter(is_ip_address, sans)) self.sans_dns = list(filterfalse(is_ip_address, sans)) - self.peer_relation_name = peer_relation_name self.certificates_relation_name = certificates_relation_name - self.certificates = TLSCertificatesRequiresV3(self.charm, self.certificates_relation_name) self.framework.observe( @@ -139,7 +134,7 @@ def __init__( self._on_config_changed, ) self.framework.observe( - self.charm.on.certificates_relation_joined, # pyright: ignore + self.charm.on[self.certificates_relation_name].relation_joined, # pyright: ignore self._on_certificates_relation_joined, ) self.framework.observe( @@ -163,71 +158,57 @@ def __init__( self._on_certificates_relation_broken, ) - # Peer relation events - self.framework.observe( - self.charm.on[self.peer_relation_name].relation_created, self._on_peer_relation_created - ) - @property def enabled(self) -> bool: """Boolean indicating whether the charm has a tls_certificates relation.""" # We need to check for units as a temporary workaround because of https://bugs.launchpad.net/juju/+bug/2024583 # This could in theory not work correctly on scale down to 0 but it is necessary for the moment. - return ( - len(self.charm.model.relations[self.certificates_relation_name]) > 0 - and len(self.charm.model.get_relation(self.certificates_relation_name).units) > 0 # type: ignore - ) - @property - def _peer_relation(self) -> Optional[Relation]: - """Return the peer relation.""" - return self.charm.model.get_relation(self.peer_relation_name, None) + if not self.charm.model.get_relation(self.certificates_relation_name): + return False - def _on_peer_relation_created(self, _): - """Generate the CSR if the certificates relation is ready.""" - self._generate_privkey() + if not self.charm.model.get_relation( + self.certificates_relation_name + ).units: # pyright: ignore + return False - # check cert relation is ready - if not (self.charm.model.get_relation(self.certificates_relation_name)): - # peer relation event happened to fire before tls-certificates events. - # Abort, and let the "certificates joined" observer create the CSR. - logger.info("certhandler waiting on certificates relation") - return + if not self.charm.model.get_relation( + self.certificates_relation_name + ).app: # pyright: ignore + return False - logger.debug("certhandler has peer and certs relation: proceeding to generate csr") - self._generate_csr() + if not self.charm.model.get_relation( + self.certificates_relation_name + ).data: # pyright: ignore + return False + + return True def _on_certificates_relation_joined(self, _) -> None: - """Generate the CSR if the peer relation is ready.""" self._generate_privkey() - - # check peer relation is there - if not self._peer_relation: - # tls-certificates relation event happened to fire before peer events. - # Abort, and let the "peer joined" relation create the CSR. - logger.info("certhandler waiting on peer relation") - return - - logger.debug("certhandler has peer and certs relation: proceeding to generate csr") self._generate_csr() def _generate_privkey(self): # Generate priv key unless done already # TODO figure out how to go about key rotation. - if not self._private_key: + + if not (relation := self.charm.model.get_relation(self.certificates_relation_name)): + return + + if not self.private_key: private_key = generate_private_key() - self._private_key = private_key.decode() + secret = self.charm.unit.add_secret({"private-key": private_key.decode()}) + secret.grant(relation) + relation.data[self.charm.unit]["private-key-secret-id"] = secret.id # pyright: ignore def _on_config_changed(self, _): - # FIXME on config changed, the web_external_url may or may not change. But because every - # call to `generate_csr` appends a uuid, CSRs cannot be easily compared to one another. - # so for now, will be overwriting the CSR (and cert) every config change. This is not - # great. We could avoid this problem if: - # - we extract the external_url from the existing cert and compare to current; or - # - we drop the web_external_url from the list of SANs. - # Generate a CSR only if the necessary relations are already in place. - if self._peer_relation and self.charm.model.get_relation(self.certificates_relation_name): - self._generate_csr(renew=True) + relation = self.charm.model.get_relation(self.certificates_relation_name) + + if not relation: + return + + self._generate_privkey() + self._generate_csr(renew=True) def _generate_csr( self, overwrite: bool = False, renew: bool = False, clear_cert: bool = False @@ -240,13 +221,16 @@ def _generate_csr( This method intentionally does not emit any events, leave it for caller's responsibility. """ - # At this point, assuming "peer joined" and "certificates joined" have already fired - # (caller must guard) so we must have a private_key entry in relation data at our disposal. - # Otherwise, traceback -> debug. + # if we are in a relation-broken hook, we might not have a relation to publish the csr to. + if not self.charm.model.get_relation(self.certificates_relation_name): + logger.warning( + f"No {self.certificates_relation_name!r} relation found. " f"Cannot generate csr." + ) + return # In case we already have a csr, do not overwrite it by default. if overwrite or renew or not self._csr: - private_key = self._private_key + private_key = self.private_key if private_key is None: # FIXME: raise this in a less nested scope by # generating privkey and csr in the same method. @@ -280,124 +264,111 @@ def _generate_csr( self._csr = csr.decode().strip() if clear_cert: - self._ca_cert = "" - self._server_cert = "" - self._chain = "" + try: + secret = self.model.get_secret(label="ca-certificate-chain") + secret.remove_all_revisions() + except SecretNotFoundError: + logger.debug("Secret with label: 'ca-certificate-chain' not found") def _on_certificate_available(self, event: CertificateAvailableEvent) -> None: """Get the certificate from the event and store it in a peer relation. Note: assuming "limit: 1" in metadata """ - # We need to store the ca cert and server cert somewhere it would persist across upgrades. - # While we support Juju 2.9, the only option is peer data. When we drop 2.9, then secrets. - - # I think juju guarantees that a peer-created always fires before any regular - # relation-changed. If that is not the case, we would need more guards and more paths. - - # Process the cert only if it belongs to the unit that requested it (this unit) event_csr = ( event.certificate_signing_request.strip() if event.certificate_signing_request else None ) if event_csr == self._csr: - self._ca_cert = event.ca - self._server_cert = event.certificate - self._chain = event.chain_as_pem() - self.on.cert_changed.emit() # pyright: ignore + content = { + "ca-cert": event.ca, + "server-cert": event.certificate, + "chain": event.chain_as_pem(), + "csr": event_csr, + } + try: + secret = self.model.get_secret(label="ca-certificate-chain") + except SecretNotFoundError: + if not ( + relation := self.charm.model.get_relation(self.certificates_relation_name) + ): + logger.error("Relation %s not found", self.certificates_relation_name) + return + + secret = self.charm.unit.add_secret(content, label="ca-certificate-chain") + secret.grant(relation) + relation.data[self.charm.unit]["secret-id"] = secret.id # pyright: ignore + self.on.cert_changed.emit() # pyright: ignore + + def _retrieve_secret_id(self, secret_id_name: str) -> Optional[str]: + if not (relation := self.charm.model.get_relation(self.certificates_relation_name)): + return None + + if not (secret_id := relation.data[self.charm.unit].get(secret_id_name)): + return None + + return secret_id + + def _retrieve_from_secret(self, value: str, secret_id_name: str) -> Optional[str]: + if not (secret_id := self._retrieve_secret_id(secret_id_name)): + return None + + if not (secret := self.model.get_secret(id=secret_id)): + return None + + content = secret.get_content() + return content.get(value) @property - def key(self): - """Return the private key.""" - return self._private_key + def private_key(self) -> Optional[str]: + """Private key.""" + return self._retrieve_from_secret("private-key", "private-key-secret-id") @property - def _private_key(self) -> Optional[str]: - if self._peer_relation: - return self._peer_relation.data[self.charm.unit].get("private_key", None) - return None - - @_private_key.setter - def _private_key(self, value: str): - # Caller must guard. We want the setter to fail loudly. Failure must have a side effect. - rel = self._peer_relation - assert rel is not None # For type checker - rel.data[self.charm.unit].update({"private_key": value}) + def private_key_secret_id(self) -> Optional[str]: + """ID of the Juju Secret for the Private key.""" + return self._retrieve_secret_id("private-key-secret-id") @property def _csr(self) -> Optional[str]: - if self._peer_relation: - return self._peer_relation.data[self.charm.unit].get("csr", None) - return None + return self._retrieve_from_secret("csr", "csr-secret-id") @_csr.setter def _csr(self, value: str): - # Caller must guard. We want the setter to fail loudly. Failure must have a side effect. - rel = self._peer_relation - assert rel is not None # For type checker - rel.data[self.charm.unit].update({"csr": value}) + if not (relation := self.charm.model.get_relation(self.certificates_relation_name)): + return - @property - def _ca_cert(self) -> Optional[str]: - if self._peer_relation: - return self._peer_relation.data[self.charm.unit].get("ca", None) - return None - - @_ca_cert.setter - def _ca_cert(self, value: str): - # Caller must guard. We want the setter to fail loudly. Failure must have a side effect. - rel = self._peer_relation - assert rel is not None # For type checker - rel.data[self.charm.unit].update({"ca": value}) + if not (secret_id := relation.data[self.charm.unit].get("csr-secret-id", None)): + secret = self.charm.unit.add_secret({"csr": value}) + secret.grant(relation) + relation.data[self.charm.unit]["csr-secret-id"] = secret.id # pyright: ignore + return + + secret = self.model.get_secret(id=secret_id) + secret.set_content({"csr": value}) @property - def cert(self): - """Return the server cert.""" - return self._server_cert + def ca_cert(self) -> Optional[str]: + """CA Certificate.""" + return self._retrieve_from_secret("ca-cert", "secret-id") @property - def ca(self): - """Return the CA cert.""" - return self._ca_cert + def ca_server_cert_secret_id(self) -> Optional[str]: + """CA server cert secret id.""" + return self._retrieve_secret_id("secret-id") @property - def _server_cert(self) -> Optional[str]: - if self._peer_relation: - return self._peer_relation.data[self.charm.unit].get("certificate", None) - return None - - @_server_cert.setter - def _server_cert(self, value: str): - # Caller must guard. We want the setter to fail loudly. Failure must have a side effect. - rel = self._peer_relation - assert rel is not None # For type checker - rel.data[self.charm.unit].update({"certificate": value}) + def server_cert(self) -> Optional[str]: + """Server Certificate.""" + return self._retrieve_from_secret("server-cert", "secret-id") @property - def _chain(self) -> str: - if self._peer_relation: - if chain := self._peer_relation.data[self.charm.unit].get("chain", ""): - chain = json.loads(chain) - - # In a previous version of this lib, chain used to be a list. - # Convert the List[str] to str, per - # https://github.com/canonical/tls-certificates-interface/pull/141 - if isinstance(chain, list): - chain = "\n\n".join(reversed(chain)) - - return cast(str, chain) - return "" - - @_chain.setter - def _chain(self, value: str): - # Caller must guard. We want the setter to fail loudly. Failure must have a side effect. - rel = self._peer_relation - assert rel is not None # For type checker - rel.data[self.charm.unit].update({"chain": json.dumps(value)}) + def _chain(self) -> Optional[str]: + return self._retrieve_from_secret("chain", "secret-id") @property - def chain(self) -> str: + def chain(self) -> Optional[str]: """Return the ca chain.""" return self._chain @@ -405,19 +376,19 @@ def _on_certificate_expiring( self, event: Union[CertificateExpiringEvent, CertificateInvalidatedEvent] ) -> None: """Generate a new CSR and request certificate renewal.""" - if event.certificate == self._server_cert: + if event.certificate == self.server_cert: self._generate_csr(renew=True) def _certificate_revoked(self, event) -> None: - """Remove the certificate from the peer relation and generate a new CSR.""" + """Remove the certificate and generate a new CSR.""" # Note: assuming "limit: 1" in metadata - if event.certificate == self._server_cert: + if event.certificate == self.server_cert: self._generate_csr(overwrite=True, clear_cert=True) self.on.cert_changed.emit() # pyright: ignore def _on_certificate_invalidated(self, event: CertificateInvalidatedEvent) -> None: """Deal with certificate revocation and expiration.""" - if event.certificate != self._server_cert: + if event.certificate != self.server_cert: return # if event.reason in ("revoked", "expired"): @@ -425,19 +396,25 @@ def _on_certificate_invalidated(self, event: CertificateInvalidatedEvent) -> Non self._generate_csr(overwrite=True, clear_cert=True) self.on.cert_changed.emit() # pyright: ignore - def _on_all_certificates_invalidated(self, event: AllCertificatesInvalidatedEvent) -> None: + def _on_all_certificates_invalidated(self, _: AllCertificatesInvalidatedEvent) -> None: # Do what you want with this information, probably remove all certificates # Note: assuming "limit: 1" in metadata self._generate_csr(overwrite=True, clear_cert=True) self.on.cert_changed.emit() # pyright: ignore - def _on_certificates_relation_broken(self, event: RelationBrokenEvent) -> None: + def _on_certificates_relation_broken(self, _: RelationBrokenEvent) -> None: """Clear the certificates data when removing the relation.""" - if self._peer_relation: - private_key = self._private_key - # This is a workaround for https://bugs.launchpad.net/juju/+bug/2024583 - self._peer_relation.data[self.charm.unit].clear() - if private_key: - self._peer_relation.data[self.charm.unit].update({"private_key": private_key}) - + try: + secret = self.model.get_secret(label="csr-secret-id") + secret.remove_all_revisions() + except SecretNotFoundError: + logger.debug("Secret 'csr-scret-id' not found") self.on.cert_changed.emit() # pyright: ignore + + def _check_juju_supports_secrets(self) -> None: + version = JujuVersion.from_environ() + + if not JujuVersion(version=str(version)).has_secrets: + msg = f"Juju version {version} does not supports Secrets. Juju >= 3.0.3 is needed" + logger.error(msg) + raise RuntimeError(msg) diff --git a/src/grafana_agent.py b/src/grafana_agent.py index fe567302..0f8e084e 100644 --- a/src/grafana_agent.py +++ b/src/grafana_agent.py @@ -27,7 +27,7 @@ ) from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider from charms.loki_k8s.v1.loki_push_api import LokiPushApiConsumer -from charms.observability_libs.v0.cert_handler import CertHandler +from charms.observability_libs.v1.cert_handler import CertHandler from charms.prometheus_k8s.v1.prometheus_remote_write import ( PrometheusRemoteWriteConsumer, ) @@ -142,8 +142,8 @@ def __init__(self, *args): self.cert = CertHandler( self, key="grafana-agent-cert", - peer_relation_name="peers", ) + self.framework.observe(self.cert.on.cert_changed, self._on_cert_changed) # pyright: ignore self._cloud = GrafanaCloudConfigRequirer(self) @@ -488,13 +488,13 @@ def _update_config(self) -> None: # Write TLS files if self.cert.enabled: - if not (self.cert.cert and self.cert.key and self.cert.ca): + if not (self.cert.server_cert and self.cert.private_key and self.cert.ca_cert): self.status.update_config = WaitingStatus("Waiting for TLS certificate.") self.stop() return - self.write_file(self._cert_path, self.cert.cert) - self.write_file(self._key_path, self.cert.key) - self.write_file(self._ca_path, self.cert.ca) + self.write_file(self._cert_path, self.cert.server_cert) + self.write_file(self._key_path, self.cert.private_key) + self.write_file(self._ca_path, self.cert.ca_cert) else: # Delete TLS related files if they exist try: @@ -812,7 +812,7 @@ def _agent_version(self) -> Optional[str]: def _update_ca(self) -> None: """Updates the CA cert on disk from cert_manager.""" - if (not self.cert.enabled) or (not self.cert.ca): + if (not self.cert.enabled) or (not self.cert.ca_cert): try: self.read_file(self._ca_path) except (FileNotFoundError, PathError): @@ -820,5 +820,5 @@ def _update_ca(self) -> None: else: self.delete_file(self._ca_path) else: - self.write_file(self._ca_path, self.cert.ca) + self.write_file(self._ca_path, self.cert.ca_cert) self.run(["update-ca-certificates", "--fresh"]) diff --git a/tox.ini b/tox.ini index 8054845a..4144204c 100644 --- a/tox.ini +++ b/tox.ini @@ -70,6 +70,9 @@ deps = toml responses cosl +setenv = + {[testenv]setenv} + JUJU_VERSION = 3.0.3 commands = coverage run \ --source={[vars]src_path} \