Skip to content

Commit

Permalink
Merge branch 'main' into split-metrics-integration-test
Browse files Browse the repository at this point in the history
  • Loading branch information
cbartz authored Jan 18, 2024
2 parents c06aa19 + 1f11e2e commit 7ed2c83
Show file tree
Hide file tree
Showing 16 changed files with 279 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
EOF
- name: Cache github-runner Charm
uses: actions/cache@v3
uses: actions/cache@v4
id: cache-charm
with:
path: github-runner_ubuntu-22.04-amd64.charm
Expand Down
17 changes: 17 additions & 0 deletions .github/workflows/workflow_dispatch_ssh_debug.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Workflow Dispatch Tests (ssh-debug/tmate)

on:
# Manually dispatched workflow action
workflow_dispatch:
inputs:
runner:
description: 'Self hosted gh runner'
required: true

jobs:
workflow-dispatch-tests:
runs-on: [self-hosted, linux, x64, "${{ inputs.runner }}"]
steps:
- name: Setup tmate session
uses: canonical/action-tmate@chore/env_var_change
timeout-minutes: 5
4 changes: 4 additions & 0 deletions metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ provides:
cos-agent:
interface: cos_agent

requires:
debug-ssh:
interface: debug-ssh

storage:
runner:
description: Storage for the root disk of LXD instances hosting the runner application.
Expand Down
6 changes: 6 additions & 0 deletions scripts/build-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ retry '/snap/bin/lxc exec builder -- /usr/bin/nslookup github.com' 'Wait for net
/snap/bin/lxc exec builder -- /usr/bin/apt-get update
/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/apt-get upgrade -yq
/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/apt-get install linux-generic-hwe-22.04 -yq
# This will remove older version of kernel as HWE is installed now.
/snap/bin/lxc exec builder -- /usr/bin/apt-get autoremove --purge

/snap/bin/lxc restart builder
retry '/snap/bin/lxc exec builder -- /usr/bin/who' 'Wait for lxd agent to be ready' 30
Expand All @@ -107,6 +109,10 @@ fi
/snap/bin/lxc exec builder -- /usr/sbin/usermod -aG docker ubuntu
/snap/bin/lxc exec builder -- /usr/sbin/iptables -I DOCKER-USER -j ACCEPT

# Reduce image size
/snap/bin/lxc exec builder -- /usr/bin/npm cache clean --force
/snap/bin/lxc exec builder -- /usr/bin/apt-get clean

# Download and verify checksum of yq
if [[ $(uname -m) == 'aarch64' ]]; then
YQ_ARCH="arm64"
Expand Down
3 changes: 2 additions & 1 deletion src-docs/event_timer.py.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ Construct the timer manager.

---

<a href="../src/event_timer.py#L99"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/event_timer.py#L105"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `disable_event_timer`

Expand Down Expand Up @@ -96,6 +96,7 @@ The timeout is the number of seconds before an event is timed out. If not set or

- <b>`event_name`</b>: Name of the juju event to schedule.
- <b>`interval`</b>: Number of minutes between emitting each event.
- <b>`timeout`</b>: Timeout for each event handle in minutes.



Expand Down
16 changes: 13 additions & 3 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus

import metrics
from charm_state import CharmConfigInvalidError, RunnerStorage, State
from charm_state import DEBUG_SSH_INTEGRATION_NAME, CharmConfigInvalidError, RunnerStorage, State
from errors import (
ConfigurationError,
LogrotateSetupError,
Expand Down Expand Up @@ -163,7 +163,6 @@ def __init__(self, *args, **kargs) -> None:
path=self.config["path"], # for detecting changes
token=self.config["token"], # for detecting changes
runner_bin_url=None,
runner_image_url=None,
)

self.proxies: ProxySetting = {}
Expand All @@ -188,6 +187,10 @@ def __init__(self, *args, **kargs) -> None:
self.framework.observe(self.on.config_changed, self._on_config_changed)
self.framework.observe(self.on.start, self._on_start)
self.framework.observe(self.on.stop, self._on_stop)
self.framework.observe(
self.on[DEBUG_SSH_INTEGRATION_NAME].relation_changed,
self._on_debug_ssh_relation_changed,
)

self.framework.observe(self.on.reconcile_runners, self._on_reconcile_runners)

Expand Down Expand Up @@ -499,7 +502,9 @@ def _on_config_changed(self, _event: ConfigChangedEvent) -> None:
self._refresh_firewall()
try:
self._event_timer.ensure_event_timer(
"reconcile-runners", self.config["reconcile-interval"]
event_name="reconcile-runners",
interval=int(self.config["reconcile-interval"]),
timeout=int(self.config["reconcile-interval"]) - 1,
)
except TimerEnableError as ex:
logger.exception("Failed to start the event timer")
Expand Down Expand Up @@ -923,6 +928,11 @@ def _apt_install(self, packages: Sequence[str]) -> None:
execute_command(["dpkg", "--configure", "-a"])
execute_command(["/usr/bin/apt-get", "install", "-qy"] + list(packages))

def _on_debug_ssh_relation_changed(self, _: ops.RelationChangedEvent) -> None:
"""Handle debug ssh relation changed event."""
runner_manager = self._get_runner_manager()
runner_manager.flush(flush_busy=False)


if __name__ == "__main__":
main(GithubRunnerCharm)
56 changes: 55 additions & 1 deletion src/charm_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from typing import Optional

from ops import CharmBase
from pydantic import AnyHttpUrl, BaseModel, ValidationError, root_validator
from pydantic import AnyHttpUrl, BaseModel, Field, ValidationError, root_validator
from pydantic.networks import IPvAnyAddress

from utilities import get_env_var

Expand All @@ -33,6 +34,7 @@ class ARCH(str, Enum):


COS_AGENT_INTEGRATION_NAME = "cos-agent"
DEBUG_SSH_INTEGRATION_NAME = "debug-ssh"


class RunnerStorage(str, Enum):
Expand Down Expand Up @@ -182,6 +184,49 @@ def _get_supported_arch() -> ARCH:
raise UnsupportedArchitectureError(arch=arch)


class SSHDebugInfo(BaseModel):
"""SSH connection information for debug workflow.
Attributes:
host: The SSH relay server host IP address inside the VPN.
port: The SSH relay server port.
rsa_fingerprint: The host SSH server public RSA key fingerprint.
ed25519_fingerprint: The host SSH server public ed25519 key fingerprint.
"""

host: IPvAnyAddress
port: int = Field(0, gt=0, le=65535)
rsa_fingerprint: str = Field(pattern="^SHA256:.*")
ed25519_fingerprint: str = Field(pattern="^SHA256:.*")

@classmethod
def from_charm(cls, charm: CharmBase) -> Optional["SSHDebugInfo"]:
"""Initialize the SSHDebugInfo from charm relation data.
Args:
charm: The charm instance.
"""
relations = charm.model.relations[DEBUG_SSH_INTEGRATION_NAME]
if not relations or not (relation := relations[0]).units:
return None
target_unit = next(iter(relation.units))
relation_data = relation.data[target_unit]
if (
not (host := relation_data.get("host"))
or not (port := relation_data.get("port"))
or not (rsa_fingerprint := relation_data.get("rsa_fingerprint"))
or not (ed25519_fingerprint := relation_data.get("ed25519_fingerprint"))
):
logger.warning("%s relation data not yet ready.", DEBUG_SSH_INTEGRATION_NAME)
return None
return SSHDebugInfo(
host=host,
port=port,
rsa_fingerprint=rsa_fingerprint,
ed25519_fingerprint=ed25519_fingerprint,
)


@dataclasses.dataclass(frozen=True)
class State:
"""The charm state.
Expand All @@ -191,12 +236,14 @@ class State:
proxy_config: Proxy-related configuration.
charm_config: Configuration of the juju charm.
arch: The underlying compute architecture, i.e. x86_64, amd64, arm64/aarch64.
ssh_debug_info: The SSH debug connection configuration information.
"""

is_metrics_logging_available: bool
proxy_config: ProxyConfig
charm_config: CharmConfig
arch: ARCH
ssh_debug_info: Optional[SSHDebugInfo]

@classmethod
def from_charm(cls, charm: CharmBase) -> "State":
Expand Down Expand Up @@ -244,11 +291,18 @@ def from_charm(cls, charm: CharmBase) -> "State":
logger.error("Unsupported architecture: %s", exc.arch)
raise CharmConfigInvalidError(f"Unsupported architecture {exc.arch}") from exc

try:
ssh_debug_info = SSHDebugInfo.from_charm(charm)
except ValidationError as exc:
logger.error("Invalid SSH debug info: %s.", exc)
raise CharmConfigInvalidError("Invalid SSH Debug info") from exc

state = cls(
is_metrics_logging_available=bool(charm.model.relations[COS_AGENT_INTEGRATION_NAME]),
proxy_config=proxy_config,
charm_config=charm_config,
arch=arch,
ssh_debug_info=ssh_debug_info,
)

state_dict = dataclasses.asdict(state)
Expand Down
8 changes: 7 additions & 1 deletion src/event_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,21 @@ def ensure_event_timer(self, event_name: str, interval: int, timeout: Optional[i
Args:
event_name: Name of the juju event to schedule.
interval: Number of minutes between emitting each event.
timeout: Timeout for each event handle in minutes.
Raises:
TimerEnableError: Timer cannot be started. Events will be not emitted.
"""
if timeout is not None:
timeout_in_secs = timeout * 60
else:
timeout_in_secs = interval * 30

context: EventConfig = {
"event": event_name,
"interval": interval,
"random_delay": interval // 4,
"timeout": timeout or (interval * 30),
"timeout": timeout_in_secs,
"unit": self.unit_name,
}
self._render_event_template("service", event_name, context)
Expand Down
Loading

0 comments on commit 7ed2c83

Please sign in to comment.