diff --git a/pyproject.toml b/pyproject.toml
index d16bac3a9..f4a49bd2a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ omit = [
]
[tool.coverage.report]
-fail_under = 85
+fail_under = 83
show_missing = true
[tool.pytest.ini_options]
diff --git a/requirements.txt b/requirements.txt
index 541c0d4c9..4d219d184 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,3 +14,4 @@ PyYAML ==6.0.*
pyOpenSSL==24.2.1
kombu==5.4.0
pymongo==4.8.0
+github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@1f310b22b99a94bd5429184191558426b014ee82
diff --git a/scripts/reactive_runner.py b/scripts/reactive_runner.py
deleted file mode 100644
index e9b996ff6..000000000
--- a/scripts/reactive_runner.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Script to spawn a reactive runner process."""
-import logging
-import os
-import sys
-
-from reactive.consumer import consume
-from reactive.runner_manager import MQ_URI_ENV_VAR, QUEUE_NAME_ENV_VAR
-
-
-def setup_root_logging() -> None:
- """Set up logging for the reactive runner process."""
- # setup root logger to log in a file which will be picked up by grafana agent and sent to Loki
- logging.basicConfig(
- stream=sys.stdout,
- level=logging.DEBUG,
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
- )
-
-
-def main() -> None:
- """Spawn a process that consumes a message from the queue to create a runner.
-
- Raises:
- ValueError: If the required environment variables are not set
- """
- mq_uri = os.environ.get(MQ_URI_ENV_VAR)
- queue_name = os.environ.get(QUEUE_NAME_ENV_VAR)
-
- if not mq_uri:
- raise ValueError(
- f"Missing {MQ_URI_ENV_VAR} environment variable. "
- "Please set it to the message queue URI."
- )
-
- if not queue_name:
- raise ValueError(
- f"Missing {QUEUE_NAME_ENV_VAR} environment variable. "
- "Please set it to the name of the queue."
- )
-
- setup_root_logging()
- consume(mq_uri, queue_name)
-
-
-if __name__ == "__main__":
- main()
diff --git a/src-docs/charm.md b/src-docs/charm.md
index 9fd2aac04..fa38de542 100644
--- a/src-docs/charm.md
+++ b/src-docs/charm.md
@@ -20,7 +20,7 @@ Charm for creating and managing GitHub self-hosted runner instances.
---
-
+
## function `catch_charm_errors`
@@ -46,7 +46,7 @@ Catch common errors in charm.
---
-
+
## function `catch_action_errors`
@@ -72,7 +72,7 @@ Catch common errors in actions.
---
-
+
## class `ReconcileRunnersEvent`
Event representing a periodic check to ensure runners are ok.
@@ -83,7 +83,7 @@ Event representing a periodic check to ensure runners are ok.
---
-
+
## class `GithubRunnerCharm`
Charm for managing GitHub self-hosted runners.
@@ -100,7 +100,7 @@ Charm for managing GitHub self-hosted runners.
- `ram_pool_path`: The path to memdisk storage.
- `kernel_module_path`: The path to kernel modules.
-
+
### method `__init__`
diff --git a/src-docs/charm_state.py.md b/src-docs/charm_state.md
similarity index 71%
rename from src-docs/charm_state.py.md
rename to src-docs/charm_state.md
index 5783f6821..9b4889d5e 100644
--- a/src-docs/charm_state.py.md
+++ b/src-docs/charm_state.md
@@ -2,7 +2,7 @@
-# module `charm_state.py`
+# module `charm_state`
State of the Charm.
**Global Variables**
@@ -33,49 +33,100 @@ State of the Charm.
- **COS_AGENT_INTEGRATION_NAME**
- **DEBUG_SSH_INTEGRATION_NAME**
- **IMAGE_INTEGRATION_NAME**
+- **MONGO_DB_INTEGRATION_NAME**
- **LTS_IMAGE_VERSION_TAG_MAP**
+
+---
+
+
+
+## class `AnyHttpsUrl`
+Represents an HTTPS URL.
+
+
+
+**Attributes:**
+
+ - `allowed_schemes`: Allowed schemes for the URL.
+
+
+
+
+
---
-
+
+
+## class `GithubConfig`
+Charm configuration related to GitHub.
-## function `parse_github_path`
+
+
+**Attributes:**
+
+ - `token`: The Github API access token (PAT).
+ - `path`: The Github org/repo path.
+
+
+
+### method `__init__`
```python
-parse_github_path(path_str: str, runner_group: str) → GithubOrg | GithubRepo
+__init__(token: str, path: GitHubOrg | GitHubRepo) → None
```
-Parse GitHub path.
+
+
+
+
+
+
+
+---
+
+
+
+### classmethod `from_charm`
+
+```python
+from_charm(charm: CharmBase) → GithubConfig
+```
+
+Get github related charm configuration values from charm.
**Args:**
- - `path_str`: GitHub path in string format.
- - `runner_group`: Runner group name for GitHub organization. If the path is a repository this argument is ignored.
+ - `charm`: The charm instance.
**Raises:**
- - `CharmConfigInvalidError`: if an invalid path string was given.
+ - `CharmConfigInvalidError`: If an invalid configuration value was set.
**Returns:**
- GithubPath object representing the GitHub repository, or the GitHub organization with runner group information.
+ The parsed GitHub configuration values.
---
-## class `AnyHttpsUrl`
-Represents an HTTPS URL.
+
+
+## class `VirtualMachineResources`
+Virtual machine resource configuration.
**Attributes:**
- - `allowed_schemes`: Allowed schemes for the URL.
+ - `cpu`: Number of vCPU for the virtual machine.
+ - `memory`: Amount of memory for the virtual machine.
+ - `disk`: Amount of disk for the virtual machine.
@@ -83,6 +134,8 @@ Represents an HTTPS URL.
---
+
+
## class `Arch`
Supported system architectures.
@@ -99,15 +152,17 @@ Supported system architectures.
---
-## class `BaseImage`
-The ubuntu OS base image to build and deploy runners on.
+
+
+## class `RunnerStorage`
+Supported storage as runner disk.
**Attributes:**
- - `JAMMY`: The jammy ubuntu LTS image.
- - `NOBLE`: The noble ubuntu LTS image.
+ - `JUJU_STORAGE`: Represents runner storage from Juju storage.
+ - `MEMORY`: Represents tempfs storage (ramdisk).
@@ -115,64 +170,80 @@ The ubuntu OS base image to build and deploy runners on.
---
-## class `CharmConfig`
-General charm configuration.
+
-Some charm configurations are grouped into other configuration models.
+## class `InstanceType`
+Type of instance for runner.
**Attributes:**
- - `denylist`: List of IPv4 to block the runners from accessing.
- - `dockerhub_mirror`: Private docker registry as dockerhub mirror for the runners to use.
- - `labels`: Additional runner labels to append to default (i.e. os, flavor, architecture).
- - `openstack_clouds_yaml`: The openstack clouds.yaml configuration.
- - `path`: GitHub repository path in the format '/', or the GitHub organization name.
- - `reconcile_interval`: Time between each reconciliation of runners in minutes.
- - `repo_policy_compliance`: Configuration for the repo policy compliance service.
- - `token`: GitHub personal access token for GitHub API.
+ - `LOCAL_LXD`: LXD instance on the local juju machine.
+ - `OPENSTACK`: OpenStack instance on a cloud.
+
---
-
+
+
+## class `CharmConfigInvalidError`
+Raised when charm config is invalid.
+
-### classmethod `check_reconcile_interval`
+
+**Attributes:**
+
+ - `msg`: Explanation of the error.
+
+
+
+### method `__init__`
```python
-check_reconcile_interval(reconcile_interval: int) → int
+__init__(msg: str)
```
-Validate the general charm configuration.
+Initialize a new instance of the CharmConfigInvalidError exception.
**Args:**
- - `reconcile_interval`: The value of reconcile_interval passed to class instantiation.
+ - `msg`: Explanation of the error.
-**Raises:**
+
+
+---
+
+
+
+## class `RepoPolicyComplianceConfig`
+Configuration for the repo policy compliance service.
+
+
+
+**Attributes:**
- - `ValueError`: if an invalid reconcile_interval value of less than 2 has been passed.
+ - `token`: Token for the repo policy compliance service.
+ - `url`: URL of the repo policy compliance service.
-**Returns:**
- The validated reconcile_interval value.
---
-
+
### classmethod `from_charm`
```python
-from_charm(charm: CharmBase) → CharmConfig
+from_charm(charm: CharmBase) → RepoPolicyComplianceConfig
```
Initialize the config from charm.
@@ -187,121 +258,96 @@ Initialize the config from charm.
**Raises:**
- - `CharmConfigInvalidError`: If any invalid configuration has been set on the charm.
+ - `CharmConfigInvalidError`: If an invalid configuration was set.
**Returns:**
- Current config of the charm.
+ Current repo-policy-compliance config.
---
-## class `CharmConfigInvalidError`
-Raised when charm config is invalid.
+
+
+## class `OpenStackCloudsYAML`
+The OpenStack clouds YAML dict mapping.
**Attributes:**
- - `msg`: Explanation of the error.
-
-
-
-### function `__init__`
-
-```python
-__init__(msg: str)
-```
-
-Initialize a new instance of the CharmConfigInvalidError exception.
-
+ - `clouds`: The map of cloud name to cloud connection info.
-**Args:**
-
- - `msg`: Explanation of the error.
+---
+
----
+## class `CharmConfig`
+General charm configuration.
-## class `CharmState`
-The charm state.
+Some charm configurations are grouped into other configuration models.
**Attributes:**
- - `arch`: The underlying compute architecture, i.e. x86_64, amd64, arm64/aarch64.
- - `charm_config`: Configuration of the juju charm.
- - `is_metrics_logging_available`: Whether the charm is able to issue metrics.
- - `proxy_config`: Proxy-related configuration.
- - `instance_type`: The type of instances, e.g., local lxd, openstack.
- - `runner_config`: The charm configuration related to runner VM configuration.
- - `ssh_debug_connections`: SSH debug connections configuration information.
+ - `denylist`: List of IPv4 to block the runners from accessing.
+ - `dockerhub_mirror`: Private docker registry as dockerhub mirror for the runners to use.
+ - `labels`: Additional runner labels to append to default (i.e. os, flavor, architecture).
+ - `openstack_clouds_yaml`: The openstack clouds.yaml configuration.
+ - `path`: GitHub repository path in the format '/', or the GitHub organization name.
+ - `reconcile_interval`: Time between each reconciliation of runners in minutes.
+ - `repo_policy_compliance`: Configuration for the repo policy compliance service.
+ - `token`: GitHub personal access token for GitHub API.
---
-
+
-### classmethod `from_charm`
+### classmethod `check_reconcile_interval`
```python
-from_charm(charm: CharmBase) → CharmState
+check_reconcile_interval(reconcile_interval: int) → int
```
-Initialize the state from charm.
+Validate the general charm configuration.
**Args:**
- - `charm`: The charm instance.
+ - `reconcile_interval`: The value of reconcile_interval passed to class instantiation.
**Raises:**
- - `CharmConfigInvalidError`: If an invalid configuration was set.
+ - `ValueError`: if an invalid reconcile_interval value of less than 2 has been passed.
**Returns:**
- Current state of the charm.
-
-
----
-
-## class `GithubConfig`
-Charm configuration related to GitHub.
-
-
-
-**Attributes:**
-
- - `token`: The Github API access token (PAT).
- - `path`: The Github org/repo path.
-
-
-
+ The validated reconcile_interval value.
---
-
+
### classmethod `from_charm`
```python
-from_charm(charm: CharmBase) → GithubConfig
+from_charm(charm: CharmBase) → CharmConfig
```
-Get github related charm configuration values from charm.
+Initialize the config from charm.
@@ -313,123 +359,128 @@ Get github related charm configuration values from charm.
**Raises:**
- - `CharmConfigInvalidError`: If an invalid configuration value was set.
+ - `CharmConfigInvalidError`: If any invalid configuration has been set on the charm.
**Returns:**
- The parsed GitHub configuration values.
+ Current config of the charm.
---
-## class `GithubOrg`
-Represent GitHub organization.
+
+
+## class `BaseImage`
+The ubuntu OS base image to build and deploy runners on.
**Attributes:**
- - `org`: Name of the GitHub organization.
- - `group`: Runner group to spawn the runners in.
+ - `JAMMY`: The jammy ubuntu LTS image.
+ - `NOBLE`: The noble ubuntu LTS image.
+
---
-
+
-### function `path`
+## class `OpenstackImage`
+OpenstackImage from image builder relation data.
-```python
-path() → str
-```
-Return a string representing the path.
+**Attributes:**
+
+ - `id`: The OpenStack image ID.
+ - `tags`: Image tags, e.g. jammy
-**Returns:**
- Path to the GitHub entity.
---
-## class `GithubRepo`
-Represent GitHub repository.
+
+### classmethod `from_charm`
+```python
+from_charm(charm: CharmBase) → OpenstackImage | None
+```
-**Attributes:**
+Initialize the OpenstackImage info from relation data.
+
+None represents relation not established. None values for id/tags represent image not yet ready but the relation exists.
+
+
+
+**Args:**
- - `owner`: Owner of the GitHub repository.
- - `repo`: Name of the GitHub repository.
+ - `charm`: The charm instance.
+**Returns:**
+ OpenstackImage metadata from charm relation data.
+
---
-
+
-### function `path`
+## class `OpenstackRunnerConfig`
+Runner configuration for OpenStack Instances.
-```python
-path() → str
-```
-Return a string representing the path.
+**Attributes:**
+
+ - `virtual_machines`: Number of virtual machine-based runner to spawn.
+ - `openstack_flavor`: flavor on openstack to use for virtual machines.
+ - `openstack_network`: Network on openstack to use for virtual machines.
+ - `openstack_image`: Openstack image to use for virtual machines.
-**Returns:**
- Path to the GitHub entity.
---
-## class `ImmutableConfigChangedError`
-Represents an error when changing immutable charm state.
+
-
-
-### function `__init__`
+### classmethod `from_charm`
```python
-__init__(msg: str)
+from_charm(charm: CharmBase) → OpenstackRunnerConfig
```
-Initialize a new instance of the ImmutableConfigChangedError exception.
+Initialize the config from charm.
**Args:**
- - `msg`: Explanation of the error.
-
-
-
-
-
----
-
-## class `InstanceType`
-Type of instance for runner.
+ - `charm`: The charm instance.
-**Attributes:**
+**Raises:**
- - `LOCAL_LXD`: LXD instance on the local juju machine.
- - `OPENSTACK`: OpenStack instance on a cloud.
+ - `CharmConfigInvalidError`: Error with charm configuration virtual-machines not of int type.
+**Returns:**
+ Openstack runner config of the charm.
---
+
+
## class `LocalLxdRunnerConfig`
Runner configurations for local LXD instances.
@@ -447,7 +498,7 @@ Runner configurations for local LXD instances.
---
-
+
### classmethod `check_virtual_machine_resources`
@@ -478,7 +529,7 @@ Validate the virtual_machine_resources field values.
---
-
+
### classmethod `check_virtual_machines`
@@ -507,7 +558,7 @@ Validate the virtual machines configuration value.
---
-
+
### classmethod `from_charm`
@@ -537,73 +588,71 @@ Initialize the config from charm.
---
-## class `OpenstackImage`
-OpenstackImage from image builder relation data.
+
+
+## class `ProxyConfig`
+Proxy configuration.
**Attributes:**
- - `id`: The OpenStack image ID.
- - `tags`: Image tags, e.g. jammy
+ - `aproxy_address`: The address of aproxy snap instance if use_aproxy is enabled.
+ - `http`: HTTP proxy address.
+ - `https`: HTTPS proxy address.
+ - `no_proxy`: Comma-separated list of hosts that should not be proxied.
+ - `use_aproxy`: Whether aproxy should be used for the runners.
+---
+
+#### property aproxy_address
+
+Return the aproxy address.
+
---
-
+
-### classmethod `from_charm`
+### classmethod `check_use_aproxy`
```python
-from_charm(charm: CharmBase) → OpenstackImage | None
+check_use_aproxy(use_aproxy: bool, values: dict) → bool
```
-Initialize the OpenstackImage info from relation data.
-
-None represents relation not established. None values for id/tags represent image not yet ready but the relation exists.
+Validate the proxy configuration.
**Args:**
- - `charm`: The charm instance.
-
-
-
-**Returns:**
- OpenstackImage metadata from charm relation data.
-
-
----
-
-## class `OpenstackRunnerConfig`
-Runner configuration for OpenStack Instances.
+ - `use_aproxy`: Value of use_aproxy variable.
+ - `values`: Values in the pydantic model.
-**Attributes:**
+**Raises:**
- - `virtual_machines`: Number of virtual machine-based runner to spawn.
- - `openstack_flavor`: flavor on openstack to use for virtual machines.
- - `openstack_network`: Network on openstack to use for virtual machines.
- - `openstack_image`: Openstack image to use for virtual machines.
+ - `ValueError`: if use_aproxy was set but no http/https was passed.
+**Returns:**
+ Validated use_aproxy value.
---
-
+
### classmethod `from_charm`
```python
-from_charm(charm: CharmBase) → OpenstackRunnerConfig
+from_charm(charm: CharmBase) → ProxyConfig
```
-Initialize the config from charm.
+Initialize the proxy config from charm.
@@ -613,81 +662,73 @@ Initialize the config from charm.
-**Raises:**
-
- - `CharmConfigInvalidError`: Error with charm configuration virtual-machines not of int type.
-
-
-
**Returns:**
- Openstack runner config of the charm.
+ Current proxy config of the charm.
---
-## class `ProxyConfig`
-Proxy configuration.
+
+
+## class `UnsupportedArchitectureError`
+Raised when given machine charm architecture is unsupported.
**Attributes:**
- - `aproxy_address`: The address of aproxy snap instance if use_aproxy is enabled.
- - `http`: HTTP proxy address.
- - `https`: HTTPS proxy address.
- - `no_proxy`: Comma-separated list of hosts that should not be proxied.
- - `use_aproxy`: Whether aproxy should be used for the runners.
+ - `arch`: The current machine architecture.
+
----
+### method `__init__`
-#### property aproxy_address
+```python
+__init__(arch: str) → None
+```
-Return the aproxy address.
+Initialize a new instance of the CharmConfigInvalidError exception.
----
+**Args:**
+
+ - `arch`: The current machine architecture.
-
-### classmethod `check_use_aproxy`
-```python
-check_use_aproxy(use_aproxy: bool, values: dict) → bool
-```
-Validate the proxy configuration.
+---
+
-**Args:**
-
- - `use_aproxy`: Value of use_aproxy variable.
- - `values`: Values in the pydantic model.
+## class `SSHDebugConnection`
+SSH connection information for debug workflow.
-**Raises:**
+**Attributes:**
- - `ValueError`: if use_aproxy was set but no http/https was passed.
+ - `host`: The SSH relay server host IP address inside the VPN.
+ - `port`: The SSH relay server port.
+ - `rsa_fingerprint`: The host SSH server public RSA key fingerprint.
+ - `ed25519_fingerprint`: The host SSH server public ed25519 key fingerprint.
-**Returns:**
- Validated use_aproxy value.
---
-
+
### classmethod `from_charm`
```python
-from_charm(charm: CharmBase) → ProxyConfig
+from_charm(charm: CharmBase) → list['SSHDebugConnection']
```
-Initialize the proxy config from charm.
+Initialize the SSHDebugInfo from charm relation data.
@@ -698,65 +739,77 @@ Initialize the proxy config from charm.
**Returns:**
- Current proxy config of the charm.
+ List of connection information for ssh debug access.
---
-## class `RepoPolicyComplianceConfig`
-Configuration for the repo policy compliance service.
+
+
+## class `ReactiveConfig`
+Represents the configuration for reactive scheduling.
**Attributes:**
- - `token`: Token for the repo policy compliance service.
- - `url`: URL of the repo policy compliance service.
+ - `mq_uri`: The URI of the MQ to use to spawn runners reactively.
---
-
+
-### classmethod `from_charm`
+### classmethod `from_database`
```python
-from_charm(charm: CharmBase) → RepoPolicyComplianceConfig
+from_database(database: DatabaseRequires) → ReactiveConfig | None
```
-Initialize the config from charm.
+Initialize the ReactiveConfig from charm config and integration data.
**Args:**
- - `charm`: The charm instance.
+ - `database`: The database to fetch integration data from.
+
+
+
+**Returns:**
+ The connection information for the reactive MQ or None if not available.
**Raises:**
- - `CharmConfigInvalidError`: If an invalid configuration was set.
+ - `MissingMongoDBError`: If the information on howto access MongoDB is missing in the integration data.
+---
-**Returns:**
- Current repo-policy-compliance config.
+
+## class `ImmutableConfigChangedError`
+Represents an error when changing immutable charm state.
----
+
-## class `RunnerStorage`
-Supported storage as runner disk.
+### method `__init__`
+```python
+__init__(msg: str)
+```
+Initialize a new instance of the ImmutableConfigChangedError exception.
-**Attributes:**
+
+
+**Args:**
- - `JUJU_STORAGE`: Represents runner storage from Juju storage.
- - `MEMORY`: Represents tempfs storage (ramdisk).
+ - `msg`: Explanation of the error.
@@ -764,90 +817,76 @@ Supported storage as runner disk.
---
-## class `SSHDebugConnection`
-SSH connection information for debug workflow.
+
+
+## class `CharmState`
+The charm state.
**Attributes:**
- - `host`: The SSH relay server host IP address inside the VPN.
- - `port`: The SSH relay server port.
- - `rsa_fingerprint`: The host SSH server public RSA key fingerprint.
- - `ed25519_fingerprint`: The host SSH server public ed25519 key fingerprint.
-
-
-
-
----
+ - `arch`: The underlying compute architecture, i.e. x86_64, amd64, arm64/aarch64.
+ - `charm_config`: Configuration of the juju charm.
+ - `is_metrics_logging_available`: Whether the charm is able to issue metrics.
+ - `proxy_config`: Proxy-related configuration.
+ - `instance_type`: The type of instances, e.g., local lxd, openstack.
+ - `reactive_config`: The charm configuration related to reactive spawning mode.
+ - `runner_config`: The charm configuration related to runner VM configuration.
+ - `ssh_debug_connections`: SSH debug connections configuration information.
-
+
-### classmethod `from_charm`
+### method `__init__`
```python
-from_charm(charm: CharmBase) → list['SSHDebugConnection']
+__init__(
+ arch: Arch,
+ is_metrics_logging_available: bool,
+ proxy_config: ProxyConfig,
+ instance_type: InstanceType,
+ charm_config: CharmConfig,
+ runner_config: OpenstackRunnerConfig | LocalLxdRunnerConfig,
+ reactive_config: ReactiveConfig | None,
+ ssh_debug_connections: list[SSHDebugConnection]
+) → None
```
-Initialize the SSHDebugInfo from charm relation data.
-**Args:**
-
- - `charm`: The charm instance.
-
-**Returns:**
- List of connection information for ssh debug access.
---
-## class `UnsupportedArchitectureError`
-Raised when given machine charm architecture is unsupported.
-
+
-
-**Attributes:**
-
- - `arch`: The current machine architecture.
-
-
-
-### function `__init__`
+### classmethod `from_charm`
```python
-__init__(arch: str) → None
+from_charm(charm: CharmBase, database: DatabaseRequires) → CharmState
```
-Initialize a new instance of the CharmConfigInvalidError exception.
+Initialize the state from charm.
**Args:**
- - `arch`: The current machine architecture.
-
-
-
-
-
----
-
-## class `VirtualMachineResources`
-Virtual machine resource configuration.
+ - `charm`: The charm instance.
+ - `database`: The database instance.
-**Attributes:**
+**Raises:**
- - `cpu`: Number of vCPU for the virtual machine.
- - `memory`: Amount of memory for the virtual machine.
- - `disk`: Amount of disk for the virtual machine.
+ - `CharmConfigInvalidError`: If an invalid configuration was set.
+**Returns:**
+ Current state of the charm.
diff --git a/src-docs/errors.md b/src-docs/errors.md
index ee5db5a11..c61dd8410 100644
--- a/src-docs/errors.md
+++ b/src-docs/errors.md
@@ -7,39 +7,6 @@ Errors used by the charm.
----
-
-
-
-## class `RunnerError`
-Generic runner error as base exception.
-
-
-
-
-
----
-
-
-
-## class `RunnerExecutionError`
-Error for executing commands on runner.
-
-
-
-
-
----
-
-
-
-## class `RunnerFileLoadError`
-Error for loading file on runner.
-
-
-
-
-
---
@@ -55,8 +22,8 @@ Error for runner creation failure.
-## class `RunnerRemoveError`
-Error for runner removal failure.
+## class `RunnerFileLoadError`
+Error for loading file on runner.
@@ -66,8 +33,8 @@ Error for runner removal failure.
-## class `RunnerStartError`
-Error for runner start failure.
+## class `RunnerRemoveError`
+Error for runner removal failure.
@@ -220,17 +187,6 @@ Represents an error raised when logrotate cannot be setup.
-## class `MetricsStorageError`
-Base class for all metrics storage errors.
-
-
-
-
-
----
-
-
-
## class `SharedFilesystemError`
Base class for all shared filesystem errors.
@@ -240,51 +196,7 @@ Base class for all shared filesystem errors.
---
-
-
-## class `CreateMetricsStorageError`
-Represents an error when the metrics storage could not be created.
-
-
-
-
-
----
-
-
-
-## class `DeleteMetricsStorageError`
-Represents an error when the metrics storage could not be deleted.
-
-
-
-
-
----
-
-
-
-## class `GetMetricsStorageError`
-Represents an error when the metrics storage could not be retrieved.
-
-
-
-
-
----
-
-
-
-## class `QuarantineMetricsStorageError`
-Represents an error when the metrics storage could not be quarantined.
-
-
-
-
-
----
-
-
+
## class `SharedFilesystemMountError`
Represents an error related to the mounting of the shared filesystem.
@@ -295,84 +207,7 @@ Represents an error related to the mounting of the shared filesystem.
---
-
-
-## class `RunnerMetricsError`
-Base class for all runner metrics errors.
-
-
-
-
-
----
-
-
-
-## class `CorruptMetricDataError`
-Represents an error with the data being corrupt.
-
-
-
-
-
----
-
-
-
-## class `GithubMetricsError`
-Base class for all github metrics errors.
-
-
-
-
-
----
-
-
-
-## class `GithubClientError`
-Base class for all github client errors.
-
-
-
-
-
----
-
-
-
-## class `GithubApiError`
-Represents an error when the GitHub API returns an error.
-
-
-
-
-
----
-
-
-
-## class `TokenError`
-Represents an error when the token is invalid or has not enough permissions.
-
-
-
-
-
----
-
-
-
-## class `JobNotFoundError`
-Represents an error when the job could not be found on GitHub.
-
-
-
-
-
----
-
-
+
## class `RunnerLogsError`
Base class for all runner logs errors.
@@ -381,58 +216,3 @@ Base class for all runner logs errors.
----
-
-
-
-## class `OpenStackError`
-Base class for OpenStack errors.
-
-
-
-
-
----
-
-
-
-## class `OpenStackInvalidConfigError`
-Represents an invalid OpenStack configuration.
-
-
-
-
-
----
-
-
-
-## class `OpenStackUnauthorizedError`
-Represents an unauthorized connection to OpenStack.
-
-
-
-
-
----
-
-
-
-## class `SSHError`
-Represents an error while interacting with SSH.
-
-
-
-
-
----
-
-
-
-## class `KeyfileError`
-Represents missing keyfile for SSH.
-
-
-
-
-
diff --git a/src-docs/github_client.md b/src-docs/github_client.md
index fc0de8f7b..679c9f907 100644
--- a/src-docs/github_client.md
+++ b/src-docs/github_client.md
@@ -8,116 +8,20 @@ GitHub API client.
Migrate to PyGithub in the future. PyGithub is still lacking some API such as remove token for runner.
----
-
-
-
-## function `catch_http_errors`
-
-```python
-catch_http_errors(
- func: Callable[~ParamT, ~ReturnT]
-) → Callable[~ParamT, ~ReturnT]
-```
-
-Catch HTTP errors and raise custom exceptions.
-
-
-
-**Args:**
-
- - `func`: The target function to catch common errors for.
-
-
-
-**Returns:**
- The decorated function.
-
---
-
+
## class `GithubClient`
GitHub API client.
-
-
-### method `__init__`
-
-```python
-__init__(token: str)
-```
-
-Instantiate the GiHub API client.
-
-
-
-**Args:**
-
- - `token`: GitHub personal token for API requests.
-
---
-
-
-### method `delete_runner`
-
-```python
-delete_runner(path: GitHubOrg | GitHubRepo, runner_id: int) → None
-```
-
-Delete the self-hosted runner from GitHub.
-
-
-
-**Args:**
-
- - `path`: GitHub repository path in the format '/', or the GitHub organization name.
- - `runner_id`: Id of the runner.
-
----
-
-
-
-### method `get_job_info`
-
-```python
-get_job_info(
- path: GitHubRepo,
- workflow_run_id: str,
- runner_name: str
-) → JobStats
-```
-
-Get information about a job for a specific workflow run.
-
-
-
-**Args:**
-
- - `path`: GitHub repository path in the format '/'.
- - `workflow_run_id`: Id of the workflow run.
- - `runner_name`: Name of the runner.
-
-
-
-**Raises:**
-
- - `TokenError`: if there was an error with the Github token crdential provided.
- - `JobNotFoundError`: If no jobs were found.
-
-
-
-**Returns:**
- Job information.
-
----
-
-
+
### method `get_runner_application`
@@ -150,73 +54,4 @@ Get runner application available for download for given arch.
**Returns:**
The runner application.
----
-
-
-
-### method `get_runner_github_info`
-
-```python
-get_runner_github_info(path: GitHubOrg | GitHubRepo) → list[SelfHostedRunner]
-```
-
-Get runner information on GitHub under a repo or org.
-
-
-
-**Args:**
-
- - `path`: GitHub repository path in the format '/', or the GitHub organization name.
-
-
-
-**Returns:**
- List of runner information.
-
----
-
-
-
-### method `get_runner_registration_token`
-
-```python
-get_runner_registration_token(path: GitHubOrg | GitHubRepo) → str
-```
-
-Get token from GitHub used for registering runners.
-
-
-
-**Args:**
-
- - `path`: GitHub repository path in the format '/', or the GitHub organization name.
-
-
-
-**Returns:**
- The registration token.
-
----
-
-
-
-### method `get_runner_remove_token`
-
-```python
-get_runner_remove_token(path: GitHubOrg | GitHubRepo) → str
-```
-
-Get token from GitHub used for removing runners.
-
-
-
-**Args:**
-
- - `path`: The Github org/repo path.
-
-
-
-**Returns:**
- The removing token.
-
diff --git a/src-docs/runner.md b/src-docs/runner.md
index b513ad697..d7bfb93c1 100644
--- a/src-docs/runner.md
+++ b/src-docs/runner.md
@@ -17,7 +17,7 @@ The `RunnerManager` class from `runner_manager.py` creates and manages a collect
---
-
+
## class `Snap`
This class represents a snap installation.
@@ -36,7 +36,7 @@ This class represents a snap installation.
---
-
+
## class `WgetExecutable`
The executable to be installed through wget.
@@ -66,7 +66,7 @@ __init__(url: str, cmd: str) → None
---
-
+
## class `CreateRunnerConfig`
The configuration values for creating a single runner instance.
@@ -105,7 +105,7 @@ __init__(
---
-
+
## class `Runner`
Single instance of GitHub self-hosted runner.
@@ -120,7 +120,7 @@ Single instance of GitHub self-hosted runner.
- `runner_script`: The runner start script file path.
- `pre_job_script`: The runner pre_job script file path. This is referenced in the env_file in the ACTIONS_RUNNER_HOOK_JOB_STARTED environment variable.
-
+
### method `__init__`
@@ -149,7 +149,7 @@ Construct the runner instance.
---
-
+
### method `create`
@@ -173,7 +173,7 @@ Create the runner instance on LXD and register it on GitHub.
---
-
+
### method `pull_logs`
@@ -193,7 +193,7 @@ Expects the runner to have an instance.
---
-
+
### method `remove`
diff --git a/src-docs/runner_manager.md b/src-docs/runner_manager.md
index f52829efa..883745753 100644
--- a/src-docs/runner_manager.md
+++ b/src-docs/runner_manager.md
@@ -52,7 +52,7 @@ Construct RunnerManager object for creating and managing runners.
---
-
+
### method `build_runner_image`
@@ -135,7 +135,7 @@ Get information on the runners from GitHub.
---
-
+
### method `get_latest_runner_bin_url`
@@ -219,7 +219,7 @@ Install cron job for building runner image.
---
-
+
### method `update_runner_bin`
diff --git a/src-docs/runner_type.md b/src-docs/runner_type.md
index d5029f4f8..cde5b2a7e 100644
--- a/src-docs/runner_type.md
+++ b/src-docs/runner_type.md
@@ -9,7 +9,7 @@ Types used by Runner class.
---
-
+
## class `RunnerNameByHealth`
Set of runners instance by health state.
@@ -39,7 +39,7 @@ __init__(healthy: tuple[str, ], unhealthy: tuple[str, ]) → None
---
-
+
## class `ProxySetting`
Represent HTTP-related proxy settings.
@@ -76,7 +76,7 @@ __init__(
---
-
+
## class `RunnerConfig`
Configuration for runner.
@@ -123,7 +123,7 @@ __init__(
---
-
+
## class `RunnerStatus`
Status of runner.
@@ -160,7 +160,7 @@ __init__(
---
-
+
## class `RunnerGithubInfo`
GitHub info of a runner.
diff --git a/src-docs/shared_fs.md b/src-docs/shared_fs.md
index 004556f9f..5ae59a8ca 100644
--- a/src-docs/shared_fs.md
+++ b/src-docs/shared_fs.md
@@ -13,7 +13,7 @@ Classes and functions to operate on the shared filesystem between the charm and
---
-
+
## function `create`
@@ -45,7 +45,7 @@ The method is not idempotent and will raise an exception if the shared filesyste
---
-
+
## function `list_all`
@@ -63,7 +63,7 @@ List all the metric storages.
---
-
+
## function `get`
@@ -95,7 +95,7 @@ Mounts the filesystem if it is not currently mounted.
---
-
+
## function `delete`
diff --git a/src-docs/utilities.md b/src-docs/utilities.md
index 6c2aab4e1..b2c4cbf21 100644
--- a/src-docs/utilities.md
+++ b/src-docs/utilities.md
@@ -8,77 +8,7 @@ Utilities used by the charm.
---
-
-
-## function `retry`
-
-```python
-retry(
- exception: Type[Exception] = ,
- tries: int = 1,
- delay: float = 0,
- max_delay: Optional[float] = None,
- backoff: float = 1,
- local_logger: Logger =
-) → Callable[[Callable[~ParamT, ~ReturnT]], Callable[~ParamT, ~ReturnT]]
-```
-
-Parameterize the decorator for adding retry to functions.
-
-
-
-**Args:**
-
- - `exception`: Exception type to be retried.
- - `tries`: Number of attempts at retry.
- - `delay`: Time in seconds to wait between retry.
- - `max_delay`: Max time in seconds to wait between retry.
- - `backoff`: Factor to increase the delay by each retry.
- - `local_logger`: Logger for logging.
-
-
-
-**Returns:**
- The function decorator for retry.
-
-
----
-
-
-
-## function `secure_run_subprocess`
-
-```python
-secure_run_subprocess(
- cmd: Sequence[str],
- hide_cmd: bool = False,
- **kwargs: dict[str, Any]
-) → CompletedProcess[bytes]
-```
-
-Run command in subprocess according to security recommendations.
-
-CalledProcessError will not be raised on error of the command executed. Errors should be handled by the caller by checking the exit code.
-
-The command is executed with `subprocess.run`, additional arguments can be passed to it as keyword arguments. The following arguments to `subprocess.run` should not be set: `capture_output`, `shell`, `check`. As those arguments are used by this function.
-
-
-
-**Args:**
-
- - `cmd`: Command in a list.
- - `hide_cmd`: Hide logging of cmd.
- - `kwargs`: Additional keyword arguments for the `subprocess.run` call.
-
-
-
-**Returns:**
- Object representing the completed process. The outputs subprocess can accessed.
-
-
----
-
-
+
## function `execute_command`
@@ -118,7 +48,7 @@ The output is logged if the log level of the logger is set to debug.
---
-
+
## function `get_env_var`
@@ -144,29 +74,7 @@ Looks for all upper-case and all low-case of the `env_var`.
---
-
-
-## function `set_env_var`
-
-```python
-set_env_var(env_var: str, value: str) → None
-```
-
-Set the environment variable value.
-
-Set the all upper case and all low case of the `env_var`.
-
-
-
-**Args:**
-
- - `env_var`: Name of the environment variable.
- - `value`: Value to set environment variable to.
-
-
----
-
-
+
## function `bytes_with_unit_to_kib`
@@ -196,7 +104,7 @@ Convert a positive integer followed by a unit to number of kibibytes.
---
-
+
## function `remove_residual_venv_dirs`
diff --git a/src/charm.py b/src/charm.py
index c60c62bea..0b17dbf52 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -7,10 +7,23 @@
# pylint: disable=too-many-lines
"""Charm for creating and managing GitHub self-hosted runner instances."""
+from github_runner_manager.manager.cloud_runner_manager import (
+ GitHubRunnerConfig,
+ SupportServiceConfig,
+)
+from github_runner_manager.manager.runner_manager import (
+ FlushMode,
+ RunnerManager,
+ RunnerManagerConfig,
+)
+from github_runner_manager.manager.runner_scaler import RunnerScaler
+from github_runner_manager.openstack_cloud.openstack_runner_manager import (
+ OpenStackCloudConfig,
+ OpenStackRunnerManager,
+ OpenStackServerConfig,
+)
+from github_runner_manager.types_.github import GitHubPath, GitHubRunnerStatus, parse_github_path
-from manager.cloud_runner_manager import GitHubRunnerConfig, SupportServiceConfig
-from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
-from manager.runner_scaler import RunnerScaler
from utilities import bytes_with_unit_to_kib, execute_command, remove_residual_venv_dirs, retry
# This is a workaround for https://bugs.launchpad.net/juju/+bug/2058335
@@ -59,20 +72,17 @@
TOKEN_CONFIG_NAME,
CharmConfigInvalidError,
CharmState,
- GitHubPath,
InstanceType,
OpenstackImage,
ProxyConfig,
RunnerStorage,
VirtualMachineResources,
- parse_github_path,
)
from errors import (
ConfigurationError,
LogrotateSetupError,
MissingMongoDBError,
MissingRunnerBinaryError,
- OpenStackUnauthorizedError,
RunnerBinaryError,
RunnerError,
SubprocessError,
@@ -80,12 +90,6 @@
)
from event_timer import EventTimer, TimerStatusError
from firewall import Firewall, FirewallEntry
-from github_type import GitHubRunnerStatus
-from openstack_cloud.openstack_runner_manager import (
- OpenStackCloudConfig,
- OpenStackRunnerManager,
- OpenStackServerConfig,
-)
from runner import LXD_PROFILE_YAML
from runner_manager import LXDRunnerManager, LXDRunnerManagerConfig
from runner_manager_type import LXDFlushMode
@@ -140,11 +144,6 @@ def func_with_catch_errors(self: "GithubRunnerCharm", event: EventT) -> None:
"GitHub runner application not downloaded; the charm will retry download on "
"reconcile interval"
)
- except OpenStackUnauthorizedError:
- logger.exception("Unauthorized OpenStack connection")
- self.unit.status = BlockedStatus(
- "Unauthorized OpenStack connection. Check credentials."
- )
except MissingMongoDBError as err:
logger.exception("Missing integration data")
self.unit.status = WaitingStatus(str(err))
diff --git a/src/charm_state.py b/src/charm_state.py
index dcd87d122..6ae46d386 100644
--- a/src/charm_state.py
+++ b/src/charm_state.py
@@ -19,6 +19,9 @@
import yaml
from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires
+from github_runner_manager import openstack_cloud
+from github_runner_manager.errors import OpenStackInvalidConfigError
+from github_runner_manager.types_.github import GitHubPath, parse_github_path
from ops import CharmBase
from pydantic import (
AnyHttpUrl,
@@ -31,8 +34,7 @@
validator,
)
-import openstack_cloud
-from errors import MissingMongoDBError, OpenStackInvalidConfigError
+from errors import MissingMongoDBError
from firewall import FirewallEntry
from utilities import get_env_var
@@ -87,75 +89,6 @@ class AnyHttpsUrl(AnyHttpUrl):
allowed_schemes = {"https"}
-@dataclasses.dataclass
-class GitHubRepo:
- """Represent GitHub repository.
-
- Attributes:
- owner: Owner of the GitHub repository.
- repo: Name of the GitHub repository.
- """
-
- owner: str
- repo: str
-
- def path(self) -> str:
- """Return a string representing the path.
-
- Returns:
- Path to the GitHub entity.
- """
- return f"{self.owner}/{self.repo}"
-
-
-@dataclasses.dataclass
-class GitHubOrg:
- """Represent GitHub organization.
-
- Attributes:
- org: Name of the GitHub organization.
- group: Runner group to spawn the runners in.
- """
-
- org: str
- group: str
-
- def path(self) -> str:
- """Return a string representing the path.
-
- Returns:
- Path to the GitHub entity.
- """
- return self.org
-
-
-GitHubPath = GitHubOrg | GitHubRepo
-
-
-def parse_github_path(path_str: str, runner_group: str) -> GitHubPath:
- """Parse GitHub path.
-
- Args:
- path_str: GitHub path in string format.
- runner_group: Runner group name for GitHub organization. If the path is
- a repository this argument is ignored.
-
- Raises:
- CharmConfigInvalidError: if an invalid path string was given.
-
- Returns:
- GithubPath object representing the GitHub repository, or the GitHub
- organization with runner group information.
- """
- if "/" in path_str:
- paths = tuple(segment for segment in path_str.split("/") if segment)
- if len(paths) != 2:
- raise CharmConfigInvalidError(f"Invalid path configuration {path_str}")
- owner, repo = paths
- return GitHubRepo(owner=owner, repo=repo)
- return GitHubOrg(org=path_str, group=runner_group)
-
-
@dataclasses.dataclass
class GithubConfig:
"""Charm configuration related to GitHub.
@@ -186,7 +119,10 @@ def from_charm(cls, charm: CharmBase) -> "GithubConfig":
path_str = cast(str, charm.config.get(PATH_CONFIG_NAME, ""))
if not path_str:
raise CharmConfigInvalidError(f"Missing {PATH_CONFIG_NAME} configuration")
- path = parse_github_path(cast(str, path_str), cast(str, runner_group))
+ try:
+ path = parse_github_path(cast(str, path_str), cast(str, runner_group))
+ except ValueError as e:
+ raise CharmConfigInvalidError(str(e)) from e
token = cast(str, charm.config.get(TOKEN_CONFIG_NAME))
if not token:
diff --git a/src/errors.py b/src/errors.py
index 4285dc6e4..7212b4642 100644
--- a/src/errors.py
+++ b/src/errors.py
@@ -6,31 +6,31 @@
from typing import Union
+# we import the errors from the module, these are used in the charm
+from github_runner_manager.errors import ( # noqa: F401 pylint: disable=unused-import
+ CreateMetricsStorageError,
+ DeleteMetricsStorageError,
+ GetMetricsStorageError,
+ GithubClientError,
+ GithubMetricsError,
+ MetricsStorageError,
+ RunnerError,
+ TokenError,
+)
-class RunnerError(Exception):
- """Generic runner error as base exception."""
-
-class RunnerExecutionError(RunnerError):
- """Error for executing commands on runner."""
+class RunnerCreateError(RunnerError):
+ """Error for runner creation failure."""
class RunnerFileLoadError(RunnerError):
"""Error for loading file on runner."""
-class RunnerCreateError(RunnerError):
- """Error for runner creation failure."""
-
-
class RunnerRemoveError(RunnerError):
"""Error for runner removal failure."""
-class RunnerStartError(RunnerError):
- """Error for runner start failure."""
-
-
class RunnerBinaryError(RunnerError):
"""Error of getting runner binary."""
@@ -100,81 +100,13 @@ class LogrotateSetupError(Exception):
"""Represents an error raised when logrotate cannot be setup."""
-class MetricsStorageError(Exception):
- """Base class for all metrics storage errors."""
-
-
class SharedFilesystemError(MetricsStorageError):
"""Base class for all shared filesystem errors."""
-class CreateMetricsStorageError(MetricsStorageError):
- """Represents an error when the metrics storage could not be created."""
-
-
-class DeleteMetricsStorageError(MetricsStorageError):
- """Represents an error when the metrics storage could not be deleted."""
-
-
-class GetMetricsStorageError(MetricsStorageError):
- """Represents an error when the metrics storage could not be retrieved."""
-
-
-class QuarantineMetricsStorageError(MetricsStorageError):
- """Represents an error when the metrics storage could not be quarantined."""
-
-
class SharedFilesystemMountError(SharedFilesystemError):
"""Represents an error related to the mounting of the shared filesystem."""
-class RunnerMetricsError(Exception):
- """Base class for all runner metrics errors."""
-
-
-class CorruptMetricDataError(RunnerMetricsError):
- """Represents an error with the data being corrupt."""
-
-
-class GithubMetricsError(Exception):
- """Base class for all github metrics errors."""
-
-
-class GithubClientError(Exception):
- """Base class for all github client errors."""
-
-
-class GithubApiError(GithubClientError):
- """Represents an error when the GitHub API returns an error."""
-
-
-class TokenError(GithubClientError):
- """Represents an error when the token is invalid or has not enough permissions."""
-
-
-class JobNotFoundError(GithubClientError):
- """Represents an error when the job could not be found on GitHub."""
-
-
class RunnerLogsError(Exception):
"""Base class for all runner logs errors."""
-
-
-class OpenStackError(Exception):
- """Base class for OpenStack errors."""
-
-
-class OpenStackInvalidConfigError(OpenStackError):
- """Represents an invalid OpenStack configuration."""
-
-
-class OpenStackUnauthorizedError(OpenStackError):
- """Represents an unauthorized connection to OpenStack."""
-
-
-class SSHError(Exception):
- """Represents an error while interacting with SSH."""
-
-
-class KeyfileError(SSHError):
- """Represents missing keyfile for SSH."""
diff --git a/src/github_client.py b/src/github_client.py
index b724b5cdb..b14d3b799 100644
--- a/src/github_client.py
+++ b/src/github_client.py
@@ -6,27 +6,22 @@
Migrate to PyGithub in the future. PyGithub is still lacking some API such as
remove token for runner.
"""
-import functools
import logging
-from datetime import datetime
-from typing import Callable, ParamSpec, TypeVar
-from urllib.error import HTTPError
-
-from ghapi.all import GhApi, pages
-from ghapi.page import paged
-from typing_extensions import assert_never
-
-from charm_state import Arch, GitHubOrg, GitHubPath, GitHubRepo
-from errors import GithubApiError, JobNotFoundError, RunnerBinaryError, TokenError
-from github_type import (
- JobStats,
- RegistrationToken,
- RemoveToken,
+from typing import ParamSpec, TypeVar
+
+from github_runner_manager.github_client import GithubClient as GitHubRunnerManagerGitHubClient
+from github_runner_manager.github_client import catch_http_errors
+from github_runner_manager.types_.github import (
+ GitHubOrg,
+ GitHubPath,
+ GitHubRepo,
RunnerApplication,
RunnerApplicationList,
- SelfHostedRunner,
)
+from charm_state import Arch
+from errors import RunnerBinaryError
+
logger = logging.getLogger(__name__)
# Parameters of the function decorated with retry
@@ -35,57 +30,9 @@
ReturnT = TypeVar("ReturnT")
-def catch_http_errors(func: Callable[ParamT, ReturnT]) -> Callable[ParamT, ReturnT]:
- """Catch HTTP errors and raise custom exceptions.
-
- Args:
- func: The target function to catch common errors for.
-
- Returns:
- The decorated function.
- """
-
- @functools.wraps(func)
- def wrapper(*args: ParamT.args, **kwargs: ParamT.kwargs) -> ReturnT:
- """Catch common errors when using the GitHub API.
-
- Args:
- args: Placeholder for positional arguments.
- kwargs: Placeholder for keyword arguments.
-
- Raises:
- TokenError: If there was an error with the provided token.
- GithubApiError: If there was an unexpected error using the GitHub API.
-
- Returns:
- The decorated function.
- """
- try:
- return func(*args, **kwargs)
- except HTTPError as exc:
- if exc.code in (401, 403):
- if exc.code == 401:
- msg = "Invalid token."
- else:
- msg = "Provided token has not enough permissions or has reached rate-limit."
- raise TokenError(msg) from exc
- raise GithubApiError from exc
-
- return wrapper
-
-
-class GithubClient:
+class GithubClient(GitHubRunnerManagerGitHubClient):
"""GitHub API client."""
- def __init__(self, token: str):
- """Instantiate the GiHub API client.
-
- Args:
- token: GitHub personal token for API requests.
- """
- self._token = token
- self._client = GhApi(token=self._token)
-
@catch_http_errors
def get_runner_application(
self, path: GitHubPath, arch: Arch, os: str = "linux"
@@ -125,176 +72,3 @@ def get_runner_application(
raise RunnerBinaryError(
f"Unable query GitHub runner binary information for {os} {arch}"
) from err
-
- @catch_http_errors
- def get_runner_github_info(self, path: GitHubPath) -> list[SelfHostedRunner]:
- """Get runner information on GitHub under a repo or org.
-
- Args:
- path: GitHub repository path in the format '/', or the GitHub organization
- name.
-
- Returns:
- List of runner information.
- """
- remote_runners_list: list[SelfHostedRunner] = []
-
- if isinstance(path, GitHubRepo):
- # The documentation of ghapi for pagination is incorrect and examples will give errors.
- # This workaround is a temp solution. Will be moving to PyGitHub in the future.
- self._client.actions.list_self_hosted_runners_for_repo(
- owner=path.owner, repo=path.repo, per_page=100
- )
- num_of_pages = self._client.last_page()
- remote_runners_list = [
- item
- for page in pages(
- self._client.actions.list_self_hosted_runners_for_repo,
- num_of_pages + 1,
- owner=path.owner,
- repo=path.repo,
- per_page=100,
- )
- for item in page["runners"]
- ]
- if isinstance(path, GitHubOrg):
- # The documentation of ghapi for pagination is incorrect and examples will give errors.
- # This workaround is a temp solution. Will be moving to PyGitHub in the future.
- self._client.actions.list_self_hosted_runners_for_org(org=path.org, per_page=100)
- num_of_pages = self._client.last_page()
- remote_runners_list = [
- item
- for page in pages(
- self._client.actions.list_self_hosted_runners_for_org,
- num_of_pages + 1,
- org=path.org,
- per_page=100,
- )
- for item in page["runners"]
- ]
- return remote_runners_list
-
- @catch_http_errors
- def get_runner_remove_token(self, path: GitHubPath) -> str:
- """Get token from GitHub used for removing runners.
-
- Args:
- path: The Github org/repo path.
-
- Returns:
- The removing token.
- """
- token: RemoveToken
- if isinstance(path, GitHubRepo):
- token = self._client.actions.create_remove_token_for_repo(
- owner=path.owner, repo=path.repo
- )
- elif isinstance(path, GitHubOrg):
- token = self._client.actions.create_remove_token_for_org(org=path.org)
- else:
- assert_never(token)
-
- return token["token"]
-
- @catch_http_errors
- def get_runner_registration_token(self, path: GitHubPath) -> str:
- """Get token from GitHub used for registering runners.
-
- Args:
- path: GitHub repository path in the format '/', or the GitHub organization
- name.
-
- Returns:
- The registration token.
- """
- token: RegistrationToken
- if isinstance(path, GitHubRepo):
- token = self._client.actions.create_registration_token_for_repo(
- owner=path.owner, repo=path.repo
- )
- elif isinstance(path, GitHubOrg):
- token = self._client.actions.create_registration_token_for_org(org=path.org)
- else:
- assert_never(token)
-
- return token["token"]
-
- @catch_http_errors
- def delete_runner(self, path: GitHubPath, runner_id: int) -> None:
- """Delete the self-hosted runner from GitHub.
-
- Args:
- path: GitHub repository path in the format '/', or the GitHub organization
- name.
- runner_id: Id of the runner.
- """
- if isinstance(path, GitHubRepo):
- self._client.actions.delete_self_hosted_runner_from_repo(
- owner=path.owner,
- repo=path.repo,
- runner_id=runner_id,
- )
- if isinstance(path, GitHubOrg):
- self._client.actions.delete_self_hosted_runner_from_org(
- org=path.org,
- runner_id=runner_id,
- )
-
- def get_job_info(self, path: GitHubRepo, workflow_run_id: str, runner_name: str) -> JobStats:
- """Get information about a job for a specific workflow run.
-
- Args:
- path: GitHub repository path in the format '/'.
- workflow_run_id: Id of the workflow run.
- runner_name: Name of the runner.
-
- Raises:
- TokenError: if there was an error with the Github token crdential provided.
- JobNotFoundError: If no jobs were found.
-
- Returns:
- Job information.
- """
- paged_kwargs = {"owner": path.owner, "repo": path.repo, "run_id": workflow_run_id}
- try:
- for wf_run_page in paged(
- self._client.actions.list_jobs_for_workflow_run, **paged_kwargs
- ):
- jobs = wf_run_page["jobs"]
- # ghapi performs endless pagination,
- # so we have to break out of the loop if there are no more jobs
- if not jobs:
- break
- for job in jobs:
- if job["runner_name"] == runner_name:
- # datetime strings should be in ISO 8601 format,
- # but they can also use Z instead of
- # +00:00, which is not supported by datetime.fromisoformat
- created_at = datetime.fromisoformat(
- job["created_at"].replace("Z", "+00:00")
- )
- started_at = datetime.fromisoformat(
- job["started_at"].replace("Z", "+00:00")
- )
- # conclusion could be null per api schema, so we need to handle that
- # though we would assume that it should always be present,
- # as the job should be finished
- conclusion = job.get("conclusion", None)
-
- job_id = job["id"]
- return JobStats(
- job_id=job_id,
- created_at=created_at,
- started_at=started_at,
- conclusion=conclusion,
- )
-
- except HTTPError as exc:
- if exc.code in (401, 403):
- raise TokenError from exc
- raise JobNotFoundError(
- f"Could not find job for runner {runner_name}. "
- f"Could not list jobs for workflow run {workflow_run_id}"
- ) from exc
-
- raise JobNotFoundError(f"Could not find job for runner {runner_name}.")
diff --git a/src/github_type.py b/src/github_type.py
deleted file mode 100644
index a26a0279a..000000000
--- a/src/github_type.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Return type for the GitHub web API."""
-
-
-from __future__ import annotations
-
-from datetime import datetime
-from enum import Enum
-from typing import List, Literal, Optional, TypedDict
-
-from pydantic import BaseModel
-from typing_extensions import NotRequired
-
-
-class GitHubRunnerStatus(str, Enum):
- """Status of runner on GitHub.
-
- Attributes:
- ONLINE: Represents an online runner status.
- OFFLINE: Represents an offline runner status.
- """
-
- ONLINE = "online"
- OFFLINE = "offline"
-
-
-# See response schema for
-# https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#list-runner-applications-for-an-organization
-class RunnerApplication(TypedDict, total=False):
- """Information on the runner application.
-
- Attributes:
- os: Operating system to run the runner application on.
- architecture: Computer Architecture to run the runner application on.
- download_url: URL to download the runner application.
- filename: Filename of the runner application.
- temp_download_token: A short lived bearer token used to download the
- runner, if needed.
- sha256_checksum: SHA256 Checksum of the runner application.
- """
-
- os: Literal["linux", "win", "osx"]
- architecture: Literal["arm", "arm64", "x64"]
- download_url: str
- filename: str
- temp_download_token: NotRequired[str]
- sha256_checksum: NotRequired[str]
-
-
-RunnerApplicationList = List[RunnerApplication]
-
-
-class SelfHostedRunnerLabel(TypedDict, total=False):
- """A single label of self-hosted runners.
-
- Attributes:
- id: Unique identifier of the label.
- name: Name of the label.
- type: Type of label. Read-only labels are applied automatically when
- the runner is configured.
- """
-
- id: NotRequired[int]
- name: str
- type: NotRequired[str]
-
-
-class SelfHostedRunner(TypedDict):
- """Information on a single self-hosted runner.
-
- Attributes:
- busy: Whether the runner is executing a job.
- id: Unique identifier of the runner.
- labels: Labels of the runner.
- os: Operation system of the runner.
- name: Name of the runner.
- status: The Github runner status.
- """
-
- busy: bool
- id: int
- labels: list[SelfHostedRunnerLabel]
- os: str
- name: str
- status: GitHubRunnerStatus
-
-
-class SelfHostedRunnerList(TypedDict):
- """Information on a collection of self-hosted runners.
-
- Attributes:
- total_count: Total number of runners.
- runners: List of runners.
- """
-
- total_count: int
- runners: list[SelfHostedRunner]
-
-
-class RegistrationToken(TypedDict):
- """Token used for registering GitHub runners.
-
- Attributes:
- token: Token for registering GitHub runners.
- expires_at: Time the token expires at.
- """
-
- token: str
- expires_at: str
-
-
-class RemoveToken(TypedDict):
- """Token used for removing GitHub runners.
-
- Attributes:
- token: Token for removing GitHub runners.
- expires_at: Time the token expires at.
- """
-
- token: str
- expires_at: str
-
-
-class JobConclusion(str, Enum):
- """Conclusion of a job on GitHub.
-
- See :https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28\
-#list-workflow-runs-for-a-repository
-
- Attributes:
- ACTION_REQUIRED: Represents additional action required on the job.
- CANCELLED: Represents a cancelled job status.
- FAILURE: Represents a failed job status.
- NEUTRAL: Represents a job status that can optionally succeed or fail.
- SKIPPED: Represents a skipped job status.
- SUCCESS: Represents a successful job status.
- TIMED_OUT: Represents a job that has timed out.
- """
-
- ACTION_REQUIRED = "action_required"
- CANCELLED = "cancelled"
- FAILURE = "failure"
- NEUTRAL = "neutral"
- SKIPPED = "skipped"
- SUCCESS = "success"
- TIMED_OUT = "timed_out"
-
-
-class JobStats(BaseModel):
- """Stats for a job on GitHub.
-
- Attributes:
- job_id: The ID of the job.
- created_at: The time the job was created.
- started_at: The time the job was started.
- conclusion: The end result of a job.
- """
-
- job_id: int
- created_at: datetime
- started_at: datetime
- conclusion: Optional[JobConclusion]
diff --git a/src/logrotate.py b/src/logrotate.py
index 0fd65d5af..294c651dd 100644
--- a/src/logrotate.py
+++ b/src/logrotate.py
@@ -6,11 +6,11 @@
from pathlib import Path
from charms.operator_libs_linux.v1 import systemd
+from github_runner_manager.metrics.events import METRICS_LOG_PATH
+from github_runner_manager.reactive.runner_manager import REACTIVE_RUNNER_LOG_DIR
from pydantic import BaseModel
from errors import LogrotateSetupError
-from metrics.events import METRICS_LOG_PATH
-from reactive.runner_manager import REACTIVE_RUNNER_LOG_DIR
LOG_ROTATE_TIMER_SYSTEMD_SERVICE = "logrotate.timer"
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
deleted file mode 100644
index aff75ed41..000000000
--- a/src/manager/cloud_runner_manager.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Interface of manager of runner instance on clouds."""
-
-import abc
-import logging
-from dataclasses import dataclass
-from enum import Enum, auto
-from typing import Iterator, Sequence, Tuple
-
-from charm_state import GitHubPath, ProxyConfig, RepoPolicyComplianceConfig, SSHDebugConnection
-from metrics.runner import RunnerMetrics
-
-logger = logging.getLogger(__name__)
-
-InstanceId = str
-
-
-class HealthState(Enum):
- """Health state of the runners.
-
- Attributes:
- HEALTHY: The runner is healthy.
- UNHEALTHY: The runner is not healthy.
- UNKNOWN: Unable to get the health state.
- """
-
- HEALTHY = auto()
- UNHEALTHY = auto()
- UNKNOWN = auto()
-
-
-class CloudRunnerState(str, Enum):
- """Represent state of the instance hosting the runner.
-
- Attributes:
- CREATED: The instance is created.
- ACTIVE: The instance is active and running.
- DELETED: The instance is deleted.
- ERROR: The instance has encountered error and not running.
- STOPPED: The instance has stopped.
- UNKNOWN: The state of the instance is not known.
- UNEXPECTED: An unknown state not accounted by the developer is encountered.
- """
-
- CREATED = auto()
- ACTIVE = auto()
- DELETED = auto()
- ERROR = auto()
- STOPPED = auto()
- UNKNOWN = auto()
- UNEXPECTED = auto()
-
- # Exclude from coverage as not much value for testing this object conversion.
- @staticmethod
- def from_openstack_server_status( # pragma: no cover
- openstack_server_status: str,
- ) -> "CloudRunnerState":
- """Create from openstack server status.
-
- The openstack server status are documented here:
- https://docs.openstack.org/api-guide/compute/server_concepts.html
-
- Args:
- openstack_server_status: Openstack server status.
-
- Returns:
- The state of the runner.
- """
- state = CloudRunnerState.UNEXPECTED
- match openstack_server_status:
- case "BUILD":
- state = CloudRunnerState.CREATED
- case "REBUILD":
- state = CloudRunnerState.CREATED
- case "ACTIVE":
- state = CloudRunnerState.ACTIVE
- case "ERROR":
- state = CloudRunnerState.ERROR
- case "STOPPED":
- state = CloudRunnerState.STOPPED
- case "DELETED":
- state = CloudRunnerState.DELETED
- case "UNKNOWN":
- state = CloudRunnerState.UNKNOWN
- case _:
- state = CloudRunnerState.UNEXPECTED
- return state
-
-
-@dataclass
-class GitHubRunnerConfig:
- """Configuration for GitHub runner spawned.
-
- Attributes:
- github_path: The GitHub organization or repository for runners to connect to.
- labels: The labels to add to runners.
- """
-
- github_path: GitHubPath
- labels: list[str]
-
-
-@dataclass
-class SupportServiceConfig:
- """Configuration for supporting services for runners.
-
- Attributes:
- proxy_config: The proxy configuration.
- dockerhub_mirror: The dockerhub mirror to use for runners.
- ssh_debug_connections: The information on the ssh debug services.
- repo_policy_compliance: The configuration of the repo policy compliance service.
- """
-
- proxy_config: ProxyConfig | None
- dockerhub_mirror: str | None
- ssh_debug_connections: list[SSHDebugConnection] | None
- repo_policy_compliance: RepoPolicyComplianceConfig | None
-
-
-@dataclass
-class CloudRunnerInstance:
- """Information on the runner on the cloud.
-
- Attributes:
- name: Name of the instance hosting the runner.
- instance_id: ID of the instance.
- health: Health state of the runner.
- state: State of the instance hosting the runner.
- """
-
- name: str
- instance_id: InstanceId
- health: HealthState
- state: CloudRunnerState
-
-
-class CloudRunnerManager(abc.ABC):
- """Manage runner instance on cloud.
-
- Attributes:
- name_prefix: The name prefix of the self-hosted runners.
- """
-
- @property
- @abc.abstractmethod
- def name_prefix(self) -> str:
- """Get the name prefix of the self-hosted runners."""
-
- @abc.abstractmethod
- def create_runner(self, registration_token: str) -> InstanceId:
- """Create a self-hosted runner.
-
- Args:
- registration_token: The GitHub registration token for registering runners.
- """
-
- @abc.abstractmethod
- def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
- """Get a self-hosted runner by instance id.
-
- Args:
- instance_id: The instance id.
- """
-
- @abc.abstractmethod
- def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerInstance]:
- """Get self-hosted runners by state.
-
- Args:
- states: Filter for the runners with these github states. If None all states will be
- included.
- """
-
- @abc.abstractmethod
- def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMetrics | None:
- """Delete self-hosted runner.
-
- Args:
- instance_id: The instance id of the runner to delete.
- remove_token: The GitHub remove token.
- """
-
- @abc.abstractmethod
- def flush_runners(self, remove_token: str, busy: bool = False) -> Iterator[RunnerMetrics]:
- """Stop all runners.
-
- Args:
- remove_token: The GitHub remove token for removing runners.
- busy: If false, only idle runners are removed. If true, both idle and busy runners are
- removed.
- """
-
- @abc.abstractmethod
- def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
- """Cleanup runner and resource on the cloud.
-
- Perform health check on runner and delete the runner if it fails.
-
- Args:
- remove_token: The GitHub remove token for removing runners.
- """
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
deleted file mode 100644
index 949a1df38..000000000
--- a/src/manager/github_runner_manager.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Client for managing self-hosted runner on GitHub side."""
-
-from enum import Enum, auto
-from typing import Iterable
-
-from charm_state import GitHubPath
-from github_client import GithubClient
-from github_type import GitHubRunnerStatus, SelfHostedRunner
-
-
-class GitHubRunnerState(str, Enum):
- """State of the self-hosted runner on GitHub.
-
- Attributes:
- BUSY: Runner is working on a job assigned by GitHub.
- IDLE: Runner is waiting to take a job or is running pre-job tasks (i.e.
- repo-policy-compliance check).
- OFFLINE: Runner is not connected to GitHub.
- """
-
- BUSY = auto()
- IDLE = auto()
- OFFLINE = auto()
-
- @staticmethod
- def from_runner(runner: SelfHostedRunner) -> "GitHubRunnerState":
- """Construct the object from GtiHub runner information.
-
- Args:
- runner: Information on the GitHub self-hosted runner.
-
- Returns:
- The state of runner.
- """
- state = GitHubRunnerState.OFFLINE
- # A runner that is busy and offline is possible.
- if runner["busy"]:
- state = GitHubRunnerState.BUSY
- if runner["status"] == GitHubRunnerStatus.ONLINE:
- if not runner["busy"]:
- state = GitHubRunnerState.IDLE
- return state
-
-
-# Thin wrapper around the GitHub Client. Not much value in unit testing.
-class GitHubRunnerManager: # pragma: no cover
- """Manage self-hosted runner on GitHub side."""
-
- def __init__(self, prefix: str, token: str, path: GitHubPath):
- """Construct the object.
-
- Args:
- prefix: The prefix in the name to identify the runners managed by this instance.
- token: The GitHub personal access token to access the GitHub API.
- path: The GitHub repository or organization to register the runners under.
- """
- self._prefix = prefix
- self._path = path
- self.github = GithubClient(token)
-
- def get_runners(
- self, states: Iterable[GitHubRunnerState] | None = None
- ) -> tuple[SelfHostedRunner, ...]:
- """Get info on self-hosted runners of certain states.
-
- Args:
- states: Filter the runners for these states. If None, all runners are returned.
-
- Returns:
- Information on the runners.
- """
- runner_list = self.github.get_runner_github_info(self._path)
- runner_list = [runner for runner in runner_list if runner.name.startswith(self._prefix)]
-
- if states is None:
- return tuple(runner_list)
-
- state_set = set(states)
- return tuple(
- runner
- for runner in runner_list
- if GitHubRunnerManager._is_runner_in_state(runner, state_set)
- )
-
- def delete_runners(self, states: Iterable[GitHubRunnerState] | None = None) -> None:
- """Delete the self-hosted runners of certain states.
-
- Args:
- states: Filter the runners for these states. If None, all runners are deleted.
- """
- runner_list = self.get_runners(states)
- for runner in runner_list:
- self.github.delete_runner(self._path, runner.id)
-
- def get_registration_token(self) -> str:
- """Get registration token from GitHub.
-
- This token is used for registering self-hosted runners.
-
- Returns:
- The registration token.
- """
- return self.github.get_runner_registration_token(self._path)
-
- def get_removal_token(self) -> str:
- """Get removal token from GitHub.
-
- This token is used for removing self-hosted runners.
-
- Returns:
- The removal token.
- """
- return self.github.get_runner_remove_token(self._path)
-
- @staticmethod
- def _is_runner_in_state(
- runner: SelfHostedRunner, states: set[GitHubRunnerState] | None
- ) -> bool:
- """Check that the runner is in one of the states provided.
-
- Args:
- runner: Runner to filter.
- states: States in which to check the runner belongs to.
-
- Returns:
- True if the runner is in one of the state, else false.
- """
- if states is None:
- return True
- return GitHubRunnerState.from_runner(runner) in states
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
deleted file mode 100644
index 72ded77fb..000000000
--- a/src/manager/runner_manager.py
+++ /dev/null
@@ -1,364 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Class for managing the GitHub self-hosted runners hosted on cloud instances."""
-
-import logging
-from dataclasses import dataclass
-from enum import Enum, auto
-from multiprocessing import Pool
-from typing import Iterator, Sequence, Type, cast
-
-from charm_state import GitHubPath
-from errors import GithubMetricsError, RunnerCreateError
-from github_type import SelfHostedRunner
-from manager.cloud_runner_manager import (
- CloudRunnerInstance,
- CloudRunnerManager,
- CloudRunnerState,
- HealthState,
- InstanceId,
-)
-from manager.github_runner_manager import GitHubRunnerManager, GitHubRunnerState
-from metrics import events as metric_events
-from metrics import github as github_metrics
-from metrics import runner as runner_metrics
-from metrics.runner import RunnerMetrics
-
-logger = logging.getLogger(__name__)
-
-IssuedMetricEventsStats = dict[Type[metric_events.Event], int]
-
-
-class FlushMode(Enum):
- """Strategy for flushing runners.
-
- Attributes:
- FLUSH_IDLE: Flush idle runners.
- FLUSH_BUSY: Flush busy runners.
- """
-
- FLUSH_IDLE = auto()
- FLUSH_BUSY = auto()
-
-
-@dataclass
-class RunnerInstance:
- """Represents an instance of runner.
-
- Attributes:
- name: Full name of the runner. Managed by the cloud runner manager.
- instance_id: ID of the runner. Managed by the runner manager.
- health: The health state of the runner.
- github_state: State on github.
- cloud_state: State on cloud.
- """
-
- name: str
- instance_id: InstanceId
- health: HealthState
- github_state: GitHubRunnerState | None
- cloud_state: CloudRunnerState
-
- def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner | None):
- """Construct an instance.
-
- Args:
- cloud_instance: Information on the cloud instance.
- github_info: Information on the GitHub of the runner.
- """
- self.name = cloud_instance.name
- self.instance_id = cloud_instance.instance_id
- self.health = cloud_instance.health
- self.github_state = (
- GitHubRunnerState.from_runner(github_info) if github_info is not None else None
- )
- self.cloud_state = cloud_instance.state
-
-
-@dataclass
-class RunnerManagerConfig:
- """Configuration for the runner manager.
-
- Attributes:
- token: GitHub personal access token to query GitHub API.
- path: Path to GitHub repository or organization to registry the runners.
- """
-
- token: str
- path: GitHubPath
-
-
-class RunnerManager:
- """Manage the runners.
-
- Attributes:
- manager_name: A name to identify this manager.
- name_prefix: The name prefix of the runners.
- """
-
- def __init__(
- self,
- manager_name: str,
- cloud_runner_manager: CloudRunnerManager,
- config: RunnerManagerConfig,
- ):
- """Construct the object.
-
- Args:
- manager_name: A name to identify this manager.
- cloud_runner_manager: For managing the cloud instance of the runner.
- config: Configuration of this class.
- """
- self.manager_name = manager_name
- self._config = config
- self._cloud = cloud_runner_manager
- self.name_prefix = self._cloud.name_prefix
- self._github = GitHubRunnerManager(
- prefix=self.name_prefix, token=self._config.token, path=self._config.path
- )
-
- def create_runners(self, num: int) -> tuple[InstanceId]:
- """Create runners.
-
- Args:
- num: Number of runners to create.
-
- Returns:
- List of instance ID of the runners.
- """
- logger.info("Creating %s runners", num)
- registration_token = self._github.get_registration_token()
-
- create_runner_args = [
- RunnerManager._CreateRunnerArgs(self._cloud, registration_token) for _ in range(num)
- ]
- return RunnerManager._spawn_runners(create_runner_args)
-
- def get_runners(
- self,
- github_states: Sequence[GitHubRunnerState] | None = None,
- cloud_states: Sequence[CloudRunnerState] | None = None,
- ) -> tuple[RunnerInstance]:
- """Get information on runner filter by state.
-
- Only runners that has cloud instance are returned.
-
- Args:
- github_states: Filter for the runners with these github states. If None all
- states will be included.
- cloud_states: Filter for the runners with these cloud states. If None all states
- will be included.
-
- Returns:
- Information on the runners.
- """
- logger.info("Getting runners...")
- github_infos = self._github.get_runners(github_states)
- cloud_infos = self._cloud.get_runners(cloud_states)
- github_infos_map = {info["name"]: info for info in github_infos}
- cloud_infos_map = {info.name: info for info in cloud_infos}
- logger.info(
- "Found following runners: %s", cloud_infos_map.keys() | github_infos_map.keys()
- )
-
- runner_names = cloud_infos_map.keys() & github_infos_map.keys()
- cloud_only = cloud_infos_map.keys() - runner_names
- github_only = github_infos_map.keys() - runner_names
- if cloud_only:
- logger.warning(
- "Found runner instance on cloud but not registered on GitHub: %s", cloud_only
- )
- if github_only:
- logger.warning(
- "Found self-hosted runner on GitHub but no matching runner instance on cloud: %s",
- github_only,
- )
-
- runner_instances: list[RunnerInstance] = [
- RunnerInstance(
- cloud_infos_map[name], github_infos_map[name] if name in github_infos_map else None
- )
- for name in cloud_infos_map.keys()
- ]
- if cloud_states is not None:
- runner_instances = [
- runner for runner in runner_instances if runner.cloud_state in cloud_states
- ]
- if github_states is not None:
- runner_instances = [
- runner
- for runner in runner_instances
- if runner.github_state is not None and runner.github_state in github_states
- ]
- return cast(tuple[RunnerInstance], tuple(runner_instances))
-
- def delete_runners(self, num: int) -> IssuedMetricEventsStats:
- """Delete runners.
-
- Args:
- num: The number of runner to delete.
-
- Returns:
- Stats on metrics events issued during the deletion of runners.
- """
- logger.info("Deleting %s number of runners", num)
- runners_list = self.get_runners()[:num]
- runner_names = [runner.name for runner in runners_list]
- logger.info("Deleting runners: %s", runner_names)
- remove_token = self._github.get_removal_token()
- return self._delete_runners(runners=runners_list, remove_token=remove_token)
-
- def flush_runners(
- self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE
- ) -> IssuedMetricEventsStats:
- """Delete runners according to state.
-
- Args:
- flush_mode: The type of runners affect by the deletion.
-
- Returns:
- Stats on metrics events issued during the deletion of runners.
- """
- match flush_mode:
- case FlushMode.FLUSH_IDLE:
- logger.info("Flushing idle runners...")
- case FlushMode.FLUSH_BUSY:
- logger.info("Flushing idle and busy runners...")
- case _:
- logger.critical(
- "Unknown flush mode %s encountered, contact developers", flush_mode
- )
-
- busy = False
- if flush_mode == FlushMode.FLUSH_BUSY:
- busy = True
- remove_token = self._github.get_removal_token()
- stats = self._cloud.flush_runners(remove_token, busy)
- return self._issue_runner_metrics(metrics=stats)
-
- def cleanup(self) -> IssuedMetricEventsStats:
- """Run cleanup of the runners and other resources.
-
- Returns:
- Stats on metrics events issued during the cleanup of runners.
- """
- self._github.delete_runners([GitHubRunnerState.OFFLINE])
- remove_token = self._github.get_removal_token()
- deleted_runner_metrics = self._cloud.cleanup(remove_token)
- return self._issue_runner_metrics(metrics=deleted_runner_metrics)
-
- @staticmethod
- def _spawn_runners(
- create_runner_args: Sequence["RunnerManager._CreateRunnerArgs"],
- ) -> tuple[InstanceId, ...]:
- """Parallel spawn of runners.
-
- The length of the create_runner_args is number _create_runner invocation, and therefore the
- number of runner spawned.
-
- Args:
- create_runner_args: List of arg for invoking _create_runner method.
-
- Returns:
- A list of instance ID of runner spawned.
- """
- num = len(create_runner_args)
-
- instance_id_list = []
- with Pool(processes=min(num, 10)) as pool:
- jobs = pool.imap_unordered(
- func=RunnerManager._create_runner, iterable=create_runner_args
- )
- for _ in range(num):
- try:
- instance_id = next(jobs)
- except RunnerCreateError:
- logger.exception("Failed to spawn a runner.")
- except StopIteration:
- break
- else:
- instance_id_list.append(instance_id)
- return tuple(instance_id_list)
-
- def _delete_runners(
- self, runners: Sequence[RunnerInstance], remove_token: str
- ) -> IssuedMetricEventsStats:
- """Delete list of runners.
-
- Args:
- runners: The runners to delete.
- remove_token: The token for removing self-hosted runners.
-
- Returns:
- Stats on metrics events issued during the deletion of runners.
- """
- runner_metrics_list = []
- for runner in runners:
- deleted_runner_metrics = self._cloud.delete_runner(
- instance_id=runner.instance_id, remove_token=remove_token
- )
- if deleted_runner_metrics is not None:
- runner_metrics_list.append(deleted_runner_metrics)
- return self._issue_runner_metrics(metrics=iter(runner_metrics_list))
-
- def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetricEventsStats:
- """Issue runner metrics.
-
- Args:
- metrics: Runner metrics to issue.
-
- Returns:
- Stats on runner metrics issued.
- """
- total_stats: IssuedMetricEventsStats = {}
-
- for extracted_metrics in metrics:
- try:
- job_metrics = github_metrics.job(
- github_client=self._github.github,
- pre_job_metrics=extracted_metrics.pre_job,
- runner_name=extracted_metrics.runner_name,
- )
- except GithubMetricsError:
- logger.exception(
- "Failed to calculate job metrics for %s", extracted_metrics.runner_name
- )
- job_metrics = None
-
- issued_events = runner_metrics.issue_events(
- runner_metrics=extracted_metrics,
- job_metrics=job_metrics,
- flavor=self.manager_name,
- )
-
- for event_type in issued_events:
- total_stats[event_type] = total_stats.get(event_type, 0) + 1
-
- return total_stats
-
- @dataclass
- class _CreateRunnerArgs:
- """Arguments for the _create_runner function.
-
- Attrs:
- cloud_runner_manager: For managing the cloud instance of the runner.
- registration_token: The GitHub provided-token for registering runners.
- """
-
- cloud_runner_manager: CloudRunnerManager
- registration_token: str
-
- @staticmethod
- def _create_runner(args: _CreateRunnerArgs) -> InstanceId:
- """Create a single runner.
-
- This is a staticmethod for usage with multiprocess.Pool.
-
- Args:
- args: The arguments.
-
- Returns:
- The instance ID of the runner created.
- """
- return args.cloud_runner_manager.create_runner(registration_token=args.registration_token)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
deleted file mode 100644
index 271b92e51..000000000
--- a/src/manager/runner_scaler.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Module for scaling the runners amount."""
-
-import logging
-import time
-from dataclasses import dataclass
-
-from pydantic import MongoDsn
-
-import reactive.runner_manager as reactive_runner_manager
-from charm_state import ReactiveConfig
-from errors import IssueMetricEventError, MissingServerConfigError
-from manager.cloud_runner_manager import HealthState
-from manager.github_runner_manager import GitHubRunnerState
-from manager.runner_manager import FlushMode, RunnerManager
-from metrics import events as metric_events
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class RunnerInfo:
- """Information on the runners.
-
- Attributes:
- online: The number of runner in online state.
- busy: The number of the runner in busy state.
- offline: The number of runner in offline state.
- unknown: The number of runner in unknown state.
- runners: The names of the online runners.
- busy_runners: The names of the busy runners.
- """
-
- online: int
- busy: int
- offline: int
- unknown: int
- runners: tuple[str, ...]
- busy_runners: tuple[str, ...]
-
-
-class RunnerScaler:
- """Manage the reconcile of runners."""
-
- def __init__(self, runner_manager: RunnerManager, reactive_config: ReactiveConfig | None):
- """Construct the object.
-
- Args:
- runner_manager: The RunnerManager to perform runner reconcile.
- reactive_config: Reactive runner configuration.
- """
- self._manager = runner_manager
- self._reactive_config = reactive_config
-
- def get_runner_info(self) -> RunnerInfo:
- """Get information on the runners.
-
- Returns:
- The information on the runners.
- """
- runner_list = self._manager.get_runners()
- online = 0
- busy = 0
- offline = 0
- unknown = 0
- online_runners = []
- busy_runners = []
- for runner in runner_list:
- match runner.github_state:
- case GitHubRunnerState.BUSY:
- online += 1
- online_runners.append(runner.name)
- busy += 1
- busy_runners.append(runner.name)
- case GitHubRunnerState.IDLE:
- online += 1
- online_runners.append(runner.name)
- case GitHubRunnerState.OFFLINE:
- offline += 1
- case _:
- unknown += 1
- return RunnerInfo(
- online=online,
- busy=busy,
- offline=offline,
- unknown=unknown,
- runners=tuple(online_runners),
- busy_runners=tuple(busy_runners),
- )
-
- def flush(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
- """Flush the runners.
-
- Args:
- flush_mode: Determines the types of runner to be flushed.
-
- Returns:
- Number of runners flushed.
- """
- metric_stats = self._manager.cleanup()
- delete_metric_stats = self._manager.flush_runners(flush_mode=flush_mode)
- events = set(delete_metric_stats.keys()) | set(metric_stats.keys())
- metric_stats = {
- event_name: delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0)
- for event_name in events
- }
- return metric_stats.get(metric_events.RunnerStop, 0)
-
- def reconcile(self, quantity: int) -> int:
- """Reconcile the quantity of runners.
-
- Args:
- quantity: The number of intended runners.
-
- Returns:
- The Change in number of runners.
- """
- logger.info("Start reconcile to %s runner", quantity)
-
- if self._reactive_config is not None:
- logger.info("Reactive configuration detected, going into experimental reactive mode.")
- return self._reconcile_reactive(quantity, self._reactive_config.mq_uri)
-
- start_timestamp = time.time()
- delete_metric_stats = None
- metric_stats = self._manager.cleanup()
- runners = self._manager.get_runners()
- logger.info("Reconcile runners from %s to %s", len(runners), quantity)
- runner_diff = quantity - len(runners)
- if runner_diff > 0:
- try:
- self._manager.create_runners(runner_diff)
- except MissingServerConfigError:
- logging.exception(
- "Unable to spawn runner due to missing server configuration, such as, image."
- )
- elif runner_diff < 0:
- delete_metric_stats = self._manager.delete_runners(-runner_diff)
- else:
- logger.info("No changes to the number of runners.")
- end_timestamp = time.time()
-
- # Merge the two metric stats.
- if delete_metric_stats is not None:
- metric_stats = {
- event_name: delete_metric_stats.get(event_name, 0)
- + metric_stats.get(event_name, 0)
- for event_name in set(delete_metric_stats) | set(metric_stats)
- }
-
- runner_list = self._manager.get_runners()
- busy_runners = [
- runner for runner in runner_list if runner.github_state == GitHubRunnerState.BUSY
- ]
- idle_runners = [
- runner for runner in runner_list if runner.github_state == GitHubRunnerState.IDLE
- ]
- offline_healthy_runners = [
- runner
- for runner in runner_list
- if runner.github_state == GitHubRunnerState.OFFLINE
- and runner.health == HealthState.HEALTHY
- ]
- unhealthy_states = set((HealthState.UNHEALTHY, HealthState.UNKNOWN))
- unhealthy_runners = [runner for runner in runner_list if runner.health in unhealthy_states]
- logger.info("Found %s busy runners: %s", len(busy_runners), busy_runners)
- logger.info("Found %s idle runners: %s", len(idle_runners), idle_runners)
- logger.info(
- "Found %s offline runners that are healthy: %s",
- len(offline_healthy_runners),
- offline_healthy_runners,
- )
- logger.info("Found %s unhealthy runners: %s", len(unhealthy_runners), unhealthy_runners)
-
- try:
- available_runners = set(runner.name for runner in idle_runners) | set(
- runner.name for runner in offline_healthy_runners
- )
- logger.info(
- "Current available runners (idle + healthy offline): %s", available_runners
- )
- metric_events.issue_event(
- metric_events.Reconciliation(
- timestamp=time.time(),
- flavor=self._manager.manager_name,
- crashed_runners=metric_stats.get(metric_events.RunnerStart, 0)
- - metric_stats.get(metric_events.RunnerStop, 0),
- idle_runners=len(available_runners),
- duration=end_timestamp - start_timestamp,
- )
- )
- except IssueMetricEventError:
- logger.exception("Failed to issue Reconciliation metric")
-
- return runner_diff
-
- def _reconcile_reactive(self, quantity: int, mq_uri: MongoDsn) -> int:
- """Reconcile runners reactively.
-
- Args:
- quantity: Number of intended runners.
- mq_uri: The URI of the MQ to use to spawn runners reactively.
-
- Returns:
- The difference between intended runners and actual runners. In reactive mode
- this number is never negative as additional processes should terminate after a timeout.
- """
- logger.info("Reactive mode is experimental and not yet fully implemented.")
- return reactive_runner_manager.reconcile(
- quantity=quantity,
- mq_uri=mq_uri,
- queue_name=self._manager.manager_name,
- )
diff --git a/src/metrics/__init__.py b/src/metrics/__init__.py
deleted file mode 100644
index d2a48eaed..000000000
--- a/src/metrics/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Package for common metrics-related code."""
diff --git a/src/metrics/events.py b/src/metrics/events.py
deleted file mode 100644
index 6f858166d..000000000
--- a/src/metrics/events.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Models and functions for the metric events."""
-import logging
-from pathlib import Path
-from typing import Any, Optional
-
-from pydantic import BaseModel, NonNegativeFloat
-
-from errors import IssueMetricEventError
-
-METRICS_LOG_PATH = Path("/var/log/github-runner-metrics.log")
-
-
-logger = logging.getLogger(__name__)
-
-
-class Event(BaseModel):
- """Base class for metric events.
-
- Attributes:
- timestamp: The UNIX time stamp of the time at which the event was originally issued.
- event: The name of the event. Will be set to the class name in snake case if not provided.
- """
-
- timestamp: NonNegativeFloat
- event: str
-
- @staticmethod
- def _camel_to_snake(camel_case_string: str) -> str:
- """Convert a camel case string to snake case.
-
- Args:
- camel_case_string: The string to convert.
-
- Returns:
- The converted string.
- """
- snake_case_string = camel_case_string[0].lower()
- for char in camel_case_string[1:]:
- if char.isupper():
- snake_case_string += "_" + char.lower()
- else:
- snake_case_string += char
- return snake_case_string
-
- def __init__(self, *args: Any, **kwargs: Any):
- """Initialize the event.
-
- Args:
- args: The positional arguments to pass to the base class.
- kwargs: The keyword arguments to pass to the base class. These are used to set the
- specific fields. E.g. timestamp=12345 will set the timestamp field to 12345.
- """
- if "event" not in kwargs:
- event = self._camel_to_snake(self.__class__.__name__)
- kwargs["event"] = event
- super().__init__(*args, **kwargs)
-
-
-class RunnerInstalled(Event):
- """Metric event for when a runner is installed.
-
- Attributes:
- flavor: Describes the characteristics of the runner.
- The flavor could be for example "small".
- duration: The duration of the installation in seconds.
- """
-
- flavor: str
- duration: NonNegativeFloat
-
-
-class RunnerStart(Event):
- """Metric event for when a runner is started.
-
- Attributes:
- flavor: Describes the characteristics of the runner.
- The flavor could be for example "small".
- workflow: The workflow name.
- repo: The repository name.
- github_event: The github event.
- idle: The idle time in seconds.
- queue_duration: The time in seconds it took before the runner picked up the job.
- This is optional as we rely on the Github API and there may be problems
- retrieving the data.
- """
-
- flavor: str
- workflow: str
- repo: str
- github_event: str
- idle: NonNegativeFloat
- queue_duration: Optional[NonNegativeFloat]
-
-
-class CodeInformation(BaseModel):
- """Information about a status code.
-
- This could e.g. be an exit code or a http status code.
-
- Attributes:
- code: The status code.
- """
-
- code: int
-
-
-class RunnerStop(Event):
- """Metric event for when a runner is stopped.
-
- Attributes:
- flavor: Describes the characteristics of the runner.
- The flavor could be for example "small".
- workflow: The workflow name.
- repo: The repository name.
- github_event: The github event.
- status: A string describing the reason for stopping the runner.
- status_info: More information about the status.
- job_duration: The duration of the job in seconds.
- job_conclusion: The job conclusion, e.g. "success", "failure", ...
- """
-
- flavor: str
- workflow: str
- repo: str
- github_event: str
- status: str
- status_info: Optional[CodeInformation]
- job_duration: NonNegativeFloat
- job_conclusion: Optional[str]
-
-
-class Reconciliation(Event):
- """Metric event for when the charm has finished reconciliation.
-
- Attributes:
- flavor: Describes the characteristics of the runner.
- The flavor could be for example "small".
- crashed_runners: The number of crashed runners.
- idle_runners: The number of idle runners.
- duration: The duration of the reconciliation in seconds.
- """
-
- flavor: str
- crashed_runners: int
- idle_runners: int
- duration: NonNegativeFloat
-
-
-def issue_event(event: Event) -> None:
- """Issue a metric event.
-
- The metric event is logged to the metrics log.
-
- Args:
- event: The metric event to log.
-
- Raises:
- IssueMetricEventError: If the event cannot be logged.
- """
- try:
- with METRICS_LOG_PATH.open(mode="a", encoding="utf-8") as metrics_file:
- metrics_file.write(f"{event.json(exclude_none=True)}\n")
- except OSError as exc:
- raise IssueMetricEventError(f"Cannot write to {METRICS_LOG_PATH}") from exc
diff --git a/src/metrics/github.py b/src/metrics/github.py
deleted file mode 100644
index e40574eb7..000000000
--- a/src/metrics/github.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Functions to calculate metrics from data retrieved from GitHub."""
-import logging
-
-from charm_state import GitHubRepo
-from errors import GithubMetricsError, JobNotFoundError
-from github_client import GithubClient
-from metrics.runner import PreJobMetrics
-from metrics.type import GithubJobMetrics
-
-logger = logging.getLogger(__name__)
-
-
-def job(
- github_client: GithubClient, pre_job_metrics: PreJobMetrics, runner_name: str
-) -> GithubJobMetrics:
- """Calculate the job metrics for a runner.
-
- The Github API is accessed to retrieve the job data for the runner.
-
- Args:
- github_client: The GitHub API client.
- pre_job_metrics: The pre-job metrics.
- runner_name: The name of the runner.
-
- Raises:
- GithubMetricsError: If the job for given workflow run is not found.
-
- Returns:
- The job metrics.
- """
- owner, repo = pre_job_metrics.repository.split("/", maxsplit=1)
-
- try:
- job_info = github_client.get_job_info(
- path=GitHubRepo(owner=owner, repo=repo),
- workflow_run_id=pre_job_metrics.workflow_run_id,
- runner_name=runner_name,
- )
- except JobNotFoundError as exc:
- raise GithubMetricsError from exc
- logger.debug(
- "Job info for runner %s with workflow run id %s: %s",
- runner_name,
- pre_job_metrics.workflow_run_id,
- job_info,
- )
-
- queue_duration = (job_info.started_at - job_info.created_at).total_seconds()
-
- return GithubJobMetrics(queue_duration=queue_duration, conclusion=job_info.conclusion)
diff --git a/src/metrics/runner.py b/src/metrics/runner.py
deleted file mode 100644
index b0ccc191a..000000000
--- a/src/metrics/runner.py
+++ /dev/null
@@ -1,470 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Classes and function to extract the metrics from storage and issue runner metrics events."""
-
-import json
-import logging
-from enum import Enum
-from json import JSONDecodeError
-from pathlib import Path
-from typing import Iterator, Optional, Type
-
-from pydantic import BaseModel, Field, NonNegativeFloat, ValidationError
-
-from errors import (
- CorruptMetricDataError,
- DeleteMetricsStorageError,
- IssueMetricEventError,
- RunnerMetricsError,
-)
-from metrics import events as metric_events
-from metrics.storage import MetricsStorage
-from metrics.storage import StorageManager as MetricsStorageManager
-from metrics.storage import move_to_quarantine
-from metrics.type import GithubJobMetrics
-
-logger = logging.getLogger(__name__)
-
-FILE_SIZE_BYTES_LIMIT = 1024
-PRE_JOB_METRICS_FILE_NAME = "pre-job-metrics.json"
-POST_JOB_METRICS_FILE_NAME = "post-job-metrics.json"
-RUNNER_INSTALLED_TS_FILE_NAME = "runner-installed.timestamp"
-
-
-class PreJobMetrics(BaseModel):
- """Metrics for the pre-job phase of a runner.
-
- Attributes:
- timestamp: The UNIX time stamp of the time at which the event was originally issued.
- workflow: The workflow name.
- workflow_run_id: The workflow run id.
- repository: The repository path in the format '/'.
- event: The github event.
- """
-
- timestamp: NonNegativeFloat
- workflow: str
- workflow_run_id: str
- repository: str = Field(None, regex=r"^.+/.+$")
- event: str
-
-
-class PostJobStatus(str, Enum):
- """The status of the post-job phase of a runner.
-
- Attributes:
- NORMAL: Represents a normal post-job.
- ABNORMAL: Represents an error with post-job.
- REPO_POLICY_CHECK_FAILURE: Represents an error with repo-policy-compliance check.
- """
-
- NORMAL = "normal"
- ABNORMAL = "abnormal"
- REPO_POLICY_CHECK_FAILURE = "repo-policy-check-failure"
-
-
-class CodeInformation(BaseModel):
- """Information about a status code.
-
- Attributes:
- code: The status code.
- """
-
- code: int
-
-
-class PostJobMetrics(BaseModel):
- """Metrics for the post-job phase of a runner.
-
- Attributes:
- timestamp: The UNIX time stamp of the time at which the event was originally issued.
- status: The status of the job.
- status_info: More information about the status.
- """
-
- timestamp: NonNegativeFloat
- status: PostJobStatus
- status_info: Optional[CodeInformation]
-
-
-class RunnerMetrics(BaseModel):
- """Metrics for a runner.
-
- Attributes:
- installed_timestamp: The UNIX time stamp of the time at which the runner was installed.
- pre_job: The metrics for the pre-job phase.
- post_job: The metrics for the post-job phase.
- runner_name: The name of the runner.
- """
-
- installed_timestamp: NonNegativeFloat
- pre_job: PreJobMetrics
- post_job: Optional[PostJobMetrics]
- runner_name: str
-
-
-def extract(
- metrics_storage_manager: MetricsStorageManager, runners: set[str], include: bool = False
-) -> Iterator[RunnerMetrics]:
- """Extract metrics from runners.
-
- The metrics are extracted from the metrics storage of the runners.
- Orphan storages are cleaned up.
-
- If corrupt data is found, the metrics are not processed further and the storage is moved
- to a special quarantine directory, as this may indicate that a malicious
- runner is trying to manipulate the files on the storage.
-
- In order to avoid DoS attacks, the file size is also checked.
-
- Args:
- metrics_storage_manager: The metrics storage manager.
- runners: The runners to include or exclude.
- include: If true the provided runners are included for metric extraction, else the provided
- runners are excluded.
-
- Yields:
- Extracted runner metrics of a particular runner.
- """
- for ms in metrics_storage_manager.list_all():
- if (include and ms.runner_name in runners) or (
- not include and ms.runner_name not in runners
- ):
- runner_metrics = _extract_storage(
- metrics_storage_manager=metrics_storage_manager, metrics_storage=ms
- )
- if not runner_metrics:
- logger.warning("Not able to issue metrics for runner %s", ms.runner_name)
- else:
- yield runner_metrics
-
-
-def issue_events(
- runner_metrics: RunnerMetrics,
- flavor: str,
- job_metrics: Optional[GithubJobMetrics],
-) -> set[Type[metric_events.Event]]:
- """Issue the metrics events for a runner.
-
- Args:
- runner_metrics: The metrics for the runner.
- flavor: The flavor of the runner.
- job_metrics: The metrics about the job run by the runner.
-
- Returns:
- A set of issued events.
- """
- runner_start_event = _create_runner_start(runner_metrics, flavor, job_metrics)
-
- issued_events = set()
- try:
- metric_events.issue_event(runner_start_event)
- except ValidationError:
- logger.exception(
- "Not able to issue RunnerStart metric for "
- "runner %s with pre-job metrics %s and job_metrics %s."
- "Will not issue RunnerStop metric.",
- runner_metrics.runner_name,
- runner_metrics.pre_job,
- job_metrics,
- )
- except IssueMetricEventError:
- logger.exception(
- "Not able to issue RunnerStart metric for runner %s. "
- "Will not issue RunnerStop metric.",
- runner_metrics.runner_name,
- )
- else:
- issued_events = {metric_events.RunnerStart}
-
- # Return to not issuing RunnerStop metrics if RunnerStart metric could not be issued.
- if not issued_events:
- return issued_events
-
- if runner_metrics.post_job:
- runner_stop_event = _create_runner_stop(runner_metrics, flavor, job_metrics)
-
- try:
- metric_events.issue_event(runner_stop_event)
- except ValidationError:
- logger.exception(
- "Not able to issue RunnerStop metric for "
- "runner %s with pre-job metrics %s, post-job metrics %s and job_metrics %s.",
- runner_metrics.runner_name,
- runner_metrics.pre_job,
- runner_metrics.post_job,
- job_metrics,
- )
- except IssueMetricEventError:
- logger.exception(
- "Not able to issue RunnerStop metric for runner %s.", runner_metrics.runner_name
- )
- return issued_events
-
- issued_events.add(metric_events.RunnerStop)
-
- return issued_events
-
-
-def _create_runner_start(
- runner_metrics: RunnerMetrics, flavor: str, job_metrics: Optional[GithubJobMetrics]
-) -> metric_events.RunnerStart:
- """Create the RunnerStart event.
-
- Args:
- runner_metrics: The metrics for the runner.
- flavor: The flavor of the runner.
- job_metrics: The metrics about the job run by the runner.
-
- Returns:
- The RunnerStart event.
- """
- # When a job gets picked up directly after spawning, the runner_metrics installed timestamp
- # might be higher than the pre-job timestamp. This is due to the fact that we issue the runner
- # installed timestamp for Openstack after waiting with delays for the runner to be ready.
- # We set the idle_duration to 0 in this case.
- if runner_metrics.pre_job.timestamp < runner_metrics.installed_timestamp:
- logger.warning(
- "Pre-job timestamp %d is before installed timestamp %d for runner %s."
- " Setting idle_duration to zero",
- runner_metrics.pre_job.timestamp,
- runner_metrics.installed_timestamp,
- runner_metrics.runner_name,
- )
- idle_duration = max(runner_metrics.pre_job.timestamp - runner_metrics.installed_timestamp, 0)
-
- # GitHub API returns started_at < created_at in some rare cases.
- if job_metrics and job_metrics.queue_duration < 0:
- logger.warning(
- "Queue duration for runner %s is negative: %f. Setting it to zero.",
- runner_metrics.runner_name,
- job_metrics.queue_duration,
- )
- queue_duration = max(job_metrics.queue_duration, 0) if job_metrics else None
-
- return metric_events.RunnerStart(
- timestamp=runner_metrics.pre_job.timestamp,
- flavor=flavor,
- workflow=runner_metrics.pre_job.workflow,
- repo=runner_metrics.pre_job.repository,
- github_event=runner_metrics.pre_job.event,
- idle=idle_duration,
- queue_duration=queue_duration,
- )
-
-
-def _create_runner_stop(
- runner_metrics: RunnerMetrics, flavor: str, job_metrics: GithubJobMetrics
-) -> metric_events.RunnerStop:
- """Create the RunnerStop event.
-
- Expects that the runner_metrics.post_job is not None.
-
- Args:
- runner_metrics: The metrics for the runner.
- flavor: The flavor of the runner.
- job_metrics: The metrics about the job run by the runner.
-
- Raises:
- RunnerMetricsError: Post job runner metric not found. Should not happen.
-
- Returns:
- The RunnerStop event.
- """
- if runner_metrics.post_job is None:
- raise RunnerMetricsError(
- "Post job runner metric not found during RunnerStop event, contact developers"
- )
-
- # When a job gets cancelled directly after spawning,
- # the post-job timestamp might be lower then the pre-job timestamp.
- # This is due to the fact that we don't have a real post-job script but rather use
- # the exit code of the runner application which might exit before the pre-job script
- # job is done in edge cases. See also:
- # https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job#triggering-the-scripts
- # We set the job_duration to 0 in this case.
- if runner_metrics.post_job.timestamp < runner_metrics.pre_job.timestamp:
- logger.warning(
- "Post-job timestamp %d is before pre-job timestamp %d for runner %s."
- " Setting job_duration to zero",
- runner_metrics.post_job.timestamp,
- runner_metrics.pre_job.timestamp,
- runner_metrics.runner_name,
- )
- job_duration = max(runner_metrics.post_job.timestamp - runner_metrics.pre_job.timestamp, 0)
-
- return metric_events.RunnerStop(
- timestamp=runner_metrics.post_job.timestamp,
- flavor=flavor,
- workflow=runner_metrics.pre_job.workflow,
- repo=runner_metrics.pre_job.repository,
- github_event=runner_metrics.pre_job.event,
- status=runner_metrics.post_job.status,
- status_info=runner_metrics.post_job.status_info,
- job_duration=job_duration,
- job_conclusion=job_metrics.conclusion if job_metrics else None,
- )
-
-
-def _extract_storage(
- metrics_storage_manager: MetricsStorageManager,
- metrics_storage: MetricsStorage,
-) -> Optional[RunnerMetrics]:
- """Extract metrics from a metrics storage.
-
- Args:
- metrics_storage_manager: The metrics storage manager.
- metrics_storage: The metrics storage for a specific runner.
-
- Returns:
- The extracted metrics if at least the pre-job metrics are present.
- """
- runner_name = metrics_storage.runner_name
- try:
- logger.debug("Extracting metrics from metrics storage for runner %s", runner_name)
- metrics_from_fs = _extract_metrics_from_storage(metrics_storage)
- except CorruptMetricDataError:
- logger.exception("Corrupt metric data found for runner %s", runner_name)
- move_to_quarantine(metrics_storage_manager, runner_name)
- return None
-
- logger.debug("Cleaning metrics storage for runner %s", runner_name)
- _clean_up_storage(
- metrics_storage_manager=metrics_storage_manager, metrics_storage=metrics_storage
- )
- return metrics_from_fs
-
-
-def _extract_metrics_from_storage(metrics_storage: MetricsStorage) -> Optional[RunnerMetrics]:
- """Extract metrics from metrics storage for a runner.
-
- Args:
- metrics_storage: The metrics storage for a specific runner.
-
- Returns:
- The extracted metrics if at least the pre-job metrics are present.
-
- Raises:
- CorruptMetricDataError: Raised if one of the files is not valid or too large.
- """
- if too_large_files := _inspect_file_sizes(metrics_storage):
- raise CorruptMetricDataError(
- f"File size of {too_large_files} is too large. "
- f"The limit is {FILE_SIZE_BYTES_LIMIT} bytes."
- )
-
- runner_name = metrics_storage.runner_name
- try:
- installed_timestamp = metrics_storage.path.joinpath(
- RUNNER_INSTALLED_TS_FILE_NAME
- ).read_text()
- logger.debug("Runner %s installed at %s", runner_name, installed_timestamp)
- except FileNotFoundError:
- logger.exception("installed_timestamp not found for runner %s", runner_name)
- return None
-
- try:
- pre_job_metrics = _extract_file_from_storage(
- metrics_storage=metrics_storage, filename=PRE_JOB_METRICS_FILE_NAME
- )
- if not pre_job_metrics:
- return None
- logger.debug("Pre-job metrics for runner %s: %s", runner_name, pre_job_metrics)
-
- post_job_metrics = _extract_file_from_storage(
- metrics_storage=metrics_storage, filename=POST_JOB_METRICS_FILE_NAME
- )
- logger.debug("Post-job metrics for runner %s: %s", runner_name, post_job_metrics)
- # TODO: 2024-04-02 - We should define a new error, wrap it and re-raise it.
- except CorruptMetricDataError: # pylint: disable=try-except-raise
- raise
-
- try:
- return RunnerMetrics(
- installed_timestamp=installed_timestamp,
- pre_job=PreJobMetrics(**pre_job_metrics),
- post_job=PostJobMetrics(**post_job_metrics) if post_job_metrics else None,
- runner_name=runner_name,
- )
- except ValidationError as exc:
- raise CorruptMetricDataError(str(exc)) from exc
-
-
-def _inspect_file_sizes(metrics_storage: MetricsStorage) -> tuple[Path, ...]:
- """Inspect the file sizes of the metrics storage.
-
- Args:
- metrics_storage: The metrics storage for a specific runner.
-
- Returns:
- A tuple of files whose size is larger than the limit.
- """
- files: list[Path] = [
- metrics_storage.path.joinpath(PRE_JOB_METRICS_FILE_NAME),
- metrics_storage.path.joinpath(POST_JOB_METRICS_FILE_NAME),
- metrics_storage.path.joinpath(RUNNER_INSTALLED_TS_FILE_NAME),
- ]
-
- return tuple(
- filter(lambda file: file.exists() and file.stat().st_size > FILE_SIZE_BYTES_LIMIT, files)
- )
-
-
-def _extract_file_from_storage(metrics_storage: MetricsStorage, filename: str) -> dict | None:
- """Extract a particular metric file from metrics storage.
-
- Args:
- metrics_storage: The metrics storage for a specific runner.
- filename: The metrics filename.
-
- Raises:
- CorruptMetricDataError: If any errors have been found within the metric.
-
- Returns:
- Metrics for the given runner if present.
- """
- try:
- job_metrics = json.loads(
- metrics_storage.path.joinpath(filename).read_text(encoding="utf-8")
- )
- except FileNotFoundError:
- logger.warning("%s not found for runner %s.", filename, metrics_storage.runner_name)
- return None
- except JSONDecodeError as exc:
- raise CorruptMetricDataError(str(exc)) from exc
- if not isinstance(job_metrics, dict):
- raise CorruptMetricDataError(
- f"{filename} metrics for runner {metrics_storage.runner_name} is not a JSON object."
- )
- return job_metrics
-
-
-def _clean_up_storage(
- metrics_storage_manager: MetricsStorageManager, metrics_storage: MetricsStorage
-) -> None:
- """Clean up the metrics storage.
-
- Remove all metric files and afterwards the storage.
-
- Args:
- metrics_storage_manager: The metrics storage manager.
- metrics_storage: The metrics storage for a specific runner.
- """
- try:
- metrics_storage.path.joinpath(RUNNER_INSTALLED_TS_FILE_NAME).unlink(missing_ok=True)
- metrics_storage.path.joinpath(PRE_JOB_METRICS_FILE_NAME).unlink(missing_ok=True)
- metrics_storage.path.joinpath(POST_JOB_METRICS_FILE_NAME).unlink(missing_ok=True)
- except OSError:
- logger.exception(
- "Could not remove metric files for runner %s, "
- "this may lead to duplicate metrics issued",
- metrics_storage.runner_name,
- )
-
- try:
- metrics_storage_manager.delete(metrics_storage.runner_name)
- except DeleteMetricsStorageError:
- logger.exception(
- "Could not delete metrics storage for runner %s.", metrics_storage.runner_name
- )
diff --git a/src/metrics/runner_logs.py b/src/metrics/runner_logs.py
deleted file mode 100644
index ec7923c9c..000000000
--- a/src/metrics/runner_logs.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Functions to pull and remove the logs of the crashed runners."""
-
-import logging
-import shutil
-import time
-from datetime import datetime
-from pathlib import Path
-
-RUNNER_LOGS_DIR_PATH = Path("/var/log/github-runner-logs")
-
-SYSLOG_PATH = Path("/var/log/syslog")
-
-OUTDATED_LOGS_IN_SECONDS = 7 * 24 * 60 * 60
-
-logger = logging.getLogger(__name__)
-
-
-def create_logs_dir(runner_name: str) -> Path:
- """Create the directory to store the logs of the crashed runners.
-
- Args:
- runner_name: The name of the runner.
-
- Returns:
- The path to the directory where the logs of the crashed runners will be stored.
- """
- target_log_path = RUNNER_LOGS_DIR_PATH / runner_name
- target_log_path.mkdir(parents=True, exist_ok=True)
-
- return target_log_path
-
-
-def remove_outdated() -> None:
- """Remove the logs that are too old."""
- maxage_absolute = time.time() - OUTDATED_LOGS_IN_SECONDS
- dt_object = datetime.fromtimestamp(maxage_absolute)
- logger.info(
- "Removing the outdated logs of the crashed runners. "
- "All logs older than %s will be removed.",
- dt_object.strftime("%Y-%m-%d %H:%M:%S"),
- )
-
- for log_path in RUNNER_LOGS_DIR_PATH.glob("*"):
- if log_path.is_dir() and (log_path.stat().st_mtime < maxage_absolute):
- logger.info("Removing the outdated logs of the runner %s.", log_path.name)
- try:
- shutil.rmtree(log_path)
- except OSError:
- logger.exception(
- "Unable to remove the outdated logs of the runner %s.", log_path.name
- )
diff --git a/src/metrics/storage.py b/src/metrics/storage.py
deleted file mode 100644
index c9b41a2f5..000000000
--- a/src/metrics/storage.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Classes and functions defining the metrics storage.
-
-It contains a protocol and reference implementation.
-"""
-import logging
-import shutil
-import tarfile
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Callable, Iterator, Protocol
-
-from errors import (
- CreateMetricsStorageError,
- DeleteMetricsStorageError,
- GetMetricsStorageError,
- QuarantineMetricsStorageError,
-)
-
-FILESYSTEM_OWNER = "ubuntu:ubuntu"
-FILESYSTEM_BASE_PATH = Path("/home/ubuntu/runner-fs")
-FILESYSTEM_QUARANTINE_PATH = Path("/home/ubuntu/runner-fs-quarantine")
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class MetricsStorage:
- """Storage for the metrics.
-
- Attributes:
- path: The path to the directory holding the metrics inside the charm.
- runner_name: The name of the associated runner.
- """
-
- path: Path
- runner_name: str
-
-
-class StorageManager(Protocol): # pylint: disable=too-few-public-methods
- """A protocol defining the methods for managing the metrics storage.
-
- Attributes:
- create: Method to create a new storage. Returns the created storage.
- Raises an exception CreateMetricsStorageError if the storage already exists.
- list_all: Method to list all storages.
- get: Method to get a storage by name.
- delete: Method to delete a storage by name.
- """
-
- create: Callable[[str], MetricsStorage]
- list_all: Callable[[], Iterator[MetricsStorage]]
- get: Callable[[str], MetricsStorage]
- delete: Callable[[str], None]
-
-
-def _get_runner_fs_path(runner_name: str) -> Path:
- """Get the path of the runner shared filesystem.
-
- Args:
- runner_name: The name of the runner.
-
- Returns:
- The path of the runner shared filesystem.
- """
- return FILESYSTEM_BASE_PATH / runner_name
-
-
-def create(runner_name: str) -> MetricsStorage:
- """Create metrics storage for the runner.
-
- The method is not idempotent and will raise an exception
- if the storage already exists.
-
- Args:
- runner_name: The name of the runner.
-
- Returns:
- The metrics storage object.
-
- Raises:
- CreateMetricsStorageError: If the creation of the shared filesystem fails.
- """
- try:
- FILESYSTEM_BASE_PATH.mkdir(exist_ok=True)
- FILESYSTEM_QUARANTINE_PATH.mkdir(exist_ok=True)
- except OSError as exc:
- raise CreateMetricsStorageError("Failed to create metrics storage directories") from exc
-
- runner_fs_path = _get_runner_fs_path(runner_name)
-
- try:
- runner_fs_path.mkdir()
- except FileExistsError as exc:
- raise CreateMetricsStorageError(
- f"Metrics storage for runner {runner_name} already exists."
- ) from exc
-
- return MetricsStorage(runner_fs_path, runner_name)
-
-
-def list_all() -> Iterator[MetricsStorage]:
- """List all the metric storages.
-
- Yields:
- A metrics storage object.
- """
- if not FILESYSTEM_BASE_PATH.exists():
- return
-
- directories = (entry for entry in FILESYSTEM_BASE_PATH.iterdir() if entry.is_dir())
- for directory in directories:
- try:
- fs = get(runner_name=directory.name)
- except GetMetricsStorageError:
- logger.error("Failed to get metrics storage for runner %s", directory.name)
- else:
- yield fs
-
-
-def get(runner_name: str) -> MetricsStorage:
- """Get the metrics storage for the runner.
-
- Args:
- runner_name: The name of the runner.
-
- Returns:
- The metrics storage object.
-
- Raises:
- GetMetricsStorageError: If the storage does not exist.
- """
- runner_fs_path = _get_runner_fs_path(runner_name)
- if not runner_fs_path.exists():
- raise GetMetricsStorageError(f"Metrics storage for runner {runner_name} not found.")
-
- return MetricsStorage(runner_fs_path, runner_name)
-
-
-def delete(runner_name: str) -> None:
- """Delete the metrics storage for the runner.
-
- Args:
- runner_name: The name of the runner.
-
- Raises:
- DeleteMetricsStorageError: If the storage could not be deleted.
- """
- runner_fs_path = _get_runner_fs_path(runner_name=runner_name)
-
- try:
- shutil.rmtree(runner_fs_path)
- except OSError as exc:
- raise DeleteMetricsStorageError(
- f"Failed to remove metrics storage for runner {runner_name}"
- ) from exc
-
-
-def move_to_quarantine(storage_manager: StorageManager, runner_name: str) -> None:
- """Archive the metrics storage for the runner and delete it.
-
- Args:
- storage_manager: The storage manager.
- runner_name: The name of the runner.
-
- Raises:
- QuarantineMetricsStorageError: If the metrics storage could not be quarantined.
- """
- try:
- runner_fs = storage_manager.get(runner_name)
- except GetMetricsStorageError as exc:
- raise QuarantineMetricsStorageError(
- f"Failed to get metrics storage for runner {runner_name}"
- ) from exc
-
- tarfile_path = FILESYSTEM_QUARANTINE_PATH.joinpath(runner_name).with_suffix(".tar.gz")
- try:
- with tarfile.open(tarfile_path, "w:gz") as tar:
- tar.add(runner_fs.path, arcname=runner_fs.path.name)
- except OSError as exc:
- raise QuarantineMetricsStorageError(
- f"Failed to archive metrics storage for runner {runner_name}"
- ) from exc
-
- try:
- storage_manager.delete(runner_name)
- except DeleteMetricsStorageError as exc:
- raise QuarantineMetricsStorageError(
- f"Failed to delete metrics storage for runner {runner_name}"
- ) from exc
diff --git a/src/metrics/type.py b/src/metrics/type.py
deleted file mode 100644
index fd45314f6..000000000
--- a/src/metrics/type.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-# Copyright 2023 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Data types used by modules handling metrics."""
-
-from typing import NamedTuple, Optional
-
-from github_type import JobConclusion
-
-
-class GithubJobMetrics(NamedTuple):
- """Metrics about a job.
-
- Attributes:
- queue_duration: The time in seconds the job took before the runner picked it up.
- conclusion: The conclusion of the job.
- """
-
- queue_duration: float
- conclusion: Optional[JobConclusion]
diff --git a/src/openstack_cloud/__init__.py b/src/openstack_cloud/__init__.py
deleted file mode 100644
index 3f9935aab..000000000
--- a/src/openstack_cloud/__init__.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Module for managing Openstack cloud."""
-
-import logging
-from pathlib import Path
-from typing import TypedDict, cast
-
-import yaml
-
-from errors import OpenStackInvalidConfigError
-
-logger = logging.getLogger(__name__)
-
-
-CLOUDS_YAML_PATH = Path(Path.home() / ".config/openstack/clouds.yaml")
-
-
-class CloudConfig(TypedDict):
- """The parsed clouds.yaml configuration dictionary.
-
- Attributes:
- clouds: A mapping of key "clouds" to cloud name mapped to cloud configuration.
- """
-
- clouds: dict[str, dict]
-
-
-def _validate_cloud_config(cloud_config: dict) -> CloudConfig:
- """Validate the format of the cloud configuration.
-
- Args:
- cloud_config: The configuration in clouds.yaml format to validate.
-
- Raises:
- OpenStackInvalidConfigError: if the format of the config is invalid.
-
- Returns:
- A typed cloud_config dictionary.
- """
- # dict of format: {clouds: : }
- try:
- clouds = list(cloud_config["clouds"].keys())
- except KeyError as exc:
- raise OpenStackInvalidConfigError("Missing key 'clouds' from config.") from exc
- if not clouds:
- raise OpenStackInvalidConfigError("No clouds defined in clouds.yaml.")
- return cast(CloudConfig, cloud_config)
-
-
-def _write_config_to_disk(cloud_config: CloudConfig) -> None:
- """Write the cloud configuration to disk.
-
- Args:
- cloud_config: The configuration in clouds.yaml format to write to disk.
- """
- CLOUDS_YAML_PATH.parent.mkdir(parents=True, exist_ok=True)
- CLOUDS_YAML_PATH.write_text(encoding="utf-8", data=yaml.dump(cloud_config))
-
-
-def initialize(cloud_config: dict) -> None:
- """Initialize Openstack integration.
-
- Validates config and writes it to disk.
-
- Raises:
- OpenStackInvalidConfigError: If there was an given cloud config.
-
- Args:
- cloud_config: The configuration in clouds.yaml format to apply.
- """
- try:
- valid_config = _validate_cloud_config(cloud_config)
- # TODO: 2024-04-02 - We should define a new error, wrap it and re-raise it.
- except OpenStackInvalidConfigError: # pylint: disable=try-except-raise
- raise
- _write_config_to_disk(valid_config)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
deleted file mode 100644
index ad21f4d97..000000000
--- a/src/openstack_cloud/openstack_cloud.py
+++ /dev/null
@@ -1,597 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Class for accessing OpenStack API for managing servers."""
-
-import logging
-from contextlib import contextmanager
-from dataclasses import dataclass
-from datetime import datetime
-from functools import reduce
-from pathlib import Path
-from typing import Iterable, Iterator, cast
-
-import openstack
-import openstack.exceptions
-import paramiko
-import yaml
-from fabric import Connection as SSHConnection
-from openstack.compute.v2.keypair import Keypair as OpenstackKeypair
-from openstack.compute.v2.server import Server as OpenstackServer
-from openstack.connection import Connection as OpenstackConnection
-from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
-from paramiko.ssh_exception import NoValidConnectionsError
-
-from errors import KeyfileError, OpenStackError, SSHError
-from utilities import retry
-
-logger = logging.getLogger(__name__)
-
-_CLOUDS_YAML_PATH = Path.home() / ".config/openstack/clouds.yaml"
-
-# Update the version when the security group rules are not backward compatible.
-_SECURITY_GROUP_NAME = "github-runner-v1"
-
-_CREATE_SERVER_TIMEOUT = 5 * 60
-_SSH_TIMEOUT = 30
-_SSH_KEY_PATH = Path("/home/ubuntu/.ssh")
-_TEST_STRING = "test_string"
-
-
-@dataclass
-class OpenstackInstance:
- """Represents an OpenStack instance.
-
- Attributes:
- server_id: ID of server assigned by OpenStack.
- server_name: Name of the server on OpenStack.
- instance_id: ID used by OpenstackCloud class to manage the instances. See docs on the
- OpenstackCloud.
- addresses: IP addresses assigned to the server.
- status: Status of the server.
- """
-
- server_id: str
- server_name: str
- instance_id: str
- addresses: list[str]
- status: str
-
- def __init__(self, server: OpenstackServer, prefix: str):
- """Construct the object.
-
- Args:
- server: The OpenStack server.
- prefix: The name prefix for the servers.
-
- Raises:
- ValueError: Provided server should not be managed under this prefix.
- """
- self.server_id = server.id
- self.server_name = server.name
- self.status = server.status
- self.addresses = [
- address["addr"]
- for network_addresses in server.addresses.values()
- for address in network_addresses
- ]
-
- if not self.server_name.startswith(f"{prefix}-"):
- # Should never happen.
- raise ValueError(
- f"Found openstack server {server.name} managed under prefix {prefix}, contact devs"
- )
- self.instance_id = self.server_name[len(prefix) + 1 :]
-
-
-@contextmanager
-@retry(tries=2, delay=5, local_logger=logger)
-def _get_openstack_connection(
- clouds_config: dict[str, dict], cloud: str
-) -> Iterator[OpenstackConnection]:
- """Create a connection context managed object, to be used within with statements.
-
- The file of _CLOUDS_YAML_PATH should only be modified by this function.
-
- Args:
- clouds_config: The configuration in clouds.yaml format to apply.
- cloud: The name of cloud to use in the clouds.yaml.
-
- Raises:
- OpenStackError: if the credentials provided is not authorized.
-
- Yields:
- An openstack.connection.Connection object.
- """
- if not _CLOUDS_YAML_PATH.exists():
- _CLOUDS_YAML_PATH.parent.mkdir(parents=True, exist_ok=True)
-
- # Concurrency: Very small chance for the file to be corrupted due to multiple process calling
- # this function and writing the file at the same time. This should cause the `conn.authorize`
- # to fail, and retry of this function would resolve this.
- _CLOUDS_YAML_PATH.write_text(data=yaml.dump(clouds_config), encoding="utf-8")
-
- # api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
- # I could not reproduce it. Therefore, no catch here for such exception.
- try:
- with openstack.connect(cloud=cloud) as conn:
- conn.authorize()
- yield conn
- # pylint thinks this isn't an exception, but does inherit from Exception class.
- except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
- logger.exception("OpenStack API call failure")
- raise OpenStackError("Failed OpenStack API call") from exc
-
-
-class OpenstackCloud:
- """Client to interact with OpenStack cloud.
-
- The OpenStack server name is managed by this cloud. Caller refers to the instances via
- instance_id. If the caller needs the server name, e.g., for logging, it can be queried with
- get_server_name.
- """
-
- def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
- """Create the object.
-
- Args:
- clouds_config: The openstack clouds.yaml in dict format.
- cloud: The name of cloud to use in the clouds.yaml.
- prefix: Prefix attached to names of resource managed by this instance. Used for
- identifying which resource belongs to this instance.
- """
- self._clouds_config = clouds_config
- self._cloud = cloud
- self.prefix = prefix
-
- # Ignore "Too many arguments" as 6 args should be fine. Move to a dataclass if new args are
- # added.
- def launch_instance( # pylint: disable=R0913
- self, instance_id: str, image: str, flavor: str, network: str, cloud_init: str
- ) -> OpenstackInstance:
- """Create an OpenStack instance.
-
- Args:
- instance_id: The instance ID to form the instance name.
- image: The image used to create the instance.
- flavor: The flavor used to create the instance.
- network: The network used to create the instance.
- cloud_init: The cloud init userdata to startup the instance.
-
- Raises:
- OpenStackError: Unable to create OpenStack server.
-
- Returns:
- The OpenStack instance created.
- """
- full_name = self.get_server_name(instance_id)
- logger.info("Creating openstack server with %s", full_name)
-
- with _get_openstack_connection(
- clouds_config=self._clouds_config, cloud=self._cloud
- ) as conn:
- security_group = OpenstackCloud._ensure_security_group(conn)
- keypair = OpenstackCloud._setup_keypair(conn, full_name)
-
- try:
- server = conn.create_server(
- name=full_name,
- image=image,
- key_name=keypair.name,
- flavor=flavor,
- network=network,
- security_groups=[security_group.id],
- userdata=cloud_init,
- auto_ip=False,
- timeout=_CREATE_SERVER_TIMEOUT,
- wait=True,
- )
- except openstack.exceptions.ResourceTimeout as err:
- logger.exception("Timeout creating openstack server %s", full_name)
- logger.info(
- "Attempting clean up of openstack server %s that timeout during creation",
- full_name,
- )
- self._delete_instance(conn, full_name)
- raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
- except openstack.exceptions.SDKException as err:
- logger.exception("Failed to create openstack server %s", full_name)
- self._delete_keypair(conn, instance_id)
- raise OpenStackError(f"Failed to create openstack server {full_name}") from err
-
- return OpenstackInstance(server, self.prefix)
-
- def get_instance(self, instance_id: str) -> OpenstackInstance | None:
- """Get OpenStack instance by instance ID.
-
- Args:
- instance_id: The instance ID.
-
- Returns:
- The OpenStack instance if found.
- """
- full_name = self.get_server_name(instance_id)
- logger.info("Getting openstack server with %s", full_name)
-
- with _get_openstack_connection(
- clouds_config=self._clouds_config, cloud=self._cloud
- ) as conn:
- server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
- if server is not None:
- return OpenstackInstance(server, self.prefix)
- return None
-
- def delete_instance(self, instance_id: str) -> None:
- """Delete a openstack instance.
-
- Args:
- instance_id: The instance ID of the instance to delete.
- """
- full_name = self.get_server_name(instance_id)
- logger.info("Deleting openstack server with %s", full_name)
-
- with _get_openstack_connection(
- clouds_config=self._clouds_config, cloud=self._cloud
- ) as conn:
- self._delete_instance(conn, full_name)
-
- def _delete_instance(self, conn: OpenstackConnection, full_name: str) -> None:
- """Delete a openstack instance.
-
- Raises:
- OpenStackError: Unable to delete OpenStack server.
-
- Args:
- conn: The openstack connection to use.
- full_name: The full name of the server.
- """
- try:
- server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
- if server is not None:
- conn.delete_server(name_or_id=server.id)
- OpenstackCloud._delete_keypair(conn, full_name)
- except (
- openstack.exceptions.SDKException,
- openstack.exceptions.ResourceTimeout,
- ) as err:
- raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
-
- def get_ssh_connection(self, instance: OpenstackInstance) -> SSHConnection:
- """Get SSH connection to an OpenStack instance.
-
- Args:
- instance: The OpenStack instance to connect to.
-
- Raises:
- SSHError: Unable to get a working SSH connection to the instance.
- KeyfileError: Unable to find the keyfile to connect to the instance.
-
- Returns:
- SSH connection object.
- """
- key_path = OpenstackCloud._get_key_path(instance.server_name)
-
- if not key_path.exists():
- raise KeyfileError(
- f"Missing keyfile for server: {instance.server_name}, key path: {key_path}"
- )
- if not instance.addresses:
- raise SSHError(f"No addresses found for OpenStack server {instance.server_name}")
-
- for ip in instance.addresses:
- try:
- connection = SSHConnection(
- host=ip,
- user="ubuntu",
- connect_kwargs={"key_filename": str(key_path)},
- connect_timeout=_SSH_TIMEOUT,
- )
- result = connection.run(f"echo {_TEST_STRING}", warn=True, timeout=_SSH_TIMEOUT)
- if not result.ok:
- logger.warning(
- "SSH test connection failed, server: %s, address: %s",
- instance.server_name,
- ip,
- )
- continue
- if _TEST_STRING in result.stdout:
- return connection
- except (NoValidConnectionsError, TimeoutError, paramiko.ssh_exception.SSHException):
- logger.warning(
- "Unable to SSH into %s with address %s",
- instance.server_name,
- connection.host,
- exc_info=True,
- )
- continue
- raise SSHError(
- f"No connectable SSH addresses found, server: {instance.server_name}, "
- f"addresses: {instance.addresses}"
- )
-
- def get_instances(self) -> tuple[OpenstackInstance, ...]:
- """Get all OpenStack instances.
-
- Returns:
- The OpenStack instances.
- """
- logger.info("Getting all openstack servers managed by the charm")
-
- with _get_openstack_connection(
- clouds_config=self._clouds_config, cloud=self._cloud
- ) as conn:
- instance_list = self._get_openstack_instances(conn)
- server_names = set(server.name for server in instance_list)
-
- server_list = [
- OpenstackCloud._get_and_ensure_unique_server(conn, name) for name in server_names
- ]
- return tuple(
- OpenstackInstance(server, self.prefix)
- for server in server_list
- if server is not None
- )
-
- def cleanup(self) -> None:
- """Cleanup unused key files and openstack keypairs."""
- with _get_openstack_connection(
- clouds_config=self._clouds_config, cloud=self._cloud
- ) as conn:
- instances = self._get_openstack_instances(conn)
- exclude_list = [server.name for server in instances]
- self._cleanup_key_files(exclude_list)
- self._cleanup_openstack_keypairs(conn, exclude_list)
-
- def get_server_name(self, instance_id: str) -> str:
- """Get server name on OpenStack.
-
- Args:
- instance_id: ID used to identify a instance.
-
- Returns:
- The OpenStack server name.
- """
- return f"{self.prefix}-{instance_id}"
-
- def _cleanup_key_files(self, exclude_instances: Iterable[str]) -> None:
- """Delete all SSH key files except the specified instances.
-
- Args:
- exclude_instances: The keys of these instance will not be deleted.
- """
- logger.info("Cleaning up SSH key files")
- exclude_filename = set(
- OpenstackCloud._get_key_path(instance) for instance in exclude_instances
- )
-
- total = 0
- deleted = 0
- for path in _SSH_KEY_PATH.iterdir():
- # Find key file from this application.
- if path.is_file() and path.name.startswith(self.prefix) and path.name.endswith(".key"):
- total += 1
- if path in exclude_filename:
- continue
- path.unlink()
- deleted += 1
- logger.info("Found %s key files, clean up %s key files", total, deleted)
-
- def _cleanup_openstack_keypairs(
- self, conn: OpenstackConnection, exclude_instances: Iterable[str]
- ) -> None:
- """Delete all OpenStack keypairs except the specified instances.
-
- Args:
- conn: The Openstack connection instance.
- exclude_instances: The keys of these instance will not be deleted.
- """
- logger.info("Cleaning up openstack keypairs")
- exclude_instance_set = set(exclude_instances)
- keypairs = conn.list_keypairs()
- for key in keypairs:
- # The `name` attribute is of resource.Body type.
- if key.name and str(key.name).startswith(self.prefix):
- if str(key.name) in exclude_instance_set:
- continue
- try:
- self._delete_keypair(conn, key.name)
- except openstack.exceptions.SDKException:
- logger.warning(
- "Unable to delete OpenStack keypair associated with deleted key file %s ",
- key.name,
- )
-
- def _get_openstack_instances(self, conn: OpenstackConnection) -> tuple[OpenstackServer, ...]:
- """Get the OpenStack servers managed by this unit.
-
- Args:
- conn: The connection object to access OpenStack cloud.
-
- Returns:
- List of OpenStack instances.
- """
- return tuple(
- server
- for server in cast(list[OpenstackServer], conn.list_servers())
- if server.name.startswith(f"{self.prefix}-")
- )
-
- @staticmethod
- def _get_and_ensure_unique_server(
- conn: OpenstackConnection, name: str
- ) -> OpenstackServer | None:
- """Get the latest server of the name and ensure it is unique.
-
- If multiple servers with the same name are found, the latest server in creation time is
- returned. Other servers is deleted.
-
- Args:
- conn: The connection to OpenStack.
- name: The name of the OpenStack name.
-
- Returns:
- A server with the name.
- """
- servers: list[OpenstackServer] = conn.search_servers(name)
-
- if not servers:
- return None
-
- # 2024/08/14: The `format` arg for `strptime` is the default format.
- # This is only provided to get around a bug of the function with type checking.
- latest_server = reduce(
- lambda a, b: (
- a
- if datetime.strptime(a.created_at, "a %b %d %H:%M:%S %Y")
- < datetime.strptime(b.create_at, "a %b %d %H:%M:%S %Y")
- else b
- ),
- servers,
- )
- outdated_servers = filter(lambda x: x != latest_server, servers)
- for server in outdated_servers:
- try:
- conn.delete_server(name_or_id=server.id)
- except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout):
- logger.warning(
- "Unable to delete server with duplicate name %s with ID %s",
- name,
- server.id,
- stack_info=True,
- )
-
- return latest_server
-
- @staticmethod
- def _get_key_path(name: str) -> Path:
- """Get the filepath for storing private SSH of a runner.
-
- Args:
- name: The name of the runner.
-
- Returns:
- Path to reserved for the key file of the runner.
- """
- return _SSH_KEY_PATH / f"{name}.key"
-
- @staticmethod
- def _setup_keypair(conn: OpenstackConnection, name: str) -> OpenstackKeypair:
- """Create OpenStack keypair.
-
- Args:
- conn: The connection object to access OpenStack cloud.
- name: The name of the keypair.
-
- Returns:
- The OpenStack keypair.
- """
- key_path = OpenstackCloud._get_key_path(name)
-
- if key_path.exists():
- logger.warning("Existing private key file for %s found, removing it.", name)
- key_path.unlink(missing_ok=True)
-
- keypair = conn.create_keypair(name=name)
- key_path.parent.mkdir(parents=True, exist_ok=True)
- key_path.write_text(keypair.private_key)
- key_path.chmod(0o400)
- return keypair
-
- @staticmethod
- def _delete_keypair(conn: OpenstackConnection, name: str) -> None:
- """Delete OpenStack keypair.
-
- Args:
- conn: The connection object to access OpenStack cloud.
- name: The name of the keypair.
- """
- try:
- # Keypair have unique names, access by ID is not needed.
- if not conn.delete_keypair(name):
- logger.warning("Unable to delete keypair for %s", name)
- except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout):
- logger.warning("Unable to delete keypair for %s", name, stack_info=True)
-
- key_path = OpenstackCloud._get_key_path(name)
- key_path.unlink(missing_ok=True)
-
- @staticmethod
- def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
- """Ensure runner security group exists.
-
- Args:
- conn: The connection object to access OpenStack cloud.
-
- Returns:
- The security group with the rules for runners.
- """
- rule_exists_icmp = False
- rule_exists_ssh = False
- rule_exists_tmate_ssh = False
-
- security_group_list = conn.list_security_groups(filters={"name": _SECURITY_GROUP_NAME})
- # Pick the first security_group returned.
- security_group = next(iter(security_group_list), None)
- if security_group is None:
- logger.info("Security group %s not found, creating it", _SECURITY_GROUP_NAME)
- security_group = conn.create_security_group(
- name=_SECURITY_GROUP_NAME,
- description="For servers managed by the github-runner charm.",
- )
- else:
- existing_rules = security_group.security_group_rules
- for rule in existing_rules:
- if rule["protocol"] == "icmp":
- logger.debug(
- "Found ICMP rule in existing security group %s of ID %s",
- _SECURITY_GROUP_NAME,
- security_group.id,
- )
- rule_exists_icmp = True
- if (
- rule["protocol"] == "tcp"
- and rule["port_range_min"] == rule["port_range_max"] == 22
- ):
- logger.debug(
- "Found SSH rule in existing security group %s of ID %s",
- _SECURITY_GROUP_NAME,
- security_group.id,
- )
- rule_exists_ssh = True
- if (
- rule["protocol"] == "tcp"
- and rule["port_range_min"] == rule["port_range_max"] == 10022
- ):
- logger.debug(
- "Found tmate SSH rule in existing security group %s of ID %s",
- _SECURITY_GROUP_NAME,
- security_group.id,
- )
- rule_exists_tmate_ssh = True
-
- if not rule_exists_icmp:
- conn.create_security_group_rule(
- secgroup_name_or_id=security_group.id,
- protocol="icmp",
- direction="ingress",
- ethertype="IPv4",
- )
- if not rule_exists_ssh:
- conn.create_security_group_rule(
- secgroup_name_or_id=security_group.id,
- port_range_min="22",
- port_range_max="22",
- protocol="tcp",
- direction="ingress",
- ethertype="IPv4",
- )
- if not rule_exists_tmate_ssh:
- conn.create_security_group_rule(
- secgroup_name_or_id=security_group.id,
- port_range_min="10022",
- port_range_max="10022",
- protocol="tcp",
- direction="egress",
- ethertype="IPv4",
- )
- return security_group
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
deleted file mode 100644
index 11bac0b92..000000000
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ /dev/null
@@ -1,830 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Manager for self-hosted runner on OpenStack."""
-
-import logging
-import secrets
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Iterator, Sequence
-
-import invoke
-import jinja2
-import paramiko
-import paramiko.ssh_exception
-from fabric import Connection as SSHConnection
-
-from charm_state import GitHubOrg
-from errors import (
- CreateMetricsStorageError,
- GetMetricsStorageError,
- IssueMetricEventError,
- KeyfileError,
- MissingServerConfigError,
- OpenStackError,
- RunnerCreateError,
- RunnerStartError,
- SSHError,
-)
-from manager.cloud_runner_manager import (
- CloudRunnerInstance,
- CloudRunnerManager,
- CloudRunnerState,
- GitHubRunnerConfig,
- InstanceId,
- SupportServiceConfig,
-)
-from manager.runner_manager import HealthState
-from metrics import events as metric_events
-from metrics import runner as runner_metrics
-from metrics import storage as metrics_storage
-from openstack_cloud.openstack_cloud import OpenstackCloud, OpenstackInstance
-from repo_policy_compliance_client import RepoPolicyComplianceClient
-from utilities import retry, set_env_var
-
-logger = logging.getLogger(__name__)
-
-BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME = "scripts/build-openstack-image.sh"
-_CONFIG_SCRIPT_PATH = Path("/home/ubuntu/actions-runner/config.sh")
-
-RUNNER_APPLICATION = Path("/home/ubuntu/actions-runner")
-METRICS_EXCHANGE_PATH = Path("/home/ubuntu/metrics-exchange")
-PRE_JOB_SCRIPT = RUNNER_APPLICATION / "pre-job.sh"
-MAX_METRICS_FILE_SIZE = 1024
-
-RUNNER_STARTUP_PROCESS = "/home/ubuntu/actions-runner/run.sh"
-RUNNER_LISTENER_PROCESS = "Runner.Listener"
-RUNNER_WORKER_PROCESS = "Runner.Worker"
-CREATE_SERVER_TIMEOUT = 5 * 60
-
-
-class _GithubRunnerRemoveError(Exception):
- """Represents an error while SSH into a runner and running the remove script."""
-
-
-class _PullFileError(Exception):
- """Represents an error while pulling a file from the runner instance."""
-
-
-@dataclass
-class OpenStackCloudConfig:
- """Configuration for OpenStack cloud authorisation information.
-
- Attributes:
- clouds_config: The clouds.yaml.
- cloud: The cloud name to connect to.
- """
-
- clouds_config: dict[str, dict]
- cloud: str
-
-
-@dataclass
-class OpenStackServerConfig:
- """Configuration for OpenStack server.
-
- Attributes:
- image: The image name for runners to use.
- flavor: The flavor name for runners to use.
- network: The network name for runners to use.
- """
-
- image: str
- flavor: str
- network: str
-
-
-@dataclass
-class _RunnerHealth:
- """Runners with health state.
-
- Attributes:
- healthy: The list of healthy runners.
- unhealthy: The list of unhealthy runners.
- """
-
- healthy: tuple[OpenstackInstance, ...]
- unhealthy: tuple[OpenstackInstance, ...]
-
-
-class OpenStackRunnerManager(CloudRunnerManager):
- """Manage self-hosted runner on OpenStack cloud.
-
- Attributes:
- name_prefix: The name prefix of the runners created.
- """
-
- # Ignore "Too many arguments", as the class requires a lot of configurations.
- def __init__( # pylint: disable=R0913
- self,
- manager_name: str,
- prefix: str,
- cloud_config: OpenStackCloudConfig,
- server_config: OpenStackServerConfig | None,
- runner_config: GitHubRunnerConfig,
- service_config: SupportServiceConfig,
- ) -> None:
- """Construct the object.
-
- Args:
- manager_name: A name to identify this manager.
- prefix: The prefix to runner name.
- cloud_config: The configuration for OpenStack authorisation.
- server_config: The configuration for creating OpenStack server. Unable to create
- runner if None.
- runner_config: The configuration for the runner.
- service_config: The configuration of supporting services of the runners.
- """
- self._manager_name = manager_name
- self._prefix = prefix
- self._cloud_config = cloud_config
- self._server_config = server_config
- self._runner_config = runner_config
- self._service_config = service_config
- self._openstack_cloud = OpenstackCloud(
- clouds_config=self._cloud_config.clouds_config,
- cloud=self._cloud_config.cloud,
- prefix=self.name_prefix,
- )
-
- # Setting the env var to this process and any child process spawned.
- proxies = service_config.proxy_config
- if no_proxy := proxies.no_proxy:
- set_env_var("NO_PROXY", no_proxy)
- if http_proxy := proxies.http:
- set_env_var("HTTP_PROXY", http_proxy)
- if https_proxy := proxies.https:
- set_env_var("HTTPS_PROXY", https_proxy)
-
- @property
- def name_prefix(self) -> str:
- """The prefix of runner names.
-
- Returns:
- The prefix of the runner names managed by this class.
- """
- return self._prefix
-
- def create_runner(self, registration_token: str) -> InstanceId:
- """Create a self-hosted runner.
-
- Args:
- registration_token: The GitHub registration token for registering runners.
-
- Raises:
- MissingServerConfigError: Unable to create runner due to missing configuration.
- RunnerCreateError: Unable to create runner due to OpenStack issues.
-
- Returns:
- Instance ID of the runner.
- """
- if self._server_config is None:
- raise MissingServerConfigError("Missing server configuration to create runners")
-
- start_timestamp = time.time()
- instance_id = OpenStackRunnerManager._generate_instance_id()
- instance_name = self._openstack_cloud.get_server_name(instance_id=instance_id)
- cloud_init = self._generate_cloud_init(
- instance_name=instance_name, registration_token=registration_token
- )
- try:
- instance = self._openstack_cloud.launch_instance(
- instance_id=instance_id,
- image=self._server_config.image,
- flavor=self._server_config.flavor,
- network=self._server_config.network,
- cloud_init=cloud_init,
- )
- except OpenStackError as err:
- raise RunnerCreateError(f"Failed to create {instance_name} openstack runner") from err
-
- self._wait_runner_startup(instance)
- self._wait_runner_running(instance)
-
- end_timestamp = time.time()
- OpenStackRunnerManager._issue_runner_installed_metric(
- name=instance_name,
- flavor=self._manager_name,
- install_start_timestamp=start_timestamp,
- install_end_timestamp=end_timestamp,
- )
- return instance_id
-
- def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
- """Get a self-hosted runner by instance id.
-
- Args:
- instance_id: The instance id.
-
- Returns:
- Information on the runner instance.
- """
- instance = self._openstack_cloud.get_instance(instance_id)
- healthy = self._runner_health_check(instance=instance)
- return (
- CloudRunnerInstance(
- name=instance.server_name,
- instance_id=instance_id,
- health=HealthState.HEALTHY if healthy else HealthState.UNHEALTHY,
- state=CloudRunnerState.from_openstack_server_status(instance.status),
- )
- if instance is not None
- else None
- )
-
- def get_runners(
- self, states: Sequence[CloudRunnerState] | None = None
- ) -> tuple[CloudRunnerInstance, ...]:
- """Get self-hosted runners by state.
-
- Args:
- states: Filter for the runners with these github states. If None all states will be
- included.
-
- Returns:
- Information on the runner instances.
- """
- instance_list = self._openstack_cloud.get_instances()
- instance_list = [
- CloudRunnerInstance(
- name=instance.server_name,
- instance_id=instance.instance_id,
- health=(
- HealthState.HEALTHY
- if self._runner_health_check(instance)
- else HealthState.UNHEALTHY
- ),
- state=CloudRunnerState.from_openstack_server_status(instance.status),
- )
- for instance in instance_list
- ]
- if states is None:
- return tuple(instance_list)
-
- state_set = set(states)
- return tuple(instance for instance in instance_list if instance.state in state_set)
-
- def delete_runner(
- self, instance_id: InstanceId, remove_token: str
- ) -> runner_metrics.RunnerMetrics | None:
- """Delete self-hosted runners.
-
- Args:
- instance_id: The instance id of the runner to delete.
- remove_token: The GitHub remove token.
-
- Returns:
- Any metrics collected during the deletion of the runner.
- """
- instance = self._openstack_cloud.get_instance(instance_id)
- if instance is None:
- logger.warning(
- "Unable to delete instance %s as it is not found",
- self._openstack_cloud.get_server_name(instance_id),
- )
- return None
-
- extracted_metrics = runner_metrics.extract(
- metrics_storage_manager=metrics_storage, runners=set([instance.server_name])
- )
- self._delete_runner(instance, remove_token)
- return next(extracted_metrics, None)
-
- def flush_runners(
- self, remove_token: str, busy: bool = False
- ) -> Iterator[runner_metrics.RunnerMetrics]:
- """Remove idle and/or busy runners.
-
- Args:
- remove_token:
- busy: If false, only idle runners are removed. If true, both idle and busy runners are
- removed.
-
- Returns:
- Any metrics retrieved from flushed runners.
- """
- instance_list = self._openstack_cloud.get_instances()
- for instance in instance_list:
- try:
- self._check_state_and_flush(instance, busy)
- except SSHError:
- logger.warning(
- "Unable to determine state of %s and kill runner process due to SSH issues",
- instance.server_name,
- )
- continue
- return self.cleanup(remove_token)
-
- def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
- """Cleanup runner and resource on the cloud.
-
- Args:
- remove_token: The GitHub remove token.
-
- Returns:
- Any metrics retrieved from cleanup runners.
- """
- runners = self._get_runners_health()
- healthy_runner_names = [runner.server_name for runner in runners.healthy]
- metrics = runner_metrics.extract(
- metrics_storage_manager=metrics_storage, runners=set(healthy_runner_names)
- )
- for runner in runners.unhealthy:
- self._delete_runner(runner, remove_token)
-
- self._openstack_cloud.cleanup()
- return metrics
-
- def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None:
- """Delete self-hosted runners by openstack instance.
-
- Args:
- instance: The OpenStack instance.
- remove_token: The GitHub remove token.
- """
- try:
- ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- self._pull_runner_metrics(instance.server_name, ssh_conn)
-
- try:
- OpenStackRunnerManager._run_runner_removal_script(
- instance.server_name, ssh_conn, remove_token
- )
- except _GithubRunnerRemoveError:
- logger.warning(
- "Unable to run github runner removal script for %s",
- instance.server_name,
- stack_info=True,
- )
- except SSHError:
- logger.exception(
- "Failed to get SSH connection while removing %s", instance.server_name
- )
- logger.warning(
- "Skipping runner remove script for %s due to SSH issues", instance.server_name
- )
-
- try:
- self._openstack_cloud.delete_instance(instance.instance_id)
- except OpenStackError:
- logger.exception(
- "Unable to delete openstack instance for runner %s", instance.server_name
- )
-
- def _get_runners_health(self) -> _RunnerHealth:
- """Get runners by health state.
-
- Returns:
- Runners by health state.
- """
- runner_list = self._openstack_cloud.get_instances()
-
- healthy, unhealthy = [], []
- for runner in runner_list:
- if self._runner_health_check(runner):
- healthy.append(runner)
- else:
- unhealthy.append(runner)
- return _RunnerHealth(healthy=tuple(healthy), unhealthy=tuple(unhealthy))
-
- def _runner_health_check(self, instance: OpenstackInstance) -> bool:
- """Run health check on a runner.
-
- Args:
- instance: The instance hosting the runner to run health check on.
-
- Returns:
- True if runner is healthy.
- """
- cloud_state = CloudRunnerState.from_openstack_server_status(instance.status)
- return cloud_state not in set(
- (
- CloudRunnerState.DELETED,
- CloudRunnerState.ERROR,
- CloudRunnerState.STOPPED,
- )
- ) and self._health_check(instance)
-
- def _generate_cloud_init(self, instance_name: str, registration_token: str) -> str:
- """Generate cloud init userdata.
-
- This is the script the openstack server runs on startup.
-
- Args:
- instance_name: The name of the instance.
- registration_token: The GitHub runner registration token.
-
- Returns:
- The cloud init userdata for openstack instance.
- """
- jinja = jinja2.Environment(loader=jinja2.FileSystemLoader("templates"), autoescape=True)
-
- env_contents = jinja.get_template("env.j2").render(
- pre_job_script=str(PRE_JOB_SCRIPT),
- dockerhub_mirror=self._service_config.dockerhub_mirror or "",
- ssh_debug_info=(
- secrets.choice(self._service_config.ssh_debug_connections)
- if self._service_config.ssh_debug_connections
- else None
- ),
- )
-
- pre_job_contents_dict = {
- "issue_metrics": True,
- "metrics_exchange_path": str(METRICS_EXCHANGE_PATH),
- "do_repo_policy_check": False,
- }
- repo_policy = self._get_repo_policy_compliance_client()
- if repo_policy is not None:
- pre_job_contents_dict.update(
- {
- "repo_policy_base_url": repo_policy.base_url,
- "repo_policy_one_time_token": repo_policy.get_one_time_token(),
- "do_repo_policy_check": True,
- }
- )
-
- pre_job_contents = jinja.get_template("pre-job.j2").render(pre_job_contents_dict)
-
- runner_group = None
- if isinstance(self._runner_config.github_path, GitHubOrg):
- runner_group = self._runner_config.github_path.group
- aproxy_address = (
- self._service_config.proxy_config.aproxy_address
- if self._service_config.proxy_config is not None
- else None
- )
- return jinja.get_template("openstack-userdata.sh.j2").render(
- github_url=f"https://github.com/{self._runner_config.github_path.path()}",
- runner_group=runner_group,
- token=registration_token,
- instance_labels=",".join(self._runner_config.labels),
- instance_name=instance_name,
- env_contents=env_contents,
- pre_job_contents=pre_job_contents,
- metrics_exchange_path=str(METRICS_EXCHANGE_PATH),
- aproxy_address=aproxy_address,
- dockerhub_mirror=self._service_config.dockerhub_mirror,
- )
-
- def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | None:
- """Get repo policy compliance client.
-
- Returns:
- The repo policy compliance client.
- """
- if self._service_config.repo_policy_compliance is not None:
- return RepoPolicyComplianceClient(
- self._service_config.repo_policy_compliance.url,
- self._service_config.repo_policy_compliance.token,
- )
- return None
-
- @retry(tries=3, delay=5, backoff=2, local_logger=logger)
- def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> None:
- """Kill runner process depending on idle or busy.
-
- Due to update to runner state has some delay with GitHub API. The state of the runner is
- determined by which runner processes are running. If the Runner.Worker process is running,
- the runner is deemed to be busy.
-
- Raises:
- SSHError: Unable to check the state of the runner and kill the runner process due to
- SSH failure.
-
- Args:
- instance: The openstack instance to kill the runner process.
- busy: Kill the process if runner is busy, else only kill runner
- process if runner is idle.
- """
- try:
- ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- except KeyfileError:
- logger.exception(
- "Health check failed due to unable to find keyfile for %s", instance.server_name
- )
- return
- except SSHError:
- logger.exception(
- "SSH connection failure with %s during flushing", instance.server_name
- )
- raise
-
- # Using a single command to determine the state and kill the process if needed.
- # This makes it more robust when network is unstable.
- if busy:
- logger.info("Attempting to kill all runner process on %s", instance.server_name)
- # kill both Runner.Listener and Runner.Worker processes.
- # This kills pre-job.sh, a child process of Runner.Worker.
- kill_command = (
- f"pgrep -x {RUNNER_LISTENER_PROCESS} && kill $(pgrep -x {RUNNER_LISTENER_PROCESS});"
- f"pgrep -x {RUNNER_WORKER_PROCESS} && kill $(pgrep -x {RUNNER_WORKER_PROCESS});"
- )
- else:
- logger.info(
- "Attempting to kill runner process on %s if not busy", instance.server_name
- )
- # Only kill Runner.Listener if Runner.Worker does not exist.
- kill_command = (
- f"pgrep -x {RUNNER_WORKER_PROCESS} || pgrep -x {RUNNER_LISTENER_PROCESS} && "
- f"kill $(pgrep -x {RUNNER_LISTENER_PROCESS})"
- )
- # Checking the result of kill command is not useful, as the exit code does not reveal much.
- ssh_conn.run(kill_command, warn=True)
-
- @retry(tries=3, delay=5, backoff=2, local_logger=logger)
- def _health_check(self, instance: OpenstackInstance) -> bool:
- """Check whether runner is healthy.
-
- Args:
- instance: The OpenStack instance to conduit the health check.
-
- Raises:
- SSHError: Unable to get a SSH connection to the instance.
-
- Returns:
- Whether the runner is healthy.
- """
- try:
- ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- except KeyfileError:
- logger.exception(
- "Health check failed due to unable to find keyfile for %s", instance.server_name
- )
- return False
- except SSHError:
- logger.exception(
- "SSH connection failure with %s during health check", instance.server_name
- )
- raise
- return OpenStackRunnerManager._run_health_check(ssh_conn, instance.server_name)
-
- @staticmethod
- def _run_health_check(ssh_conn: SSHConnection, name: str) -> bool:
- """Run a health check for runner process.
-
- Args:
- ssh_conn: The SSH connection to the runner.
- name: The name of the runner.
-
- Returns:
- Whether the health succeed.
- """
- result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
- if not result.ok:
- logger.warning("SSH run of `ps aux` failed on %s: %s", name, result.stderr)
- return False
- if (
- RUNNER_WORKER_PROCESS not in result.stdout
- and RUNNER_LISTENER_PROCESS not in result.stdout
- ):
- logger.warning("Runner process not found on %s", name)
- return False
- return True
-
- @retry(tries=10, delay=60, local_logger=logger)
- def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
- """Wait until runner is startup.
-
- Args:
- instance: The runner instance.
-
- Raises:
- RunnerStartError: The runner startup process was not found on the runner.
- """
- try:
- ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- except SSHError as err:
- raise RunnerStartError(
- f"Failed to SSH to {instance.server_name} during creation possible due to setup "
- "not completed"
- ) from err
-
- result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
- if not result.ok:
- logger.warning("SSH run of `ps aux` failed on %s", instance.server_name)
- raise RunnerStartError(f"Unable to SSH run `ps aux` on {instance.server_name}")
- if RUNNER_STARTUP_PROCESS not in result.stdout:
- logger.warning("Runner startup process not found on %s", instance.server_name)
- raise RunnerStartError(f"Runner startup process not found on {instance.server_name}")
- logger.info("Runner startup process found to be healthy on %s", instance.server_name)
-
- @retry(tries=5, delay=60, local_logger=logger)
- def _wait_runner_running(self, instance: OpenstackInstance) -> None:
- """Wait until runner is running.
-
- Args:
- instance: The runner instance.
-
- Raises:
- RunnerStartError: The runner process was not found on the runner.
- """
- try:
- ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- except SSHError as err:
- raise RunnerStartError(
- f"Failed to SSH connect to {instance.server_name} openstack runner"
- ) from err
-
- if not self._run_health_check(ssh_conn=ssh_conn, name=instance.server_name):
- logger.info("Runner process not found on %s", instance.server_name)
- raise RunnerStartError(
- f"Runner process on {instance.server_name} failed to initialize on after starting"
- )
-
- logger.info("Runner process found to be healthy on %s", instance.server_name)
-
- @staticmethod
- def _generate_instance_id() -> InstanceId:
- """Generate a instance id.
-
- Return:
- The id.
- """
- return secrets.token_hex(12)
-
- @staticmethod
- def _issue_runner_installed_metric(
- name: str,
- flavor: str,
- install_start_timestamp: float,
- install_end_timestamp: float,
- ) -> None:
- """Issue metric for runner installed event.
-
- Args:
- name: The name of the runner.
- flavor: The flavor of the runner.
- install_start_timestamp: The timestamp of installation start.
- install_end_timestamp: The timestamp of installation end.
- """
- try:
- metric_events.issue_event(
- event=metric_events.RunnerInstalled(
- timestamp=install_start_timestamp,
- flavor=flavor,
- duration=install_end_timestamp - install_start_timestamp,
- )
- )
- except IssueMetricEventError:
- logger.exception("Failed to issue RunnerInstalled metric")
-
- try:
- storage = metrics_storage.create(name)
- except CreateMetricsStorageError:
- logger.exception(
- "Failed to create metrics storage for runner %s, "
- "will not be able to issue all metrics.",
- name,
- )
- else:
- try:
- (storage.path / runner_metrics.RUNNER_INSTALLED_TS_FILE_NAME).write_text(
- str(install_end_timestamp), encoding="utf-8"
- )
- except FileNotFoundError:
- logger.exception(
- "Failed to write runner-installed.timestamp into metrics storage "
- "for runner %s, will not be able to issue all metrics.",
- name,
- )
-
- @staticmethod
- def _pull_runner_metrics(name: str, ssh_conn: SSHConnection) -> None:
- """Pull metrics from runner.
-
- Args:
- name: The name of the runner.
- ssh_conn: The SSH connection to the runner.
- """
- try:
- storage = metrics_storage.get(name)
- except GetMetricsStorageError:
- logger.exception(
- "Failed to get shared metrics storage for runner %s, "
- "will not be able to issue all metrics.",
- name,
- )
- return
-
- try:
- OpenStackRunnerManager._ssh_pull_file(
- ssh_conn=ssh_conn,
- remote_path=str(METRICS_EXCHANGE_PATH / "pre-job-metrics.json"),
- local_path=str(storage.path / "pre-job-metrics.json"),
- max_size=MAX_METRICS_FILE_SIZE,
- )
- OpenStackRunnerManager._ssh_pull_file(
- ssh_conn=ssh_conn,
- remote_path=str(METRICS_EXCHANGE_PATH / "post-job-metrics.json"),
- local_path=str(storage.path / "post-job-metrics.json"),
- max_size=MAX_METRICS_FILE_SIZE,
- )
- except _PullFileError as exc:
- logger.warning(
- "Failed to pull metrics for %s: %s . Will not be able to issue all metrics",
- name,
- exc,
- )
-
- @staticmethod
- def _ssh_pull_file(
- ssh_conn: SSHConnection, remote_path: str, local_path: str, max_size: int
- ) -> None:
- """Pull file from the runner instance.
-
- Args:
- ssh_conn: The SSH connection instance.
- remote_path: The file path on the runner instance.
- local_path: The local path to store the file.
- max_size: If the file is larger than this, it will not be pulled.
-
- Raises:
- _PullFileError: Unable to pull the file from the runner instance.
- SSHError: Issue with SSH connection.
- """
- try:
- result = ssh_conn.run(f"stat -c %s {remote_path}", warn=True)
- except (
- TimeoutError,
- paramiko.ssh_exception.NoValidConnectionsError,
- paramiko.ssh_exception.SSHException,
- ) as exc:
- raise SSHError(f"Unable to SSH into {ssh_conn.host}") from exc
- if not result.ok:
- logger.warning(
- (
- "Unable to get file size of %s on instance %s, "
- "exit code: %s, stdout: %s, stderr: %s"
- ),
- remote_path,
- ssh_conn.host,
- result.return_code,
- result.stdout,
- result.stderr,
- )
- raise _PullFileError(f"Unable to get file size of {remote_path}")
-
- stdout = result.stdout
- try:
- stdout.strip()
- size = int(stdout)
- if size > max_size:
- raise _PullFileError(f"File size of {remote_path} too large {size} > {max_size}")
- except ValueError as exc:
- raise _PullFileError(f"Invalid file size for {remote_path}: stdout") from exc
-
- try:
- ssh_conn.get(remote=remote_path, local=local_path)
- except (
- TimeoutError,
- paramiko.ssh_exception.NoValidConnectionsError,
- paramiko.ssh_exception.SSHException,
- ) as exc:
- raise SSHError(f"Unable to SSH into {ssh_conn.host}") from exc
- except OSError as exc:
- raise _PullFileError(f"Unable to retrieve file {remote_path}") from exc
-
- @staticmethod
- def _run_runner_removal_script(
- instance_name: str, ssh_conn: SSHConnection, remove_token: str
- ) -> None:
- """Run Github runner removal script.
-
- Args:
- instance_name: The name of the runner instance.
- ssh_conn: The SSH connection to the runner instance.
- remove_token: The GitHub instance removal token.
-
- Raises:
- _GithubRunnerRemoveError: Unable to remove runner from GitHub.
- """
- try:
- result = ssh_conn.run(
- f"{_CONFIG_SCRIPT_PATH} remove --token {remove_token}",
- warn=True,
- )
- if result.ok:
- return
-
- logger.warning(
- (
- "Unable to run removal script on instance %s, "
- "exit code: %s, stdout: %s, stderr: %s"
- ),
- instance_name,
- result.return_code,
- result.stdout,
- result.stderr,
- )
- raise _GithubRunnerRemoveError(f"Failed to remove runner {instance_name} from Github.")
- except (
- TimeoutError,
- paramiko.ssh_exception.NoValidConnectionsError,
- paramiko.ssh_exception.SSHException,
- ) as exc:
- raise _GithubRunnerRemoveError(
- f"Failed to remove runner {instance_name} from Github."
- ) from exc
diff --git a/src/reactive/__init__.py b/src/reactive/__init__.py
deleted file mode 100644
index 1c7b82dda..000000000
--- a/src/reactive/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Package for code implementing reactive scheduling."""
diff --git a/src/reactive/consumer.py b/src/reactive/consumer.py
deleted file mode 100644
index f868feddd..000000000
--- a/src/reactive/consumer.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Module responsible for consuming jobs from the message queue."""
-import contextlib
-import logging
-import signal
-import sys
-from contextlib import closing
-from types import FrameType
-from typing import Generator, cast
-
-from kombu import Connection
-from kombu.simple import SimpleQueue
-from pydantic import BaseModel, HttpUrl, ValidationError
-
-logger = logging.getLogger(__name__)
-
-
-class JobDetails(BaseModel):
- """A class to translate the payload.
-
- Attributes:
- labels: The labels of the job.
- run_url: The URL of the job.
- """
-
- labels: list[str]
- run_url: HttpUrl
-
-
-class JobError(Exception):
- """Raised when a job error occurs."""
-
-
-def consume(mongodb_uri: str, queue_name: str) -> None:
- """Consume a job from the message queue.
-
- Log the job details and acknowledge the message.
- If the job details are invalid, reject the message and raise an error.
-
- Args:
- mongodb_uri: The URI of the MongoDB database.
- queue_name: The name of the queue.
-
- Raises:
- JobError: If the job details are invalid.
- """
- with Connection(mongodb_uri) as conn:
- with closing(SimpleQueue(conn, queue_name)) as simple_queue:
- with signal_handler(signal.SIGTERM):
- msg = simple_queue.get(block=True)
- try:
- job_details = cast(JobDetails, JobDetails.parse_raw(msg.payload))
- except ValidationError as exc:
- msg.reject(requeue=True)
- raise JobError(f"Invalid job details: {msg.payload}") from exc
- logger.info(
- "Received job with labels %s and run_url %s",
- job_details.labels,
- job_details.run_url,
- )
- msg.ack()
-
-
-@contextlib.contextmanager
-def signal_handler(signal_code: signal.Signals) -> Generator[None, None, None]:
- """Set a signal handler and after the context, restore the default handler.
-
- The signal handler exits the process.
-
- Args:
- signal_code: The signal code to handle.
- """
- _set_signal_handler(signal_code)
- try:
- yield
- finally:
- _restore_signal_handler(signal_code)
-
-
-def _set_signal_handler(signal_code: signal.Signals) -> None:
- """Set a signal handler which exits the process.
-
- Args:
- signal_code: The signal code to handle.
- """
-
- def sigterm_handler(signal_code: int, _: FrameType | None) -> None:
- """Handle a signal.
-
- Call sys.exit with the signal code. Kombu should automatically
- requeue unacknowledged messages.
-
- Args:
- signal_code: The signal code to handle.
- """
- print(
- f"Signal '{signal.strsignal(signal_code)}' received. Will terminate.", file=sys.stderr
- )
- sys.exit(signal_code)
-
- signal.signal(signal_code, sigterm_handler)
-
-
-def _restore_signal_handler(signal_code: signal.Signals) -> None:
- """Restore the default signal handler.
-
- Args:
- signal_code: The signal code to restore.
- """
- signal.signal(signal_code, signal.SIG_DFL)
diff --git a/src/reactive/runner_manager.py b/src/reactive/runner_manager.py
deleted file mode 100644
index 5799731ee..000000000
--- a/src/reactive/runner_manager.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Module for managing reactive runners."""
-import logging
-import os
-import shutil
-import signal
-
-# All commands run by subprocess are secure.
-import subprocess # nosec
-from pathlib import Path
-
-from utilities import secure_run_subprocess
-
-logger = logging.getLogger(__name__)
-
-MQ_URI_ENV_VAR = "MQ_URI"
-QUEUE_NAME_ENV_VAR = "QUEUE_NAME"
-REACTIVE_RUNNER_LOG_DIR = Path("/var/log/reactive_runner")
-REACTIVE_RUNNER_SCRIPT_FILE = "scripts/reactive_runner.py"
-PYTHON_BIN = "/usr/bin/python3"
-REACTIVE_RUNNER_CMD_LINE_PREFIX = f"{PYTHON_BIN} {REACTIVE_RUNNER_SCRIPT_FILE}"
-PID_CMD_COLUMN_WIDTH = len(REACTIVE_RUNNER_CMD_LINE_PREFIX)
-PIDS_COMMAND_LINE = [
- "ps",
- "axo",
- f"cmd:{PID_CMD_COLUMN_WIDTH},pid",
- "--no-headers",
- "--sort=-start_time",
-]
-UBUNTU_USER = "ubuntu"
-
-
-class ReactiveRunnerError(Exception):
- """Raised when a reactive runner error occurs."""
-
-
-def reconcile(quantity: int, mq_uri: str, queue_name: str) -> int:
- """Spawn a runner reactively.
-
- Args:
- quantity: The number of runners to spawn.
- mq_uri: The message queue URI.
- queue_name: The name of the queue.
-
- Raises a ReactiveRunnerError if the runner fails to spawn.
-
- Returns:
- The number of reactive runner processes spawned.
- """
- pids = _get_pids()
- current_quantity = len(pids)
- logger.info("Current quantity of reactive runner processes: %s", current_quantity)
- delta = quantity - current_quantity
- if delta > 0:
- logger.info("Will spawn %d new reactive runner process(es)", delta)
- _setup_logging_for_processes()
- for _ in range(delta):
- _spawn_runner(mq_uri=mq_uri, queue_name=queue_name)
- elif delta < 0:
- logger.info("Will kill %d process(es).", -delta)
- for pid in pids[:-delta]:
- logger.info("Killing reactive runner process with pid %s", pid)
- try:
- os.kill(pid, signal.SIGTERM)
- except ProcessLookupError:
- # There can be a race condition that the process has already terminated.
- # We just ignore and log the fact.
- logger.info(
- "Failed to kill process with pid %s. Process might have terminated it self.",
- pid,
- )
- else:
- logger.info("No changes to number of reactive runner processes needed.")
-
- return delta
-
-
-def _get_pids() -> list[int]:
- """Get the PIDs of the reactive runners processes.
-
- Returns:
- The PIDs of the reactive runner processes sorted by start time in descending order.
-
- Raises:
- ReactiveRunnerError: If the command to get the PIDs fails
- """
- result = secure_run_subprocess(cmd=PIDS_COMMAND_LINE)
- if result.returncode != 0:
- raise ReactiveRunnerError("Failed to get list of processes")
-
- return [
- int(line.rstrip().rsplit(maxsplit=1)[-1])
- for line in result.stdout.decode().split("\n")
- if line.startswith(REACTIVE_RUNNER_CMD_LINE_PREFIX)
- ]
-
-
-def _setup_logging_for_processes() -> None:
- """Set up the log dir."""
- if not REACTIVE_RUNNER_LOG_DIR.exists():
- REACTIVE_RUNNER_LOG_DIR.mkdir()
- shutil.chown(REACTIVE_RUNNER_LOG_DIR, user=UBUNTU_USER, group=UBUNTU_USER)
-
-
-def _spawn_runner(mq_uri: str, queue_name: str) -> None:
- """Spawn a runner.
-
- Args:
- mq_uri: The message queue URI.
- queue_name: The name of the queue.
- """
- env = {
- "PYTHONPATH": "src:lib:venv",
- MQ_URI_ENV_VAR: mq_uri,
- QUEUE_NAME_ENV_VAR: queue_name,
- }
- # We do not want to wait for the process to finish, so we do not use with statement.
- # We trust the command.
- command = " ".join(
- [
- PYTHON_BIN,
- REACTIVE_RUNNER_SCRIPT_FILE,
- ">>",
- # $$ will be replaced by the PID of the process, so we can track the error log easily.
- f"{REACTIVE_RUNNER_LOG_DIR}/$$.log",
- "2>&1",
- ]
- )
- logger.debug("Spawning a new reactive runner process with command: %s", command)
- process = subprocess.Popen( # pylint: disable=consider-using-with # nosec
- command,
- shell=True,
- env=env,
- stdout=subprocess.DEVNULL,
- stderr=subprocess.DEVNULL,
- user=UBUNTU_USER,
- )
-
- logger.info("Spawned a new reactive runner process with pid %s", process.pid)
diff --git a/src/repo_policy_compliance_client.py b/src/repo_policy_compliance_client.py
deleted file mode 100644
index 6dbc1d919..000000000
--- a/src/repo_policy_compliance_client.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Client for requesting repo policy compliance service."""
-
-import logging
-from urllib.parse import urljoin
-
-import requests
-import urllib3
-
-logger = logging.getLogger(__name__)
-
-
-# Disable pylint public method number check as this class can be extended in the future.
-class RepoPolicyComplianceClient: # pylint: disable=too-few-public-methods
- """Client for repo policy compliance service.
-
- Attributes:
- base_url: Base url to the repo policy compliance service.
- token: Charm token configured for the repo policy compliance service.
- """
-
- def __init__(self, url: str, charm_token: str) -> None:
- """Construct the RepoPolicyComplianceClient.
-
- Args:
- url: Base URL to the repo policy compliance service.
- charm_token: Charm token configured for the repo policy compliance service.
- """
- self._session = self._create_session()
- self.base_url = url
- self.token = charm_token
-
- def get_one_time_token(self) -> str:
- """Get a single-use token for repo policy compliance check.
-
- Raises:
- HTTPError: If there was an error getting one-time token from repo-policy-compliance \
- service.
-
- Returns:
- The one-time token to be used in a single request of repo policy compliance check.
- """
- url = urljoin(self.base_url, "one-time-token")
- try:
- response = self._session.get(url, headers={"Authorization": f"Bearer {self.token}"})
- response.raise_for_status()
- return response.content.decode("utf-8")
- except requests.HTTPError:
- logger.exception("Unable to get one time token from repo policy compliance service.")
- raise
-
- def _create_session(self) -> requests.Session:
- """Create a new requests session.
-
- Returns:
- A new requests session with retries and no proxy settings.
- """
- # The repo policy compliance service might be on localhost and should not have any proxies
- # setting configured. This can be changed in the future when we also rely on an
- # external service for LXD cloud.
- adapter = requests.adapters.HTTPAdapter(
- max_retries=urllib3.Retry(
- total=3, backoff_factor=0.3, status_forcelist=[500, 502, 503, 504]
- )
- )
-
- session = requests.Session()
- session.mount("http://", adapter)
- session.mount("https://", adapter)
- session.trust_env = False
- return session
diff --git a/src/runner.py b/src/runner.py
index 61a12115c..a1b64fcfc 100644
--- a/src/runner.py
+++ b/src/runner.py
@@ -21,9 +21,12 @@
from typing import Iterable, NamedTuple, Optional, Sequence
import yaml
+from github_runner_manager.metrics.runner_logs import SYSLOG_PATH, create_logs_dir
+from github_runner_manager.metrics.storage import MetricsStorage
+from github_runner_manager.types_.github import GitHubOrg
import shared_fs
-from charm_state import Arch, GitHubOrg, SSHDebugConnection, VirtualMachineResources
+from charm_state import Arch, SSHDebugConnection, VirtualMachineResources
from errors import (
CreateMetricsStorageError,
GithubClientError,
@@ -38,8 +41,6 @@
)
from lxd import LxdInstance
from lxd_type import LxdInstanceConfig
-from metrics.runner_logs import SYSLOG_PATH, create_logs_dir
-from metrics.storage import MetricsStorage
from runner_manager_type import RunnerManagerClients
from runner_type import RunnerConfig, RunnerStatus
from utilities import execute_command, retry
diff --git a/src/runner_manager.py b/src/runner_manager.py
index 31c30ef85..66a7e03d3 100644
--- a/src/runner_manager.py
+++ b/src/runner_manager.py
@@ -13,12 +13,19 @@
from pathlib import Path
from typing import Iterator, Optional, Type
+import github_runner_manager.reactive.runner_manager as reactive_runner_manager
import jinja2
import requests
import requests.adapters
import urllib3
+from github_runner_manager.metrics import events as metric_events
+from github_runner_manager.metrics import github as github_metrics
+from github_runner_manager.metrics import runner as runner_metrics
+from github_runner_manager.metrics import runner_logs
+from github_runner_manager.metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
+from github_runner_manager.repo_policy_compliance_client import RepoPolicyComplianceClient
+from github_runner_manager.types_.github import RunnerApplication, SelfHostedRunner
-import reactive.runner_manager as reactive_runner_manager
import shared_fs
from charm_state import VirtualMachineResources
from errors import (
@@ -32,14 +39,7 @@
SubprocessError,
)
from github_client import GithubClient
-from github_type import RunnerApplication, SelfHostedRunner
from lxd import LxdClient, LxdInstance
-from metrics import events as metric_events
-from metrics import github as github_metrics
-from metrics import runner as runner_metrics
-from metrics import runner_logs
-from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
-from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner import LXD_PROFILE_YAML, CreateRunnerConfig, Runner, RunnerConfig, RunnerStatus
from runner_manager_type import (
LXDFlushMode,
diff --git a/src/runner_manager_type.py b/src/runner_manager_type.py
index 95f8edcc3..deb30540b 100644
--- a/src/runner_manager_type.py
+++ b/src/runner_manager_type.py
@@ -9,12 +9,12 @@
from typing import Iterable
import jinja2
+from github_runner_manager.repo_policy_compliance_client import RepoPolicyComplianceClient
+from github_runner_manager.types_.github import GitHubPath, GitHubRunnerStatus
-from charm_state import CharmState, GitHubPath, ReactiveConfig
+from charm_state import CharmState, ReactiveConfig
from github_client import GithubClient
-from github_type import GitHubRunnerStatus
from lxd import LxdClient
-from repo_policy_compliance_client import RepoPolicyComplianceClient
class LXDFlushMode(Enum):
diff --git a/src/runner_type.py b/src/runner_type.py
index 92560cbcf..eec8793ee 100644
--- a/src/runner_type.py
+++ b/src/runner_type.py
@@ -8,7 +8,9 @@
from pathlib import Path
from typing import Optional
-from charm_state import GitHubPath, SSHDebugConnection
+from github_runner_manager.types_.github import GitHubPath
+
+from charm_state import SSHDebugConnection
@dataclass
diff --git a/src/shared_fs.py b/src/shared_fs.py
index 28e97c4fb..48c392113 100644
--- a/src/shared_fs.py
+++ b/src/shared_fs.py
@@ -7,7 +7,8 @@
from pathlib import Path
from typing import Iterator
-import metrics.storage as metrics_storage
+import github_runner_manager.metrics.storage as metrics_storage
+
from errors import (
CreateMetricsStorageError,
DeleteMetricsStorageError,
diff --git a/src/utilities.py b/src/utilities.py
index a19effc5c..86c32c4d2 100644
--- a/src/utilities.py
+++ b/src/utilities.py
@@ -3,14 +3,18 @@
"""Utilities used by the charm."""
-import functools
import logging
import os
import pathlib
import subprocess # nosec B404
-import time
-from typing import Any, Callable, Optional, Sequence, Type, TypeVar
-
+from typing import Any, Optional, Sequence, TypeVar
+
+# we import the functions from the utilities module, these are used in the charm
+from github_runner_manager.utilities import retry # noqa: F401 pylint: disable=unused-import
+from github_runner_manager.utilities import ( # noqa: F401 pylint: disable=unused-import
+ secure_run_subprocess,
+ set_env_var,
+)
from typing_extensions import ParamSpec
from errors import SubprocessError
@@ -24,130 +28,6 @@
ReturnT = TypeVar("ReturnT")
-# This decorator has default arguments, one extra argument is not a problem.
-def retry( # pylint: disable=too-many-arguments
- exception: Type[Exception] = Exception,
- tries: int = 1,
- delay: float = 0,
- max_delay: Optional[float] = None,
- backoff: float = 1,
- local_logger: logging.Logger = logger,
-) -> Callable[[Callable[ParamT, ReturnT]], Callable[ParamT, ReturnT]]:
- """Parameterize the decorator for adding retry to functions.
-
- Args:
- exception: Exception type to be retried.
- tries: Number of attempts at retry.
- delay: Time in seconds to wait between retry.
- max_delay: Max time in seconds to wait between retry.
- backoff: Factor to increase the delay by each retry.
- local_logger: Logger for logging.
-
- Returns:
- The function decorator for retry.
- """
-
- def retry_decorator(
- func: Callable[ParamT, ReturnT],
- ) -> Callable[ParamT, ReturnT]:
- """Decorate function with retry.
-
- Args:
- func: The function to decorate.
-
- Returns:
- The resulting function with retry added.
- """
-
- @functools.wraps(func)
- def fn_with_retry(*args: ParamT.args, **kwargs: ParamT.kwargs) -> ReturnT:
- """Wrap the function with retries.
-
- Args:
- args: The placeholder for decorated function's positional arguments.
- kwargs: The placeholder for decorated function's key word arguments.
-
- Raises:
- RuntimeError: Should be unreachable.
-
- Returns:
- Original return type of the decorated function.
- """
- remain_tries, current_delay = tries, delay
-
- for _ in range(tries):
- try:
- return func(*args, **kwargs)
- # Error caught is set by the input of the function.
- except exception as err: # pylint: disable=broad-exception-caught
- remain_tries -= 1
-
- if remain_tries == 0:
- if local_logger is not None:
- local_logger.exception("Retry limit of %s exceed: %s", tries, err)
- raise
-
- if local_logger is not None:
- local_logger.warning(
- "Retrying error in %s seconds: %s", current_delay, err
- )
- local_logger.debug("Error to be retried:", stack_info=True)
-
- time.sleep(current_delay)
-
- current_delay *= backoff
-
- if max_delay is not None:
- current_delay = min(current_delay, max_delay)
-
- raise RuntimeError("Unreachable code of retry logic.")
-
- return fn_with_retry
-
- return retry_decorator
-
-
-def secure_run_subprocess(
- cmd: Sequence[str], hide_cmd: bool = False, **kwargs: dict[str, Any]
-) -> subprocess.CompletedProcess[bytes]:
- """Run command in subprocess according to security recommendations.
-
- CalledProcessError will not be raised on error of the command executed.
- Errors should be handled by the caller by checking the exit code.
-
- The command is executed with `subprocess.run`, additional arguments can be passed to it as
- keyword arguments. The following arguments to `subprocess.run` should not be set:
- `capture_output`, `shell`, `check`. As those arguments are used by this function.
-
- Args:
- cmd: Command in a list.
- hide_cmd: Hide logging of cmd.
- kwargs: Additional keyword arguments for the `subprocess.run` call.
-
- Returns:
- Object representing the completed process. The outputs subprocess can accessed.
- """
- if not hide_cmd:
- logger.info("Executing command %s", cmd)
- else:
- logger.info("Executing sensitive command")
-
- result = subprocess.run( # nosec B603
- cmd,
- capture_output=True,
- # Not running in shell to avoid security problems.
- shell=False,
- check=False,
- # Disable type check due to the support for unpacking arguments in mypy is experimental.
- **kwargs, # type: ignore
- )
- if not hide_cmd:
- logger.debug("Command %s returns: %s", cmd, result.stdout)
- else:
- logger.debug("Command returns: %s", result.stdout)
- return result
-
-
def execute_command(cmd: Sequence[str], check_exit: bool = True, **kwargs: Any) -> tuple[str, int]:
"""Execute a command on a subprocess.
@@ -203,19 +83,6 @@ def get_env_var(env_var: str) -> Optional[str]:
return os.environ.get(env_var.upper(), os.environ.get(env_var.lower(), None))
-def set_env_var(env_var: str, value: str) -> None:
- """Set the environment variable value.
-
- Set the all upper case and all low case of the `env_var`.
-
- Args:
- env_var: Name of the environment variable.
- value: Value to set environment variable to.
- """
- os.environ[env_var.upper()] = value
- os.environ[env_var.lower()] = value
-
-
def bytes_with_unit_to_kib(num_bytes: str) -> int:
"""Convert a positive integer followed by a unit to number of kibibytes.
diff --git a/templates/openstack-userdata.sh.j2 b/templates/openstack-userdata.sh.j2
deleted file mode 100644
index 047a62be1..000000000
--- a/templates/openstack-userdata.sh.j2
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/sh
-
-set -e
-
-hostnamectl set-hostname github-runner
-
-# Write .env contents
-su - ubuntu -c 'cd ~/actions-runner && echo "{{ env_contents }}" > .env'
-
-{% if aproxy_address %}
-snap install aproxy --edge
-snap set aproxy proxy={{ aproxy_address }} listen=:54969
-cat << EOF > /etc/nftables.conf
-define default-ip = $(ip route get $(ip route show 0.0.0.0/0 | grep -oP 'via \K\S+') | grep -oP 'src \K\S+')
-define private-ips = { 10.0.0.0/8, 127.0.0.1/8, 172.16.0.0/12, 192.168.0.0/16 }
-table ip aproxy
-flush table ip aproxy
-table ip aproxy {
- chain prerouting {
- type nat hook prerouting priority dstnat; policy accept;
- ip daddr != \$private-ips tcp dport { 80, 443 } counter dnat to \$default-ip:54969
- }
-
- chain output {
- type nat hook output priority -100; policy accept;
- ip daddr != \$private-ips tcp dport { 80, 443 } counter dnat to \$default-ip:54969
- }
-}
-EOF
-systemctl enable nftables.service
-nft -f /etc/nftables.conf
-{% endif %}
-
-adduser ubuntu lxd
-adduser ubuntu adm
-
-{% if dockerhub_mirror %}
-echo "{\"registry-mirrors\": [\"{{ dockerhub_mirror }}\"]}" > /etc/docker/daemon.json
-sudo systemctl daemon-reload
-sudo systemctl restart docker
-{% endif %}
-
-# Prepare metrics
-su - ubuntu -c 'mkdir "{{ metrics_exchange_path }}"'
-
-# Insert pre-job script
-cat << 'EOF' | su - ubuntu -c 'tee /home/ubuntu/actions-runner/pre-job.sh'
-{{ pre_job_contents | safe }}
-EOF
-
-# Create the runner and start the configuration experience
-{% if runner_group %}
-su - ubuntu -c "cd ~/actions-runner && ./config.sh \
- --url {{ github_url }} \
- --runnergroup '{{ runner_group }}' \
- --token {{ token }} --ephemeral --unattended \
- --labels {{ instance_labels }} --name {{ instance_name }}"
-{% else %}
-su - ubuntu -c "cd ~/actions-runner && ./config.sh \
- --url {{ github_url }} \
- --token {{ token }} --ephemeral --unattended \
- --labels {{ instance_labels }} --name {{ instance_name }}"
-{% endif %}
-
-
-write_post_metrics(){
- # Expects the exit code of the run.sh script as the first argument.
-
- # Only write the post-job metrics if the file does not already exist - which may indicate
- # that the job has failed inside pre-job.
-
- if [ -f {{ metrics_exchange_path}}/post-job-metrics.json ]; then
- return
- fi
-
- timestamp=$(date +%s)
-
- # Write the post-job metrics using status abnormal and exit code if exit code is non-zero
- if [ "$1" != "0" ]; then
- sudo -g ubuntu -u ubuntu jq -n \
- --argjson timestamp "$timestamp" \
- --arg status "abnormal" \
- --argjson exit_code "$1" \
- '{
- "timestamp": $timestamp,
- "status": $status,
- "status_info": {code: $exit_code}
- }' > "{{ metrics_exchange_path}}/post-job-metrics.json"
- return
- else
- # If exit code is zero, write the post-job metrics using status normal
- sudo -g ubuntu -u ubuntu jq -n \
- --argjson timestamp "$timestamp" \
- '{
- "timestamp": $timestamp,
- "status": "normal"
- }' > "{{ metrics_exchange_path }}/post-job-metrics.json"
- fi
-}
-
-# Run runner
-# We want to capture the exit code of the run.sh script and write the post-job metrics.
-(set +e; su - ubuntu -c "cd ~/actions-runner && /home/ubuntu/actions-runner/run.sh"; write_post_metrics $?)
-
-su - ubuntu -c "touch /home/ubuntu/run-completed"
diff --git a/tests/integration/helpers/charm_metrics.py b/tests/integration/helpers/charm_metrics.py
index b6c2f05bc..15cd7e3db 100644
--- a/tests/integration/helpers/charm_metrics.py
+++ b/tests/integration/helpers/charm_metrics.py
@@ -14,12 +14,12 @@
from github.Repository import Repository
from github.Workflow import Workflow
from github.WorkflowJob import WorkflowJob
+from github_runner_manager.metrics.events import METRICS_LOG_PATH
+from github_runner_manager.metrics.runner import PostJobStatus
+from github_runner_manager.types_.github import JobConclusion
from juju.application import Application
from juju.unit import Unit
-from github_type import JobConclusion
-from metrics.events import METRICS_LOG_PATH
-from metrics.runner import PostJobStatus
from tests.integration.helpers.common import (
InstanceHelper,
get_file_content,
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index c15afd5a5..a77c14604 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -6,12 +6,12 @@
from typing import Optional, TypedDict, cast
import openstack.connection
+from github_runner_manager.openstack_cloud.openstack_cloud import OpenstackCloud
from juju.application import Application
from juju.unit import Unit
from openstack.compute.v2.server import Server
from charm_state import VIRTUAL_MACHINES_CONFIG_NAME
-from openstack_cloud.openstack_cloud import OpenstackCloud
from tests.integration.helpers.common import InstanceHelper, reconcile, run_in_unit, wait_for
logger = logging.getLogger(__name__)
diff --git a/tests/integration/test_charm_metrics_failure.py b/tests/integration/test_charm_metrics_failure.py
index e3de1600d..6ce23fa0d 100644
--- a/tests/integration/test_charm_metrics_failure.py
+++ b/tests/integration/test_charm_metrics_failure.py
@@ -10,12 +10,12 @@
import pytest_asyncio
from github.Branch import Branch
from github.Repository import Repository
+from github_runner_manager.metrics import runner_logs
+from github_runner_manager.metrics.runner import PostJobStatus
from juju.application import Application
from juju.model import Model
from charm_state import PATH_CONFIG_NAME, VIRTUAL_MACHINES_CONFIG_NAME
-from metrics import runner_logs
-from metrics.runner import PostJobStatus
from tests.integration.helpers.charm_metrics import (
assert_events_after_reconciliation,
cancel_workflow_run,
diff --git a/tests/integration/test_charm_metrics_success.py b/tests/integration/test_charm_metrics_success.py
index c9b7a8dc0..5e8254e5d 100644
--- a/tests/integration/test_charm_metrics_success.py
+++ b/tests/integration/test_charm_metrics_success.py
@@ -10,11 +10,11 @@
import pytest_asyncio
from github.Branch import Branch
from github.Repository import Repository
+from github_runner_manager.metrics.runner import PostJobStatus
from juju.application import Application
from juju.model import Model
from charm_state import PATH_CONFIG_NAME, VIRTUAL_MACHINES_CONFIG_NAME
-from metrics.runner import PostJobStatus
from tests.integration.helpers.charm_metrics import (
assert_events_after_reconciliation,
clear_metrics_log,
diff --git a/tests/integration/test_reactive.py b/tests/integration/test_reactive.py
index b7445be1f..06dc6e48c 100644
--- a/tests/integration/test_reactive.py
+++ b/tests/integration/test_reactive.py
@@ -6,14 +6,14 @@
import secrets
import pytest
+from github_runner_manager.reactive.consumer import JobDetails
+from github_runner_manager.reactive.runner_manager import REACTIVE_RUNNER_LOG_DIR
from juju.application import Application
from juju.model import Model
from juju.unit import Unit
from kombu import Connection
from pytest_operator.plugin import OpsTest
-from reactive.consumer import JobDetails
-from reactive.runner_manager import REACTIVE_RUNNER_LOG_DIR
from tests.integration.helpers.common import get_file_content, reconcile, run_in_unit
FAKE_URL = "http://example.com"
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 63b7204b3..cb88d84ba 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -1,5 +1,5 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager."""
@@ -15,19 +15,28 @@
from github.Branch import Branch
from github.Repository import Repository
from github.Workflow import Workflow
-from openstack.connection import Connection as OpenstackConnection
-
-from charm_state import GitHubPath, ProxyConfig, parse_github_path
-from manager.cloud_runner_manager import CloudRunnerState, GitHubRunnerConfig, SupportServiceConfig
-from manager.github_runner_manager import GitHubRunnerState
-from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
-from metrics import events, storage
-from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
-from openstack_cloud.openstack_runner_manager import (
+from github_runner_manager.manager.cloud_runner_manager import (
+ CloudRunnerState,
+ GitHubRunnerConfig,
+ SupportServiceConfig,
+)
+from github_runner_manager.manager.github_runner_manager import GitHubRunnerState
+from github_runner_manager.manager.runner_manager import (
+ FlushMode,
+ RunnerManager,
+ RunnerManagerConfig,
+)
+from github_runner_manager.metrics import events, storage
+from github_runner_manager.openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
+from github_runner_manager.openstack_cloud.openstack_runner_manager import (
OpenStackCloudConfig,
OpenStackRunnerManager,
OpenStackServerConfig,
)
+from github_runner_manager.types_.github import GitHubPath, parse_github_path
+from openstack.connection import Connection as OpenstackConnection
+
+from charm_state import ProxyConfig
from tests.integration.helpers.common import (
DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
dispatch_workflow,
diff --git a/tests/integration/test_self_hosted_runner.py b/tests/integration/test_self_hosted_runner.py
index 4232fae4b..46c8280b1 100644
--- a/tests/integration/test_self_hosted_runner.py
+++ b/tests/integration/test_self_hosted_runner.py
@@ -9,6 +9,7 @@
import github
import pytest
from github.Repository import Repository
+from github_runner_manager.types_.github import GitHubRepo
from juju.application import Application
from juju.model import Model
@@ -16,7 +17,6 @@
DOCKERHUB_MIRROR_CONFIG_NAME,
PATH_CONFIG_NAME,
VIRTUAL_MACHINES_CONFIG_NAME,
- GitHubRepo,
)
from github_client import GithubClient
from tests.integration.helpers.common import (
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index cb50275f6..c0b760144 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -8,9 +8,9 @@
from pathlib import Path
import pytest
+from github_runner_manager.manager.runner_scaler import RunnerScaler
import utilities
-from manager.runner_scaler import RunnerScaler
from tests.unit.mock import MockGhapiClient, MockLxdClient, MockRepoPolicyComplianceClient
@@ -76,9 +76,11 @@ def mocks(monkeypatch, tmp_path, exec_command, lxd_exec_command, runner_binary_p
monkeypatch.setattr("firewall.Firewall.refresh_firewall", unittest.mock.MagicMock())
monkeypatch.setattr("runner.execute_command", lxd_exec_command)
monkeypatch.setattr("runner.shared_fs", unittest.mock.MagicMock())
- monkeypatch.setattr("metrics.events.METRICS_LOG_PATH", Path(tmp_path / "metrics.log"))
+ monkeypatch.setattr(
+ "github_runner_manager.metrics.events.METRICS_LOG_PATH", Path(tmp_path / "metrics.log")
+ )
monkeypatch.setattr("runner.time", unittest.mock.MagicMock())
- monkeypatch.setattr("github_client.GhApi", MockGhapiClient)
+ monkeypatch.setattr("github_runner_manager.github_client.GhApi", MockGhapiClient)
monkeypatch.setattr("runner_manager_type.jinja2", unittest.mock.MagicMock())
monkeypatch.setattr("runner_manager_type.LxdClient", MockLxdClient)
monkeypatch.setattr("runner_manager.github_metrics", unittest.mock.MagicMock())
@@ -91,7 +93,7 @@ def mocks(monkeypatch, tmp_path, exec_command, lxd_exec_command, runner_binary_p
monkeypatch.setattr(
"runner_manager.RepoPolicyComplianceClient", MockRepoPolicyComplianceClient
)
- monkeypatch.setattr("utilities.time", unittest.mock.MagicMock())
+ monkeypatch.setattr("github_runner_manager.utilities.time", unittest.mock.MagicMock())
@pytest.fixture(autouse=True, name="cloud_name")
@@ -108,7 +110,7 @@ def clouds_yaml_path(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path:
Path: Mocked clouds.yaml path.
"""
clouds_yaml_path = tmp_path / "clouds.yaml"
- monkeypatch.setattr("openstack_cloud.CLOUDS_YAML_PATH", clouds_yaml_path)
+ monkeypatch.setattr("github_runner_manager.openstack_cloud.CLOUDS_YAML_PATH", clouds_yaml_path)
return clouds_yaml_path
diff --git a/tests/unit/metrics/__init__.py b/tests/unit/metrics/__init__.py
deleted file mode 100644
index 188515554..000000000
--- a/tests/unit/metrics/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
diff --git a/tests/unit/metrics/test_events.py b/tests/unit/metrics/test_events.py
deleted file mode 100644
index 195768291..000000000
--- a/tests/unit/metrics/test_events.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-import json
-from pathlib import Path
-
-from metrics import events
-
-TEST_LOKI_PUSH_API_URL = "http://loki:3100/api/prom/push"
-
-
-def test_issue_events_logs_events(tmp_path: Path):
- """
- arrange: Change path of the events log.
- act: Issue a metric event.
- assert: The expected metric log is created.
- """
- event = events.RunnerInstalled(timestamp=123, flavor="small", duration=456)
-
- events.issue_event(event)
-
- assert json.loads(events.METRICS_LOG_PATH.read_text()) == {
- "event": "runner_installed",
- "timestamp": 123,
- "flavor": "small",
- "duration": 456,
- }
-
-
-def test_issue_events_exclude_none_values(tmp_path: Path):
- """
- arrange: Change path of the events log.
- act: Issue a metric event with a None value.
- assert: The expected metric log without the None value is created.
- """
- event = events.RunnerStop(
- timestamp=123,
- flavor="small",
- workflow="workflow",
- repo="repo",
- github_event="github_event",
- status="status",
- status_info=None,
- job_duration=456,
- )
-
- events.issue_event(event)
-
- assert json.loads(events.METRICS_LOG_PATH.read_text()) == {
- "event": "runner_stop",
- "timestamp": 123,
- "flavor": "small",
- "workflow": "workflow",
- "repo": "repo",
- "github_event": "github_event",
- "status": "status",
- "job_duration": 456,
- }
diff --git a/tests/unit/metrics/test_github.py b/tests/unit/metrics/test_github.py
deleted file mode 100644
index 78a21e4e1..000000000
--- a/tests/unit/metrics/test_github.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-import secrets
-from datetime import datetime, timedelta, timezone
-from random import randint
-from unittest.mock import MagicMock
-
-import pytest
-
-from errors import GithubMetricsError, JobNotFoundError
-from github_client import GithubClient
-from github_type import JobConclusion, JobStats
-from metrics import github as github_metrics
-from metrics.runner import PreJobMetrics
-
-
-@pytest.fixture(name="pre_job_metrics")
-def pre_job_metrics_fixture() -> PreJobMetrics:
- """Create a PreJobMetrics object."""
- return PreJobMetrics(
- repository="owner/repo",
- workflow_run_id=1,
- workflow="workflow",
- job_name="job",
- job_started_at=datetime(2021, 10, 1, 1, 0, 0, tzinfo=timezone.utc),
- timestamp=1234567890,
- event="push",
- )
-
-
-def test_job(pre_job_metrics: PreJobMetrics):
- """
- arrange: create a GithubClient mock which returns a GithubJobStats object.
- act: Call job.
- assert: the job metrics are returned.
- """
- github_client = MagicMock(spec=GithubClient)
- runner_name = secrets.token_hex(16)
- created_at = datetime(2021, 10, 1, 0, 0, 0, tzinfo=timezone.utc)
- started_at = created_at + timedelta(seconds=3600)
- github_client.get_job_info.return_value = JobStats(
- created_at=created_at,
- started_at=started_at,
- runner_name=runner_name,
- conclusion=JobConclusion.SUCCESS,
- job_id=randint(1, 1000),
- )
-
- job_metrics = github_metrics.job(
- github_client=github_client, pre_job_metrics=pre_job_metrics, runner_name=runner_name
- )
-
- assert job_metrics.queue_duration == 3600
- assert job_metrics.conclusion == JobConclusion.SUCCESS
-
-
-def test_job_job_not_found(pre_job_metrics: PreJobMetrics):
- """
- arrange: create a GithubClient mock which raises a JobNotFound exception.
- act: Call job.
- assert: a GithubMetricsError is raised.
- """
- github_client = MagicMock(spec=GithubClient)
- runner_name = secrets.token_hex(16)
- github_client.get_job_info.side_effect = JobNotFoundError("Job not found")
-
- with pytest.raises(GithubMetricsError):
- github_metrics.job(
- github_client=github_client, pre_job_metrics=pre_job_metrics, runner_name=runner_name
- )
diff --git a/tests/unit/metrics/test_runner.py b/tests/unit/metrics/test_runner.py
deleted file mode 100644
index bf0a14251..000000000
--- a/tests/unit/metrics/test_runner.py
+++ /dev/null
@@ -1,649 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-import json
-import secrets
-from pathlib import Path
-from unittest.mock import MagicMock, call
-
-import pytest
-
-from errors import DeleteMetricsStorageError, IssueMetricEventError
-from github_type import JobConclusion
-from metrics import events as metric_events
-from metrics import runner as runner_metrics
-from metrics import type as metrics_type
-from metrics.events import RunnerStart, RunnerStop
-from metrics.runner import (
- RUNNER_INSTALLED_TS_FILE_NAME,
- PostJobMetrics,
- PreJobMetrics,
- RunnerMetrics,
-)
-from metrics.storage import MetricsStorage
-
-
-@pytest.fixture(autouse=True, name="issue_event_mock")
-def issue_event_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
- """Mock the issue_event function."""
- issue_event_mock = MagicMock()
- monkeypatch.setattr("metrics.events.issue_event", issue_event_mock)
- return issue_event_mock
-
-
-@pytest.fixture(name="runner_fs_base")
-def runner_fs_base_fixture(tmp_path: Path) -> Path:
- """Create a runner filesystem base."""
- runner_fs_base = tmp_path / "runner-fs"
- runner_fs_base.mkdir(exist_ok=True)
- return runner_fs_base
-
-
-def _create_metrics_data(runner_name: str) -> RunnerMetrics:
- """Create a RunnerMetrics object that is suitable for most tests.
-
- Args:
- runner_name: The test runner name.
-
- Returns:
- Test metrics data.
- """
- return RunnerMetrics(
- installed_timestamp=1,
- pre_job=PreJobMetrics(
- timestamp=1,
- workflow="workflow1",
- workflow_run_id="workflow_run_id1",
- repository="org1/repository1",
- event="push",
- ),
- post_job=PostJobMetrics(timestamp=3, status=runner_metrics.PostJobStatus.NORMAL),
- runner_name=runner_name,
- )
-
-
-def _create_runner_fs_base(tmp_path: Path):
- """Create a runner filesystem base.
-
- Args:
- tmp_path: The temporary path to create test runner filesystem under.
-
- Returns:
- The runner filesystem temporary path.
- """
- runner_fs_base = tmp_path / "runner-fs"
- runner_fs_base.mkdir(exist_ok=True)
- return runner_fs_base
-
-
-def _create_runner_files(
- runner_fs_base: Path,
- runner_name: str,
- pre_job_data: str | bytes | None,
- post_job_data: str | bytes | None,
- installed_timestamp: str | bytes | None,
-) -> MetricsStorage:
- """Create runner files inside shared fs.
-
- If the data is bytes, the file is written as binary, otherwise as text.
- If data is None, it is not written.
-
- Args:
- runner_fs_base: The base path of the shared fs.
- runner_name: The runner name.
- pre_job_data: The pre-job metrics data.
- post_job_data: The post-job metrics data.
- installed_timestamp: The installed timestamp.
-
- Returns:
- A SharedFilesystem instance.
- """
- runner_fs = runner_fs_base / runner_name
- runner_fs.mkdir()
- if pre_job_data:
- if isinstance(pre_job_data, bytes):
- runner_fs.joinpath(runner_metrics.PRE_JOB_METRICS_FILE_NAME).write_bytes(pre_job_data)
- else:
- runner_fs.joinpath(runner_metrics.PRE_JOB_METRICS_FILE_NAME).write_text(
- pre_job_data, encoding="utf-8"
- )
-
- if post_job_data:
- if isinstance(post_job_data, bytes):
- runner_fs.joinpath(runner_metrics.POST_JOB_METRICS_FILE_NAME).write_bytes(
- post_job_data
- )
- else:
- runner_fs.joinpath(runner_metrics.POST_JOB_METRICS_FILE_NAME).write_text(
- post_job_data, encoding="utf-8"
- )
-
- if installed_timestamp:
- if isinstance(installed_timestamp, bytes):
- runner_fs.joinpath(RUNNER_INSTALLED_TS_FILE_NAME).write_bytes(installed_timestamp)
- else:
- runner_fs.joinpath(RUNNER_INSTALLED_TS_FILE_NAME).write_text(
- installed_timestamp, encoding="utf-8"
- )
- return MetricsStorage(path=runner_fs, runner_name=runner_name)
-
-
-def test_extract(runner_fs_base: Path):
- """
- arrange: \
- 1. A runner with all metrics inside shared fs. \
- 2. A runner with only pre-job metrics inside shared fs. \
- 3. A runner with no metrics except installed_timestamp inside shared fs.
- act: Call extract
- assert: All shared filesystems are removed and for runners
- 1. + 2. metrics are extracted
- 3. no metrics are extracted
- """
- runner_all_metrics_name = secrets.token_hex(16)
- runner_all_metrics = _create_metrics_data(runner_all_metrics_name)
- runner_wihout_post_job_name = secrets.token_hex(16)
- runner_without_post_job_metrics = runner_all_metrics.copy()
- runner_without_post_job_metrics.post_job = None
- runner_without_post_job_metrics.runner_name = runner_wihout_post_job_name
-
- # 1. Runner has all metrics inside shared fs
- runner1_fs = _create_runner_files(
- runner_fs_base,
- runner_all_metrics_name,
- runner_all_metrics.pre_job.json(),
- runner_all_metrics.post_job.json(),
- str(runner_all_metrics.installed_timestamp),
- )
-
- # 2. Runner has only pre-job metrics inside shared fs
- runner2_fs = _create_runner_files(
- runner_fs_base,
- runner_wihout_post_job_name,
- runner_without_post_job_metrics.pre_job.json(),
- None,
- str(runner_without_post_job_metrics.installed_timestamp),
- )
-
- # 3. Runner has no metrics except installed_timestamp inside shared fs
- runner3_fs = _create_runner_files(runner_fs_base, secrets.token_hex(16), None, None, "5")
-
- metrics_storage_manager = MagicMock()
- metrics_storage_manager.list_all.return_value = [runner1_fs, runner2_fs, runner3_fs]
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
-
- assert extracted_metrics == [
- runner_all_metrics,
- runner_without_post_job_metrics,
- ]
- metrics_storage_manager.delete.assert_has_calls(
- [
- ((runner1_fs.runner_name,),),
- ((runner2_fs.runner_name,),),
- ((runner3_fs.runner_name,),),
- ]
- )
-
-
-def test_extract_ignores_runners(runner_fs_base: Path):
- """
- arrange: Runners with metrics.
- act: Call extract with some runners on ignore list.
- expect: The ignored runners are not processed.
- """
- runner_metrics_data = []
-
- runner_filesystems = []
- for i in range(5):
- runner_name = secrets.token_hex(16)
- data = _create_metrics_data(runner_name)
- data.pre_job.workflow = f"workflow{i}"
- runner_metrics_data.append(data)
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- data.pre_job.json(),
- data.post_job.json(),
- str(data.installed_timestamp),
- )
- runner_filesystems.append(runner_fs)
-
- metrics_storage_manager = MagicMock()
- metrics_storage_manager.list_all.return_value = runner_filesystems
-
- ignore_runners = {runner_filesystems[0].runner_name, runner_filesystems[2].runner_name}
-
- extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, runners=ignore_runners
- )
- )
-
- assert extracted_metrics == runner_metrics_data[1:2] + runner_metrics_data[3:]
-
-
-def test_extract_corrupt_data(runner_fs_base: Path, monkeypatch: pytest.MonkeyPatch):
- """
- arrange: \
- 1. A runner with non-compliant pre-job metrics inside shared fs. \
- 2. A runner with non-json post-job metrics inside shared fs. \
- 3. A runner with json array post-job metrics inside shared fs. \
- 4. A runner with no real timestamp in installed_timestamp file inside shared fs.
- act: Call extract.
- assert: No metrics are extracted is issued and shared filesystems are quarantined in all cases.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name=runner_name)
-
- # 1. Runner has noncompliant pre-job metrics inside shared fs
- invalid_pre_job_data = runner_metrics_data.pre_job.copy(update={"timestamp": -1})
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- invalid_pre_job_data.json(),
- runner_metrics_data.post_job.json(),
- str(runner_metrics_data.installed_timestamp),
- )
- metrics_storage_manager = MagicMock()
- metrics_storage_manager.list_all.return_value = [runner_fs]
- move_to_quarantine_mock = MagicMock()
- monkeypatch.setattr(runner_metrics, "move_to_quarantine", move_to_quarantine_mock)
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
-
- assert not extracted_metrics
- move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
-
- # 2. Runner has non-json post-job metrics inside shared fs
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name=runner_name)
-
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- runner_metrics_data.pre_job.json(),
- b"\x00",
- str(runner_metrics_data.installed_timestamp),
- )
- metrics_storage_manager.list_all.return_value = [runner_fs]
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
- assert not extracted_metrics
- move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
-
- # 3. Runner has json post-job metrics but a json array (not object) inside shared fs.
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name=runner_name)
-
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- runner_metrics_data.pre_job.json(),
- json.dumps([runner_metrics_data.post_job.dict()]),
- str(runner_metrics_data.installed_timestamp),
- )
- metrics_storage_manager.list_all.return_value = [runner_fs]
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
- assert not extracted_metrics
- move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
-
- # 4. Runner has not a timestamp in installed_timestamp file inside shared fs
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name=runner_name)
-
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- runner_metrics_data.pre_job.json(),
- runner_metrics_data.post_job.json(),
- b"\x00",
- )
- metrics_storage_manager.list_all.return_value = [runner_fs]
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
- assert not extracted_metrics
-
- move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
-
-
-def test_extract_raises_error_for_too_large_files(
- runner_fs_base: Path, issue_event_mock: MagicMock, monkeypatch: pytest.MonkeyPatch
-):
- """
- arrange: Runners with too large metric and timestamp files.
- act: Call extract.
- assert: No metrics are extracted and shared filesystems is quarantined.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
-
- # 1. Runner has a pre-job metrics file that is too large
- invalid_pre_job_data = runner_metrics_data.pre_job.copy(
- update={"workflow": "a" * runner_metrics.FILE_SIZE_BYTES_LIMIT + "b"}
- )
-
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- invalid_pre_job_data.json(),
- runner_metrics_data.post_job.json(),
- str(runner_metrics_data.installed_timestamp),
- )
- metrics_storage_manager = MagicMock()
-
- metrics_storage_manager.list_all.return_value = [runner_fs]
-
- move_to_quarantine_mock = MagicMock()
- monkeypatch.setattr(runner_metrics, "move_to_quarantine", move_to_quarantine_mock)
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
- assert not extracted_metrics
-
- move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
-
- # 2. Runner has a post-job metrics file that is too large
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
- invalid_post_job_data = runner_metrics_data.post_job.copy(
- update={"status": "a" * runner_metrics.FILE_SIZE_BYTES_LIMIT + "b"}
- )
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- runner_metrics_data.pre_job.json(),
- invalid_post_job_data.json(),
- str(runner_metrics_data.installed_timestamp),
- )
- metrics_storage_manager.list_all.return_value = [runner_fs]
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
-
- assert not extracted_metrics
-
- move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
-
- # 3. Runner has an installed_timestamp file that is too large
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
-
- invalid_ts = "1" * (runner_metrics.FILE_SIZE_BYTES_LIMIT + 1)
-
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- runner_metrics_data.pre_job.json(),
- runner_metrics_data.post_job.json(),
- invalid_ts,
- )
- metrics_storage_manager.list_all.return_value = [runner_fs]
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
-
- assert not extracted_metrics
- move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
-
-
-def test_extract_ignores_filesystems_without_ts(runner_fs_base: Path):
- """
- arrange: A runner without installed_timestamp file inside shared fs.
- act: Call extract.
- assert: No metrics are extracted and shared filesystem is removed.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = RunnerMetrics.construct(
- installed_timestamp=1,
- pre_job=PreJobMetrics(
- timestamp=1,
- workflow="workflow1",
- workflow_run_id="workflow_run_id1",
- repository="org1/repository1",
- event="push",
- ),
- post_job=PostJobMetrics(timestamp=3, status=runner_metrics.PostJobStatus.NORMAL),
- runner_name=runner_name,
- )
-
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_name,
- runner_metrics_data.pre_job.json(),
- runner_metrics_data.post_job.json(),
- None,
- )
- metrics_storage_manager = MagicMock()
- metrics_storage_manager.list_all.return_value = [runner_fs]
-
- extracted_metrics = list(
- runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
- )
- assert not extracted_metrics
- metrics_storage_manager.delete.assert_called_once_with(runner_fs.runner_name)
-
-
-def test_extract_ignores_failure_on_shared_fs_cleanup(
- runner_fs_base: Path,
- caplog: pytest.LogCaptureFixture,
-):
- """
- arrange: Mock the shared_fs.delete to raise an exception.
- act: Call extract.
- assert: The metric is extracted and the exception is caught and logged.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
- runner_fs = _create_runner_files(
- runner_fs_base,
- runner_metrics_data.runner_name,
- runner_metrics_data.pre_job.json(),
- runner_metrics_data.post_job.json(),
- str(runner_metrics_data.installed_timestamp),
- )
- metrics_storage_manager = MagicMock()
-
- metrics_storage_manager.list_all.return_value = [runner_fs]
-
- metrics_storage_manager.delete.side_effect = DeleteMetricsStorageError(
- "Failed to delete shared filesystem"
- )
-
- extracted_metrics = runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, runners=set()
- )
- assert list(extracted_metrics) == [runner_metrics_data]
-
- assert "Failed to delete shared filesystem" in caplog.text
-
-
-def test_issue_events(issue_event_mock: MagicMock):
- """
- arrange: A runner with all metrics.
- act: Call issue_events.
- assert: RunnerStart and RunnerStop metrics are issued.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
-
- flavor = secrets.token_hex(16)
- job_metrics = metrics_type.GithubJobMetrics(
- queue_duration=3600, conclusion=JobConclusion.SUCCESS
- )
- issued_metrics = runner_metrics.issue_events(
- runner_metrics=runner_metrics_data, flavor=flavor, job_metrics=job_metrics
- )
- assert issued_metrics == {metric_events.RunnerStart, metric_events.RunnerStop}
- issue_event_mock.assert_has_calls(
- [
- # 1. Runner
- call(
- RunnerStart(
- timestamp=runner_metrics_data.pre_job.timestamp,
- flavor=flavor,
- workflow=runner_metrics_data.pre_job.workflow,
- repo=runner_metrics_data.pre_job.repository,
- github_event=runner_metrics_data.pre_job.event,
- idle=runner_metrics_data.pre_job.timestamp
- - runner_metrics_data.installed_timestamp,
- queue_duration=job_metrics.queue_duration,
- )
- ),
- call(
- RunnerStop(
- timestamp=runner_metrics_data.post_job.timestamp,
- flavor=flavor,
- workflow=runner_metrics_data.pre_job.workflow,
- repo=runner_metrics_data.pre_job.repository,
- github_event=runner_metrics_data.pre_job.event,
- status=runner_metrics_data.post_job.status,
- job_duration=runner_metrics_data.post_job.timestamp
- - runner_metrics_data.pre_job.timestamp,
- job_conclusion=job_metrics.conclusion,
- )
- ),
- ]
- )
-
-
-def test_issue_events_pre_job_before_runner_installed(issue_event_mock: MagicMock):
- """
- arrange: A runner with pre-job timestamp smaller than installed timestamp.
- act: Call issue_events.
- assert: RunnerStart metric is issued with idle set to 0.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
- runner_metrics_data.pre_job.timestamp = 0
-
- flavor = secrets.token_hex(16)
- job_metrics = metrics_type.GithubJobMetrics(
- queue_duration=3600, conclusion=JobConclusion.SUCCESS
- )
- issued_metrics = runner_metrics.issue_events(
- runner_metrics=runner_metrics_data, flavor=flavor, job_metrics=job_metrics
- )
- assert metric_events.RunnerStart in issued_metrics
- issue_event_mock.assert_has_calls(
- [
- call(
- RunnerStart(
- timestamp=runner_metrics_data.pre_job.timestamp,
- flavor=flavor,
- workflow=runner_metrics_data.pre_job.workflow,
- repo=runner_metrics_data.pre_job.repository,
- github_event=runner_metrics_data.pre_job.event,
- idle=0,
- queue_duration=job_metrics.queue_duration,
- )
- )
- ]
- )
-
-
-def test_issue_events_post_job_before_pre_job(issue_event_mock: MagicMock):
- """
- arrange: A runner with post-job timestamp smaller than pre-job timestamps.
- act: Call issue_events.
- assert: job_duration is set to zero.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
- runner_metrics_data.post_job = PostJobMetrics(
- timestamp=0, status=runner_metrics.PostJobStatus.NORMAL
- )
- flavor = secrets.token_hex(16)
- job_metrics = metrics_type.GithubJobMetrics(
- queue_duration=3600, conclusion=JobConclusion.SUCCESS
- )
- issued_metrics = runner_metrics.issue_events(
- runner_metrics=runner_metrics_data, flavor=flavor, job_metrics=job_metrics
- )
-
- assert metric_events.RunnerStop in issued_metrics
- issue_event_mock.assert_has_calls(
- [
- call(
- RunnerStop(
- timestamp=runner_metrics_data.post_job.timestamp,
- flavor=flavor,
- workflow=runner_metrics_data.pre_job.workflow,
- repo=runner_metrics_data.pre_job.repository,
- github_event=runner_metrics_data.pre_job.event,
- status=runner_metrics_data.post_job.status,
- job_duration=0,
- job_conclusion=job_metrics.conclusion,
- )
- ),
- ]
- )
-
-
-def test_issue_events_no_post_job_metrics(issue_event_mock: MagicMock):
- """
- arrange: A runner without post-job metrics.
- act: Call issue_events.
- assert: Only RunnerStart metric is issued.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
- runner_metrics_data.post_job = None
- flavor = secrets.token_hex(16)
- job_metrics = metrics_type.GithubJobMetrics(
- queue_duration=3600, conclusion=JobConclusion.SUCCESS
- )
- issued_metrics = runner_metrics.issue_events(
- runner_metrics=runner_metrics_data, flavor=flavor, job_metrics=job_metrics
- )
- assert issued_metrics == {metric_events.RunnerStart}
-
- issue_event_mock.assert_called_once_with(
- RunnerStart(
- timestamp=runner_metrics_data.pre_job.timestamp,
- flavor=flavor,
- workflow=runner_metrics_data.pre_job.workflow,
- repo=runner_metrics_data.pre_job.repository,
- github_event=runner_metrics_data.pre_job.event,
- idle=runner_metrics_data.pre_job.timestamp - runner_metrics_data.installed_timestamp,
- queue_duration=job_metrics.queue_duration,
- )
- )
-
-
-def test_issue_events_returns_empty_set_on_issue_event_failure(
- issue_event_mock: MagicMock,
- caplog: pytest.LogCaptureFixture,
-):
- """
- arrange: Mock the issue_event_mock to raise an exception on the first call.
- act: Call issue_events.
- assert: No metrics at all are issued. The exception is caught and logged.
- """
- runner_name = secrets.token_hex(16)
- runner_metrics_data = _create_metrics_data(runner_name)
-
- issue_event_mock.side_effect = [IssueMetricEventError("Failed to issue metric"), None]
-
- flavor = secrets.token_hex(16)
- job_metrics = metrics_type.GithubJobMetrics(
- queue_duration=3600, conclusion=JobConclusion.SUCCESS
- )
-
- issued_metrics = runner_metrics.issue_events(
- runner_metrics=runner_metrics_data, flavor=flavor, job_metrics=job_metrics
- )
- assert not issued_metrics
- assert "Failed to issue metric" in caplog.text
diff --git a/tests/unit/metrics/test_runner_logs.py b/tests/unit/metrics/test_runner_logs.py
deleted file mode 100644
index d53dc17cf..000000000
--- a/tests/unit/metrics/test_runner_logs.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-from pathlib import Path
-
-import pytest
-
-from metrics import runner_logs
-
-
-@pytest.fixture(name="log_dir_base_path")
-def log_dir_base_path_fixture(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
- """Mock the log directory path and return it."""
- log_dir_base_path = tmp_path / "log_dir"
- monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", log_dir_base_path)
- return log_dir_base_path
-
-
-def test_remove_outdated_crashed(log_dir_base_path: Path, monkeypatch: pytest.MonkeyPatch):
- """
- arrange: Mock the base log directory path.
- act: Remove the logs of the runner.
- assert: The expected logs are removed.
- """
- monkeypatch.setattr(runner_logs, "OUTDATED_LOGS_IN_SECONDS", 0)
-
- log_dir_path = log_dir_base_path / "test-runner"
- log_dir_path.mkdir(parents=True)
-
- runner_logs.remove_outdated()
-
- assert not log_dir_path.exists()
diff --git a/tests/unit/metrics/test_storage.py b/tests/unit/metrics/test_storage.py
deleted file mode 100644
index bc8d0e94c..000000000
--- a/tests/unit/metrics/test_storage.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-import secrets
-import tarfile
-from pathlib import Path
-
-import pytest
-
-from errors import (
- CreateMetricsStorageError,
- DeleteMetricsStorageError,
- GetMetricsStorageError,
- QuarantineMetricsStorageError,
-)
-from metrics import storage
-from metrics.storage import MetricsStorage
-
-
-@pytest.fixture(autouse=True, name="filesystem_paths")
-def filesystem_paths_fixture(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> dict[str, Path]:
- """Mock the hardcoded filesystem paths."""
- ms_path = tmp_path / "runner-fs"
- ms_quarantine_path = tmp_path / "quarantine"
- monkeypatch.setattr(storage, "FILESYSTEM_BASE_PATH", ms_path)
- monkeypatch.setattr(storage, "FILESYSTEM_QUARANTINE_PATH", ms_quarantine_path)
- return {"base": ms_path, "quarantine": ms_quarantine_path}
-
-
-def test_create_creates_directory():
- """
- arrange: Given a runner name and a path for the storage.
- act: Call create.
- assert: The directory is created.
- """
- runner_name = secrets.token_hex(16)
-
- fs = storage.create(runner_name)
-
- assert fs.path.exists()
- assert fs.path.is_dir()
-
-
-def test_create_raises_exception_if_already_exists():
- """
- arrange: Given a runner name and an already existing shared filesystem.
- act: Call create.
- assert: The expected exception is raised.
- """
- runner_name = secrets.token_hex(16)
- storage.create(runner_name)
-
- with pytest.raises(CreateMetricsStorageError):
- storage.create(runner_name)
-
-
-def test_list_all():
- """
- arrange: Create metric storages for multiple runners.
- act: Call list_all.
- assert: A generator listing all the shared filesystems is returned.
- """
- runner_names = [secrets.token_hex(16) for _ in range(3)]
- for runner_name in runner_names:
- storage.create(runner_name)
-
- fs_list = list(storage.list_all())
-
- assert len(fs_list) == 3
- for fs in fs_list:
- assert isinstance(fs, storage.MetricsStorage)
- assert fs.runner_name in runner_names
-
-
-def test_list_all_empty():
- """
- arrange: Nothing.
- act: Call list_all.
- assert: An empty iterator is returned.
- """
- fs_list = list(storage.list_all())
-
- assert len(fs_list) == 0
-
-
-def test_delete():
- """
- arrange: Create metrics storage for a runner.
- act: Call delete
- assert: The storage is deleted.
- """
- runner_name = secrets.token_hex(16)
- storage.create(runner_name)
-
- storage.delete(runner_name)
-
- with pytest.raises(GetMetricsStorageError):
- storage.get(runner_name)
-
-
-def test_delete_raises_error():
- """
- arrange: Nothing.
- act: Call delete.
- assert: A DeleteMetricsStorageError is raised.
- """
- runner_name = secrets.token_hex(16)
-
- with pytest.raises(DeleteMetricsStorageError):
- storage.delete(runner_name)
-
-
-def test_get():
- """
- arrange: Given a runner name.
- act: Call create and get.
- assert: A metrics storage object for this runner is returned.
- """
- runner_name = secrets.token_hex(16)
-
- storage.create(runner_name)
- ms = storage.get(runner_name)
-
- assert isinstance(ms, MetricsStorage)
- assert ms.runner_name == runner_name
-
-
-def test_get_raises_error_if_not_found():
- """
- arrange: Nothing.
- act: Call get.
- assert: A GetMetricsStorageError is raised.
- """
- runner_name = secrets.token_hex(16)
-
- with pytest.raises(GetMetricsStorageError):
- storage.get(runner_name)
-
-
-def test_quarantine(filesystem_paths: dict[str, Path], tmp_path: Path):
- """
- arrange: Create a storage for a runner with a file in it.
- act: Call quarantine.
- assert: The storage is moved to the quarantine.
- """
- runner_name = secrets.token_hex(16)
- ms = storage.create(runner_name)
- ms.path.joinpath("test.txt").write_text("foo bar")
-
- storage.move_to_quarantine(storage, runner_name)
-
- tarfile_path = filesystem_paths["quarantine"].joinpath(runner_name).with_suffix(".tar.gz")
- assert tarfile_path.exists()
- tarfile.open(tarfile_path).extractall(path=tmp_path)
- assert tmp_path.joinpath(f"{runner_name}/test.txt").exists()
- assert tmp_path.joinpath(f"{runner_name}/test.txt").read_text(encoding="utf-8") == "foo bar"
- assert not ms.path.exists()
-
-
-def test_quarantine_raises_error():
- """
- arrange: Nothing.
- act: Call quarantine.
- assert: A QuarantineMetricsStorageError is raised.
- """
- runner_name = secrets.token_hex(16)
-
- with pytest.raises(QuarantineMetricsStorageError):
- storage.move_to_quarantine(storage, runner_name)
diff --git a/tests/unit/mock.py b/tests/unit/mock.py
index be3e07ca7..78c0c6990 100644
--- a/tests/unit/mock.py
+++ b/tests/unit/mock.py
@@ -12,8 +12,9 @@
from pathlib import Path
from typing import IO, Optional, Sequence, Union
+from github_runner_manager.types_.github import RegistrationToken, RemoveToken, RunnerApplication
+
from errors import LxdError, RunnerError
-from github_type import RegistrationToken, RemoveToken, RunnerApplication
from lxd_type import LxdNetwork
from runner import LxdInstanceConfig
diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py
index 443c84dfd..b52afa538 100644
--- a/tests/unit/mock_runner_managers.py
+++ b/tests/unit/mock_runner_managers.py
@@ -7,17 +7,18 @@
from typing import Iterable, Iterator, Sequence
from unittest.mock import MagicMock
-from charm_state import GitHubPath
-from github_client import GithubClient
-from github_type import GitHubRunnerStatus, SelfHostedRunner
-from manager.cloud_runner_manager import (
+from github_runner_manager.manager.cloud_runner_manager import (
CloudRunnerInstance,
CloudRunnerManager,
CloudRunnerState,
InstanceId,
)
-from manager.github_runner_manager import GitHubRunnerState
-from metrics.runner import RunnerMetrics
+from github_runner_manager.manager.github_runner_manager import GitHubRunnerState
+from github_runner_manager.metrics.runner import RunnerMetrics
+from github_runner_manager.types_.github import GitHubRunnerStatus, SelfHostedRunner
+
+from charm_state import GitHubPath
+from github_client import GithubClient
from tests.unit.mock import MockGhapiClient
diff --git a/tests/unit/reactive/__init__.py b/tests/unit/reactive/__init__.py
deleted file mode 100644
index 188515554..000000000
--- a/tests/unit/reactive/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
diff --git a/tests/unit/reactive/test_consumer.py b/tests/unit/reactive/test_consumer.py
deleted file mode 100644
index 2a443c9b3..000000000
--- a/tests/unit/reactive/test_consumer.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-import secrets
-from contextlib import closing
-
-import pytest
-from kombu import Connection, Message
-
-from reactive import consumer
-from reactive.consumer import JobError
-
-IN_MEMORY_URI = "memory://"
-FAKE_RUN_URL = "https://api.github.com/repos/fakeusergh-runner-test/actions/runs/8200803099"
-
-
-def test_consume(caplog: pytest.LogCaptureFixture):
- """
- arrange: A job placed in the message queue.
- act: Call consume
- assert: The job is logged.
- """
- queue_name = secrets.token_hex(16)
- job_details = consumer.JobDetails(
- labels=[secrets.token_hex(16), secrets.token_hex(16)],
- run_url=FAKE_RUN_URL,
- )
- _put_in_queue(job_details.json(), queue_name)
-
- # we use construct to avoid pydantic validation as IN_MEMORY_URI is not a valid URL
- consumer.consume(IN_MEMORY_URI, queue_name)
- assert str(job_details.labels) in caplog.text
- assert job_details.run_url in caplog.text
-
-
-@pytest.mark.parametrize(
- "job_str",
- [
- pytest.param(
- '{"labels": ["label1", "label2"], "status": "completed"}', id="run_url missing"
- ),
- pytest.param(
- '{"status": "completed", "run_url": "https://example.com"}', id="labels missing"
- ),
- pytest.param("no json at all", id="invalid json"),
- ],
-)
-def test_job_details_validation_error(job_str: str):
- """
- arrange: A job placed in the message queue with invalid details.
- act: Call consume
- assert: A JobError is raised and the message is requeued.
- """
- queue_name = secrets.token_hex(16)
- _put_in_queue(job_str, queue_name)
-
- with pytest.raises(JobError) as exc_info:
- consumer.consume(IN_MEMORY_URI, queue_name)
- assert "Invalid job details" in str(exc_info.value)
-
- # Ensure message has been requeued by reconsuming it
- msg = _consume_from_queue(queue_name)
- assert msg.payload == job_str
-
-
-def _put_in_queue(msg: str, queue_name: str) -> None:
- """Put a job in the message queue.
-
- Args:
- msg: The job details.
- queue_name: The name of the queue
- """
- with Connection(IN_MEMORY_URI) as conn:
- with closing(conn.SimpleQueue(queue_name)) as simple_queue:
- simple_queue.put(msg, retry=True)
-
-
-def _consume_from_queue(queue_name: str) -> Message:
- """Consume a job from the message queue.
-
- Args:
- queue_name: The name of the queue
-
- Returns:
- The message consumed from the queue.
- """
- with Connection(IN_MEMORY_URI) as conn:
- with closing(conn.SimpleQueue(queue_name)) as simple_queue:
- return simple_queue.get(block=False)
diff --git a/tests/unit/reactive/test_runner_manager.py b/tests/unit/reactive/test_runner_manager.py
deleted file mode 100644
index cd25cf728..000000000
--- a/tests/unit/reactive/test_runner_manager.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-import os
-import secrets
-import subprocess
-from pathlib import Path
-from subprocess import CompletedProcess
-from unittest.mock import MagicMock
-
-import pytest
-
-from reactive.runner_manager import (
- PIDS_COMMAND_LINE,
- PYTHON_BIN,
- REACTIVE_RUNNER_SCRIPT_FILE,
- ReactiveRunnerError,
- reconcile,
-)
-from utilities import secure_run_subprocess
-
-EXAMPLE_MQ_URI = "http://example.com"
-
-
-@pytest.fixture(name="log_dir", autouse=True)
-def log_dir_path_fixture(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
- """Return the path to the log file."""
- log_file_path = tmp_path / "logs"
- monkeypatch.setattr("reactive.runner_manager.REACTIVE_RUNNER_LOG_DIR", log_file_path)
- monkeypatch.setattr("shutil.chown", lambda *args, **kwargs: None)
- return log_file_path
-
-
-@pytest.fixture(name="secure_run_subprocess_mock")
-def secure_run_subprocess_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
- """Mock the ps command."""
- secure_run_subprocess_mock = MagicMock(spec=secure_run_subprocess)
- monkeypatch.setattr(
- "reactive.runner_manager.secure_run_subprocess", secure_run_subprocess_mock
- )
- return secure_run_subprocess_mock
-
-
-@pytest.fixture(name="os_kill_mock", autouse=True)
-def os_kill_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
- """Mock the os.kill function."""
- os_kill_mock = MagicMock(spec=os.kill)
- monkeypatch.setattr("os.kill", os_kill_mock)
- return os_kill_mock
-
-
-@pytest.fixture(name="subprocess_popen_mock")
-def subprocess_popen_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
- """Mock the subprocess.Popen function."""
- popen_result = MagicMock(spec=subprocess.Popen, pid=1234, returncode=0)
- subprocess_popen_mock = MagicMock(
- spec=subprocess.Popen,
- return_value=popen_result,
- )
- monkeypatch.setattr("subprocess.Popen", subprocess_popen_mock)
- return subprocess_popen_mock
-
-
-def test_reconcile_spawns_runners(
- secure_run_subprocess_mock: MagicMock, subprocess_popen_mock: MagicMock, log_dir: Path
-):
- """
- arrange: Mock that two reactive runner processes are active.
- act: Call reconcile with a quantity of 5.
- assert: Three runners are spawned. Log file is setup.
- """
- queue_name = secrets.token_hex(16)
- _arrange_reactive_processes(secure_run_subprocess_mock, count=2)
-
- delta = reconcile(5, mq_uri=EXAMPLE_MQ_URI, queue_name=queue_name)
-
- assert delta == 3
- assert subprocess_popen_mock.call_count == 3
- assert log_dir.exists()
-
-
-def test_reconcile_does_not_spawn_runners(
- secure_run_subprocess_mock: MagicMock, subprocess_popen_mock: MagicMock
-):
- """
- arrange: Mock that two reactive runner processes are active.
- act: Call reconcile with a quantity of 2.
- assert: No runners are spawned.
- """
- queue_name = secrets.token_hex(16)
- _arrange_reactive_processes(secure_run_subprocess_mock, count=2)
-
- delta = reconcile(2, mq_uri=EXAMPLE_MQ_URI, queue_name=queue_name)
-
- assert delta == 0
- assert subprocess_popen_mock.call_count == 0
-
-
-def test_reconcile_kills_processes_for_too_many_processes(
- secure_run_subprocess_mock: MagicMock,
- subprocess_popen_mock: MagicMock,
- os_kill_mock: MagicMock,
-):
- """
- arrange: Mock that 3 reactive runner processes are active.
- act: Call reconcile with a quantity of 1.
- assert: 2 processes are killed.
- """
- queue_name = secrets.token_hex(16)
- _arrange_reactive_processes(secure_run_subprocess_mock, count=3)
- delta = reconcile(1, mq_uri=EXAMPLE_MQ_URI, queue_name=queue_name)
-
- assert delta == -2
- assert subprocess_popen_mock.call_count == 0
- assert os_kill_mock.call_count == 2
-
-
-def test_reconcile_ignore_process_not_found_on_kill(
- secure_run_subprocess_mock: MagicMock,
- subprocess_popen_mock: MagicMock,
- os_kill_mock: MagicMock,
-):
- """
- arrange: Mock 3 reactive processes and os.kill to fail once with a ProcessLookupError.
- act: Call reconcile with a quantity of 1.
- assert: The returned delta is still -2.
- """
- queue_name = secrets.token_hex(16)
- _arrange_reactive_processes(secure_run_subprocess_mock, count=3)
- os_kill_mock.side_effect = [None, ProcessLookupError]
- delta = reconcile(1, mq_uri=EXAMPLE_MQ_URI, queue_name=queue_name)
-
- assert delta == -2
- assert subprocess_popen_mock.call_count == 0
- assert os_kill_mock.call_count == 2
-
-
-def test_reconcile_raises_reactive_runner_error_on_ps_failure(
- secure_run_subprocess_mock: MagicMock,
-):
- """
- arrange: Mock that the ps command fails.
- act: Call reconcile with a quantity of 1.
- assert: A ReactiveRunnerError is raised.
- """
- queue_name = secrets.token_hex(16)
- secure_run_subprocess_mock.return_value = CompletedProcess(
- args=PIDS_COMMAND_LINE,
- returncode=1,
- stdout=b"",
- stderr=b"error",
- )
-
- with pytest.raises(ReactiveRunnerError) as err:
- reconcile(1, mq_uri=EXAMPLE_MQ_URI, queue_name=queue_name)
-
- assert "Failed to get list of processes" in str(err.value)
-
-
-def _arrange_reactive_processes(secure_run_subprocess_mock: MagicMock, count: int):
- """Mock reactive runner processes are active.
-
- Args:
- secure_run_subprocess_mock: The mock to use for the ps command.
- count: The number of processes.
- """
- process_cmds_before = "\n".join(
- [f"{PYTHON_BIN} {REACTIVE_RUNNER_SCRIPT_FILE}\t{i}" for i in range(count)]
- )
-
- secure_run_subprocess_mock.return_value = CompletedProcess(
- args=PIDS_COMMAND_LINE,
- returncode=0,
- stdout=f"CMD\n{process_cmds_before}".encode("utf-8"),
- stderr=b"",
- )
diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py
index 060bbc96f..a28fc9743 100644
--- a/tests/unit/test_charm.py
+++ b/tests/unit/test_charm.py
@@ -12,6 +12,7 @@
import pytest
import yaml
+from github_runner_manager.types_.github import GitHubOrg, GitHubRepo, GitHubRunnerStatus
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, StatusBase, WaitingStatus
from ops.testing import Harness
@@ -28,8 +29,6 @@
VM_CPU_CONFIG_NAME,
VM_DISK_CONFIG_NAME,
Arch,
- GitHubOrg,
- GitHubRepo,
InstanceType,
OpenStackCloudsYAML,
OpenstackImage,
@@ -41,14 +40,12 @@
LogrotateSetupError,
MissingMongoDBError,
MissingRunnerBinaryError,
- OpenStackUnauthorizedError,
RunnerError,
SubprocessError,
TokenError,
)
from event_timer import EventTimer, TimerEnableError
from firewall import FirewallEntry
-from github_type import GitHubRunnerStatus
from runner_manager import LXDRunnerManagerConfig, RunnerInfo
TEST_PROXY_SERVER_URL = "http://proxy.server:1234"
@@ -761,7 +758,6 @@ def test_on_flush_runners_action(self, run, wt, mkdir, rm):
pytest.param(ConfigurationError, BlockedStatus, id="charm config error"),
pytest.param(TokenError, BlockedStatus, id="github token error"),
pytest.param(MissingRunnerBinaryError, MaintenanceStatus, id="runner binary error"),
- pytest.param(OpenStackUnauthorizedError, BlockedStatus, id="openstack auth error"),
],
)
def test_catch_charm_errors(
diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py
index d8fdd896d..b7df8a5dc 100644
--- a/tests/unit/test_charm_state.py
+++ b/tests/unit/test_charm_state.py
@@ -8,20 +8,22 @@
from pathlib import Path
from unittest.mock import MagicMock
+import github_runner_manager.openstack_cloud
import pytest
import yaml
from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires
+from github_runner_manager.types_.github import GitHubOrg, GitHubRepo
from pydantic import BaseModel
from pydantic.error_wrappers import ValidationError
from pydantic.networks import IPv4Address
import charm_state
-import openstack_cloud
from charm_state import (
BASE_IMAGE_CONFIG_NAME,
DEBUG_SSH_INTEGRATION_NAME,
DENYLIST_CONFIG_NAME,
DOCKERHUB_MIRROR_CONFIG_NAME,
+ GROUP_CONFIG_NAME,
IMAGE_INTEGRATION_NAME,
LABELS_CONFIG_NAME,
OPENSTACK_CLOUDS_YAML_CONFIG_NAME,
@@ -41,8 +43,6 @@
CharmState,
FirewallEntry,
GithubConfig,
- GitHubOrg,
- GitHubRepo,
ImmutableConfigChangedError,
LocalLxdRunnerConfig,
OpenstackImage,
@@ -87,20 +87,21 @@ def test_github_org_path():
assert path == org
-def test_parse_github_path_invalid():
+def test_github_config_from_charm_invalud_path():
"""
arrange: Create an invalid GitHub path string and runner group name.
act: Call parse_github_path with the invalid path string and runner group name.
assert: Verify that the function raises CharmConfigInvalidError.
"""
- path_str = "invalidpath/"
- runner_group = "test_group"
+ mock_charm = MockGithubRunnerCharmFactory()
+ mock_charm.config[PATH_CONFIG_NAME] = "invalidpath/"
+ mock_charm.config[GROUP_CONFIG_NAME] = "test_group"
with pytest.raises(CharmConfigInvalidError):
- charm_state.parse_github_path(path_str, runner_group)
+ GithubConfig.from_charm(mock_charm)
-def test_github_config_from_charm_invalid_path():
+def test_github_config_from_charm_empty_path():
"""
arrange: Create a mock CharmBase instance with an empty path configuration.
act: Call from_charm method with the mock CharmBase instance.
@@ -367,9 +368,9 @@ def test_parse_openstack_clouds_initialize_fail(
mock_charm = MockGithubRunnerCharmFactory()
mock_charm.config[OPENSTACK_CLOUDS_YAML_CONFIG_NAME] = valid_yaml_config
monkeypatch.setattr(
- openstack_cloud,
+ github_runner_manager.openstack_cloud,
"initialize",
- MagicMock(side_effect=openstack_cloud.OpenStackInvalidConfigError),
+ MagicMock(side_effect=github_runner_manager.openstack_cloud.OpenStackInvalidConfigError),
)
with pytest.raises(CharmConfigInvalidError):
diff --git a/tests/unit/test_github_client.py b/tests/unit/test_github_client.py
deleted file mode 100644
index 9bd336a03..000000000
--- a/tests/unit/test_github_client.py
+++ /dev/null
@@ -1,208 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-import http
-import random
-import secrets
-from collections import namedtuple
-from datetime import datetime, timezone
-from unittest.mock import MagicMock
-from urllib.error import HTTPError
-
-import pytest
-
-from charm_state import GitHubRepo
-from errors import JobNotFoundError
-from github_client import GithubClient
-from github_type import JobConclusion, JobStats
-
-JobStatsRawData = namedtuple(
- "JobStatsRawData",
- ["created_at", "started_at", "runner_name", "conclusion", "id"],
-)
-
-
-@pytest.fixture(name="job_stats_raw")
-def job_stats_fixture() -> JobStatsRawData:
- """Create a JobStats object."""
- runner_name = secrets.token_hex(16)
- return JobStatsRawData(
- created_at="2021-10-01T00:00:00Z",
- started_at="2021-10-01T01:00:00Z",
- conclusion="success",
- runner_name=runner_name,
- id=random.randint(1, 1000),
- )
-
-
-@pytest.fixture(name="github_client")
-def github_client_fixture(job_stats_raw: JobStatsRawData) -> GithubClient:
- """Create a GithubClient object with a mocked GhApi object."""
- gh_client = GithubClient("token")
- gh_client._client = MagicMock()
- gh_client._client.actions.list_jobs_for_workflow_run.return_value = {
- "jobs": [
- {
- "created_at": job_stats_raw.created_at,
- "started_at": job_stats_raw.started_at,
- "runner_name": job_stats_raw.runner_name,
- "conclusion": job_stats_raw.conclusion,
- "id": job_stats_raw.id,
- }
- ]
- }
-
- return gh_client
-
-
-def _mock_multiple_pages_for_job_response(
- github_client: GithubClient, job_stats_raw: JobStatsRawData, include_runner: bool = True
-):
- """Mock the list_jobs_for_workflow_run to return multiple pages.
-
- Args:
- github_client: The GithubClient object to mock.
- job_stats_raw: The JobStatsRawData object to use for the response.
- include_runner: Whether to include the runner in the response for one of the jobs.
- """
- no_of_pages = random.choice(range(1, 5))
- no_of_jobs_per_page = random.choice(range(1, 4))
- runner_names = [secrets.token_hex(16) for _ in range(no_of_pages * no_of_jobs_per_page)]
-
- if include_runner:
- runner_names[random.choice(range(no_of_pages))] = job_stats_raw.runner_name
-
- github_client._client.actions.list_jobs_for_workflow_run.side_effect = [
- {
- "jobs": [
- {
- "created_at": job_stats_raw.created_at,
- "started_at": job_stats_raw.started_at,
- "runner_name": runner_names[i * no_of_jobs_per_page + j],
- "conclusion": job_stats_raw.conclusion,
- "id": job_stats_raw.id,
- }
- for j in range(no_of_jobs_per_page)
- ]
- }
- for i in range(no_of_pages)
- ] + [{"jobs": []}]
-
-
-def test_get_job_info(github_client: GithubClient, job_stats_raw: JobStatsRawData):
- """
- arrange: A mocked Github Client that returns one page of jobs containing one job \
- with the runner.
- act: Call get_job_info.
- assert: The correct JobStats object is returned.
- """
- github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
- job_stats = github_client.get_job_info(
- path=github_repo,
- workflow_run_id=secrets.token_hex(16),
- runner_name=job_stats_raw.runner_name,
- )
- assert job_stats == JobStats(
- created_at=datetime(2021, 10, 1, 0, 0, 0, tzinfo=timezone.utc),
- started_at=datetime(2021, 10, 1, 1, 0, 0, tzinfo=timezone.utc),
- runner_name=job_stats_raw.runner_name,
- conclusion=JobConclusion.SUCCESS,
- job_id=job_stats_raw.id,
- )
-
-
-def test_get_job_info_no_conclusion(github_client: GithubClient, job_stats_raw: JobStatsRawData):
- """
- arrange: A mocked Github Client that returns one page of jobs containing one job \
- with the runner with conclusion set to None.
- act: Call get_job_info.
- assert: JobStats object with conclusion set to None is returned.
- """
- github_client._client.actions.list_jobs_for_workflow_run.return_value = {
- "jobs": [
- {
- "created_at": job_stats_raw.created_at,
- "started_at": job_stats_raw.started_at,
- "runner_name": job_stats_raw.runner_name,
- "conclusion": None,
- "id": job_stats_raw.id,
- }
- ]
- }
- github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
- job_stats = github_client.get_job_info(
- path=github_repo,
- workflow_run_id=secrets.token_hex(16),
- runner_name=job_stats_raw.runner_name,
- )
- assert job_stats == JobStats(
- created_at=datetime(2021, 10, 1, 0, 0, 0, tzinfo=timezone.utc),
- started_at=datetime(2021, 10, 1, 1, 0, 0, tzinfo=timezone.utc),
- runner_name=job_stats_raw.runner_name,
- conclusion=None,
- job_id=job_stats_raw.id,
- )
-
-
-def test_github_api_pagination_multiple_pages(
- github_client: GithubClient, job_stats_raw: JobStatsRawData
-):
- """
- arrange: A mocked Github Client that returns multiple pages of jobs containing \
- one job with the runner.
- act: Call get_job_info.
- assert: The correct JobStats object is returned.
- """
- _mock_multiple_pages_for_job_response(
- github_client=github_client, job_stats_raw=job_stats_raw, include_runner=True
- )
-
- github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
- job_stats = github_client.get_job_info(
- path=github_repo,
- workflow_run_id=secrets.token_hex(16),
- runner_name=job_stats_raw.runner_name,
- )
- assert job_stats == JobStats(
- created_at=datetime(2021, 10, 1, 0, 0, 0, tzinfo=timezone.utc),
- started_at=datetime(2021, 10, 1, 1, 0, 0, tzinfo=timezone.utc),
- runner_name=job_stats_raw.runner_name,
- conclusion=JobConclusion.SUCCESS,
- job_id=job_stats_raw.id,
- )
-
-
-def test_github_api_pagination_job_not_found(
- github_client: GithubClient, job_stats_raw: JobStatsRawData
-):
- """
- arrange: A mocked Github Client that returns multiple pages of jobs containing \
- no job with the runner.
- act: Call get_job_info.
- assert: An exception is raised.
- """
- _mock_multiple_pages_for_job_response(
- github_client=github_client, job_stats_raw=job_stats_raw, include_runner=False
- )
-
- github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
-
- with pytest.raises(JobNotFoundError):
- github_client.get_job_info(
- path=github_repo,
- workflow_run_id=secrets.token_hex(16),
- runner_name=job_stats_raw.runner_name,
- )
-
-
-def test_github_api_http_error(github_client: GithubClient, job_stats_raw: JobStatsRawData):
- github_client._client.actions.list_jobs_for_workflow_run.side_effect = HTTPError(
- "http://test.com", 500, "", http.client.HTTPMessage(), None
- )
- github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
-
- with pytest.raises(JobNotFoundError):
- github_client.get_job_info(
- path=github_repo,
- workflow_run_id=secrets.token_hex(16),
- runner_name=job_stats_raw.runner_name,
- )
diff --git a/tests/unit/test_lxd_runner_manager.py b/tests/unit/test_lxd_runner_manager.py
index 36c36df11..215cbe7e0 100644
--- a/tests/unit/test_lxd_runner_manager.py
+++ b/tests/unit/test_lxd_runner_manager.py
@@ -7,26 +7,29 @@
from pathlib import Path
from unittest.mock import MagicMock, call
+import github_runner_manager.reactive.runner_manager
import pytest
+from github_runner_manager.metrics.events import (
+ Reconciliation,
+ RunnerInstalled,
+ RunnerStart,
+ RunnerStop,
+)
+from github_runner_manager.metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
+from github_runner_manager.metrics.storage import MetricsStorage
+from github_runner_manager.types_.github import GitHubOrg, GitHubRepo, RunnerApplication
from pytest import LogCaptureFixture, MonkeyPatch
-import reactive.runner_manager
import shared_fs
from charm_state import (
Arch,
CharmConfig,
CharmState,
- GitHubOrg,
- GitHubRepo,
ProxyConfig,
ReactiveConfig,
VirtualMachineResources,
)
from errors import IssueMetricEventError, RunnerBinaryError
-from github_type import RunnerApplication
-from metrics.events import Reconciliation, RunnerInstalled, RunnerStart, RunnerStop
-from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
-from metrics.storage import MetricsStorage
from runner import Runner, RunnerStatus
from runner_manager import BUILD_IMAGE_SCRIPT_FILENAME, LXDRunnerManager, LXDRunnerManagerConfig
from runner_type import RunnerNameByHealth
@@ -107,7 +110,7 @@ def runner_manager_fixture(request, tmp_path, monkeypatch, token, charm_state):
def issue_event_mock_fixture(monkeypatch: MonkeyPatch) -> MagicMock:
"""Mock the issue_event function."""
issue_event_mock = MagicMock()
- monkeypatch.setattr("metrics.events.issue_event", issue_event_mock)
+ monkeypatch.setattr("github_runner_manager.metrics.events.issue_event", issue_event_mock)
return issue_event_mock
@@ -131,7 +134,7 @@ def runner_metrics_fixture(monkeypatch: MonkeyPatch) -> MagicMock:
@pytest.fixture(name="reactive_reconcile_mock")
def reactive_reconcile_fixture(monkeypatch: MonkeyPatch, tmp_path: Path) -> MagicMock:
"""Mock the job class."""
- reconcile_mock = MagicMock(spec=reactive.runner_manager.reconcile)
+ reconcile_mock = MagicMock(spec=github_runner_manager.reactive.runner_manager.reconcile)
monkeypatch.setattr("runner_manager.reactive_runner_manager.reconcile", reconcile_mock)
reconcile_mock.side_effect = lambda quantity, **kwargs: quantity
return reconcile_mock
diff --git a/tests/unit/test_openstack_cloud.py b/tests/unit/test_openstack_cloud.py
deleted file mode 100644
index 4f599e914..000000000
--- a/tests/unit/test_openstack_cloud.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-from pathlib import Path
-
-import pytest
-import yaml
-
-import openstack_cloud
-from errors import OpenStackInvalidConfigError
-
-
-def test_initialize(clouds_yaml_path: Path, clouds_yaml: dict):
- """
- arrange: Mocked clouds.yaml data and path.
- act: Call initialize.
- assert: The clouds.yaml file is written to disk.
- """
- openstack_cloud.initialize(clouds_yaml)
-
- assert yaml.safe_load(clouds_yaml_path.read_text(encoding="utf-8")) == clouds_yaml
-
-
-@pytest.mark.parametrize(
- "invalid_yaml, expected_err_msg",
- [
- pytest.param(
- {"wrong-key": {"cloud_name": {"auth": {}}}}, "Missing key 'clouds' from config."
- ),
- pytest.param({}, "Missing key 'clouds' from config."),
- pytest.param({"clouds": {}}, "No clouds defined in clouds.yaml."),
- ],
-)
-def test_initialize_validation_error(invalid_yaml: dict, expected_err_msg):
- """
- arrange: Mocked clouds.yaml data with invalid data.
- act: Call initialize.
- assert: InvalidConfigError is raised.
- """
- with pytest.raises(OpenStackInvalidConfigError) as exc:
- openstack_cloud.initialize(invalid_yaml)
- assert expected_err_msg in str(exc)
diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py
index af7954d06..e6d57f305 100644
--- a/tests/unit/test_runner.py
+++ b/tests/unit/test_runner.py
@@ -8,12 +8,14 @@
from pathlib import Path
from unittest.mock import MagicMock, call
+import github_runner_manager.metrics.runner_logs
import jinja2
import pytest
from _pytest.monkeypatch import MonkeyPatch
+from github_runner_manager.metrics.storage import MetricsStorage
+from github_runner_manager.types_.github import GitHubOrg, GitHubRepo
-import metrics.runner_logs
-from charm_state import GitHubOrg, GitHubRepo, SSHDebugConnection, VirtualMachineResources
+from charm_state import SSHDebugConnection, VirtualMachineResources
from errors import (
CreateMetricsStorageError,
LxdError,
@@ -22,7 +24,6 @@
RunnerRemoveError,
)
from lxd import LxdInstance, LxdInstanceFileManager
-from metrics.storage import MetricsStorage
from runner import DIAG_DIR_PATH, CreateRunnerConfig, Runner, RunnerConfig, RunnerStatus
from runner_manager_type import RunnerManagerClients
from runner_type import ProxySetting
@@ -102,7 +103,9 @@ def create_logs_dir(runner_name: str) -> Path:
return target_log_path
- create_logs_dir_mock = MagicMock(spec=metrics.runner_logs.create_logs_dir)
+ create_logs_dir_mock = MagicMock(
+ spec=github_runner_manager.metrics.runner_logs.create_logs_dir
+ )
create_logs_dir_mock.side_effect = create_logs_dir
monkeypatch.setattr("runner.create_logs_dir", create_logs_dir_mock)
@@ -522,7 +525,7 @@ def test_pull_logs(runner: Runner, log_dir_base_path: Path):
runner.instance.files.pull_file.assert_has_calls(
[
call(str(DIAG_DIR_PATH), str(log_dir_path), is_dir=True),
- call(str(metrics.runner_logs.SYSLOG_PATH), str(log_dir_path)),
+ call(str(github_runner_manager.metrics.runner_logs.SYSLOG_PATH), str(log_dir_path)),
]
)
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 845c8da49..f3199fd99 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -6,12 +6,16 @@
from unittest.mock import MagicMock
import pytest
+from github_runner_manager.manager.cloud_runner_manager import CloudRunnerState, InstanceId
+from github_runner_manager.manager.github_runner_manager import GitHubRunnerState
+from github_runner_manager.manager.runner_manager import (
+ FlushMode,
+ RunnerManager,
+ RunnerManagerConfig,
+)
+from github_runner_manager.manager.runner_scaler import RunnerScaler
+from github_runner_manager.types_.github import GitHubPath, GitHubRepo
-from charm_state import GitHubPath, GitHubRepo
-from manager.cloud_runner_manager import CloudRunnerState, InstanceId
-from manager.github_runner_manager import GitHubRunnerState
-from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
-from manager.runner_scaler import RunnerScaler
from tests.unit.mock_runner_managers import (
MockCloudRunnerManager,
MockGitHubRunnerManager,
@@ -58,11 +62,16 @@ def runner_manager_fixture(
) -> RunnerManager:
mock_cloud, mock_github = mock_runner_managers
monkeypatch.setattr(
- "manager.runner_manager.RunnerManager._spawn_runners", mock_runner_manager_spawn_runners
+ "github_runner_manager.manager.runner_manager.RunnerManager._spawn_runners",
+ mock_runner_manager_spawn_runners,
)
# Patch out the metrics, as metrics has their own tests.
- monkeypatch.setattr("manager.runner_manager.github_metrics.job", MagicMock())
- monkeypatch.setattr("manager.runner_manager.runner_metrics.issue_events", MagicMock())
+ monkeypatch.setattr(
+ "github_runner_manager.manager.runner_manager.github_metrics.job", MagicMock()
+ )
+ monkeypatch.setattr(
+ "github_runner_manager.manager.runner_manager.runner_metrics.issue_events", MagicMock()
+ )
config = RunnerManagerConfig("mock_token", github_path)
runner_manager = RunnerManager("mock_runners", mock_cloud, config)
diff --git a/tests/unit/test_shared_fs.py b/tests/unit/test_shared_fs.py
index 0c1266566..2a21bf3cc 100644
--- a/tests/unit/test_shared_fs.py
+++ b/tests/unit/test_shared_fs.py
@@ -7,6 +7,7 @@
import pytest
from _pytest.monkeypatch import MonkeyPatch
+from github_runner_manager.metrics.storage import MetricsStorage
import shared_fs
from errors import (
@@ -15,7 +16,6 @@
GetMetricsStorageError,
SubprocessError,
)
-from metrics.storage import MetricsStorage
MOUNTPOINT_FAILURE_EXIT_CODE = 1