From ba970fd84624dc4394da2bce56139431b1706396 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sat, 27 Jul 2024 11:09:42 +0800
Subject: [PATCH 001/278] Add openstack interface
---
requirements.txt | 2 +
src-docs/managed_requests.md | 32 ++
src-docs/openstack_cloud.md | 3 +
src-docs/openstack_cloud.openstack_cloud.md | 125 ++++++
src-docs/openstack_cloud.openstack_manager.md | 16 +-
src/openstack_cloud/openstack_cloud.py | 388 ++++++++++++++++++
src/openstack_cloud/openstack_manager.py | 35 +-
tests/conftest.py | 13 +
tests/integration/conftest.py | 13 +
tests/integration/test_e2e.py | 2 +-
tests/integration/test_openstack_cloud.py | 102 +++++
11 files changed, 688 insertions(+), 43 deletions(-)
create mode 100644 src-docs/managed_requests.md
create mode 100644 src-docs/openstack_cloud.openstack_cloud.md
create mode 100644 src/openstack_cloud/openstack_cloud.py
create mode 100644 tests/integration/test_openstack_cloud.py
diff --git a/requirements.txt b/requirements.txt
index 1046b854a..927fa70c5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
+# TODO 2024-07-12: PyGithub-based inteface will be replacing the ghapi in the future
+PyGithub
ghapi
jinja2
fabric >=3,<4
diff --git a/src-docs/managed_requests.md b/src-docs/managed_requests.md
new file mode 100644
index 000000000..23939bcc2
--- /dev/null
+++ b/src-docs/managed_requests.md
@@ -0,0 +1,32 @@
+
+
+
+
+# module `managed_requests`
+Get configured requests session instance
+
+
+---
+
+
+
+## function `get_requests_session`
+
+```python
+get_requests_session(proxy: ProxyConfig) → Session
+```
+
+Get managed requests session instance.
+
+
+
+**Args:**
+
+ - `proxy`: HTTP proxy configurations.
+
+
+
+**Returns:**
+ Requests session with proxy and retry setup.
+
+
diff --git a/src-docs/openstack_cloud.md b/src-docs/openstack_cloud.md
index 4d82f5359..2bd698583 100644
--- a/src-docs/openstack_cloud.md
+++ b/src-docs/openstack_cloud.md
@@ -7,6 +7,9 @@ Module for managing Openstack cloud.
**Global Variables**
---------------
+- **openstack_cloud**: # Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
- **openstack_manager**: # Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
new file mode 100644
index 000000000..6cb15ee29
--- /dev/null
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -0,0 +1,125 @@
+
+
+
+
+# module `openstack_cloud.openstack_cloud`
+
+
+
+
+
+
+---
+
+
+
+## class `OpenstackInstance`
+OpenstackInstance(server: openstack.compute.v2.server.Server)
+
+
+
+### method `__init__`
+
+```python
+__init__(server: Server)
+```
+
+
+
+
+
+
+
+
+
+---
+
+
+
+## class `OpenstackCloud`
+
+
+
+
+
+
+### method `__init__`
+
+```python
+__init__(cloud_config: dict[str, dict], prefix: str)
+```
+
+Create a OpenstackCloud instance.
+
+
+
+**Args:**
+
+ - `cloud_config`: The openstack clouds.yaml in dict format. The first cloud in the yaml is used. prefix:
+
+
+
+
+---
+
+
+
+### method `delete_instance`
+
+```python
+delete_instance(name: str)
+```
+
+
+
+
+
+---
+
+
+
+### method `get_instances`
+
+```python
+get_instances(name: str) → list[OpenstackInstance]
+```
+
+
+
+
+
+---
+
+
+
+### method `get_ssh_connection`
+
+```python
+get_ssh_connection(instance: OpenstackInstance) → Connection
+```
+
+
+
+
+
+---
+
+
+
+### method `launch_instance`
+
+```python
+launch_instance(
+ name: str,
+ image: str,
+ flavor: str,
+ network: str,
+ userdata: str
+) → OpenstackInstance
+```
+
+
+
+
+
+
diff --git a/src-docs/openstack_cloud.openstack_manager.md b/src-docs/openstack_cloud.openstack_manager.md
index 697d3d96a..e810e05d0 100644
--- a/src-docs/openstack_cloud.openstack_manager.md
+++ b/src-docs/openstack_cloud.openstack_manager.md
@@ -18,7 +18,7 @@ Module for handling interactions with OpenStack.
---
-
+
## function `create_instance_config`
@@ -54,7 +54,7 @@ Create an instance config from charm data.
---
-
+
## class `InstanceConfig`
The configuration values for creating a single runner instance.
@@ -93,7 +93,7 @@ __init__(
---
-
+
## class `GithubRunnerRemoveError`
Represents an error removing registered runner from Github.
@@ -104,7 +104,7 @@ Represents an error removing registered runner from Github.
---
-
+
## class `OpenstackRunnerManager`
Runner manager for OpenStack-based instances.
@@ -117,7 +117,7 @@ Runner manager for OpenStack-based instances.
- `unit_num`: The juju unit number.
- `instance_name`: Prefix of the name for the set of runners.
-
+
### method `__init__`
@@ -146,7 +146,7 @@ Construct OpenstackRunnerManager object.
---
-
+
### method `flush`
@@ -163,7 +163,7 @@ Flush Openstack servers.
---
-
+
### method `get_github_runner_info`
@@ -180,7 +180,7 @@ Get information on GitHub for the runners.
---
-
+
### method `reconcile`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
new file mode 100644
index 000000000..f8c601d6f
--- /dev/null
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -0,0 +1,388 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+from contextlib import contextmanager
+from dataclasses import dataclass
+import datetime
+from functools import reduce
+import logging
+from pathlib import Path
+import shutil
+from typing import Iterable, Iterator, cast
+import openstack
+from openstack.compute.v2.server import Server as OpenstackServer
+from openstack.compute.v2.keypair import Keypair as OpenstackKeypair
+from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
+from openstack.connection import Connection as OpenstackConnection
+import openstack.exceptions
+from fabric import Connection as SshConnection
+import paramiko
+from paramiko.ssh_exception import NoValidConnectionsError
+
+from errors import OpenStackError
+
+logger = logging.getLogger(__name__)
+
+# Update the version when the security group rules are not backward compatible.
+_SECURITY_GROUP_NAME = "github-runner-v1"
+
+_CREATE_SERVER_TIMEOUT = 5 * 60
+_SSH_TIMEOUT= 30
+_SSH_KEY_PATH = "/home/ubuntu/.ssh"
+_TEST_STRING = "test_string"
+
+class _SshError(Exception):
+ """Represents an error while interacting with SSH."""
+
+@dataclass
+class OpenstackInstance:
+ id: str
+ name: str
+ addresses: list[str]
+
+ def __init__(self, server: OpenstackServer):
+ self.id = server.id
+ self.name = server.name
+ self.addresses = [
+ address["addr"]
+ for network_addresses in server.addresses.values()
+ for address in network_addresses
+ ]
+
+
+@contextmanager
+def _create_connection(cloud_config: dict[str, dict]) -> Iterator[OpenstackConnection]:
+ """Create a connection context managed object, to be used within with statements.
+
+ This method should be called with a valid cloud_config. See _validate_cloud_config.
+ Also, this method assumes that the clouds.yaml exists on ~/.config/openstack/clouds.yaml.
+ See charm_state.py _write_openstack_config_to_disk.
+
+ Args:
+ cloud_config: The configuration in clouds.yaml format to apply.
+
+ Raises:
+ OpenStackError: if the credentials provided is not authorized.
+
+ Yields:
+ An openstack.connection.Connection object.
+ """
+ clouds = list(cloud_config["clouds"].keys())
+ if len(clouds) > 1:
+ logger.warning("Multiple clouds defined in clouds.yaml. Using the first one to connect.")
+ cloud_name = clouds[0]
+
+ # api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
+ # I could not reproduce it. Therefore, no catch here for such exception.
+ try:
+ with openstack.connect(cloud=cloud_name) as conn:
+ conn.authorize()
+ yield conn
+ # pylint thinks this isn't an exception, but does inherit from Exception class.
+ except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
+ logger.exception("OpenStack API call failure")
+ raise OpenStackError("Failed OpenStack API call") from exc
+
+class OpenstackCloud:
+
+ def __init__(self, cloud_config: dict[str, dict], prefix: str):
+ """Create a OpenstackCloud instance.
+
+ Args:
+ cloud_config: The openstack clouds.yaml in dict format. The first cloud in the yaml is
+ used.
+ prefix:
+ """
+ self.cloud_config = cloud_config
+ self.prefix = prefix
+
+ def launch_instance(self, name: str, image: str, flavor: str, network: str, userdata: str) -> OpenstackInstance:
+ full_name = self._get_instance_name(name)
+ logger.info("Creating openstack server with %s", full_name)
+
+ with _create_connection(cloud_config=self.cloud_config) as conn:
+ security_group = OpenstackCloud._ensure_security_group(conn)
+ keypair = OpenstackCloud._setup_key_pair(conn, full_name)
+
+ server = conn.create_server(
+ name = full_name,
+ image = image,
+ key_name=keypair.name,
+ flavor= flavor,
+ network= network,
+ security_groups=[security_group.id],
+ userdata=userdata,
+ auto_ip=False,
+ timeout=_CREATE_SERVER_TIMEOUT,
+ wait=True,
+ )
+ return OpenstackInstance(server)
+
+ def delete_instance(self, name: str):
+ full_name = self._get_instance_name(full_name)
+ logger.info("Deleting openstack server with %s", full_name)
+
+ with _create_connection(cloud_config=self.cloud_config) as conn:
+ server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
+ server.delete()
+ OpenstackCloud._delete_key_pair(conn, full_name)
+
+ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
+ key_path = OpenstackCloud._get_key_path(instance.name)
+
+ if not key_path.exists():
+ raise _SshError(f"Missing keyfile for server: {instance.name}, key path: {key_path}")
+ if not instance.addresses:
+ raise _SshError(f"No addresses found for OpenStack server {instance.name}")
+
+ for ip in instance.addresses:
+ try:
+ connection = SshConnection(
+ host=ip,
+ user="ubuntu",
+ connect_kwargs={"key_filename": str(key_path)},
+ connect_timeout=_SSH_TIMEOUT,
+ )
+ result = connection.run("echo {_TEST_STRING}", warn=True, timeout=_SSH_TIMEOUT)
+ if not result.ok:
+ logger.warning(
+ "SSH test connection failed, server: %s, address: %s", instance.name, ip
+ )
+ continue
+ if _TEST_STRING in result.stdout:
+ return connection
+ except (NoValidConnectionsError, TimeoutError, paramiko.ssh_exception.SSHException):
+ logger.warning(
+ "Unable to SSH into %s with address %s",
+ instance.name,
+ connection.host,
+ exc_info=True,
+ )
+ continue
+ raise _SshError(
+ f"No connectable SSH addresses found, server: {instance.name}, "
+ f"addresses: {instance.addresses}"
+ )
+
+ def get_instances(self, name: str) -> list[OpenstackInstance]:
+ logger.info("Getting all openstack servers managed by the charm")
+
+ with _create_connection(cloud_config=self.cloud_config) as conn:
+ servers = self._get_openstack_instances(conn)
+ server_names = set(server.name for server in servers)
+ return [
+ OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, name))
+ for name in server_names
+ ]
+
+ def _cleanup_key_files(
+ self, conn: OpenstackConnection, exclude_instances: Iterable[str]
+ ) -> None:
+ """Delete all SSH key files except the specified instances.
+
+ Args:
+ conn: The Openstack connection instance.
+ exclude_instances: The keys of these instance will not be deleted.
+ """
+ logger.info("Cleaning up SSH key files")
+ exclude_filename = set(
+ OpenstackCloud._get_key_path(instance) for instance in exclude_instances
+ )
+
+ total = 0
+ deleted = 0
+ for path in _SSH_KEY_PATH.iterdir():
+ # Find key file from this application.
+ if (
+ path.is_file()
+ and path.name.startswith(self.instance_name)
+ and path.name.endswith(".key")
+ ):
+ total += 1
+ if path.name in exclude_filename:
+ continue
+
+ keypair_name = path.name.split(".")[0]
+ try:
+ conn.delete_keypair(keypair_name)
+ except openstack.exceptions.SDKException:
+ logger.warning(
+ "Unable to delete OpenStack keypair associated with deleted key file %s ",
+ path.name,
+ )
+
+ path.unlink()
+ deleted += 1
+ logger.info("Found %s key files, clean up %s key files", total, deleted)
+
+ def _clean_up_openstack_keypairs(
+ self, conn: OpenstackConnection, exclude_instances: Iterable[str]
+ ) -> None:
+ """Delete all OpenStack keypairs except the specified instances.
+
+ Args:
+ conn: The Openstack connection instance.
+ exclude_instances: The keys of these instance will not be deleted.
+ """
+ logger.info("Cleaning up openstack keypairs")
+ keypairs = conn.list_keypairs()
+ for key in keypairs:
+ # The `name` attribute is of resource.Body type.
+ if key.name and str(key.name).startswith(self.instance_name):
+ if str(key.name) in exclude_instances:
+ continue
+
+ try:
+ conn.delete_keypair(key.name)
+ except openstack.exceptions.SDKException:
+ logger.warning(
+ "Unable to delete OpenStack keypair associated with deleted key file %s ",
+ key.name,
+ )
+
+ def _get_instance_name(self, name: str) -> str:
+ return f"{self.prefix}-{name}"
+
+ def _get_openstack_instances(self, conn: OpenstackConnection) -> list[OpenstackServer]:
+ """Get the OpenStack servers managed by this unit.
+
+ Args:
+ conn: The connection object to access OpenStack cloud.
+
+ Returns:
+ List of OpenStack instances.
+ """
+ return [
+ server
+ for server in cast(list[OpenstackServer], conn.list_servers())
+ if server.name.startswith(f"{self.prefix}-")
+ ]
+
+ @staticmethod
+ def _get_and_ensure_unique_server(conn: OpenstackConnection, name: str) -> OpenstackServer | None:
+ """Get the latest server of the name and ensure it is unique.
+
+ If multiple servers with the same name is found, the latest server in creation time is
+ returned. Other servers is deleted.
+ """
+ servers: list[OpenstackServer] = conn.search_servers(name)
+
+ latest_server = reduce(lambda a, b: a if datetime.strptime(a.created_at) < datetime.strptime(b.create_at) else b, servers)
+ outdated_servers = filter(lambda x: x != latest_server, servers)
+ for server in outdated_servers:
+ server.delete()
+
+ return latest_server
+
+ @staticmethod
+ def _get_key_path(name: str) -> Path:
+ """Get the filepath for storing private SSH of a runner.
+
+ Args:
+ name: The name of the runner.
+
+ Returns:
+ Path to reserved for the key file of the runner.
+ """
+ return _SSH_KEY_PATH / f"{name}.key"
+
+ @staticmethod
+ def _setup_key_pair(conn: OpenstackConnection, name: str) -> OpenstackKeypair:
+ key_path = OpenstackCloud._get_key_path(name)
+
+ if key_path.exists:
+ logger.warning("Existing private key file for %s found, removing it.", name)
+ key_path.unlink(missing_ok=True)
+
+ keypair = conn.create_keypair(name=name)
+ key_path.write_text(keypair.private_key)
+ shutil.chown(key_path, user="ubuntu", group="ubuntu")
+ key_path.chmod(0o400)
+ return keypair
+
+ @staticmethod
+ def _delete_key_pair(conn: OpenstackConnection, name: str) -> None:
+ try:
+ # Keypair have unique names, access by ID is not needed.
+ if not conn.delete_keypair(name):
+ logger.warning("Unable to delete keypair for %s", name)
+ except openstack.exceptions.SDKException:
+ logger.warning("Unable to delete keypair for %s", name, stack_info=True)
+
+ key_path = OpenstackCloud._get_key_path(name)
+ key_path.unlink(missing_ok=True)
+
+ @staticmethod
+ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
+ """Ensure runner security group exists.
+
+ Args:
+ conn: The connection object to access OpenStack cloud.
+
+ Returns:
+ The security group with the rules for runners.
+ """
+ rule_exists_icmp = False
+ rule_exists_ssh = False
+ rule_exists_tmate_ssh = False
+
+ security_group_list = conn.list_security_groups(filters={"name": _SECURITY_GROUP_NAME})
+ # Pick the first security_group returned.
+ security_group = next(iter(security_group_list), None)
+ if security_group is None:
+ logger.info("Security group %s not found, creating it", _SECURITY_GROUP_NAME)
+ security_group = conn.create_security_group(
+ name=_SECURITY_GROUP_NAME,
+ description="For servers managed by the github-runner charm.",
+ )
+ else:
+ existing_rules = security_group.security_group_rules
+ for rule in existing_rules:
+ if rule.protocol == "icmp":
+ logger.debug(
+ "Found ICMP rule in existing security group %s of ID %s", _SECURITY_GROUP_NAME, security_group.id
+ )
+ rule_exists_icmp = True
+ if (
+ rule.protocol == "tcp"
+ and rule["port_range_min"] == rule["port_range_max"] == 22
+ ):
+ logger.debug(
+ "Found SSH rule in existing security group %s of ID %s", _SECURITY_GROUP_NAME, security_group.id
+ )
+ rule_exists_ssh = True
+ if (
+ rule.protocol == "tcp"
+ and rule["port_range_min"] == rule["port_range_max"] == 10022
+ ):
+ logger.debug(
+ "Found tmate SSH rule in existing security group %s of ID %s", _SECURITY_GROUP_NAME, security_group.id
+ )
+ rule_exists_tmate_ssh = True
+
+ if not rule_exists_icmp:
+ conn.create_security_group_rule(
+ secgroup_name_or_id=_SECURITY_GROUP_NAME,
+ protocol="icmp",
+ direction="ingress",
+ ethertype="IPv4",
+ )
+ if not rule_exists_ssh:
+ conn.create_security_group_rule(
+ secgroup_name_or_id=_SECURITY_GROUP_NAME,
+ port_range_min="22",
+ port_range_max="22",
+ protocol="tcp",
+ direction="ingress",
+ ethertype="IPv4",
+ )
+ if not rule_exists_tmate_ssh:
+ conn.create_security_group_rule(
+ secgroup_name_or_id=_SECURITY_GROUP_NAME,
+ port_range_min="10022",
+ port_range_max="10022",
+ protocol="tcp",
+ direction="egress",
+ ethertype="IPv4",
+ )
+ return security_group
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index 7fcaa2f6f..77e42c3ef 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -59,6 +59,7 @@
from metrics import runner as runner_metrics
from metrics import storage as metrics_storage
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
+from openstack_cloud.openstack_cloud import _create_connection
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner_manager import IssuedMetricEventsStats
from runner_manager_type import OpenstackRunnerManagerConfig
@@ -149,40 +150,6 @@ class _CloudInitUserData:
proxies: Optional[ProxyConfig] = None
-@contextmanager
-def _create_connection(cloud_config: dict[str, dict]) -> Iterator[openstack.connection.Connection]:
- """Create a connection context managed object, to be used within with statements.
-
- This method should be called with a valid cloud_config. See _validate_cloud_config.
- Also, this method assumes that the clouds.yaml exists on ~/.config/openstack/clouds.yaml.
- See charm_state.py _write_openstack_config_to_disk.
-
- Args:
- cloud_config: The configuration in clouds.yaml format to apply.
-
- Raises:
- OpenStackError: if the credentials provided is not authorized.
-
- Yields:
- An openstack.connection.Connection object.
- """
- clouds = list(cloud_config["clouds"].keys())
- if len(clouds) > 1:
- logger.warning("Multiple clouds defined in clouds.yaml. Using the first one to connect.")
- cloud_name = clouds[0]
-
- # api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
- # I could not reproduce it. Therefore, no catch here for such exception.
- try:
- with openstack.connect(cloud=cloud_name) as conn:
- conn.authorize()
- yield conn
- # pylint thinks this isn't an exception, but does inherit from Exception class.
- except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
- logger.exception("OpenStack API call failure")
- raise OpenStackError("Failed OpenStack API call") from exc
-
-
# Disable too many arguments, as they are needed to create the dataclass.
def create_instance_config( # pylint: disable=too-many-arguments
app_name: str,
diff --git a/tests/conftest.py b/tests/conftest.py
index 7bb35c4f3..7ae97d4a6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -139,3 +139,16 @@ def pytest_addoption(parser: Parser):
help="The Openstack region to authenticate to.",
default=None,
)
+ # OpenStack integration tests
+ parser.addoption(
+ "--openstack-test-image",
+ action="store",
+ help="The image for testing openstack interfaces. Any ubuntu image should work.",
+ default=None,
+ )
+ parser.addoption(
+ "--openstack-test-flavor",
+ action="store",
+ help="The flavor for testing openstack interfaces. The resource should be enough to boot the test image.",
+ default=None,
+ )
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 4d54c8f89..46f7e61c6 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -277,6 +277,19 @@ def flavor_name_fixture(pytestconfig: pytest.Config) -> str:
assert flavor_name, "Please specify the --openstack-flavor-name command line option"
return flavor_name
+@pytest.fixture(scope="module", name="openstack_test_image")
+def openstack_test_image_fixture(pytestconfig: pytest.Config) -> str:
+ """Image for testing openstack interfaces."""
+ test_image = pytestconfig.getoption("--openstack-test-image")
+ assert test_image, "Please specify the --openstack-test-image command line option"
+ return test_image
+
+@pytest.fixture(scope="module", name="openstack_test_flavor")
+def openstack_test_image_fixture(pytestconfig: pytest.Config) -> str:
+ """Flavor for testing openstack interfaces."""
+ test_flavor = pytestconfig.getoption("--openstack-test-flavor")
+ assert test_flavor, "Please specify the --openstack-test-flavor command line option"
+ return test_flavor
@pytest.fixture(scope="module", name="openstack_connection")
def openstack_connection_fixture(
diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py
index b3fb311ed..bed193216 100644
--- a/tests/integration/test_e2e.py
+++ b/tests/integration/test_e2e.py
@@ -44,7 +44,7 @@ async def test_e2e_workflow(
"""
arrange: An app connected to an OpenStack cloud with no runners.
act: Run e2e test workflow.
- assert:
+ assert: No exception thrown.
"""
virt_type: str
if instance_type == InstanceType.OPENSTACK:
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
new file mode 100644
index 000000000..10ae98d09
--- /dev/null
+++ b/tests/integration/test_openstack_cloud.py
@@ -0,0 +1,102 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+"""Test for OpenstackCloud class integration with OpenStack."""
+
+from secrets import token_hex
+from typing import AsyncIterator
+
+import pytest
+import pytest_asyncio
+from openstack.connection import Connection as OpenstackConnection
+
+from openstack_cloud.openstack_cloud import OpenstackCloud
+
+
+@pytest_asyncio.fixture(scope="function", name="base_openstack_cloud")
+async def base_openstack_cloud_fixture(private_endpoint_clouds_config: dict[str, dict]) -> OpenstackCloud:
+ """Setup a OpenstackCloud object with connection to openstack."""
+ return OpenstackCloud(private_endpoint_clouds_config, f"test-{token_hex(4)}")
+
+@pytest_asyncio.fixture(scope="function", name="openstack_cloud")
+async def openstack_cloud_fixture(base_openstack_cloud: OpenstackCloud) -> OpenstackCloud:
+ """Ensures the OpenstackCloud object has no openstack servers."""
+ instances = base_openstack_cloud.get_instances()
+ for instance in instances:
+ base_openstack_cloud.delete_instance(name=instance.name)
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_get_no_instances(base_openstack_cloud: OpenstackCloud) -> None:
+ """
+ arrange: No instance on OpenStack.
+ act: Get instances on OpenStack.
+ assert: An empty list returned.
+
+ Uses base_openstack_cloud as openstack_cloud_fixture relies on this test.
+ """
+ instances = base_openstack_cloud.get_instances()
+ assert not instances
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_launch_instance_and_delete(base_openstack_cloud: OpenstackCloud, openstack_connection: OpenstackConnection, openstack_test_image: str, openstack_test_flavor: str) -> None:
+ """
+ arrange: No instance on OpenStack.
+ act:
+ 1. Create an openstack instance.
+ 2. Delete openstack instance.
+ assert:
+ 1. Instance returned.
+ 2. No instance exists.
+
+ Uses base_openstack_cloud as openstack_cloud_fixture relies on this test.
+ """
+ instances = base_openstack_cloud.get_instances()
+ assert not instances, "Test arrange failure: found existing openstack instance."
+
+ instance_name = f"{token_hex(2)}"
+
+ # 1.
+ instance = base_openstack_cloud.launch_instance(name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, userdata="")
+
+ assert instance is not None
+ assert instance.name is not None
+ assert instance.id is not None
+
+ servers = openstack_connection.list_servers()
+ for server in servers:
+ if instance_name in server.name:
+ break
+ else:
+ assert False, f"OpenStack server with {instance_name} in the name not found"
+
+ # 2.
+ base_openstack_cloud.delete_instance(name=instance_name)
+ instances = base_openstack_cloud.get_instances()
+ assert not instances, "Test failure: openstack instance should be deleted."
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_instance_ssh_connection(openstack_cloud: OpenstackCloud, openstack_connection: OpenstackConnection, openstack_test_image: str, openstack_test_flavor: str) -> None:
+ """
+ arrange: One instance on OpenStack.
+ act: Get SSH connection of instance and execute command.
+ assert: Test SSH command executed successfully.
+
+ This tests whether the network rules (security group) are in place.
+ """
+ rand_chars = f"{token_hex(10)}"
+ instance_name = f"{token_hex(2)}"
+ instance = openstack_cloud.launch_instance(name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, userdata="")
+
+ ssh_conn = openstack_cloud.get_ssh_connection(instance)
+ result = ssh_conn.run(f"echo {rand_chars}")
+
+ assert result.ok
+ assert rand_chars in result.stdout
From 3e6b570990c34236b898bed0fd38cc42ed693694 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sat, 27 Jul 2024 11:12:46 +0800
Subject: [PATCH 002/278] Add openstack cloud test
---
.github/workflows/integration_test.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 6d1499b27..349bc302c 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -33,7 +33,7 @@ jobs:
pre-run-script: scripts/setup-lxd.sh
provider: lxd
test-tox-env: integration-juju3.2
- modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive"]'
+ modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive", "test_openstack_cloud"]'
extra-arguments: "-m openstack"
self-hosted-runner: true
self-hosted-runner-label: stg-private-endpoint
From dc136f13596993b523a43fc508fda518d1424814 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:30:56 +0800
Subject: [PATCH 003/278] Fix wrong name
---
tests/integration/test_openstack_cloud.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index 10ae98d09..db8a314b7 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -14,9 +14,9 @@
@pytest_asyncio.fixture(scope="function", name="base_openstack_cloud")
-async def base_openstack_cloud_fixture(private_endpoint_clouds_config: dict[str, dict]) -> OpenstackCloud:
+async def base_openstack_cloud_fixture(private_endpoint_config: dict[str, dict]) -> OpenstackCloud:
"""Setup a OpenstackCloud object with connection to openstack."""
- return OpenstackCloud(private_endpoint_clouds_config, f"test-{token_hex(4)}")
+ return OpenstackCloud(private_endpoint_config, f"test-{token_hex(4)}")
@pytest_asyncio.fixture(scope="function", name="openstack_cloud")
async def openstack_cloud_fixture(base_openstack_cloud: OpenstackCloud) -> OpenstackCloud:
From 0a9bb4a06a50a08c321ddbbf1adc04da4f3624fd Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sat, 27 Jul 2024 16:13:26 +0800
Subject: [PATCH 004/278] Fix typo and formatting
---
src-docs/openstack_cloud.openstack_cloud.md | 18 ++--
src/openstack_cloud/openstack_cloud.py | 95 ++++++++++++---------
tests/integration/conftest.py | 7 +-
tests/integration/test_openstack_cloud.py | 40 ++++++---
4 files changed, 99 insertions(+), 61 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 6cb15ee29..d930869bc 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -11,12 +11,12 @@
---
-
+
## class `OpenstackInstance`
OpenstackInstance(server: openstack.compute.v2.server.Server)
-
+
### method `__init__`
@@ -34,14 +34,14 @@ __init__(server: Server)
---
-
+
## class `OpenstackCloud`
-
+
### method `__init__`
@@ -55,14 +55,14 @@ Create a OpenstackCloud instance.
**Args:**
- - `cloud_config`: The openstack clouds.yaml in dict format. The first cloud in the yaml is used. prefix:
+ - `cloud_config`: The openstack clouds.yaml in dict format. The first cloud in the yaml is used. prefix:
---
-
+
### method `delete_instance`
@@ -76,7 +76,7 @@ delete_instance(name: str)
---
-
+
### method `get_instances`
@@ -90,7 +90,7 @@ get_instances(name: str) → list[OpenstackInstance]
---
-
+
### method `get_ssh_connection`
@@ -104,7 +104,7 @@ get_ssh_connection(instance: OpenstackInstance) → Connection
---
-
+
### method `launch_instance`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index f8c601d6f..226af3f9d 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -1,25 +1,26 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+import datetime
+import logging
+import shutil
from contextlib import contextmanager
from dataclasses import dataclass
-import datetime
from functools import reduce
-import logging
from pathlib import Path
-import shutil
from typing import Iterable, Iterator, cast
+
import openstack
-from openstack.compute.v2.server import Server as OpenstackServer
-from openstack.compute.v2.keypair import Keypair as OpenstackKeypair
-from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
-from openstack.connection import Connection as OpenstackConnection
import openstack.exceptions
-from fabric import Connection as SshConnection
import paramiko
+from fabric import Connection as SshConnection
+from openstack.compute.v2.keypair import Keypair as OpenstackKeypair
+from openstack.compute.v2.server import Server as OpenstackServer
+from openstack.connection import Connection as OpenstackConnection
+from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
from paramiko.ssh_exception import NoValidConnectionsError
-from errors import OpenStackError
+from errors import OpenStackError
logger = logging.getLogger(__name__)
@@ -27,19 +28,21 @@
_SECURITY_GROUP_NAME = "github-runner-v1"
_CREATE_SERVER_TIMEOUT = 5 * 60
-_SSH_TIMEOUT= 30
+_SSH_TIMEOUT = 30
_SSH_KEY_PATH = "/home/ubuntu/.ssh"
_TEST_STRING = "test_string"
+
class _SshError(Exception):
"""Represents an error while interacting with SSH."""
+
@dataclass
class OpenstackInstance:
id: str
name: str
addresses: list[str]
-
+
def __init__(self, server: OpenstackServer):
self.id = server.id
self.name = server.name
@@ -83,33 +86,36 @@ def _create_connection(cloud_config: dict[str, dict]) -> Iterator[OpenstackConne
logger.exception("OpenStack API call failure")
raise OpenStackError("Failed OpenStack API call") from exc
+
class OpenstackCloud:
-
+
def __init__(self, cloud_config: dict[str, dict], prefix: str):
"""Create a OpenstackCloud instance.
-
+
Args:
- cloud_config: The openstack clouds.yaml in dict format. The first cloud in the yaml is
+ cloud_config: The openstack clouds.yaml in dict format. The first cloud in the yaml is
used.
prefix:
"""
self.cloud_config = cloud_config
self.prefix = prefix
-
- def launch_instance(self, name: str, image: str, flavor: str, network: str, userdata: str) -> OpenstackInstance:
+
+ def launch_instance(
+ self, name: str, image: str, flavor: str, network: str, userdata: str
+ ) -> OpenstackInstance:
full_name = self._get_instance_name(name)
logger.info("Creating openstack server with %s", full_name)
with _create_connection(cloud_config=self.cloud_config) as conn:
security_group = OpenstackCloud._ensure_security_group(conn)
keypair = OpenstackCloud._setup_key_pair(conn, full_name)
-
+
server = conn.create_server(
- name = full_name,
- image = image,
+ name=full_name,
+ image=image,
key_name=keypair.name,
- flavor= flavor,
- network= network,
+ flavor=flavor,
+ network=network,
security_groups=[security_group.id],
userdata=userdata,
auto_ip=False,
@@ -126,7 +132,7 @@ def delete_instance(self, name: str):
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
server.delete()
OpenstackCloud._delete_key_pair(conn, full_name)
-
+
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
key_path = OpenstackCloud._get_key_path(instance.name)
@@ -166,7 +172,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
def get_instances(self, name: str) -> list[OpenstackInstance]:
logger.info("Getting all openstack servers managed by the charm")
-
+
with _create_connection(cloud_config=self.cloud_config) as conn:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
@@ -257,21 +263,28 @@ def _get_openstack_instances(self, conn: OpenstackConnection) -> list[OpenstackS
for server in cast(list[OpenstackServer], conn.list_servers())
if server.name.startswith(f"{self.prefix}-")
]
-
+
@staticmethod
- def _get_and_ensure_unique_server(conn: OpenstackConnection, name: str) -> OpenstackServer | None:
+ def _get_and_ensure_unique_server(
+ conn: OpenstackConnection, name: str
+ ) -> OpenstackServer | None:
"""Get the latest server of the name and ensure it is unique.
- If multiple servers with the same name is found, the latest server in creation time is
+ If multiple servers with the same name is found, the latest server in creation time is
returned. Other servers is deleted.
"""
servers: list[OpenstackServer] = conn.search_servers(name)
- latest_server = reduce(lambda a, b: a if datetime.strptime(a.created_at) < datetime.strptime(b.create_at) else b, servers)
+ latest_server = reduce(
+ lambda a, b: (
+ a if datetime.strptime(a.created_at) < datetime.strptime(b.create_at) else b
+ ),
+ servers,
+ )
outdated_servers = filter(lambda x: x != latest_server, servers)
for server in outdated_servers:
server.delete()
-
+
return latest_server
@staticmethod
@@ -289,17 +302,17 @@ def _get_key_path(name: str) -> Path:
@staticmethod
def _setup_key_pair(conn: OpenstackConnection, name: str) -> OpenstackKeypair:
key_path = OpenstackCloud._get_key_path(name)
-
+
if key_path.exists:
logger.warning("Existing private key file for %s found, removing it.", name)
key_path.unlink(missing_ok=True)
-
+
keypair = conn.create_keypair(name=name)
key_path.write_text(keypair.private_key)
shutil.chown(key_path, user="ubuntu", group="ubuntu")
key_path.chmod(0o400)
return keypair
-
+
@staticmethod
def _delete_key_pair(conn: OpenstackConnection, name: str) -> None:
try:
@@ -307,8 +320,8 @@ def _delete_key_pair(conn: OpenstackConnection, name: str) -> None:
if not conn.delete_keypair(name):
logger.warning("Unable to delete keypair for %s", name)
except openstack.exceptions.SDKException:
- logger.warning("Unable to delete keypair for %s", name, stack_info=True)
-
+ logger.warning("Unable to delete keypair for %s", name, stack_info=True)
+
key_path = OpenstackCloud._get_key_path(name)
key_path.unlink(missing_ok=True)
@@ -318,14 +331,14 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
Args:
conn: The connection object to access OpenStack cloud.
-
+
Returns:
The security group with the rules for runners.
"""
rule_exists_icmp = False
rule_exists_ssh = False
rule_exists_tmate_ssh = False
-
+
security_group_list = conn.list_security_groups(filters={"name": _SECURITY_GROUP_NAME})
# Pick the first security_group returned.
security_group = next(iter(security_group_list), None)
@@ -340,7 +353,9 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
for rule in existing_rules:
if rule.protocol == "icmp":
logger.debug(
- "Found ICMP rule in existing security group %s of ID %s", _SECURITY_GROUP_NAME, security_group.id
+ "Found ICMP rule in existing security group %s of ID %s",
+ _SECURITY_GROUP_NAME,
+ security_group.id,
)
rule_exists_icmp = True
if (
@@ -348,7 +363,9 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
and rule["port_range_min"] == rule["port_range_max"] == 22
):
logger.debug(
- "Found SSH rule in existing security group %s of ID %s", _SECURITY_GROUP_NAME, security_group.id
+ "Found SSH rule in existing security group %s of ID %s",
+ _SECURITY_GROUP_NAME,
+ security_group.id,
)
rule_exists_ssh = True
if (
@@ -356,7 +373,9 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
and rule["port_range_min"] == rule["port_range_max"] == 10022
):
logger.debug(
- "Found tmate SSH rule in existing security group %s of ID %s", _SECURITY_GROUP_NAME, security_group.id
+ "Found tmate SSH rule in existing security group %s of ID %s",
+ _SECURITY_GROUP_NAME,
+ security_group.id,
)
rule_exists_tmate_ssh = True
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 46f7e61c6..25f4f1ee3 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -277,20 +277,23 @@ def flavor_name_fixture(pytestconfig: pytest.Config) -> str:
assert flavor_name, "Please specify the --openstack-flavor-name command line option"
return flavor_name
+
@pytest.fixture(scope="module", name="openstack_test_image")
def openstack_test_image_fixture(pytestconfig: pytest.Config) -> str:
"""Image for testing openstack interfaces."""
test_image = pytestconfig.getoption("--openstack-test-image")
assert test_image, "Please specify the --openstack-test-image command line option"
return test_image
-
+
+
@pytest.fixture(scope="module", name="openstack_test_flavor")
-def openstack_test_image_fixture(pytestconfig: pytest.Config) -> str:
+def openstack_test_flavor_fixture(pytestconfig: pytest.Config) -> str:
"""Flavor for testing openstack interfaces."""
test_flavor = pytestconfig.getoption("--openstack-test-flavor")
assert test_flavor, "Please specify the --openstack-test-flavor command line option"
return test_flavor
+
@pytest.fixture(scope="module", name="openstack_connection")
def openstack_connection_fixture(
clouds_yaml_contents: str, app_name: str
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index db8a314b7..72d760020 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -18,6 +18,7 @@ async def base_openstack_cloud_fixture(private_endpoint_config: dict[str, dict])
"""Setup a OpenstackCloud object with connection to openstack."""
return OpenstackCloud(private_endpoint_config, f"test-{token_hex(4)}")
+
@pytest_asyncio.fixture(scope="function", name="openstack_cloud")
async def openstack_cloud_fixture(base_openstack_cloud: OpenstackCloud) -> OpenstackCloud:
"""Ensures the OpenstackCloud object has no openstack servers."""
@@ -34,22 +35,28 @@ async def test_get_no_instances(base_openstack_cloud: OpenstackCloud) -> None:
arrange: No instance on OpenStack.
act: Get instances on OpenStack.
assert: An empty list returned.
-
+
Uses base_openstack_cloud as openstack_cloud_fixture relies on this test.
"""
instances = base_openstack_cloud.get_instances()
assert not instances
+
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
-async def test_launch_instance_and_delete(base_openstack_cloud: OpenstackCloud, openstack_connection: OpenstackConnection, openstack_test_image: str, openstack_test_flavor: str) -> None:
+async def test_launch_instance_and_delete(
+ base_openstack_cloud: OpenstackCloud,
+ openstack_connection: OpenstackConnection,
+ openstack_test_image: str,
+ openstack_test_flavor: str,
+) -> None:
"""
arrange: No instance on OpenStack.
- act:
+ act:
1. Create an openstack instance.
2. Delete openstack instance.
- assert:
+ assert:
1. Instance returned.
2. No instance exists.
@@ -57,16 +64,18 @@ async def test_launch_instance_and_delete(base_openstack_cloud: OpenstackCloud,
"""
instances = base_openstack_cloud.get_instances()
assert not instances, "Test arrange failure: found existing openstack instance."
-
+
instance_name = f"{token_hex(2)}"
# 1.
- instance = base_openstack_cloud.launch_instance(name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, userdata="")
+ instance = base_openstack_cloud.launch_instance(
+ name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, userdata=""
+ )
assert instance is not None
assert instance.name is not None
assert instance.id is not None
-
+
servers = openstack_connection.list_servers()
for server in servers:
if instance_name in server.name:
@@ -78,25 +87,32 @@ async def test_launch_instance_and_delete(base_openstack_cloud: OpenstackCloud,
base_openstack_cloud.delete_instance(name=instance_name)
instances = base_openstack_cloud.get_instances()
assert not instances, "Test failure: openstack instance should be deleted."
-
+
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
-async def test_instance_ssh_connection(openstack_cloud: OpenstackCloud, openstack_connection: OpenstackConnection, openstack_test_image: str, openstack_test_flavor: str) -> None:
+async def test_instance_ssh_connection(
+ openstack_cloud: OpenstackCloud,
+ openstack_connection: OpenstackConnection,
+ openstack_test_image: str,
+ openstack_test_flavor: str,
+) -> None:
"""
arrange: One instance on OpenStack.
act: Get SSH connection of instance and execute command.
assert: Test SSH command executed successfully.
-
+
This tests whether the network rules (security group) are in place.
"""
rand_chars = f"{token_hex(10)}"
instance_name = f"{token_hex(2)}"
- instance = openstack_cloud.launch_instance(name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, userdata="")
+ instance = openstack_cloud.launch_instance(
+ name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, userdata=""
+ )
ssh_conn = openstack_cloud.get_ssh_connection(instance)
result = ssh_conn.run(f"echo {rand_chars}")
- assert result.ok
+ assert result.ok
assert rand_chars in result.stdout
From 01c6f525c6167e2e9297c74c6a2b6de45386d897 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sun, 28 Jul 2024 13:54:52 +0800
Subject: [PATCH 005/278] Fix wrong func sig
---
.github/workflows/integration_test.yaml | 4 +++-
src-docs/openstack_cloud.openstack_cloud.md | 2 +-
src/openstack_cloud/openstack_cloud.py | 2 +-
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 349bc302c..a85614f3e 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -33,7 +33,9 @@ jobs:
pre-run-script: scripts/setup-lxd.sh
provider: lxd
test-tox-env: integration-juju3.2
- modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive", "test_openstack_cloud"]'
+ # TODO: debug only remove
+ # modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive", "test_openstack_cloud"]'
+ modules: '["test_openstack_cloud"]'
extra-arguments: "-m openstack"
self-hosted-runner: true
self-hosted-runner-label: stg-private-endpoint
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index d930869bc..7e3e41cee 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -81,7 +81,7 @@ delete_instance(name: str)
### method `get_instances`
```python
-get_instances(name: str) → list[OpenstackInstance]
+get_instances() → list[OpenstackInstance]
```
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 226af3f9d..e20ccf7fb 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -170,7 +170,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
f"addresses: {instance.addresses}"
)
- def get_instances(self, name: str) -> list[OpenstackInstance]:
+ def get_instances(self) -> list[OpenstackInstance]:
logger.info("Getting all openstack servers managed by the charm")
with _create_connection(cloud_config=self.cloud_config) as conn:
From 3969600b90ecc5f8b935e8aec39f5435b1f7b8bb Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sun, 28 Jul 2024 15:20:00 +0800
Subject: [PATCH 006/278] Fix yaml format
---
tests/integration/test_openstack_cloud.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index 72d760020..63f2df4c8 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -9,14 +9,16 @@
import pytest
import pytest_asyncio
from openstack.connection import Connection as OpenstackConnection
+import yaml
from openstack_cloud.openstack_cloud import OpenstackCloud
@pytest_asyncio.fixture(scope="function", name="base_openstack_cloud")
-async def base_openstack_cloud_fixture(private_endpoint_config: dict[str, dict]) -> OpenstackCloud:
+async def base_openstack_cloud_fixture(private_endpoint_clouds_yaml: str) -> OpenstackCloud:
"""Setup a OpenstackCloud object with connection to openstack."""
- return OpenstackCloud(private_endpoint_config, f"test-{token_hex(4)}")
+ clouds_yaml = yaml.load(private_endpoint_clouds_yaml)
+ return OpenstackCloud(clouds_yaml, f"test-{token_hex(4)}")
@pytest_asyncio.fixture(scope="function", name="openstack_cloud")
From 9bf7443a24391e22ebdcb63be6f1c164b410c4bf Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sun, 28 Jul 2024 17:48:11 +0800
Subject: [PATCH 007/278] Fix yaml loading
---
tests/integration/test_openstack_cloud.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index 63f2df4c8..45c19e65c 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -17,7 +17,7 @@
@pytest_asyncio.fixture(scope="function", name="base_openstack_cloud")
async def base_openstack_cloud_fixture(private_endpoint_clouds_yaml: str) -> OpenstackCloud:
"""Setup a OpenstackCloud object with connection to openstack."""
- clouds_yaml = yaml.load(private_endpoint_clouds_yaml)
+ clouds_yaml = yaml.safe_load(private_endpoint_clouds_yaml)
return OpenstackCloud(clouds_yaml, f"test-{token_hex(4)}")
From 1bc3b2b2e4147839830f5eb71673a85fc10a855a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 29 Jul 2024 10:30:06 +0800
Subject: [PATCH 008/278] Fix openstack connection function
---
src-docs/openstack_cloud.md | 3 --
src-docs/openstack_cloud.openstack_cloud.md | 22 ++++++-----
src-docs/openstack_cloud.openstack_manager.md | 16 ++++----
src/openstack_cloud/openstack_cloud.py | 39 ++++++++++---------
src/openstack_cloud/openstack_manager.py | 35 ++++++++++++++++-
tests/integration/test_openstack_cloud.py | 4 +-
6 files changed, 77 insertions(+), 42 deletions(-)
diff --git a/src-docs/openstack_cloud.md b/src-docs/openstack_cloud.md
index 2bd698583..4d82f5359 100644
--- a/src-docs/openstack_cloud.md
+++ b/src-docs/openstack_cloud.md
@@ -7,9 +7,6 @@ Module for managing Openstack cloud.
**Global Variables**
---------------
-- **openstack_cloud**: # Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
- **openstack_manager**: # Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 7e3e41cee..6feb5ad2f 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -11,12 +11,12 @@
---
-
+
## class `OpenstackInstance`
OpenstackInstance(server: openstack.compute.v2.server.Server)
-
+
### method `__init__`
@@ -34,19 +34,19 @@ __init__(server: Server)
---
-
+
## class `OpenstackCloud`
-
+
### method `__init__`
```python
-__init__(cloud_config: dict[str, dict], prefix: str)
+__init__(clouds_config: dict[str, dict], cloud: str, prefix: str)
```
Create a OpenstackCloud instance.
@@ -55,14 +55,16 @@ Create a OpenstackCloud instance.
**Args:**
- - `cloud_config`: The openstack clouds.yaml in dict format. The first cloud in the yaml is used. prefix:
+ - `clouds_config`: The openstack clouds.yaml in dict format.
+ - `cloud`: The name of cloud to use in the clouds.yaml.
+ - `prefix`: Prefix attached to names of resource managed by this instance. Used for identifying which resource belongs to this instance.
---
-
+
### method `delete_instance`
@@ -76,7 +78,7 @@ delete_instance(name: str)
---
-
+
### method `get_instances`
@@ -90,7 +92,7 @@ get_instances() → list[OpenstackInstance]
---
-
+
### method `get_ssh_connection`
@@ -104,7 +106,7 @@ get_ssh_connection(instance: OpenstackInstance) → Connection
---
-
+
### method `launch_instance`
diff --git a/src-docs/openstack_cloud.openstack_manager.md b/src-docs/openstack_cloud.openstack_manager.md
index e810e05d0..93cff6908 100644
--- a/src-docs/openstack_cloud.openstack_manager.md
+++ b/src-docs/openstack_cloud.openstack_manager.md
@@ -18,7 +18,7 @@ Module for handling interactions with OpenStack.
---
-
+
## function `create_instance_config`
@@ -54,7 +54,7 @@ Create an instance config from charm data.
---
-
+
## class `InstanceConfig`
The configuration values for creating a single runner instance.
@@ -93,7 +93,7 @@ __init__(
---
-
+
## class `GithubRunnerRemoveError`
Represents an error removing registered runner from Github.
@@ -104,7 +104,7 @@ Represents an error removing registered runner from Github.
---
-
+
## class `OpenstackRunnerManager`
Runner manager for OpenStack-based instances.
@@ -117,7 +117,7 @@ Runner manager for OpenStack-based instances.
- `unit_num`: The juju unit number.
- `instance_name`: Prefix of the name for the set of runners.
-
+
### method `__init__`
@@ -146,7 +146,7 @@ Construct OpenstackRunnerManager object.
---
-
+
### method `flush`
@@ -163,7 +163,7 @@ Flush Openstack servers.
---
-
+
### method `get_github_runner_info`
@@ -180,7 +180,7 @@ Get information on GitHub for the runners.
---
-
+
### method `reconcile`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index e20ccf7fb..90e1d3073 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -19,11 +19,14 @@
from openstack.connection import Connection as OpenstackConnection
from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
from paramiko.ssh_exception import NoValidConnectionsError
+import yaml
from errors import OpenStackError
logger = logging.getLogger(__name__)
+_CLOUDS_YAML_PATH = Path(Path.home() / ".config/openstack/clouds.yaml")
+
# Update the version when the security group rules are not backward compatible.
_SECURITY_GROUP_NAME = "github-runner-v1"
@@ -54,15 +57,14 @@ def __init__(self, server: OpenstackServer):
@contextmanager
-def _create_connection(cloud_config: dict[str, dict]) -> Iterator[OpenstackConnection]:
+def _get_openstack_connection(clouds_config: dict[str, dict], cloud: str) -> Iterator[OpenstackConnection]:
"""Create a connection context managed object, to be used within with statements.
-
- This method should be called with a valid cloud_config. See _validate_cloud_config.
- Also, this method assumes that the clouds.yaml exists on ~/.config/openstack/clouds.yaml.
- See charm_state.py _write_openstack_config_to_disk.
+
+ The file of _CLOUDS_YAML_PATH should only be modified by this function.
Args:
cloud_config: The configuration in clouds.yaml format to apply.
+ cloud: The name of cloud to use in the clouds.yaml.
Raises:
OpenStackError: if the credentials provided is not authorized.
@@ -70,15 +72,14 @@ def _create_connection(cloud_config: dict[str, dict]) -> Iterator[OpenstackConne
Yields:
An openstack.connection.Connection object.
"""
- clouds = list(cloud_config["clouds"].keys())
- if len(clouds) > 1:
- logger.warning("Multiple clouds defined in clouds.yaml. Using the first one to connect.")
- cloud_name = clouds[0]
+ if not _CLOUDS_YAML_PATH.exists():
+ _CLOUDS_YAML_PATH.parent.mkdir(parents=True, exist_ok=True)
+ _CLOUDS_YAML_PATH.write_text(data=yaml.dump(clouds_config), encoding="utf-8")
# api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
# I could not reproduce it. Therefore, no catch here for such exception.
try:
- with openstack.connect(cloud=cloud_name) as conn:
+ with openstack.connect(cloud=cloud) as conn:
conn.authorize()
yield conn
# pylint thinks this isn't an exception, but does inherit from Exception class.
@@ -89,15 +90,17 @@ def _create_connection(cloud_config: dict[str, dict]) -> Iterator[OpenstackConne
class OpenstackCloud:
- def __init__(self, cloud_config: dict[str, dict], prefix: str):
+ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
"""Create a OpenstackCloud instance.
Args:
- cloud_config: The openstack clouds.yaml in dict format. The first cloud in the yaml is
- used.
- prefix:
+ clouds_config: The openstack clouds.yaml in dict format.
+ cloud: The name of cloud to use in the clouds.yaml.
+ prefix: Prefix attached to names of resource managed by this instance. Used for
+ identifying which resource belongs to this instance.
"""
- self.cloud_config = cloud_config
+ self.clouds_config = clouds_config
+ self.cloud = cloud
self.prefix = prefix
def launch_instance(
@@ -106,7 +109,7 @@ def launch_instance(
full_name = self._get_instance_name(name)
logger.info("Creating openstack server with %s", full_name)
- with _create_connection(cloud_config=self.cloud_config) as conn:
+ with _get_openstack_connection(cloud_config=self.clouds_config, cloud=self.cloud) as conn:
security_group = OpenstackCloud._ensure_security_group(conn)
keypair = OpenstackCloud._setup_key_pair(conn, full_name)
@@ -128,7 +131,7 @@ def delete_instance(self, name: str):
full_name = self._get_instance_name(full_name)
logger.info("Deleting openstack server with %s", full_name)
- with _create_connection(cloud_config=self.cloud_config) as conn:
+ with _get_openstack_connection(cloud_config=self.clouds_config) as conn:
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
server.delete()
OpenstackCloud._delete_key_pair(conn, full_name)
@@ -173,7 +176,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
def get_instances(self) -> list[OpenstackInstance]:
logger.info("Getting all openstack servers managed by the charm")
- with _create_connection(cloud_config=self.cloud_config) as conn:
+ with _get_openstack_connection(cloud_config=self.clouds_config) as conn:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
return [
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index 77e42c3ef..de17d412f 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -59,7 +59,6 @@
from metrics import runner as runner_metrics
from metrics import storage as metrics_storage
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
-from openstack_cloud.openstack_cloud import _create_connection
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner_manager import IssuedMetricEventsStats
from runner_manager_type import OpenstackRunnerManagerConfig
@@ -1493,3 +1492,37 @@ def flush(self) -> int:
remove_token=remove_token,
)
return len(runners_to_delete)
+
+
+@contextmanager
+def _create_connection(cloud_config: dict[str, dict]) -> Iterator[OpenstackConnection]:
+ """Create a connection context managed object, to be used within with statements.
+
+ This method should be called with a valid cloud_config. See _validate_cloud_config.
+ Also, this method assumes that the clouds.yaml exists on ~/.config/openstack/clouds.yaml.
+ See charm_state.py _write_openstack_config_to_disk.
+
+ Args:
+ cloud_config: The configuration in clouds.yaml format to apply.
+
+ Raises:
+ OpenStackError: if the credentials provided is not authorized.
+
+ Yields:
+ An openstack.connection.Connection object.
+ """
+ clouds = list(cloud_config["clouds"].keys())
+ if len(clouds) > 1:
+ logger.warning("Multiple clouds defined in clouds.yaml. Using the first one to connect.")
+ cloud_name = clouds[0]
+
+ # api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
+ # I could not reproduce it. Therefore, no catch here for such exception.
+ try:
+ with openstack.connect(cloud=cloud_name) as conn:
+ conn.authorize()
+ yield conn
+ # pylint thinks this isn't an exception, but does inherit from Exception class.
+ except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
+ logger.exception("OpenStack API call failure")
+ raise OpenStackError("Failed OpenStack API call") from exc
\ No newline at end of file
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index 45c19e65c..8f908a331 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -8,8 +8,8 @@
import pytest
import pytest_asyncio
-from openstack.connection import Connection as OpenstackConnection
import yaml
+from openstack.connection import Connection as OpenstackConnection
from openstack_cloud.openstack_cloud import OpenstackCloud
@@ -18,7 +18,7 @@
async def base_openstack_cloud_fixture(private_endpoint_clouds_yaml: str) -> OpenstackCloud:
"""Setup a OpenstackCloud object with connection to openstack."""
clouds_yaml = yaml.safe_load(private_endpoint_clouds_yaml)
- return OpenstackCloud(clouds_yaml, f"test-{token_hex(4)}")
+ return OpenstackCloud(clouds_yaml, "testcloud", f"test-{token_hex(4)}")
@pytest_asyncio.fixture(scope="function", name="openstack_cloud")
From c89725a5401df3c88607541b141749a075c18f80 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 30 Jul 2024 09:41:04 +0800
Subject: [PATCH 009/278] Fix _get_openstack_connection call
---
src/openstack_cloud/openstack_cloud.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 90e1d3073..9bfb9d6c6 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -99,8 +99,8 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
prefix: Prefix attached to names of resource managed by this instance. Used for
identifying which resource belongs to this instance.
"""
- self.clouds_config = clouds_config
- self.cloud = cloud
+ self._clouds_config = clouds_config
+ self._cloud = cloud
self.prefix = prefix
def launch_instance(
@@ -109,7 +109,7 @@ def launch_instance(
full_name = self._get_instance_name(name)
logger.info("Creating openstack server with %s", full_name)
- with _get_openstack_connection(cloud_config=self.clouds_config, cloud=self.cloud) as conn:
+ with _get_openstack_connection(clouds_config=self._clouds_config, cloud=self._cloud) as conn:
security_group = OpenstackCloud._ensure_security_group(conn)
keypair = OpenstackCloud._setup_key_pair(conn, full_name)
@@ -131,7 +131,7 @@ def delete_instance(self, name: str):
full_name = self._get_instance_name(full_name)
logger.info("Deleting openstack server with %s", full_name)
- with _get_openstack_connection(cloud_config=self.clouds_config) as conn:
+ with _get_openstack_connection(clouds_config=self._clouds_config, cloud=self._cloud) as conn:
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
server.delete()
OpenstackCloud._delete_key_pair(conn, full_name)
@@ -176,7 +176,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
def get_instances(self) -> list[OpenstackInstance]:
logger.info("Getting all openstack servers managed by the charm")
- with _get_openstack_connection(cloud_config=self.clouds_config) as conn:
+ with _get_openstack_connection(clouds_config=self._clouds_config, cloud=self._cloud) as conn:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
return [
From 14f621c3e79a103531e2f70d256d347b54b01008 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 30 Jul 2024 10:36:04 +0800
Subject: [PATCH 010/278] Fix integration tests
---
tests/integration/test_openstack_cloud.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index 8f908a331..91a45b52a 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -27,6 +27,7 @@ async def openstack_cloud_fixture(base_openstack_cloud: OpenstackCloud) -> Opens
instances = base_openstack_cloud.get_instances()
for instance in instances:
base_openstack_cloud.delete_instance(name=instance.name)
+ return base_openstack_cloud
@pytest.mark.openstack
@@ -52,6 +53,7 @@ async def test_launch_instance_and_delete(
openstack_connection: OpenstackConnection,
openstack_test_image: str,
openstack_test_flavor: str,
+ network_name: str,
) -> None:
"""
arrange: No instance on OpenStack.
@@ -71,7 +73,7 @@ async def test_launch_instance_and_delete(
# 1.
instance = base_openstack_cloud.launch_instance(
- name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, userdata=""
+ name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, network=network_name,userdata=""
)
assert instance is not None
@@ -96,7 +98,6 @@ async def test_launch_instance_and_delete(
@pytest.mark.abort_on_fail
async def test_instance_ssh_connection(
openstack_cloud: OpenstackCloud,
- openstack_connection: OpenstackConnection,
openstack_test_image: str,
openstack_test_flavor: str,
) -> None:
From 74e415fc19ed4c705ac9e5f163bbbccf56f70d54 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 30 Jul 2024 13:19:56 +0800
Subject: [PATCH 011/278] Fix dict access
---
src-docs/openstack_cloud.openstack_cloud.md | 14 +++++------
src/openstack_cloud/openstack_cloud.py | 28 +++++++++++++--------
src/openstack_cloud/openstack_manager.py | 2 +-
tests/integration/test_openstack_cloud.py | 13 ++++++++--
4 files changed, 37 insertions(+), 20 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 6feb5ad2f..0831662e7 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -34,14 +34,14 @@ __init__(server: Server)
---
-
+
## class `OpenstackCloud`
-
+
### method `__init__`
@@ -57,14 +57,14 @@ Create a OpenstackCloud instance.
- `clouds_config`: The openstack clouds.yaml in dict format.
- `cloud`: The name of cloud to use in the clouds.yaml.
- - `prefix`: Prefix attached to names of resource managed by this instance. Used for identifying which resource belongs to this instance.
+ - `prefix`: Prefix attached to names of resource managed by this instance. Used for identifying which resource belongs to this instance.
---
-
+
### method `delete_instance`
@@ -78,7 +78,7 @@ delete_instance(name: str)
---
-
+
### method `get_instances`
@@ -92,7 +92,7 @@ get_instances() → list[OpenstackInstance]
---
-
+
### method `get_ssh_connection`
@@ -106,7 +106,7 @@ get_ssh_connection(instance: OpenstackInstance) → Connection
---
-
+
### method `launch_instance`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 9bfb9d6c6..14d205b7b 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -13,13 +13,13 @@
import openstack
import openstack.exceptions
import paramiko
+import yaml
from fabric import Connection as SshConnection
from openstack.compute.v2.keypair import Keypair as OpenstackKeypair
from openstack.compute.v2.server import Server as OpenstackServer
from openstack.connection import Connection as OpenstackConnection
from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
from paramiko.ssh_exception import NoValidConnectionsError
-import yaml
from errors import OpenStackError
@@ -57,9 +57,11 @@ def __init__(self, server: OpenstackServer):
@contextmanager
-def _get_openstack_connection(clouds_config: dict[str, dict], cloud: str) -> Iterator[OpenstackConnection]:
+def _get_openstack_connection(
+ clouds_config: dict[str, dict], cloud: str
+) -> Iterator[OpenstackConnection]:
"""Create a connection context managed object, to be used within with statements.
-
+
The file of _CLOUDS_YAML_PATH should only be modified by this function.
Args:
@@ -96,7 +98,7 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
Args:
clouds_config: The openstack clouds.yaml in dict format.
cloud: The name of cloud to use in the clouds.yaml.
- prefix: Prefix attached to names of resource managed by this instance. Used for
+ prefix: Prefix attached to names of resource managed by this instance. Used for
identifying which resource belongs to this instance.
"""
self._clouds_config = clouds_config
@@ -109,7 +111,9 @@ def launch_instance(
full_name = self._get_instance_name(name)
logger.info("Creating openstack server with %s", full_name)
- with _get_openstack_connection(clouds_config=self._clouds_config, cloud=self._cloud) as conn:
+ with _get_openstack_connection(
+ clouds_config=self._clouds_config, cloud=self._cloud
+ ) as conn:
security_group = OpenstackCloud._ensure_security_group(conn)
keypair = OpenstackCloud._setup_key_pair(conn, full_name)
@@ -131,7 +135,9 @@ def delete_instance(self, name: str):
full_name = self._get_instance_name(full_name)
logger.info("Deleting openstack server with %s", full_name)
- with _get_openstack_connection(clouds_config=self._clouds_config, cloud=self._cloud) as conn:
+ with _get_openstack_connection(
+ clouds_config=self._clouds_config, cloud=self._cloud
+ ) as conn:
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
server.delete()
OpenstackCloud._delete_key_pair(conn, full_name)
@@ -176,7 +182,9 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
def get_instances(self) -> list[OpenstackInstance]:
logger.info("Getting all openstack servers managed by the charm")
- with _get_openstack_connection(clouds_config=self._clouds_config, cloud=self._cloud) as conn:
+ with _get_openstack_connection(
+ clouds_config=self._clouds_config, cloud=self._cloud
+ ) as conn:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
return [
@@ -354,7 +362,7 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
else:
existing_rules = security_group.security_group_rules
for rule in existing_rules:
- if rule.protocol == "icmp":
+ if rule["protocol"] == "icmp":
logger.debug(
"Found ICMP rule in existing security group %s of ID %s",
_SECURITY_GROUP_NAME,
@@ -362,7 +370,7 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
)
rule_exists_icmp = True
if (
- rule.protocol == "tcp"
+ rule["protocol"] == "tcp"
and rule["port_range_min"] == rule["port_range_max"] == 22
):
logger.debug(
@@ -372,7 +380,7 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
)
rule_exists_ssh = True
if (
- rule.protocol == "tcp"
+ rule["protocol"] == "tcp"
and rule["port_range_min"] == rule["port_range_max"] == 10022
):
logger.debug(
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index de17d412f..f5fb1f0f1 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -1525,4 +1525,4 @@ def _create_connection(cloud_config: dict[str, dict]) -> Iterator[OpenstackConne
# pylint thinks this isn't an exception, but does inherit from Exception class.
except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
logger.exception("OpenStack API call failure")
- raise OpenStackError("Failed OpenStack API call") from exc
\ No newline at end of file
+ raise OpenStackError("Failed OpenStack API call") from exc
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index 91a45b52a..321fbe1fa 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -73,7 +73,11 @@ async def test_launch_instance_and_delete(
# 1.
instance = base_openstack_cloud.launch_instance(
- name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, network=network_name,userdata=""
+ name=instance_name,
+ image=openstack_test_image,
+ flavor=openstack_test_flavor,
+ network=network_name,
+ userdata="",
)
assert instance is not None
@@ -100,6 +104,7 @@ async def test_instance_ssh_connection(
openstack_cloud: OpenstackCloud,
openstack_test_image: str,
openstack_test_flavor: str,
+ network_name: str,
) -> None:
"""
arrange: One instance on OpenStack.
@@ -111,7 +116,11 @@ async def test_instance_ssh_connection(
rand_chars = f"{token_hex(10)}"
instance_name = f"{token_hex(2)}"
instance = openstack_cloud.launch_instance(
- name=instance_name, image=openstack_test_image, flavor=openstack_test_flavor, userdata=""
+ name=instance_name,
+ image=openstack_test_image,
+ flavor=openstack_test_flavor,
+ network=network_name,
+ userdata="",
)
ssh_conn = openstack_cloud.get_ssh_connection(instance)
From a7252ab52bd233bdec87ac63caf54f2b1568a5f4 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 30 Jul 2024 13:40:10 +0800
Subject: [PATCH 012/278] Fix SSH path
---
src/openstack_cloud/openstack_cloud.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 14d205b7b..bb7c3940a 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -32,7 +32,7 @@
_CREATE_SERVER_TIMEOUT = 5 * 60
_SSH_TIMEOUT = 30
-_SSH_KEY_PATH = "/home/ubuntu/.ssh"
+_SSH_KEY_PATH = Path("/home/ubuntu/.ssh")
_TEST_STRING = "test_string"
From 0ef1d0a263084a210f07fd7ebb5906ffc8951ac8 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 31 Jul 2024 11:16:20 +0800
Subject: [PATCH 013/278] Fix wrong variable name
---
src-docs/openstack_cloud.openstack_cloud.md | 14 ++++++++++++++
src/openstack_cloud/openstack_cloud.py | 6 +++---
2 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 0831662e7..9aa364477 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -76,6 +76,20 @@ delete_instance(name: str)
+---
+
+
+
+### method `get_instance_name`
+
+```python
+get_instance_name(name: str) → str
+```
+
+
+
+
+
---
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index bb7c3940a..d41fb722a 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -108,7 +108,7 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
def launch_instance(
self, name: str, image: str, flavor: str, network: str, userdata: str
) -> OpenstackInstance:
- full_name = self._get_instance_name(name)
+ full_name = self.get_instance_name(name)
logger.info("Creating openstack server with %s", full_name)
with _get_openstack_connection(
@@ -132,7 +132,7 @@ def launch_instance(
return OpenstackInstance(server)
def delete_instance(self, name: str):
- full_name = self._get_instance_name(full_name)
+ full_name = self.get_instance_name(name)
logger.info("Deleting openstack server with %s", full_name)
with _get_openstack_connection(
@@ -257,7 +257,7 @@ def _clean_up_openstack_keypairs(
key.name,
)
- def _get_instance_name(self, name: str) -> str:
+ def get_instance_name(self, name: str) -> str:
return f"{self.prefix}-{name}"
def _get_openstack_instances(self, conn: OpenstackConnection) -> list[OpenstackServer]:
From a31f31085e31079188215b178b80a38671fdc854 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 31 Jul 2024 14:27:37 +0800
Subject: [PATCH 014/278] Fix server deletion
---
src/openstack_cloud/openstack_cloud.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index d41fb722a..90b7191ad 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -139,7 +139,7 @@ def delete_instance(self, name: str):
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
- server.delete()
+ conn.delete_server(name_or_id=server.id)
OpenstackCloud._delete_key_pair(conn, full_name)
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
From acfa8c9a65ecc546b3dbfe4ad9dd37aa70dfaef9 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 31 Jul 2024 16:04:41 +0800
Subject: [PATCH 015/278] Add error handling of create openstack instance
---
src-docs/openstack_cloud.openstack_cloud.md | 8 ++--
src/openstack_cloud/openstack_cloud.py | 41 ++++++++++++++-------
2 files changed, 32 insertions(+), 17 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 9aa364477..8956f7c8c 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -64,7 +64,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `delete_instance`
@@ -78,7 +78,7 @@ delete_instance(name: str)
---
-
+
### method `get_instance_name`
@@ -92,7 +92,7 @@ get_instance_name(name: str) → str
---
-
+
### method `get_instances`
@@ -106,7 +106,7 @@ get_instances() → list[OpenstackInstance]
---
-
+
### method `get_ssh_connection`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 90b7191ad..9070d6dac 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -117,18 +117,33 @@ def launch_instance(
security_group = OpenstackCloud._ensure_security_group(conn)
keypair = OpenstackCloud._setup_key_pair(conn, full_name)
- server = conn.create_server(
- name=full_name,
- image=image,
- key_name=keypair.name,
- flavor=flavor,
- network=network,
- security_groups=[security_group.id],
- userdata=userdata,
- auto_ip=False,
- timeout=_CREATE_SERVER_TIMEOUT,
- wait=True,
- )
+ try:
+ server = conn.create_server(
+ name=full_name,
+ image=image,
+ key_name=keypair.name,
+ flavor=flavor,
+ network=network,
+ security_groups=[security_group.id],
+ userdata=userdata,
+ auto_ip=False,
+ timeout=_CREATE_SERVER_TIMEOUT,
+ wait=True,
+ )
+ except openstack.exceptions.ResourceTimeout as err:
+ logger.exception("Timeout creating openstack server %s", full_name)
+ logger.info("Attempting clean up of openstack server %s that timeout during creation", full_name)
+ try:
+ conn.delete_server(name_or_id=full_name, wait=True)
+ except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout) as err:
+ logger.exception("Failed to cleanup openstack server %s that timeout during creation", full_name)
+ self._delete_key_pair(conn, name)
+ raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
+ except openstack.exceptions.SDKException as err:
+ logger.exception("Failed to create openstack server %s", full_name)
+ self._delete_key_pair(conn, name)
+ raise OpenStackError(f"Failed to create openstack server {full_name}") from err
+
return OpenstackInstance(server)
def delete_instance(self, name: str):
@@ -330,7 +345,7 @@ def _delete_key_pair(conn: OpenstackConnection, name: str) -> None:
# Keypair have unique names, access by ID is not needed.
if not conn.delete_keypair(name):
logger.warning("Unable to delete keypair for %s", name)
- except openstack.exceptions.SDKException:
+ except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout) as err:
logger.warning("Unable to delete keypair for %s", name, stack_info=True)
key_path = OpenstackCloud._get_key_path(name)
From 3f39a3e195064080c453b0cc1a76b3512df0cbd0 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 1 Aug 2024 15:22:32 +0800
Subject: [PATCH 016/278] Initial openstack runner manager refactor
---
src-docs/openstack_cloud.openstack_cloud.md | 36 +-
...penstack_cloud.openstack_runner_manager.md | 138 +++++++
src/manager/cloud_runner_manager.py | 75 ++++
src/manager/runner_manager.py | 20 ++
src/openstack_cloud/openstack_cloud.py | 36 +-
.../openstack_runner_manager.py | 339 ++++++++++++++++++
6 files changed, 625 insertions(+), 19 deletions(-)
create mode 100644 src-docs/openstack_cloud.openstack_runner_manager.md
create mode 100644 src/manager/cloud_runner_manager.py
create mode 100644 src/manager/runner_manager.py
create mode 100644 src/openstack_cloud/openstack_runner_manager.py
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 8956f7c8c..126a4413e 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -14,14 +14,14 @@
## class `OpenstackInstance`
-OpenstackInstance(server: openstack.compute.v2.server.Server)
+OpenstackInstance(server: openstack.compute.v2.server.Server, prefix: str)
-
+
### method `__init__`
```python
-__init__(server: Server)
+__init__(server: Server, prefix: str)
```
@@ -34,14 +34,14 @@ __init__(server: Server)
---
-
+
## class `OpenstackCloud`
-
+
### method `__init__`
@@ -64,12 +64,12 @@ Create a OpenstackCloud instance.
---
-
+
### method `delete_instance`
```python
-delete_instance(name: str)
+delete_instance(name: str) → None
```
@@ -78,7 +78,21 @@ delete_instance(name: str)
---
-
+
+
+### method `get_instance`
+
+```python
+get_instance(name: str) → OpenstackInstance
+```
+
+
+
+
+
+---
+
+
### method `get_instance_name`
@@ -92,7 +106,7 @@ get_instance_name(name: str) → str
---
-
+
### method `get_instances`
@@ -106,7 +120,7 @@ get_instances() → list[OpenstackInstance]
---
-
+
### method `get_ssh_connection`
@@ -120,7 +134,7 @@ get_ssh_connection(instance: OpenstackInstance) → Connection
---
-
+
### method `launch_instance`
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
new file mode 100644
index 000000000..02a1e2e4f
--- /dev/null
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -0,0 +1,138 @@
+
+
+
+
+# module `openstack_cloud.openstack_runner_manager`
+
+
+
+
+**Global Variables**
+---------------
+- **BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME**
+- **MAX_METRICS_FILE_SIZE**
+- **RUNNER_STARTUP_PROCESS**
+- **RUNNER_LISTENER_PROCESS**
+- **RUNNER_WORKER_PROCESS**
+- **CREATE_SERVER_TIMEOUT**
+
+
+---
+
+
+
+## class `OpenstackRunnerManagerConfig`
+OpenstackRunnerManagerConfig(image: str, flavor: str, network: str, github_path: charm_state.GithubOrg | charm_state.GithubRepo, labels: list[str], proxy_config: charm_state.ProxyConfig | None, dockerhub_mirror: str | None, ssh_debug_connections: list[charm_state.SSHDebugConnection], repo_policy_url: str, repo_policy_token: str, clouds_config: dict[str, dict], cloud: str)
+
+
+
+### method `__init__`
+
+```python
+__init__(
+ image: str,
+ flavor: str,
+ network: str,
+ github_path: GithubOrg | GithubRepo,
+ labels: list[str],
+ proxy_config: ProxyConfig | None,
+ dockerhub_mirror: str | None,
+ ssh_debug_connections: list[SSHDebugConnection],
+ repo_policy_url: str,
+ repo_policy_token: str,
+ clouds_config: dict[str, dict],
+ cloud: str
+) → None
+```
+
+
+
+
+
+
+
+
+
+---
+
+
+
+## class `OpenstackRunnerManager`
+
+
+
+
+
+
+### method `__init__`
+
+```python
+__init__(runner_flavor: str, config: OpenstackRunnerManagerConfig) → None
+```
+
+
+
+
+
+
+
+
+---
+
+
+
+### method `create_runner`
+
+```python
+create_runner(registration_token: str) → str
+```
+
+
+
+
+
+---
+
+
+
+### method `delete_runners`
+
+```python
+delete_runners(id: str, remove_token: str) → None
+```
+
+
+
+
+
+---
+
+
+
+### method `get_runner`
+
+```python
+get_runner(id: str) → RunnerInstance | None
+```
+
+
+
+
+
+---
+
+
+
+### method `get_runners`
+
+```python
+get_runners(
+ cloud_runner_status: list[CloudRunnerStatus]
+) → Tuple[RunnerInstance]
+```
+
+
+
+
+
+
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
new file mode 100644
index 000000000..39b2b6c94
--- /dev/null
+++ b/src/manager/cloud_runner_manager.py
@@ -0,0 +1,75 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+from abc import ABC
+from dataclasses import dataclass
+from enum import Enum
+from typing import Tuple
+
+RunnerId = str
+
+_OPENSTACK_STATUS_SHUTOFF = "SHUTOFF"
+_OPENSTACK_STATUS_ERROR = "ERROR"
+_OPENSTACK_STATUS_ACTIVE = "ACTIVE"
+_OPENSTACK_STATUS_BUILDING = "BUILDING"
+
+class CloudRunnerStatus(str, Enum):
+ created = "created"
+ active = "active"
+ deleted = "deleted"
+ error = "error"
+ stopped = "stopped"
+ unknown = "unknown"
+ unexpected = "unexpected"
+
+
+ def from_openstack_status(status: str) -> "CloudRunnerStatus":
+ """Create from openstack server status.
+
+ The openstack server status are documented here:
+ https://docs.openstack.org/api-guide/compute/server_concepts.html
+
+ Args:
+ status: Openstack server status.
+
+ Returns:
+ The CloudRunnerStatus.
+ """
+ match status:
+ case "BUILD":
+ return CloudRunnerStatus.created
+ case "REBUILD":
+ return CloudRunnerStatus.created
+ case "ACTIVE":
+ return CloudRunnerStatus.active
+ case "ERROR":
+ return CloudRunnerStatus.error
+ case "STOPPED":
+ return CloudRunnerStatus.stopped
+ case "DELETED":
+ return CloudRunnerStatus.deleted
+ case "UNKNOWN":
+ return CloudRunnerStatus.unknown
+ case _:
+ return CloudRunnerStatus.unexpected
+
+@dataclass
+class RunnerInstance:
+ name: str
+ id: str
+ status: CloudRunnerStatus
+
+@dataclass
+class RunnerMetrics:
+ pass
+
+class CloudRunnerManager(ABC):
+ def create_runner(self, registration_token: str) -> RunnerId: ...
+
+ def get_runner(self, id: RunnerId) -> RunnerInstance: ...
+
+ def get_runners(
+ self, cloud_runner_status: list[CloudRunnerStatus]
+ ) -> Tuple[RunnerInstance]: ...
+
+ def delete_runners(self, id: RunnerId, remove_token: str) -> None: ...
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
new file mode 100644
index 000000000..cb5ce7b05
--- /dev/null
+++ b/src/manager/runner_manager.py
@@ -0,0 +1,20 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+from dataclasses import dataclass
+from enum import Enum
+
+from manager.cloud_runner_manager import CloudRunnerStatus, RunnerId
+
+
+class GithubRunnerStatus(str, Enum):
+ busy = "busy"
+ idle = "idle"
+ offline = "offline"
+
+@dataclass
+class RunnerInstance:
+ github_name: str
+ id: RunnerId
+ github_status: GithubRunnerStatus
+ cloud_status: CloudRunnerStatus
\ No newline at end of file
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 9070d6dac..6ec16f11f 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -42,20 +42,28 @@ class _SshError(Exception):
@dataclass
class OpenstackInstance:
- id: str
+ server_id: str
+ server_name: str
name: str
addresses: list[str]
+ status: str
- def __init__(self, server: OpenstackServer):
- self.id = server.id
- self.name = server.name
+ def __init__(self, server: OpenstackServer, prefix: str):
+ self.server_id = server.id
+ self.server_name = server.name
+ self.status = server.status
self.addresses = [
address["addr"]
for network_addresses in server.addresses.values()
for address in network_addresses
]
+ if not self.name.startswith(prefix):
+ # Should never happen.
+ raise ValueError(f"Found openstack server {server.name} managed under prefix {prefix}, contact devs")
+ self.name = self.server_name[len(prefix):]
+
@contextmanager
def _get_openstack_connection(
clouds_config: dict[str, dict], cloud: str
@@ -145,17 +153,29 @@ def launch_instance(
raise OpenStackError(f"Failed to create openstack server {full_name}") from err
return OpenstackInstance(server)
+
+ def get_instance(self, name: str) -> OpenstackInstance:
+ full_name = self.get_instance_name(name)
+ logger.info("Getting openstack server with %s", full_name)
+
+ with _get_openstack_connection(
+ clouds_config=self._clouds_config, cloud=self._cloud
+ ) as conn:
+ return OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, full_name))
- def delete_instance(self, name: str):
+ def delete_instance(self, name: str) -> None:
full_name = self.get_instance_name(name)
logger.info("Deleting openstack server with %s", full_name)
with _get_openstack_connection(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
- server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
- conn.delete_server(name_or_id=server.id)
- OpenstackCloud._delete_key_pair(conn, full_name)
+ try:
+ server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
+ conn.delete_server(name_or_id=server.id)
+ OpenstackCloud._delete_key_pair(conn, full_name)
+ except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout) as err:
+ raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
key_path = OpenstackCloud._get_key_path(instance.name)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
new file mode 100644
index 000000000..57249314a
--- /dev/null
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -0,0 +1,339 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+import logging
+from pathlib import Path
+import secrets
+from dataclasses import dataclass
+import time
+from typing import Tuple
+
+import jinja2
+from fabric import Connection as SshConnection
+import paramiko
+import paramiko.ssh_exception
+
+from charm_state import GithubOrg, GithubPath, ProxyConfig, SSHDebugConnection
+from errors import CreateMetricsStorageError, GetMetricsStorageError, IssueMetricEventError, OpenStackError, RunnerCreateError, RunnerRemoveError
+from manager.cloud_runner_manager import (
+ CloudRunnerManager,
+ CloudRunnerStatus,
+ RunnerId,
+ RunnerInstance,
+ RunnerMetrics,
+)
+from openstack_cloud.openstack_cloud import OpenstackCloud
+from openstack_cloud.openstack_manager import GithubRunnerRemoveError
+from repo_policy_compliance_client import RepoPolicyComplianceClient
+from metrics import events as metric_events
+from metrics import github as github_metrics
+from metrics import runner as runner_metrics
+from metrics import storage as metrics_storage
+
+logger = logging.getLogger(__name__)
+
+BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME = "scripts/build-openstack-image.sh"
+_SSH_KEY_PATH = Path("/home/ubuntu/.ssh")
+_CONFIG_SCRIPT_PATH = Path("/home/ubuntu/actions-runner/config.sh")
+
+RUNNER_APPLICATION = Path("/home/ubuntu/actions-runner")
+METRICS_EXCHANGE_PATH = Path("/home/ubuntu/metrics-exchange")
+PRE_JOB_SCRIPT = RUNNER_APPLICATION / "pre-job.sh"
+MAX_METRICS_FILE_SIZE = 1024
+
+RUNNER_STARTUP_PROCESS = "/home/ubuntu/actions-runner/run.sh"
+RUNNER_LISTENER_PROCESS = "Runner.Listener"
+RUNNER_WORKER_PROCESS = "Runner.Worker"
+CREATE_SERVER_TIMEOUT = 5 * 60
+
+class _SshError(Exception):
+ """Represents an error while interacting with SSH."""
+
+class _PullFileError(Exception):
+ """Represents an error while pulling a file from the runner instance."""
+
+@dataclass
+class OpenstackRunnerManagerConfig:
+ image: str
+ flavor: str
+ network: str
+ github_path: GithubPath
+ labels: list[str]
+ proxy_config: ProxyConfig | None
+ dockerhub_mirror: str | None
+ ssh_debug_connections: list[SSHDebugConnection]
+ repo_policy_url: str
+ repo_policy_token: str
+ clouds_config: dict[str, dict]
+ cloud: str
+
+
+class OpenstackRunnerManager(CloudRunnerManager):
+
+ def __init__(
+ self, runner_flavor: str, config: OpenstackRunnerManagerConfig
+ ) -> None:
+ self.runner_flavor = runner_flavor
+ self.config = config
+ self._openstack_cloud = OpenstackCloud(clouds_config=self.config.clouds_config, cloud=self.config.cloud, prefix=self.runner_flavor)
+
+ def create_runner(self, registration_token: str) -> RunnerId:
+ start_timestamp = time.time()
+ id = OpenstackRunnerManager._generate_runner_id()
+ instance_name = self._openstack_cloud.get_instance_name(name=id)
+ userdata = self._generate_userdata(instance_name=instance_name,registration_token=registration_token)
+ try:
+ self._openstack_cloud.launch_instance(
+ name=id,
+ image=self.config.image,
+ flavor=self.config.flavor,
+ network=self.config.network,
+ userdata=userdata,
+ )
+ except OpenStackError as err:
+ raise RunnerCreateError("Failed to create {instance_name} openstack runner") from err
+ end_timestamp = time.time()
+ OpenstackRunnerManager._issue_runner_installed_metric(
+ name=instance_name,
+ flavor=self.runner_flavor,
+ install_start_timestamp=start_timestamp,
+ install_end_timestamp=end_timestamp,
+ )
+ return id
+
+ def get_runner(self, id: RunnerId) -> RunnerInstance | None:
+ name = self._openstack_cloud.get_instance_name(id)
+ instances_list = self._openstack_cloud.get_instances()
+ for instance in instances_list:
+ if instance.name == name:
+ return RunnerInstance(name=name, id=id, status=CloudRunnerStatus.from_openstack_status(instance.status))
+ return None
+
+ def get_runners(self, cloud_runner_status: list[CloudRunnerStatus]) -> Tuple[RunnerInstance]:
+ instances_list = self._openstack_cloud.get_instances()
+ instances_list = [RunnerInstance(name=instance.name, id=self._openstack_cloud.convert_name(instance.name), status=CloudRunnerStatus.from_openstack_status(instance.status))
+ for instance in instances_list]
+ return [instance for instance in instances_list if instance.status in cloud_runner_status]
+
+ def delete_runners(self, id: RunnerId, remove_token: str) -> None:
+ instance = self._openstack_cloud.get_instance(id)
+ ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ self._pull_runner_metrics(instance.name, ssh_conn)
+ try:
+ OpenstackRunnerManager._run_github_runner_removal_script(instance.name,ssh_conn, remove_token)
+ except GithubRunnerRemoveError:
+ logger.warning("Unable to run github runner removal script for %s", instance.name, stack_info=True)
+
+ try:
+ self._openstack_cloud.delete_instance(id)
+ except OpenStackError:
+ logger.exception("Unable to delete openstack instance for runner %s", instance.name)
+
+ def _generate_userdata(self, instance_name: str, registration_token: str) -> str:
+ jinja = jinja2.Environment(
+ loader=jinja2.FileSystemLoader("templates"), autoescape=True
+ )
+
+ env_contents = jinja.get_template("env.j2").render(
+ pre_job_script=str(PRE_JOB_SCRIPT),
+ dockerhub_mirror=self.config.dockerhub_mirror or "",
+ ssh_debug_info=(secrets.choice(self.config.ssh_debug_connections) if self.config.ssh_debug_connections else None),
+ # Proxies are handled by aproxy.
+ proxies={},
+ )
+
+ pre_job_contents_dict = {
+ "issue_metrics": True,
+ "metrics_exchange_path": str(METRICS_EXCHANGE_PATH),
+ "do_repo_policy_check": False,
+ }
+ repo_policy = self._get_repo_policy_compliance_client()
+ if repo_policy is not None:
+ pre_job_contents_dict.update(
+ {
+ "repo_policy_base_url": repo_policy.base_url,
+ "repo_policy_one_time_token": repo_policy.get_one_time_token(),
+ "do_repo_policy_check": True,
+ }
+ )
+
+ pre_job_contents = jinja.get_template("pre-job.j2").render(pre_job_contents_dict)
+
+ runner_group = None
+ if isinstance(self.config.github_path, GithubOrg):
+ runner_group = self.config.github_path.group
+ aproxy_address = self.config.proxy_config.aproxy_address if self.config.proxy_config is not None else None
+ return jinja.get_template("openstack_userdata.sh.j2").render(
+ github_url=f"https://github.com/{self.config.github_path.path()}",
+ runner_group=runner_group,
+ token=registration_token,
+ instance_labels=",".join(self.config.labels),
+ instance_name=instance_name,
+ env_contents=env_contents,
+ pre_job_contents=pre_job_contents,
+ metrics_exchange_path=str(METRICS_EXCHANGE_PATH),
+ aproxy_address=aproxy_address,
+ dockerhub_mirror=self.config.dockerhub_mirror,
+ )
+
+ def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | None:
+ if self.config.repo_policy_url and self.config.repo_policy_token:
+ return RepoPolicyComplianceClient(self.config.repo_policy_url, self.config.repo_policy_token)
+ return None
+
+ @staticmethod
+ def _generate_runner_id() -> RunnerId:
+ return secrets.token_hex(12)
+
+ @staticmethod
+ def _issue_runner_installed_metric(
+ name: str,
+ flavor: str,
+ install_start_timestamp: float,
+ install_end_timestamp: float,
+ ) -> None:
+ try:
+ metric_events.issue_event(
+ event=metric_events.RunnerInstalled(
+ timestamp=install_start_timestamp,
+ flavor=flavor,
+ duration=install_start_timestamp - install_end_timestamp,
+ )
+ )
+ except IssueMetricEventError:
+ logger.exception("Failed to issue RunnerInstalled metric")
+
+ try:
+ storage = metrics_storage.create(name)
+ except CreateMetricsStorageError:
+ logger.exception(
+ "Failed to create metrics storage for runner %s, "
+ "will not be able to issue all metrics.",
+ name,
+ )
+ else:
+ try:
+ (storage.path / runner_metrics.RUNNER_INSTALLED_TS_FILE_NAME).write_text(
+ str(install_end_timestamp), encoding="utf-8"
+ )
+ except FileNotFoundError:
+ logger.exception(
+ "Failed to write runner-installed.timestamp into metrics storage "
+ "for runner %s, will not be able to issue all metrics.",
+ name,
+ )
+
+ @staticmethod
+ def _pull_runner_metrics(name: str, ssh_conn: SshConnection) -> None:
+ try:
+ storage = metrics_storage.get(name)
+ except GetMetricsStorageError:
+ logger.exception(
+ "Failed to get shared metrics storage for runner %s, "
+ "will not be able to issue all metrics.",
+ name,
+ )
+ return
+
+ try:
+ OpenstackRunnerManager._ssh_pull_file(
+ ssh_conn=ssh_conn,
+ remote_path=str(METRICS_EXCHANGE_PATH / "pre-job-metrics.json"),
+ local_path=str(storage.path / "pre-job-metrics.json"),
+ max_size=MAX_METRICS_FILE_SIZE,
+ )
+ OpenstackRunnerManager._ssh_pull_file(
+ ssh_conn=ssh_conn,
+ remote_path=str(METRICS_EXCHANGE_PATH / "post-job-metrics.json"),
+ local_path=str(storage.path / "post-job-metrics.json"),
+ max_size=MAX_METRICS_FILE_SIZE,
+ )
+ except _PullFileError as exc:
+ logger.warning(
+ "Failed to pull metrics for %s: %s . Will not be able to issue all metrics",
+ name,
+ exc,
+ )
+
+
+ @staticmethod
+ def _ssh_pull_file(ssh_conn: SshConnection, remote_path:str, local_path: str, max_size: int) -> None:
+ """Pull file from the runner instance.
+
+ Args:
+ ssh_conn: The SSH connection instance.
+ remote_path: The file path on the runner instance.
+ local_path: The local path to store the file.
+ max_size: If the file is larger than this, it will not be pulled.
+
+ Raises:
+ _PullFileError: Unable to pull the file from the runner instance.
+ _SSHError: Issue with SSH connection.
+ """
+ try:
+ result = ssh_conn.run(f"stat -c %s {remote_path}", warn=True)
+ except (TimeoutError, paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as exc:
+ raise _SshError(f"Unable to SSH into {ssh_conn.host}") from exc
+ if not result.ok:
+ logger.warning(
+ (
+ "Unable to get file size of %s on instance %s, "
+ "exit code: %s, stdout: %s, stderr: %s"
+ ),
+ remote_path,
+ ssh_conn.host,
+ result.return_code,
+ result.stdout,
+ result.stderr,
+ )
+ raise _PullFileError(f"Unable to get file size of {remote_path}")
+
+ stdout = result.stdout
+ try:
+ stdout.strip()
+ size = int(stdout)
+ if size > max_size:
+ raise _PullFileError( f"File size of {remote_path} too large {size} > {max_size}")
+ except ValueError as exc:
+ raise _PullFileError(f"Invalid file size for {remote_path}: stdout") from exc
+
+ try:
+ ssh_conn.get(remote=remote_path, local=local_path)
+ except (TimeoutError, paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as exc:
+ raise _SshError(f"Unable to SSH into {ssh_conn.host}") from exc
+ except OSError as exc:
+ raise _PullFileError(F"Unable to retrieve file {remote_path}") from exc
+
+ @staticmethod
+ def _run_github_runner_removal_script(instance_name: str, ssh_conn: SshConnection, remove_token: str) -> None:
+ """Run Github runner removal script.
+
+ Args:
+ ssh_conn: The SSH connection to the runner instance.
+ remove_token: The GitHub instance removal token.
+
+ Raises:
+ GithubRunnerRemoveError: Unable to remove runner from GitHub.
+ """
+ try:
+ result = ssh_conn.run(
+ f"{_CONFIG_SCRIPT_PATH} remove --token {remove_token}",
+ warn=True,
+ )
+ if result.ok:
+ return
+
+ logger.warning(
+ (
+ "Unable to run removal script on instance %s, "
+ "exit code: %s, stdout: %s, stderr: %s"
+ ),
+ instance_name,
+ result.return_code,
+ result.stdout,
+ result.stderr,
+ )
+ raise GithubRunnerRemoveError(f"Failed to remove runner {instance_name} from Github.")
+ except (TimeoutError, paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as exc:
+ raise GithubRunnerRemoveError(f"Failed to remove runner {instance_name} from Github.") from exc
+
\ No newline at end of file
From 941ae941d71232221d97b2cb5936b8078270025d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 2 Aug 2024 22:38:23 +0800
Subject: [PATCH 017/278] Initial runner manager implementation
---
src-docs/errors.md | 11 +
src-docs/openstack_cloud.openstack_cloud.md | 36 ++-
src/errors.py | 4 +
src/manager/cloud_runner_manager.py | 66 ++---
src/manager/github_runner_manager.py | 58 +++++
src/manager/runner_manager.py | 109 ++++++++-
src/openstack_cloud/openstack_cloud.py | 55 +++--
.../openstack_runner_manager.py | 227 ++++++++++++++----
8 files changed, 443 insertions(+), 123 deletions(-)
create mode 100644 src/manager/github_runner_manager.py
diff --git a/src-docs/errors.md b/src-docs/errors.md
index c0f190a73..d091b72f9 100644
--- a/src-docs/errors.md
+++ b/src-docs/errors.md
@@ -403,3 +403,14 @@ Represents an unauthorized connection to OpenStack.
+---
+
+
+
+## class `SshError`
+Represents an error while interacting with SSH.
+
+
+
+
+
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 126a4413e..0b227b3ba 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -11,12 +11,12 @@
---
-
+
## class `OpenstackInstance`
OpenstackInstance(server: openstack.compute.v2.server.Server, prefix: str)
-
+
### method `__init__`
@@ -34,14 +34,14 @@ __init__(server: Server, prefix: str)
---
-
+
## class `OpenstackCloud`
-
+
### method `__init__`
@@ -64,7 +64,21 @@ Create a OpenstackCloud instance.
---
-
+
+
+### method `cleanup`
+
+```python
+cleanup() → None
+```
+
+
+
+
+
+---
+
+
### method `delete_instance`
@@ -78,7 +92,7 @@ delete_instance(name: str) → None
---
-
+
### method `get_instance`
@@ -92,7 +106,7 @@ get_instance(name: str) → OpenstackInstance
---
-
+
### method `get_instance_name`
@@ -106,12 +120,12 @@ get_instance_name(name: str) → str
---
-
+
### method `get_instances`
```python
-get_instances() → list[OpenstackInstance]
+get_instances() → tuple[OpenstackInstance]
```
@@ -120,7 +134,7 @@ get_instances() → list[OpenstackInstance]
---
-
+
### method `get_ssh_connection`
@@ -134,7 +148,7 @@ get_ssh_connection(instance: OpenstackInstance) → Connection
---
-
+
### method `launch_instance`
diff --git a/src/errors.py b/src/errors.py
index 55d84e8e1..0dab2a54a 100644
--- a/src/errors.py
+++ b/src/errors.py
@@ -166,3 +166,7 @@ class OpenStackInvalidConfigError(OpenStackError):
class OpenStackUnauthorizedError(OpenStackError):
"""Represents an unauthorized connection to OpenStack."""
+
+
+class SshError(Exception):
+ """Represents an error while interacting with SSH."""
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 39b2b6c94..621a26179 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -4,7 +4,7 @@
from abc import ABC
from dataclasses import dataclass
from enum import Enum
-from typing import Tuple
+from typing import Sequence, Tuple
RunnerId = str
@@ -13,63 +13,65 @@
_OPENSTACK_STATUS_ACTIVE = "ACTIVE"
_OPENSTACK_STATUS_BUILDING = "BUILDING"
-class CloudRunnerStatus(str, Enum):
- created = "created"
- active = "active"
- deleted = "deleted"
- error = "error"
- stopped = "stopped"
- unknown = "unknown"
- unexpected = "unexpected"
-
-
- def from_openstack_status(status: str) -> "CloudRunnerStatus":
+
+class CloudRunnerState(str, Enum):
+ CREATED = "created"
+ ACTIVE = "active"
+ DELETED = "deleted"
+ ERROR = "error"
+ STOPPED = "stopped"
+ UNKNOWN = "unknown"
+ UNEXPECTED = "unexpected"
+
+ def __init__(openstack_server_status: str) -> None:
"""Create from openstack server status.
-
+
The openstack server status are documented here:
https://docs.openstack.org/api-guide/compute/server_concepts.html
-
+
Args:
status: Openstack server status.
-
- Returns:
- The CloudRunnerStatus.
"""
- match status:
+ match openstack_server_status:
case "BUILD":
- return CloudRunnerStatus.created
+ return CloudRunnerState.CREATED
case "REBUILD":
- return CloudRunnerStatus.created
+ return CloudRunnerState.CREATED
case "ACTIVE":
- return CloudRunnerStatus.active
+ return CloudRunnerState.ACTIVE
case "ERROR":
- return CloudRunnerStatus.error
+ return CloudRunnerState.ERROR
case "STOPPED":
- return CloudRunnerStatus.stopped
+ return CloudRunnerState.STOPPED
case "DELETED":
- return CloudRunnerStatus.deleted
+ return CloudRunnerState.DELETED
case "UNKNOWN":
- return CloudRunnerStatus.unknown
+ return CloudRunnerState.UNKNOWN
case _:
- return CloudRunnerStatus.unexpected
+ return CloudRunnerState.UNEXPECTED
+
@dataclass
-class RunnerInstance:
+class CloudRunnerInstance:
name: str
id: str
- status: CloudRunnerStatus
+ status: CloudRunnerState
+
@dataclass
class RunnerMetrics:
pass
+
class CloudRunnerManager(ABC):
def create_runner(self, registration_token: str) -> RunnerId: ...
- def get_runner(self, id: RunnerId) -> RunnerInstance: ...
+ def get_runner(self, id: RunnerId) -> CloudRunnerInstance: ...
def get_runners(
- self, cloud_runner_status: list[CloudRunnerStatus]
- ) -> Tuple[RunnerInstance]: ...
+ self, cloud_runner_status: Sequence[CloudRunnerState]
+ ) -> Tuple[CloudRunnerInstance]: ...
+
+ def delete_runner(self, id: RunnerId, remove_token: str) -> None: ...
- def delete_runners(self, id: RunnerId, remove_token: str) -> None: ...
+ def cleanup_runner(self, remove_token: str) -> None: ...
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
new file mode 100644
index 000000000..41bfbc20a
--- /dev/null
+++ b/src/manager/github_runner_manager.py
@@ -0,0 +1,58 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+from enum import Enum, auto
+from typing import Sequence
+
+from charm_state import GithubPath
+from github_client import GithubClient
+from github_type import GitHubRunnerStatus, SelfHostedRunner
+
+
+class GithubRunnerState(str, Enum):
+ BUSY = "busy"
+ IDLE = "idle"
+ OFFLINE = "offline"
+ UNKNOWN = "unknown"
+
+ def __init__(self, runner: SelfHostedRunner) -> "GithubRunnerState":
+ state = GithubRunnerState.OFFLINE
+ if runner.status == GitHubRunnerStatus.ONLINE:
+ if runner.busy:
+ state = GithubRunnerState.BUSY
+ if not runner.busy:
+ state = GithubRunnerState.IDLE
+ return state
+
+
+class GithubRunnerManager:
+
+ def __init__(self, prefix: str, token: str, path: GithubPath):
+ self._prefix = prefix
+ self._path = path
+ self._github = GithubClient(token)
+
+ def get_runners(self, states: Sequence[GithubRunnerState]) -> tuple[SelfHostedRunner]:
+ runner_list = self._github.get_runner_github_info()
+ return tuple(
+ runner
+ for runner in runner_list
+ if GithubRunnerManager._filter_runner_state(runner, states)
+ )
+
+ def delete_runners(self, states: Sequence[GithubRunnerState]) -> None:
+ runner_list = self.get_runners(states)
+ for runner in runner_list:
+ self._github.delete_runner(self._path, runner.id)
+
+ def get_registration_token(self) -> str:
+ return self._github.get_runner_registration_token(self._path)
+
+ def get_removal_token(self) -> str:
+ return self._github.get_runner_remove_token(self._path)
+
+ @staticmethod
+ def _filter_runner_state(
+ runner: SelfHostedRunner, states: Sequence[GithubRunnerState]
+ ) -> bool:
+ return GithubRunnerState(runner) in states
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index cb5ce7b05..c12fdf526 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -2,19 +2,110 @@
# See LICENSE file for licensing details.
from dataclasses import dataclass
-from enum import Enum
+from enum import Enum, auto
+from typing import Sequence
-from manager.cloud_runner_manager import CloudRunnerStatus, RunnerId
+from charm_state import GithubPath
+from github_type import SelfHostedRunner
+from manager.cloud_runner_manager import (
+ CloudRunnerInstance,
+ CloudRunnerManager,
+ CloudRunnerState,
+ RunnerId,
+)
+from manager.github_runner_manager import GithubRunnerManager, GithubRunnerState
-class GithubRunnerStatus(str, Enum):
- busy = "busy"
- idle = "idle"
- offline = "offline"
+class FlushMode(Enum):
+ """Strategy for flushing runners.
+
+ Attributes:
+ FLUSH_IDLE: Flush idle runners.
+ FLUSH_BUSY: Flush busy runners.
+ """
+
+ FLUSH_IDLE = auto()
+ FLUSH_BUSY = auto()
+
@dataclass
class RunnerInstance:
- github_name: str
+ name: str
id: RunnerId
- github_status: GithubRunnerStatus
- cloud_status: CloudRunnerStatus
\ No newline at end of file
+ github_state: GithubRunnerState
+ cloud_state: CloudRunnerState
+
+ def __init__(
+ self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner
+ ) -> "RunnerInstance":
+ self.name = github_info.name
+ self.id = cloud_instance.id
+ self.github_state = GithubRunnerState(SelfHostedRunner)
+ self.cloud_state = cloud_instance.status
+
+
+@dataclass
+class RunnerManagerConfig:
+ prefix: str
+ token: str
+ path: GithubPath
+
+
+class RunnerManager:
+
+ def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManagerConfig):
+ self._config = config
+ self._cloud = cloud_runner_manager
+ self._github = GithubRunnerManager(
+ self._config.prefix, self._config.path, self._config.path
+ )
+
+ def create_runners(self, num: int) -> list[RunnerId]:
+ registration_token = self._github.get_registration_token()
+
+ runner_ids = []
+ for _ in range(num):
+ runner_ids.append(self._cloud.create_runner(registration_token=registration_token))
+
+ return runner_ids
+
+ def get_runners(
+ self,
+ github_runner_state: Sequence[GithubRunnerState] = None,
+ cloud_runner_state: Sequence[CloudRunnerState] = None,
+ ) -> tuple[RunnerInstance]:
+ """Get information on runner filter by state.
+
+ Args:
+ github_runner_state: Filter for the runners with these github states. If None all
+ states will be included.
+ cloud_runner_state: Filter for the runners with these cloud states. If None all states
+ will be included.
+
+ Returns:
+ Information on the runners.
+ """
+ cloud_infos = self._cloud.get_runners(cloud_runner_status=cloud_runner_state)
+ github_infos = self._github.get_runners(github_runner_state)
+ cloud_infos_map = {info.name: info for info in cloud_infos}
+ github_infos_map = {info.name: info for info in github_infos}
+ return tuple(
+ RunnerInstance(cloud_infos_map[name], github_infos_map[name])
+ for name in cloud_infos_map.keys() & github_infos_map.keys()
+ )
+
+ def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
+ states = [GithubRunnerState.IDLE]
+ if flush_mode == FlushMode.FLUSH_BUSY:
+ states.append(GithubRunnerState.BUSY)
+
+ runners_list = self.get_runners(github_runner_state=states)
+ remove_token = self._github.get_removal_token()
+
+ for runner in runners_list:
+ self._cloud.delete_runners(id=runner.id, remove_token=remove_token)
+
+ def cleanup(self) -> None:
+ self._github.delete_runners([GithubRunnerState.OFFLINE, GithubRunnerState.UNKNOWN])
+ remove_token = self._github.get_removal_token()
+ self._cloud.cleanup_runner(remove_token)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 6ec16f11f..662014044 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -21,7 +21,7 @@
from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
from paramiko.ssh_exception import NoValidConnectionsError
-from errors import OpenStackError
+from errors import OpenStackError, SshError
logger = logging.getLogger(__name__)
@@ -36,10 +36,6 @@
_TEST_STRING = "test_string"
-class _SshError(Exception):
- """Represents an error while interacting with SSH."""
-
-
@dataclass
class OpenstackInstance:
server_id: str
@@ -60,10 +56,12 @@ def __init__(self, server: OpenstackServer, prefix: str):
if not self.name.startswith(prefix):
# Should never happen.
- raise ValueError(f"Found openstack server {server.name} managed under prefix {prefix}, contact devs")
- self.name = self.server_name[len(prefix):]
+ raise ValueError(
+ f"Found openstack server {server.name} managed under prefix {prefix}, contact devs"
+ )
+ self.name = self.server_name[len(prefix) :]
+
-
@contextmanager
def _get_openstack_connection(
clouds_config: dict[str, dict], cloud: str
@@ -140,11 +138,20 @@ def launch_instance(
)
except openstack.exceptions.ResourceTimeout as err:
logger.exception("Timeout creating openstack server %s", full_name)
- logger.info("Attempting clean up of openstack server %s that timeout during creation", full_name)
+ logger.info(
+ "Attempting clean up of openstack server %s that timeout during creation",
+ full_name,
+ )
try:
conn.delete_server(name_or_id=full_name, wait=True)
- except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout) as err:
- logger.exception("Failed to cleanup openstack server %s that timeout during creation", full_name)
+ except (
+ openstack.exceptions.SDKException,
+ openstack.exceptions.ResourceTimeout,
+ ) as err:
+ logger.exception(
+ "Failed to cleanup openstack server %s that timeout during creation",
+ full_name,
+ )
self._delete_key_pair(conn, name)
raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
except openstack.exceptions.SDKException as err:
@@ -153,7 +160,7 @@ def launch_instance(
raise OpenStackError(f"Failed to create openstack server {full_name}") from err
return OpenstackInstance(server)
-
+
def get_instance(self, name: str) -> OpenstackInstance:
full_name = self.get_instance_name(name)
logger.info("Getting openstack server with %s", full_name)
@@ -174,16 +181,19 @@ def delete_instance(self, name: str) -> None:
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
conn.delete_server(name_or_id=server.id)
OpenstackCloud._delete_key_pair(conn, full_name)
- except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout) as err:
+ except (
+ openstack.exceptions.SDKException,
+ openstack.exceptions.ResourceTimeout,
+ ) as err:
raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
key_path = OpenstackCloud._get_key_path(instance.name)
if not key_path.exists():
- raise _SshError(f"Missing keyfile for server: {instance.name}, key path: {key_path}")
+ raise SshError(f"Missing keyfile for server: {instance.name}, key path: {key_path}")
if not instance.addresses:
- raise _SshError(f"No addresses found for OpenStack server {instance.name}")
+ raise SshError(f"No addresses found for OpenStack server {instance.name}")
for ip in instance.addresses:
try:
@@ -209,12 +219,12 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
exc_info=True,
)
continue
- raise _SshError(
+ raise SshError(
f"No connectable SSH addresses found, server: {instance.name}, "
f"addresses: {instance.addresses}"
)
- def get_instances(self) -> list[OpenstackInstance]:
+ def get_instances(self) -> tuple[OpenstackInstance]:
logger.info("Getting all openstack servers managed by the charm")
with _get_openstack_connection(
@@ -227,6 +237,15 @@ def get_instances(self) -> list[OpenstackInstance]:
for name in server_names
]
+ def cleanup(self) -> None:
+ with _get_openstack_connection(
+ clouds_config=self._clouds_config, cloud=self._cloud
+ ) as conn:
+ server_list = self._get_openstack_instances(conn)
+ exclude_list = [server.name for server in server_list]
+ self._cleanup_key_files(conn, exclude_list)
+ self._clean_up_openstack_keypairs(conn, exclude_list)
+
def _cleanup_key_files(
self, conn: OpenstackConnection, exclude_instances: Iterable[str]
) -> None:
@@ -295,7 +314,7 @@ def _clean_up_openstack_keypairs(
def get_instance_name(self, name: str) -> str:
return f"{self.prefix}-{name}"
- def _get_openstack_instances(self, conn: OpenstackConnection) -> list[OpenstackServer]:
+ def _get_openstack_instances(self, conn: OpenstackConnection) -> tuple[OpenstackServer]:
"""Get the OpenStack servers managed by this unit.
Args:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 57249314a..95981063c 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -2,38 +2,49 @@
# See LICENSE file for licensing details.
import logging
-from pathlib import Path
import secrets
-from dataclasses import dataclass
import time
-from typing import Tuple
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Sequence, Tuple
+import invoke
import jinja2
-from fabric import Connection as SshConnection
import paramiko
import paramiko.ssh_exception
+from fabric import Connection as SshConnection
from charm_state import GithubOrg, GithubPath, ProxyConfig, SSHDebugConnection
-from errors import CreateMetricsStorageError, GetMetricsStorageError, IssueMetricEventError, OpenStackError, RunnerCreateError, RunnerRemoveError
+from errors import (
+ CreateMetricsStorageError,
+ GetMetricsStorageError,
+ IssueMetricEventError,
+ OpenStackError,
+ RunnerCreateError,
+ RunnerError,
+ RunnerRemoveError,
+ RunnerStartError,
+ SshError,
+)
from manager.cloud_runner_manager import (
+ CloudRunnerInstance,
CloudRunnerManager,
- CloudRunnerStatus,
+ CloudRunnerState,
RunnerId,
- RunnerInstance,
RunnerMetrics,
)
-from openstack_cloud.openstack_cloud import OpenstackCloud
-from openstack_cloud.openstack_manager import GithubRunnerRemoveError
-from repo_policy_compliance_client import RepoPolicyComplianceClient
from metrics import events as metric_events
from metrics import github as github_metrics
from metrics import runner as runner_metrics
from metrics import storage as metrics_storage
+from openstack_cloud.openstack_cloud import OpenstackCloud, OpenstackInstance
+from openstack_cloud.openstack_manager import GithubRunnerRemoveError
+from repo_policy_compliance_client import RepoPolicyComplianceClient
+from utilities import retry
logger = logging.getLogger(__name__)
BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME = "scripts/build-openstack-image.sh"
-_SSH_KEY_PATH = Path("/home/ubuntu/.ssh")
_CONFIG_SCRIPT_PATH = Path("/home/ubuntu/actions-runner/config.sh")
RUNNER_APPLICATION = Path("/home/ubuntu/actions-runner")
@@ -46,12 +57,11 @@
RUNNER_WORKER_PROCESS = "Runner.Worker"
CREATE_SERVER_TIMEOUT = 5 * 60
-class _SshError(Exception):
- """Represents an error while interacting with SSH."""
class _PullFileError(Exception):
"""Represents an error while pulling a file from the runner instance."""
+
@dataclass
class OpenstackRunnerManagerConfig:
image: str
@@ -70,20 +80,24 @@ class OpenstackRunnerManagerConfig:
class OpenstackRunnerManager(CloudRunnerManager):
- def __init__(
- self, runner_flavor: str, config: OpenstackRunnerManagerConfig
- ) -> None:
+ def __init__(self, runner_flavor: str, config: OpenstackRunnerManagerConfig) -> None:
self.runner_flavor = runner_flavor
self.config = config
- self._openstack_cloud = OpenstackCloud(clouds_config=self.config.clouds_config, cloud=self.config.cloud, prefix=self.runner_flavor)
+ self._openstack_cloud = OpenstackCloud(
+ clouds_config=self.config.clouds_config,
+ cloud=self.config.cloud,
+ prefix=self.runner_flavor,
+ )
def create_runner(self, registration_token: str) -> RunnerId:
start_timestamp = time.time()
id = OpenstackRunnerManager._generate_runner_id()
instance_name = self._openstack_cloud.get_instance_name(name=id)
- userdata = self._generate_userdata(instance_name=instance_name,registration_token=registration_token)
+ userdata = self._generate_userdata(
+ instance_name=instance_name, registration_token=registration_token
+ )
try:
- self._openstack_cloud.launch_instance(
+ instance = self._openstack_cloud.launch_instance(
name=id,
image=self.config.image,
flavor=self.config.flavor,
@@ -92,6 +106,16 @@ def create_runner(self, registration_token: str) -> RunnerId:
)
except OpenStackError as err:
raise RunnerCreateError("Failed to create {instance_name} openstack runner") from err
+
+ try:
+ ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ except SshError as err:
+ raise RunnerCreateError(
+ "Failed to SSH connect to {instance_name} openstack runner"
+ ) from err
+
+ OpenstackRunnerManager._wait_runner_startup(ssh_conn, instance_name)
+
end_timestamp = time.time()
OpenstackRunnerManager._issue_runner_installed_metric(
name=instance_name,
@@ -101,43 +125,78 @@ def create_runner(self, registration_token: str) -> RunnerId:
)
return id
- def get_runner(self, id: RunnerId) -> RunnerInstance | None:
+ def get_runner(self, id: RunnerId) -> CloudRunnerInstance | None:
name = self._openstack_cloud.get_instance_name(id)
instances_list = self._openstack_cloud.get_instances()
for instance in instances_list:
if instance.name == name:
- return RunnerInstance(name=name, id=id, status=CloudRunnerStatus.from_openstack_status(instance.status))
+ return CloudRunnerInstance(
+ name=name,
+ id=id,
+ status=CloudRunnerState(instance.status),
+ )
return None
- def get_runners(self, cloud_runner_status: list[CloudRunnerStatus]) -> Tuple[RunnerInstance]:
+ def get_runners(
+ self, cloud_runner_status: Sequence[CloudRunnerState]
+ ) -> Tuple[CloudRunnerInstance]:
instances_list = self._openstack_cloud.get_instances()
- instances_list = [RunnerInstance(name=instance.name, id=self._openstack_cloud.convert_name(instance.name), status=CloudRunnerStatus.from_openstack_status(instance.status))
- for instance in instances_list]
+ instances_list = [
+ CloudRunnerInstance(
+ name=instance.name,
+ id=self._openstack_cloud.convert_name(instance.name),
+ status=CloudRunnerState(instance.status),
+ )
+ for instance in instances_list
+ ]
return [instance for instance in instances_list if instance.status in cloud_runner_status]
- def delete_runners(self, id: RunnerId, remove_token: str) -> None:
+ def delete_runner(self, id: RunnerId, remove_token: str) -> None:
instance = self._openstack_cloud.get_instance(id)
+ self._delete_runner(instance, remove_token)
+
+ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
self._pull_runner_metrics(instance.name, ssh_conn)
try:
- OpenstackRunnerManager._run_github_runner_removal_script(instance.name,ssh_conn, remove_token)
+ OpenstackRunnerManager._run_github_runner_removal_script(
+ instance.name, ssh_conn, remove_token
+ )
except GithubRunnerRemoveError:
- logger.warning("Unable to run github runner removal script for %s", instance.name, stack_info=True)
+ logger.warning(
+ "Unable to run github runner removal script for %s", instance.name, stack_info=True
+ )
try:
self._openstack_cloud.delete_instance(id)
except OpenStackError:
logger.exception("Unable to delete openstack instance for runner %s", instance.name)
+ def cleanup(self, remove_token: str) -> None:
+ runner_list = self._openstack_cloud.get_instances()
+
+ for runner in runner_list:
+ state = CloudRunnerState(runner.status)
+ if state in (
+ CloudRunnerState.DELETED,
+ CloudRunnerState.ERROR,
+ CloudRunnerState.STOPPED,
+ ) or self._health_check(runner):
+ self._delete_runner(runner, remove_token)
+
+ self._openstack_cloud.cleanup()
+
def _generate_userdata(self, instance_name: str, registration_token: str) -> str:
- jinja = jinja2.Environment(
- loader=jinja2.FileSystemLoader("templates"), autoescape=True
- )
-
+ jinja = jinja2.Environment(loader=jinja2.FileSystemLoader("templates"), autoescape=True)
+
env_contents = jinja.get_template("env.j2").render(
pre_job_script=str(PRE_JOB_SCRIPT),
dockerhub_mirror=self.config.dockerhub_mirror or "",
- ssh_debug_info=(secrets.choice(self.config.ssh_debug_connections) if self.config.ssh_debug_connections else None),
+ ssh_debug_info=(
+ secrets.choice(self.config.ssh_debug_connections)
+ if self.config.ssh_debug_connections
+ else None
+ ),
# Proxies are handled by aproxy.
proxies={},
)
@@ -158,11 +217,15 @@ def _generate_userdata(self, instance_name: str, registration_token: str) -> str
)
pre_job_contents = jinja.get_template("pre-job.j2").render(pre_job_contents_dict)
-
+
runner_group = None
if isinstance(self.config.github_path, GithubOrg):
runner_group = self.config.github_path.group
- aproxy_address = self.config.proxy_config.aproxy_address if self.config.proxy_config is not None else None
+ aproxy_address = (
+ self.config.proxy_config.aproxy_address
+ if self.config.proxy_config is not None
+ else None
+ )
return jinja.get_template("openstack_userdata.sh.j2").render(
github_url=f"https://github.com/{self.config.github_path.path()}",
runner_group=runner_group,
@@ -175,12 +238,54 @@ def _generate_userdata(self, instance_name: str, registration_token: str) -> str
aproxy_address=aproxy_address,
dockerhub_mirror=self.config.dockerhub_mirror,
)
-
+
def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | None:
if self.config.repo_policy_url and self.config.repo_policy_token:
- return RepoPolicyComplianceClient(self.config.repo_policy_url, self.config.repo_policy_token)
+ return RepoPolicyComplianceClient(
+ self.config.repo_policy_url, self.config.repo_policy_token
+ )
return None
+ def _health_check(self, instance: OpenstackInstance) -> bool:
+ try:
+ ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ except SshError:
+ logger.exception("SSH connection failure with %s", instance.name)
+ return False
+ try:
+ OpenstackRunnerManager._run_health_check(ssh_conn, instance.name)
+ except RunnerError:
+ logger.exception("Health check failure for %s", instance.name)
+ return False
+ logger.info("Health check success for %s", instance.name)
+ return True
+
+ @retry(tries=3, delay=60, local_logger=logger)
+ @staticmethod
+ def _run_health_check(ssh_conn: SshConnection, name: str):
+ result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
+ if not result.ok:
+ logger.warning("SSH run of `ps aux` failed on %s", name)
+ raise RunnerError(f"Unable to SSH run `ps aux` on {name}")
+ if (
+ RUNNER_WORKER_PROCESS not in result.stdout
+ and RUNNER_LISTENER_PROCESS not in result.stdout
+ ):
+ logger.warning("Runner process not found on %s", name)
+ raise RunnerError(f"Runner process not found on {name}")
+
+ @retry(tries=10, delay=60, local_logger=logger)
+ @staticmethod
+ def _wait_runner_startup(ssh_conn: SshConnection, name: str) -> None:
+ result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
+ if not result.ok:
+ logger.warning("SSH run of `ps aux` failed on %s", name)
+ raise RunnerStartError(f"Unable to SSH run `ps aux` on {name}")
+ if RUNNER_STARTUP_PROCESS not in result.stdout:
+ logger.warning("Runner startup process not found on %s", name)
+ return RunnerStartError(f"Runner startup process not found on {name}")
+ logger.info("Runner startup process found to be healthy on %s", name)
+
@staticmethod
def _generate_runner_id() -> RunnerId:
return secrets.token_hex(12)
@@ -202,7 +307,7 @@ def _issue_runner_installed_metric(
)
except IssueMetricEventError:
logger.exception("Failed to issue RunnerInstalled metric")
-
+
try:
storage = metrics_storage.create(name)
except CreateMetricsStorageError:
@@ -223,7 +328,7 @@ def _issue_runner_installed_metric(
name,
)
- @staticmethod
+ @staticmethod
def _pull_runner_metrics(name: str, ssh_conn: SshConnection) -> None:
try:
storage = metrics_storage.get(name)
@@ -255,9 +360,10 @@ def _pull_runner_metrics(name: str, ssh_conn: SshConnection) -> None:
exc,
)
-
@staticmethod
- def _ssh_pull_file(ssh_conn: SshConnection, remote_path:str, local_path: str, max_size: int) -> None:
+ def _ssh_pull_file(
+ ssh_conn: SshConnection, remote_path: str, local_path: str, max_size: int
+ ) -> None:
"""Pull file from the runner instance.
Args:
@@ -272,8 +378,12 @@ def _ssh_pull_file(ssh_conn: SshConnection, remote_path:str, local_path: str, ma
"""
try:
result = ssh_conn.run(f"stat -c %s {remote_path}", warn=True)
- except (TimeoutError, paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as exc:
- raise _SshError(f"Unable to SSH into {ssh_conn.host}") from exc
+ except (
+ TimeoutError,
+ paramiko.ssh_exception.NoValidConnectionsError,
+ paramiko.ssh_exception.SSHException,
+ ) as exc:
+ raise SshError(f"Unable to SSH into {ssh_conn.host}") from exc
if not result.ok:
logger.warning(
(
@@ -293,19 +403,25 @@ def _ssh_pull_file(ssh_conn: SshConnection, remote_path:str, local_path: str, ma
stdout.strip()
size = int(stdout)
if size > max_size:
- raise _PullFileError( f"File size of {remote_path} too large {size} > {max_size}")
+ raise _PullFileError(f"File size of {remote_path} too large {size} > {max_size}")
except ValueError as exc:
raise _PullFileError(f"Invalid file size for {remote_path}: stdout") from exc
-
+
try:
ssh_conn.get(remote=remote_path, local=local_path)
- except (TimeoutError, paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as exc:
- raise _SshError(f"Unable to SSH into {ssh_conn.host}") from exc
+ except (
+ TimeoutError,
+ paramiko.ssh_exception.NoValidConnectionsError,
+ paramiko.ssh_exception.SSHException,
+ ) as exc:
+ raise SshError(f"Unable to SSH into {ssh_conn.host}") from exc
except OSError as exc:
- raise _PullFileError(F"Unable to retrieve file {remote_path}") from exc
-
+ raise _PullFileError(f"Unable to retrieve file {remote_path}") from exc
+
@staticmethod
- def _run_github_runner_removal_script(instance_name: str, ssh_conn: SshConnection, remove_token: str) -> None:
+ def _run_github_runner_removal_script(
+ instance_name: str, ssh_conn: SshConnection, remove_token: str
+ ) -> None:
"""Run Github runner removal script.
Args:
@@ -319,7 +435,7 @@ def _run_github_runner_removal_script(instance_name: str, ssh_conn: SshConnectio
result = ssh_conn.run(
f"{_CONFIG_SCRIPT_PATH} remove --token {remove_token}",
warn=True,
- )
+ )
if result.ok:
return
@@ -334,6 +450,11 @@ def _run_github_runner_removal_script(instance_name: str, ssh_conn: SshConnectio
result.stderr,
)
raise GithubRunnerRemoveError(f"Failed to remove runner {instance_name} from Github.")
- except (TimeoutError, paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as exc:
- raise GithubRunnerRemoveError(f"Failed to remove runner {instance_name} from Github.") from exc
-
\ No newline at end of file
+ except (
+ TimeoutError,
+ paramiko.ssh_exception.NoValidConnectionsError,
+ paramiko.ssh_exception.SSHException,
+ ) as exc:
+ raise GithubRunnerRemoveError(
+ f"Failed to remove runner {instance_name} from Github."
+ ) from exc
From 00ec04e346eb40fc77ad00c291bee7bb42fec4d7 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sat, 3 Aug 2024 16:11:01 +0800
Subject: [PATCH 018/278] Add inital integration test
---
...penstack_cloud.openstack_runner_manager.md | 66 ++++++++----
src/manager/cloud_runner_manager.py | 7 +-
src/manager/github_runner_manager.py | 10 +-
src/manager/runner_manager.py | 5 +-
.../openstack_runner_manager.py | 29 ++---
tests/integration/conftest.py | 2 +-
.../test_runner_manager_openstack.py | 100 ++++++++++++++++++
7 files changed, 177 insertions(+), 42 deletions(-)
create mode 100644 tests/integration/test_runner_manager_openstack.py
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 02a1e2e4f..537b4b776 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -19,10 +19,10 @@
---
-
+
## class `OpenstackRunnerManagerConfig`
-OpenstackRunnerManagerConfig(image: str, flavor: str, network: str, github_path: charm_state.GithubOrg | charm_state.GithubRepo, labels: list[str], proxy_config: charm_state.ProxyConfig | None, dockerhub_mirror: str | None, ssh_debug_connections: list[charm_state.SSHDebugConnection], repo_policy_url: str, repo_policy_token: str, clouds_config: dict[str, dict], cloud: str)
+OpenstackRunnerManagerConfig(clouds_config: dict[str, dict], cloud: str, image: str, flavor: str, network: str, github_path: charm_state.GithubOrg | charm_state.GithubRepo, labels: list[str], proxy_config: charm_state.ProxyConfig | None, dockerhub_mirror: str | None, ssh_debug_connections: list[charm_state.SSHDebugConnection] | None, repo_policy_url: str | None, repo_policy_token: str | None)
@@ -30,6 +30,8 @@ OpenstackRunnerManagerConfig(image: str, flavor: str, network: str, github_path:
```python
__init__(
+ clouds_config: dict[str, dict],
+ cloud: str,
image: str,
flavor: str,
network: str,
@@ -37,11 +39,9 @@ __init__(
labels: list[str],
proxy_config: ProxyConfig | None,
dockerhub_mirror: str | None,
- ssh_debug_connections: list[SSHDebugConnection],
- repo_policy_url: str,
- repo_policy_token: str,
- clouds_config: dict[str, dict],
- cloud: str
+ ssh_debug_connections: list[SSHDebugConnection] | None,
+ repo_policy_url: str | None,
+ repo_policy_token: str | None
) → None
```
@@ -55,19 +55,19 @@ __init__(
---
-
+
## class `OpenstackRunnerManager`
-
+
### method `__init__`
```python
-__init__(runner_flavor: str, config: OpenstackRunnerManagerConfig) → None
+__init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
```
@@ -79,7 +79,21 @@ __init__(runner_flavor: str, config: OpenstackRunnerManagerConfig) → None
---
-
+
+
+### method `cleanup`
+
+```python
+cleanup(remove_token: str) → None
+```
+
+
+
+
+
+---
+
+
### method `create_runner`
@@ -93,12 +107,26 @@ create_runner(registration_token: str) → str
---
-
+
+
+### method `delete_runner`
+
+```python
+delete_runner(id: str, remove_token: str) → None
+```
+
+
+
+
+
+---
+
+
-### method `delete_runners`
+### method `get_name_prefix`
```python
-delete_runners(id: str, remove_token: str) → None
+get_name_prefix() → str
```
@@ -107,12 +135,12 @@ delete_runners(id: str, remove_token: str) → None
---
-
+
### method `get_runner`
```python
-get_runner(id: str) → RunnerInstance | None
+get_runner(id: str) → CloudRunnerInstance | None
```
@@ -121,14 +149,14 @@ get_runner(id: str) → RunnerInstance | None
---
-
+
### method `get_runners`
```python
get_runners(
- cloud_runner_status: list[CloudRunnerStatus]
-) → Tuple[RunnerInstance]
+ cloud_runner_status: Sequence[CloudRunnerState]
+) → Tuple[CloudRunnerInstance]
```
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 621a26179..b342c7254 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -23,7 +23,8 @@ class CloudRunnerState(str, Enum):
UNKNOWN = "unknown"
UNEXPECTED = "unexpected"
- def __init__(openstack_server_status: str) -> None:
+ @staticmethod
+ def from_openstack_server_status(openstack_server_status: str) -> None:
"""Create from openstack server status.
The openstack server status are documented here:
@@ -55,7 +56,7 @@ def __init__(openstack_server_status: str) -> None:
class CloudRunnerInstance:
name: str
id: str
- status: CloudRunnerState
+ state: CloudRunnerState
@dataclass
@@ -64,6 +65,8 @@ class RunnerMetrics:
class CloudRunnerManager(ABC):
+ def get_name_prefix(self) -> str: ...
+
def create_runner(self, registration_token: str) -> RunnerId: ...
def get_runner(self, id: RunnerId) -> CloudRunnerInstance: ...
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 41bfbc20a..3e8972e10 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -15,7 +15,8 @@ class GithubRunnerState(str, Enum):
OFFLINE = "offline"
UNKNOWN = "unknown"
- def __init__(self, runner: SelfHostedRunner) -> "GithubRunnerState":
+ @staticmethod
+ def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
state = GithubRunnerState.OFFLINE
if runner.status == GitHubRunnerStatus.ONLINE:
if runner.busy:
@@ -33,11 +34,12 @@ def __init__(self, prefix: str, token: str, path: GithubPath):
self._github = GithubClient(token)
def get_runners(self, states: Sequence[GithubRunnerState]) -> tuple[SelfHostedRunner]:
- runner_list = self._github.get_runner_github_info()
+ runner_list = self._github.get_runner_github_info(self._path)
return tuple(
runner
for runner in runner_list
- if GithubRunnerManager._filter_runner_state(runner, states)
+ if runner.name.startswith(self._prefix)
+ and GithubRunnerManager._filter_runner_state(runner, states)
)
def delete_runners(self, states: Sequence[GithubRunnerState]) -> None:
@@ -55,4 +57,4 @@ def get_removal_token(self) -> str:
def _filter_runner_state(
runner: SelfHostedRunner, states: Sequence[GithubRunnerState]
) -> bool:
- return GithubRunnerState(runner) in states
+ return GithubRunnerState.from_runner(runner) in states
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index c12fdf526..3970587e4 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -41,12 +41,11 @@ def __init__(
self.name = github_info.name
self.id = cloud_instance.id
self.github_state = GithubRunnerState(SelfHostedRunner)
- self.cloud_state = cloud_instance.status
+ self.cloud_state = cloud_instance.state
@dataclass
class RunnerManagerConfig:
- prefix: str
token: str
path: GithubPath
@@ -57,7 +56,7 @@ def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManag
self._config = config
self._cloud = cloud_runner_manager
self._github = GithubRunnerManager(
- self._config.prefix, self._config.path, self._config.path
+ prefix=self._cloud.get_name_prefix(), token=self._config.token, path=self._config.path
)
def create_runners(self, num: int) -> list[RunnerId]:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 95981063c..307e72f5d 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -64,6 +64,8 @@ class _PullFileError(Exception):
@dataclass
class OpenstackRunnerManagerConfig:
+ clouds_config: dict[str, dict]
+ cloud: str
image: str
flavor: str
network: str
@@ -71,24 +73,25 @@ class OpenstackRunnerManagerConfig:
labels: list[str]
proxy_config: ProxyConfig | None
dockerhub_mirror: str | None
- ssh_debug_connections: list[SSHDebugConnection]
- repo_policy_url: str
- repo_policy_token: str
- clouds_config: dict[str, dict]
- cloud: str
+ ssh_debug_connections: list[SSHDebugConnection] | None
+ repo_policy_url: str | None
+ repo_policy_token: str | None
class OpenstackRunnerManager(CloudRunnerManager):
- def __init__(self, runner_flavor: str, config: OpenstackRunnerManagerConfig) -> None:
- self.runner_flavor = runner_flavor
+ def __init__(self, prefix: str, config: OpenstackRunnerManagerConfig) -> None:
+ self.prefix = prefix
self.config = config
self._openstack_cloud = OpenstackCloud(
clouds_config=self.config.clouds_config,
cloud=self.config.cloud,
- prefix=self.runner_flavor,
+ prefix=self.prefix,
)
+ def get_name_prefix(self) -> str:
+ return self.prefix
+
def create_runner(self, registration_token: str) -> RunnerId:
start_timestamp = time.time()
id = OpenstackRunnerManager._generate_runner_id()
@@ -119,7 +122,7 @@ def create_runner(self, registration_token: str) -> RunnerId:
end_timestamp = time.time()
OpenstackRunnerManager._issue_runner_installed_metric(
name=instance_name,
- flavor=self.runner_flavor,
+ flavor=self.prefix,
install_start_timestamp=start_timestamp,
install_end_timestamp=end_timestamp,
)
@@ -133,7 +136,7 @@ def get_runner(self, id: RunnerId) -> CloudRunnerInstance | None:
return CloudRunnerInstance(
name=name,
id=id,
- status=CloudRunnerState(instance.status),
+ state=CloudRunnerState.from_openstack_server_status(instance.status),
)
return None
@@ -145,11 +148,11 @@ def get_runners(
CloudRunnerInstance(
name=instance.name,
id=self._openstack_cloud.convert_name(instance.name),
- status=CloudRunnerState(instance.status),
+ state=CloudRunnerState.from_openstack_server_status(instance.status),
)
for instance in instances_list
]
- return [instance for instance in instances_list if instance.status in cloud_runner_status]
+ return [instance for instance in instances_list if instance.state in cloud_runner_status]
def delete_runner(self, id: RunnerId, remove_token: str) -> None:
instance = self._openstack_cloud.get_instance(id)
@@ -176,7 +179,7 @@ def cleanup(self, remove_token: str) -> None:
runner_list = self._openstack_cloud.get_instances()
for runner in runner_list:
- state = CloudRunnerState(runner.status)
+ state = (CloudRunnerState(runner.status),)
if state in (
CloudRunnerState.DELETED,
CloudRunnerState.ERROR,
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 25f4f1ee3..c44d374fe 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -85,7 +85,7 @@ def existing_app(pytestconfig: pytest.Config) -> Optional[str]:
def app_name(existing_app: Optional[str]) -> str:
"""Randomized application name."""
# Randomized app name to avoid collision when runner is connecting to GitHub.
- return existing_app or f"integration-id{secrets.token_hex(2)}"
+ return existing_app or f"test-{secrets.token_hex(4)}"
@pytest.fixture(scope="module", name="openstack_clouds_yaml")
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
new file mode 100644
index 000000000..dfd7ee6cd
--- /dev/null
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -0,0 +1,100 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager."""
+
+
+import pytest
+import pytest_asyncio
+import yaml
+from openstack.connection import Connection as OpenstackConnection
+
+from charm_state import GithubPath, ProxyConfig, parse_github_path
+from manager.runner_manager import RunnerManager, RunnerManagerConfig
+from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
+from openstack_cloud.openstack_runner_manager import (
+ OpenstackRunnerManager,
+ OpenstackRunnerManagerConfig,
+)
+from tests.integration.helpers.openstack import PrivateEndpointConfigs
+
+
+@pytest.fixture(scope="module", name="github_path")
+def github_path_fixture(path: str) -> GithubPath:
+ return parse_github_path(path, "Default")
+
+
+@pytest.fixture(scope="module", name="proxy_config")
+def openstack_proxy_config_fixture(
+ openstack_http_proxy: str, openstack_https_proxy: str, openstack_no_proxy: str
+) -> ProxyConfig:
+ use_aproxy = False
+ if openstack_http_proxy or openstack_https_proxy:
+ use_aproxy = True
+ openstack_http_proxy = openstack_http_proxy if openstack_http_proxy else None
+ openstack_https_proxy = openstack_https_proxy if openstack_https_proxy else None
+ return ProxyConfig(
+ http=openstack_http_proxy,
+ https=openstack_https_proxy,
+ no_proxy=openstack_no_proxy,
+ use_aproxy=use_aproxy,
+ )
+
+
+@pytest_asyncio.fixture(scope="module", name="openstack_runner_manager")
+async def openstack_runner_manager_fixture(
+ app_name: str,
+ private_endpoint_clouds_yaml: str,
+ openstack_test_image: str,
+ flavor_name: str,
+ network_name: str,
+ github_path: GithubPath,
+ proxy_config: ProxyConfig,
+ openstack_connection: OpenstackConnection,
+) -> OpenstackRunnerManager:
+ """Create OpenstackRunnerManager instance.
+
+ The prefix args of OpenstackRunnerManager set to app_name to let openstack_connection_fixture preform the cleanup of openstack resources.
+ """
+ # TODO: Think about how to deal with this when testing locally.
+ # This will modify a file under home directory.
+ _CLOUDS_YAML_PATH.unlink()
+ clouds_config = yaml.safe_load(private_endpoint_clouds_yaml)
+
+ config = OpenstackRunnerManagerConfig(
+ clouds_config=clouds_config,
+ cloud="testcloud",
+ image=openstack_test_image,
+ flavor=flavor_name,
+ network=network_name,
+ github_path=github_path,
+ labels=["openstack_test"],
+ proxy_config=proxy_config,
+ dockerhub_mirror=None,
+ ssh_debug_connections=None,
+ repo_policy_url=None,
+ repo_policy_token=None,
+ )
+ return OpenstackRunnerManager(app_name, config)
+
+
+@pytest_asyncio.fixture(scope="module", name="runner_manager")
+async def runner_manager_fixture(
+ openstack_runner_manager: OpenstackRunnerManager, token: str, github_path: GithubPath
+) -> RunnerManager:
+ config = RunnerManagerConfig(token, github_path)
+ return RunnerManager(openstack_runner_manager, config)
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_get_no_runner(runner_manager: RunnerManager) -> None:
+ """
+ Arrange: No runners on the
+ Act:
+ Assert:
+ """
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert not runner_list
From 9ea0abe449df6c2bee4335105650f1380a69ae7b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sun, 4 Aug 2024 14:15:57 +0800
Subject: [PATCH 019/278] Add create runner test
---
src/openstack_cloud/openstack_cloud.py | 8 ++---
.../openstack_runner_manager.py | 2 +-
.../test_runner_manager_openstack.py | 36 +++++++++++++++----
3 files changed, 35 insertions(+), 11 deletions(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 662014044..29a084664 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -54,7 +54,7 @@ def __init__(self, server: OpenstackServer, prefix: str):
for address in network_addresses
]
- if not self.name.startswith(prefix):
+ if not self.server_name.startswith(prefix):
# Should never happen.
raise ValueError(
f"Found openstack server {server.name} managed under prefix {prefix}, contact devs"
@@ -159,7 +159,7 @@ def launch_instance(
self._delete_key_pair(conn, name)
raise OpenStackError(f"Failed to create openstack server {full_name}") from err
- return OpenstackInstance(server)
+ return OpenstackInstance(server, self.prefix)
def get_instance(self, name: str) -> OpenstackInstance:
full_name = self.get_instance_name(name)
@@ -368,13 +368,13 @@ def _get_key_path(name: str) -> Path:
def _setup_key_pair(conn: OpenstackConnection, name: str) -> OpenstackKeypair:
key_path = OpenstackCloud._get_key_path(name)
- if key_path.exists:
+ if key_path.exists():
logger.warning("Existing private key file for %s found, removing it.", name)
key_path.unlink(missing_ok=True)
keypair = conn.create_keypair(name=name)
+ key_path.parent.mkdir(parents=True, exist_ok=True)
key_path.write_text(keypair.private_key)
- shutil.chown(key_path, user="ubuntu", group="ubuntu")
key_path.chmod(0o400)
return keypair
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 307e72f5d..7e7757d55 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -229,7 +229,7 @@ def _generate_userdata(self, instance_name: str, registration_token: str) -> str
if self.config.proxy_config is not None
else None
)
- return jinja.get_template("openstack_userdata.sh.j2").render(
+ return jinja.get_template("openstack-userdata.sh.j2").render(
github_url=f"https://github.com/{self.config.github_path.path()}",
runner_group=runner_group,
token=registration_token,
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index dfd7ee6cd..b1f820a1a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -10,6 +10,8 @@
from openstack.connection import Connection as OpenstackConnection
from charm_state import GithubPath, ProxyConfig, parse_github_path
+from manager.cloud_runner_manager import CloudRunnerState
+from manager.github_runner_manager import GithubRunnerState
from manager.runner_manager import RunnerManager, RunnerManagerConfig
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
@@ -56,9 +58,7 @@ async def openstack_runner_manager_fixture(
The prefix args of OpenstackRunnerManager set to app_name to let openstack_connection_fixture preform the cleanup of openstack resources.
"""
- # TODO: Think about how to deal with this when testing locally.
- # This will modify a file under home directory.
- _CLOUDS_YAML_PATH.unlink()
+ _CLOUDS_YAML_PATH.unlink(missing_ok=True)
clouds_config = yaml.safe_load(private_endpoint_clouds_yaml)
config = OpenstackRunnerManagerConfig(
@@ -91,10 +91,34 @@ async def runner_manager_fixture(
@pytest.mark.abort_on_fail
async def test_get_no_runner(runner_manager: RunnerManager) -> None:
"""
- Arrange: No runners on the
- Act:
- Assert:
+ Arrange: RunnerManager instance with no runners.
+ Act: Get runners.
+ Assert: Empty tuple returned.
"""
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
assert not runner_list
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_create_runner(runner_manager: RunnerManager) -> None:
+ """
+ Arrange: RunnerManager instance with no runners.
+ Act: Create one runner.
+ Assert: An active idle runner.
+ """
+ runner_id_list = runner_manager.create_runners(1)
+ assert isinstance(runner_id_list, tuple)
+ assert len(runner_id_list) == 1
+ runner_id = runner_id[0]
+
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 1
+ runner = runner_list[0]
+ assert runner.id == runner_id
+ assert runner.cloud_state == CloudRunnerState.ACTIVE
+ assert runner.github_state == GithubRunnerState.IDLE
+
From d47be6f44b47637cdc3311c7469bbc54f880007c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sun, 4 Aug 2024 14:18:03 +0800
Subject: [PATCH 020/278] Update integration test to debug new tests
---
.github/workflows/integration_test.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index a85614f3e..d71b576fe 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -35,7 +35,7 @@ jobs:
test-tox-env: integration-juju3.2
# TODO: debug only remove
# modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive", "test_openstack_cloud"]'
- modules: '["test_openstack_cloud"]'
+ modules: '["test_runner_manager_openstack"]'
extra-arguments: "-m openstack"
self-hosted-runner: true
self-hosted-runner-label: stg-private-endpoint
From f8c6c6229fc2d12990e299fad3e3f38ba98262a2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 09:24:22 +0800
Subject: [PATCH 021/278] Enable debugging
---
.github/workflows/integration_test.yaml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index d71b576fe..72afcddfc 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -39,3 +39,5 @@ jobs:
extra-arguments: "-m openstack"
self-hosted-runner: true
self-hosted-runner-label: stg-private-endpoint
+ tmate-debug: true
+ tmate-timeout: 300
From fc9d9976bc553864874c9122039eeee79f7f2758 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 10:03:37 +0800
Subject: [PATCH 022/278] Test env
---
.github/workflows/e2e_test.yaml | 4 +++-
.github/workflows/integration_test.yaml | 10 ++++------
.github/workflows/manual_test_env.yaml | 25 +++++++++++++++++++++++++
.github/workflows/test.yaml | 4 +++-
src/manager/cloud_runner_manager.py | 6 ------
5 files changed, 35 insertions(+), 14 deletions(-)
create mode 100644 .github/workflows/manual_test_env.yaml
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 5933451ee..bb1dada46 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,7 +1,9 @@
name: End-to-End tests
on:
- pull_request:
+ # TODO: Uncomment
+ #pull_request:
+ workflow_dispatch:
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 72afcddfc..1edd98aca 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,7 +1,9 @@
name: integration-tests
on:
- pull_request:
+ # TODO: Uncomment
+ #pull_request:
+ workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -33,11 +35,7 @@ jobs:
pre-run-script: scripts/setup-lxd.sh
provider: lxd
test-tox-env: integration-juju3.2
- # TODO: debug only remove
- # modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive", "test_openstack_cloud"]'
- modules: '["test_runner_manager_openstack"]'
+ modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive", "test_openstack_cloud"]'
extra-arguments: "-m openstack"
self-hosted-runner: true
self-hosted-runner-label: stg-private-endpoint
- tmate-debug: true
- tmate-timeout: 300
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
new file mode 100644
index 000000000..5bd1a6254
--- /dev/null
+++ b/.github/workflows/manual_test_env.yaml
@@ -0,0 +1,25 @@
+name: Manual test env
+
+on:
+ pull_request:
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ openstack-integration-tests-private-endpoint:
+ name: Integration test using private-endpoint
+ uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ secrets: inherit
+ with:
+ juju-channel: 3.2/stable
+ pre-run-script: scripts/setup-lxd.sh
+ provider: lxd
+ test-tox-env: integration-juju3.2
+ modules: '["test_runner_manager_openstack"]'
+ extra-arguments: "-m openstack"
+ self-hosted-runner: true
+ self-hosted-runner-label: stg-private-endpoint
+ tmate-debug: true
+ tmate-timeout: 300
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 99e540d31..34803b2fb 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,7 +1,9 @@
name: Tests
on:
- pull_request:
+ # TODO: Uncomment
+ #pull_request:
+ workflow_dispatch:
jobs:
unit-tests:
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index b342c7254..1f4c8b507 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -8,12 +8,6 @@
RunnerId = str
-_OPENSTACK_STATUS_SHUTOFF = "SHUTOFF"
-_OPENSTACK_STATUS_ERROR = "ERROR"
-_OPENSTACK_STATUS_ACTIVE = "ACTIVE"
-_OPENSTACK_STATUS_BUILDING = "BUILDING"
-
-
class CloudRunnerState(str, Enum):
CREATED = "created"
ACTIVE = "active"
From fa2947292083e83b709762b75d2b8bd557ea27ea Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 10:07:19 +0800
Subject: [PATCH 023/278] Add debug
---
src-docs/openstack_cloud.openstack_runner_manager.md | 8 ++++----
src/openstack_cloud/openstack_runner_manager.py | 4 ++++
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 537b4b776..dbbc067a6 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -79,7 +79,7 @@ __init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
---
-
+
### method `cleanup`
@@ -107,7 +107,7 @@ create_runner(registration_token: str) → str
---
-
+
### method `delete_runner`
@@ -135,7 +135,7 @@ get_name_prefix() → str
---
-
+
### method `get_runner`
@@ -149,7 +149,7 @@ get_runner(id: str) → CloudRunnerInstance | None
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 7e7757d55..7bbca45f0 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -109,6 +109,10 @@ def create_runner(self, registration_token: str) -> RunnerId:
)
except OpenStackError as err:
raise RunnerCreateError("Failed to create {instance_name} openstack runner") from err
+
+ # TODO: Test only
+ import pytest
+ pytest.set_trace()
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
From 9c74ad97dc9bcadf187b22db223d589eb142f699 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 11:01:07 +0800
Subject: [PATCH 024/278] Fix confusing naming issue.
---
src-docs/openstack_cloud.openstack_cloud.md | 6 ++--
...penstack_cloud.openstack_runner_manager.md | 8 ++---
src/openstack_cloud/openstack_cloud.py | 32 +++++++++----------
.../openstack_runner_manager.py | 28 +++++++---------
tests/integration/test_openstack_cloud.py | 11 ++++---
5 files changed, 41 insertions(+), 44 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 0b227b3ba..6f8fb579b 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -83,7 +83,7 @@ cleanup() → None
### method `delete_instance`
```python
-delete_instance(name: str) → None
+delete_instance(instance_id: str) → None
```
@@ -97,7 +97,7 @@ delete_instance(name: str) → None
### method `get_instance`
```python
-get_instance(name: str) → OpenstackInstance
+get_instance(instance_id: str) → OpenstackInstance
```
@@ -154,7 +154,7 @@ get_ssh_connection(instance: OpenstackInstance) → Connection
```python
launch_instance(
- name: str,
+ instance_id: str,
image: str,
flavor: str,
network: str,
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index dbbc067a6..537b4b776 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -79,7 +79,7 @@ __init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
---
-
+
### method `cleanup`
@@ -107,7 +107,7 @@ create_runner(registration_token: str) → str
---
-
+
### method `delete_runner`
@@ -135,7 +135,7 @@ get_name_prefix() → str
---
-
+
### method `get_runner`
@@ -149,7 +149,7 @@ get_runner(id: str) → CloudRunnerInstance | None
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 29a084664..ab1efdd07 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -40,7 +40,7 @@
class OpenstackInstance:
server_id: str
server_name: str
- name: str
+ instance_id: str
addresses: list[str]
status: str
@@ -59,7 +59,7 @@ def __init__(self, server: OpenstackServer, prefix: str):
raise ValueError(
f"Found openstack server {server.name} managed under prefix {prefix}, contact devs"
)
- self.name = self.server_name[len(prefix) :]
+ self.instance_id = self.server_name[len(prefix) :]
@contextmanager
@@ -112,9 +112,9 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
self.prefix = prefix
def launch_instance(
- self, name: str, image: str, flavor: str, network: str, userdata: str
+ self, instance_id: str, image: str, flavor: str, network: str, userdata: str
) -> OpenstackInstance:
- full_name = self.get_instance_name(name)
+ full_name = self.get_instance_name(instance_id)
logger.info("Creating openstack server with %s", full_name)
with _get_openstack_connection(
@@ -152,17 +152,17 @@ def launch_instance(
"Failed to cleanup openstack server %s that timeout during creation",
full_name,
)
- self._delete_key_pair(conn, name)
+ self._delete_key_pair(conn, instance_id)
raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
except openstack.exceptions.SDKException as err:
logger.exception("Failed to create openstack server %s", full_name)
- self._delete_key_pair(conn, name)
+ self._delete_key_pair(conn, instance_id)
raise OpenStackError(f"Failed to create openstack server {full_name}") from err
return OpenstackInstance(server, self.prefix)
- def get_instance(self, name: str) -> OpenstackInstance:
- full_name = self.get_instance_name(name)
+ def get_instance(self, instance_id: str) -> OpenstackInstance:
+ full_name = self.get_instance_name(instance_id)
logger.info("Getting openstack server with %s", full_name)
with _get_openstack_connection(
@@ -170,8 +170,8 @@ def get_instance(self, name: str) -> OpenstackInstance:
) as conn:
return OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, full_name))
- def delete_instance(self, name: str) -> None:
- full_name = self.get_instance_name(name)
+ def delete_instance(self, instance_id: str) -> None:
+ full_name = self.get_instance_name(instance_id)
logger.info("Deleting openstack server with %s", full_name)
with _get_openstack_connection(
@@ -188,12 +188,12 @@ def delete_instance(self, name: str) -> None:
raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
- key_path = OpenstackCloud._get_key_path(instance.name)
+ key_path = OpenstackCloud._get_key_path(instance.server_name)
if not key_path.exists():
- raise SshError(f"Missing keyfile for server: {instance.name}, key path: {key_path}")
+ raise SshError(f"Missing keyfile for server: {instance.server_name}, key path: {key_path}")
if not instance.addresses:
- raise SshError(f"No addresses found for OpenStack server {instance.name}")
+ raise SshError(f"No addresses found for OpenStack server {instance.server_name}")
for ip in instance.addresses:
try:
@@ -206,7 +206,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
result = connection.run("echo {_TEST_STRING}", warn=True, timeout=_SSH_TIMEOUT)
if not result.ok:
logger.warning(
- "SSH test connection failed, server: %s, address: %s", instance.name, ip
+ "SSH test connection failed, server: %s, address: %s", instance.server_name, ip
)
continue
if _TEST_STRING in result.stdout:
@@ -214,13 +214,13 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
except (NoValidConnectionsError, TimeoutError, paramiko.ssh_exception.SSHException):
logger.warning(
"Unable to SSH into %s with address %s",
- instance.name,
+ instance.server_name,
connection.host,
exc_info=True,
)
continue
raise SshError(
- f"No connectable SSH addresses found, server: {instance.name}, "
+ f"No connectable SSH addresses found, server: {instance.server_name}, "
f"addresses: {instance.addresses}"
)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 7bbca45f0..b89214ffc 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -101,7 +101,7 @@ def create_runner(self, registration_token: str) -> RunnerId:
)
try:
instance = self._openstack_cloud.launch_instance(
- name=id,
+ instance_id=id,
image=self.config.image,
flavor=self.config.flavor,
network=self.config.network,
@@ -109,10 +109,6 @@ def create_runner(self, registration_token: str) -> RunnerId:
)
except OpenStackError as err:
raise RunnerCreateError("Failed to create {instance_name} openstack runner") from err
-
- # TODO: Test only
- import pytest
- pytest.set_trace()
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
@@ -136,7 +132,7 @@ def get_runner(self, id: RunnerId) -> CloudRunnerInstance | None:
name = self._openstack_cloud.get_instance_name(id)
instances_list = self._openstack_cloud.get_instances()
for instance in instances_list:
- if instance.name == name:
+ if instance.server_name == name:
return CloudRunnerInstance(
name=name,
id=id,
@@ -150,8 +146,8 @@ def get_runners(
instances_list = self._openstack_cloud.get_instances()
instances_list = [
CloudRunnerInstance(
- name=instance.name,
- id=self._openstack_cloud.convert_name(instance.name),
+ name=instance.server_name,
+ id=instance.instance_id,
state=CloudRunnerState.from_openstack_server_status(instance.status),
)
for instance in instances_list
@@ -164,20 +160,20 @@ def delete_runner(self, id: RunnerId, remove_token: str) -> None:
def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- self._pull_runner_metrics(instance.name, ssh_conn)
+ self._pull_runner_metrics(instance.server_name, ssh_conn)
try:
OpenstackRunnerManager._run_github_runner_removal_script(
- instance.name, ssh_conn, remove_token
+ instance.server_name, ssh_conn, remove_token
)
except GithubRunnerRemoveError:
logger.warning(
- "Unable to run github runner removal script for %s", instance.name, stack_info=True
+ "Unable to run github runner removal script for %s", instance.server_name, stack_info=True
)
try:
self._openstack_cloud.delete_instance(id)
except OpenStackError:
- logger.exception("Unable to delete openstack instance for runner %s", instance.name)
+ logger.exception("Unable to delete openstack instance for runner %s", instance.server_name)
def cleanup(self, remove_token: str) -> None:
runner_list = self._openstack_cloud.get_instances()
@@ -257,14 +253,14 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SshError:
- logger.exception("SSH connection failure with %s", instance.name)
+ logger.exception("SSH connection failure with %s", instance.server_name)
return False
try:
- OpenstackRunnerManager._run_health_check(ssh_conn, instance.name)
+ OpenstackRunnerManager._run_health_check(ssh_conn, instance.server_name)
except RunnerError:
- logger.exception("Health check failure for %s", instance.name)
+ logger.exception("Health check failure for %s", instance.server_name)
return False
- logger.info("Health check success for %s", instance.name)
+ logger.info("Health check success for %s", instance.server_name)
return True
@retry(tries=3, delay=60, local_logger=logger)
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index 321fbe1fa..926e545bb 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -26,7 +26,7 @@ async def openstack_cloud_fixture(base_openstack_cloud: OpenstackCloud) -> Opens
"""Ensures the OpenstackCloud object has no openstack servers."""
instances = base_openstack_cloud.get_instances()
for instance in instances:
- base_openstack_cloud.delete_instance(name=instance.name)
+ base_openstack_cloud.delete_instance(instance_id=instance.instance_id)
return base_openstack_cloud
@@ -73,7 +73,7 @@ async def test_launch_instance_and_delete(
# 1.
instance = base_openstack_cloud.launch_instance(
- name=instance_name,
+ instance_id=instance_name,
image=openstack_test_image,
flavor=openstack_test_flavor,
network=network_name,
@@ -81,7 +81,8 @@ async def test_launch_instance_and_delete(
)
assert instance is not None
- assert instance.name is not None
+ assert instance.instance_id is not None
+ assert instance.server_name is not None
assert instance.id is not None
servers = openstack_connection.list_servers()
@@ -92,7 +93,7 @@ async def test_launch_instance_and_delete(
assert False, f"OpenStack server with {instance_name} in the name not found"
# 2.
- base_openstack_cloud.delete_instance(name=instance_name)
+ base_openstack_cloud.delete_instance(instance_id=instance_name)
instances = base_openstack_cloud.get_instances()
assert not instances, "Test failure: openstack instance should be deleted."
@@ -116,7 +117,7 @@ async def test_instance_ssh_connection(
rand_chars = f"{token_hex(10)}"
instance_name = f"{token_hex(2)}"
instance = openstack_cloud.launch_instance(
- name=instance_name,
+ instance_id=instance_name,
image=openstack_test_image,
flavor=openstack_test_flavor,
network=network_name,
From b443be4e27bf5ba306ad4f4c6081a83604e45bcd Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 11:16:10 +0800
Subject: [PATCH 025/278] Pre-merge
---
src-docs/openstack_cloud.openstack_manager.md | 26 ++--
src/openstack_cloud/openstack_manager.py | 146 ++++++++++++------
2 files changed, 120 insertions(+), 52 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_manager.md b/src-docs/openstack_cloud.openstack_manager.md
index 93cff6908..0a39a9a37 100644
--- a/src-docs/openstack_cloud.openstack_manager.md
+++ b/src-docs/openstack_cloud.openstack_manager.md
@@ -18,7 +18,7 @@ Module for handling interactions with OpenStack.
---
-
+
## function `create_instance_config`
@@ -93,7 +93,7 @@ __init__(
---
-
+
## class `GithubRunnerRemoveError`
Represents an error removing registered runner from Github.
@@ -104,7 +104,7 @@ Represents an error removing registered runner from Github.
---
-
+
## class `OpenstackRunnerManager`
Runner manager for OpenStack-based instances.
@@ -117,7 +117,7 @@ Runner manager for OpenStack-based instances.
- `unit_num`: The juju unit number.
- `instance_name`: Prefix of the name for the set of runners.
-
+
### method `__init__`
@@ -146,24 +146,32 @@ Construct OpenstackRunnerManager object.
---
-
+
### method `flush`
```python
-flush() → int
+flush(mode: FlushMode = ) → int
```
Flush Openstack servers.
+1. Kill the processes depending on flush mode. 2. Get unhealthy runners after process purging. 3. Delete unhealthy runners.
+
+
+
+**Args:**
+
+ - `mode`: The mode to determine which runner to flush.
+
**Returns:**
- The number of runners flushed.
+ The number of runners flushed.
---
-
+
### method `get_github_runner_info`
@@ -180,7 +188,7 @@ Get information on GitHub for the runners.
---
-
+
### method `reconcile`
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index f5fb1f0f1..f61d28b8d 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -20,6 +20,7 @@
import time
from contextlib import contextmanager
from dataclasses import dataclass
+from datetime import datetime
from multiprocessing import Pool
from pathlib import Path
from typing import Iterable, Iterator, Literal, Optional, cast
@@ -32,7 +33,6 @@
import openstack.image.v2.image
import paramiko
from fabric import Connection as SshConnection
-from invoke.runners import Result
from openstack.compute.v2.server import Server
from openstack.connection import Connection as OpenstackConnection
from openstack.exceptions import SDKException
@@ -61,7 +61,7 @@
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner_manager import IssuedMetricEventsStats
-from runner_manager_type import OpenstackRunnerManagerConfig
+from runner_manager_type import FlushMode, OpenstackRunnerManagerConfig
from runner_type import GithubPath, RunnerByHealth, RunnerGithubInfo
from utilities import retry, set_env_var
@@ -149,6 +149,40 @@ class _CloudInitUserData:
proxies: Optional[ProxyConfig] = None
+@contextmanager
+def _create_connection(cloud_config: dict[str, dict]) -> Iterator[openstack.connection.Connection]:
+ """Create a connection context managed object, to be used within with statements.
+
+ This method should be called with a valid cloud_config. See _validate_cloud_config.
+ Also, this method assumes that the clouds.yaml exists on ~/.config/openstack/clouds.yaml.
+ See charm_state.py _write_openstack_config_to_disk.
+
+ Args:
+ cloud_config: The configuration in clouds.yaml format to apply.
+
+ Raises:
+ OpenStackError: if the credentials provided is not authorized.
+
+ Yields:
+ An openstack.connection.Connection object.
+ """
+ clouds = list(cloud_config["clouds"].keys())
+ if len(clouds) > 1:
+ logger.warning("Multiple clouds defined in clouds.yaml. Using the first one to connect.")
+ cloud_name = clouds[0]
+
+ # api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
+ # I could not reproduce it. Therefore, no catch here for such exception.
+ try:
+ with openstack.connect(cloud=cloud_name) as conn:
+ conn.authorize()
+ yield conn
+ # pylint thinks this isn't an exception, but does inherit from Exception class.
+ except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
+ logger.exception("OpenStack API call failure")
+ raise OpenStackError("Failed OpenStack API call") from exc
+
+
# Disable too many arguments, as they are needed to create the dataclass.
def create_instance_config( # pylint: disable=too-many-arguments
app_name: str,
@@ -424,14 +458,19 @@ def _get_openstack_instances(self, conn: OpenstackConnection) -> list[Server]:
]
@staticmethod
- def _health_check(conn: OpenstackConnection, server_name: str, startup: bool = False) -> bool:
+ def _health_check(
+ conn: OpenstackConnection,
+ server_name: str,
+ startup: bool = False,
+ ) -> bool:
"""Health check a server instance.
A healthy server is defined as:
1. Openstack instance status is ACTIVE or BUILDING.
- 2. Runner.Worker exists (running a job).
- 3. Runner.Listener exists (waiting for job).
- 3. GitHub runner status is Idle or Active.
+ 2. Openstack instance status is in BUILDING less than CREATE_SERVER_TIMEOUT seconds.
+ 3. Runner.Worker exists (running a job).
+ 4. Runner.Listener exists (waiting for job).
+ 5. GitHub runner status is Idle or Active.
An undetermined server is marked as healthy when:
1. SSH fails - could be a transient network error.
@@ -453,6 +492,11 @@ def _health_check(conn: OpenstackConnection, server_name: str, startup: bool = F
return False
if server.status not in (_INSTANCE_STATUS_ACTIVE, _INSTANCE_STATUS_BUILDING):
return False
+ created_at = datetime.strptime(server.created_at, "%Y-%m-%dT%H:%M:%SZ")
+ current_time = datetime.now(created_at.tzinfo)
+ elapsed_min = (created_at - current_time).total_seconds()
+ if server.status == _INSTANCE_STATUS_BUILDING:
+ return elapsed_min < CREATE_SERVER_TIMEOUT
return OpenstackRunnerManager._ssh_health_check(
conn=conn, server_name=server_name, startup=startup
)
@@ -494,8 +538,7 @@ def _ssh_health_check(conn: OpenstackConnection, server_name: str, startup: bool
if RUNNER_WORKER_PROCESS in result.stdout or RUNNER_LISTENER_PROCESS in result.stdout:
return True
- logger.error("[ALERT] Health check failed for server: %s", server_name)
- return True
+ return False
@staticmethod
@retry(tries=3, delay=5, max_delay=60, backoff=2, local_logger=logger)
@@ -1155,7 +1198,7 @@ def _run_github_removal_script(
) from exc
try:
- result: Result = ssh_conn.run(
+ result: invoke.runners.Result = ssh_conn.run(
f"{_CONFIG_SCRIPT_PATH} remove --token {remove_token}",
warn=True,
)
@@ -1475,54 +1518,71 @@ def _issue_reconciliation_metric(
except IssueMetricEventError:
logger.exception("Failed to issue Reconciliation metric")
- def flush(self) -> int:
+ def flush(self, mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
"""Flush Openstack servers.
+ 1. Kill the processes depending on flush mode.
+ 2. Get unhealthy runners after process purging.
+ 3. Delete unhealthy runners.
+
+ Args:
+ mode: The mode to determine which runner to flush.
+
Returns:
The number of runners flushed.
"""
logger.info("Flushing OpenStack all runners")
with _create_connection(self._cloud_config) as conn:
+ self._kill_runner_processes(conn=conn, mode=mode)
runner_by_health = self._get_openstack_runner_status(conn)
remove_token = self._github.get_runner_remove_token(path=self._config.path)
- runners_to_delete = (*runner_by_health.healthy, *runner_by_health.unhealthy)
self._remove_runners(
conn=conn,
- instance_names=runners_to_delete,
+ instance_names=runner_by_health.unhealthy,
remove_token=remove_token,
)
- return len(runners_to_delete)
+ return len(runner_by_health.unhealthy)
+ def _kill_runner_processes(self, conn: OpenstackConnection, mode: FlushMode) -> None:
+ """Kill runner application that are not running any jobs.
-@contextmanager
-def _create_connection(cloud_config: dict[str, dict]) -> Iterator[OpenstackConnection]:
- """Create a connection context managed object, to be used within with statements.
-
- This method should be called with a valid cloud_config. See _validate_cloud_config.
- Also, this method assumes that the clouds.yaml exists on ~/.config/openstack/clouds.yaml.
- See charm_state.py _write_openstack_config_to_disk.
-
- Args:
- cloud_config: The configuration in clouds.yaml format to apply.
-
- Raises:
- OpenStackError: if the credentials provided is not authorized.
+ Runners that have not picked up a job has
+ 1. no Runner.Worker process
+ 2. no pre-run.sh job process
- Yields:
- An openstack.connection.Connection object.
- """
- clouds = list(cloud_config["clouds"].keys())
- if len(clouds) > 1:
- logger.warning("Multiple clouds defined in clouds.yaml. Using the first one to connect.")
- cloud_name = clouds[0]
+ Args:
+ conn: The connection object to access OpenStack cloud.
+ mode: The flush mode to determine which runner processes to kill.
- # api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
- # I could not reproduce it. Therefore, no catch here for such exception.
- try:
- with openstack.connect(cloud=cloud_name) as conn:
- conn.authorize()
- yield conn
- # pylint thinks this isn't an exception, but does inherit from Exception class.
- except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
- logger.exception("OpenStack API call failure")
- raise OpenStackError("Failed OpenStack API call") from exc
+ Raises:
+ NotImplementedError: If unsupported flush mode has been passed.
+ """
+ killer_command: str
+ match mode:
+ case FlushMode.FLUSH_IDLE:
+ # only kill Runner.Listener if Runner.Worker does not exist.
+ killer_command = (
+ "! pgrep -x Runner.Worker && pgrep -x Runner.Listener && "
+ "kill $(pgrep -x Runner.Listener)"
+ )
+ case FlushMode.FLUSH_BUSY:
+ # kill both Runner.Listener and Runner.Worker processes.
+ # This kills pre-job.sh, a child process of Runner.Worker.
+ killer_command = (
+ "pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener);"
+ "pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);"
+ )
+ case _:
+ raise NotImplementedError(f"Unsupported flush mode {mode}")
+
+ servers = self._get_openstack_instances(conn=conn)
+ for server in servers:
+ ssh_conn: SshConnection = self._get_ssh_connection(conn=conn, server_name=server.name)
+ result: invoke.runners.Result = ssh_conn.run(
+ killer_command,
+ warn=True,
+ )
+ if not result.ok:
+ logger.warning("Failed to kill runner process. Instance: %s", server.name)
+ continue
+ logger.info("Successfully killed runner process. Instance: %s", server.name)
From d38d818b2146862e3ab5c693a447a6421e89b0d6 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 11:33:59 +0800
Subject: [PATCH 026/278] Test manual test env
---
.github/workflows/manual_test_env.yaml | 45 +++++++++++++++++---------
src-docs/runner_manager_type.md | 10 +++---
2 files changed, 36 insertions(+), 19 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 5bd1a6254..a2a7ff630 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -8,18 +8,33 @@ concurrency:
cancel-in-progress: true
jobs:
- openstack-integration-tests-private-endpoint:
- name: Integration test using private-endpoint
- uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
- secrets: inherit
- with:
- juju-channel: 3.2/stable
- pre-run-script: scripts/setup-lxd.sh
- provider: lxd
- test-tox-env: integration-juju3.2
- modules: '["test_runner_manager_openstack"]'
- extra-arguments: "-m openstack"
- self-hosted-runner: true
- self-hosted-runner-label: stg-private-endpoint
- tmate-debug: true
- tmate-timeout: 300
+ manual-test-env:
+ name: manual-test-env
+ runs-on: ["self-hosted", "stg-private-endpoint"]
+ steps:
+ - name: Setup operator environment
+ uses: charmed-kubernetes/actions-operator@main
+ with:
+ provider: lxd
+ juju-channel: 3.2/stable
+ - uses: actions/checkout@v4
+ - run: bash scripts/setup-lxd.sh
+ - name: Tmate debugging session (self-hosted)
+ uses: canonical/action-tmate@main
+ timeout-minutes: ${{ inputs.tmate-timeout }}
+
+ # openstack-integration-tests-private-endpoint:
+ # name: Integration test using private-endpoint
+ # uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ # secrets: inherit
+ # with:
+ # juju-channel: 3.2/stable
+ # pre-run-script: scripts/setup-lxd.sh
+ # provider: lxd
+ # test-tox-env: integration-juju3.2
+ # modules: '["test_runner_manager_openstack"]'
+ # extra-arguments: "-m openstack"
+ # self-hosted-runner: true
+ # self-hosted-runner-label: stg-private-endpoint
+ # tmate-debug: true
+ # tmate-timeout: 300
diff --git a/src-docs/runner_manager_type.md b/src-docs/runner_manager_type.md
index c3509b433..f6dd4faae 100644
--- a/src-docs/runner_manager_type.md
+++ b/src-docs/runner_manager_type.md
@@ -14,6 +14,8 @@ Types used by RunnerManager class.
## class `FlushMode`
Strategy for flushing runners.
+During pre-job (repo-check), the runners are marked as idle and if the pre-job fails, the runner falls back to being idle again. Hence wait_repo_check is required.
+
**Attributes:**
@@ -30,7 +32,7 @@ Strategy for flushing runners.
---
-
+
## class `RunnerManagerClients`
Clients for accessing various services.
@@ -67,7 +69,7 @@ __init__(
---
-
+
## class `RunnerManagerConfig`
Configuration of runner manager.
@@ -119,7 +121,7 @@ Whether metrics for the runners should be collected.
---
-
+
## class `OpenstackRunnerManagerConfig`
Configuration of runner manager.
@@ -166,7 +168,7 @@ __init__(
---
-
+
## class `RunnerInfo`
Information from GitHub of a runner.
From 9990499f362f7d01648844c9b16e805c89f02e35 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 12:07:38 +0800
Subject: [PATCH 027/278] Debug ssh conn
---
src-docs/openstack_cloud.openstack_cloud.md | 6 +++---
src/openstack_cloud/openstack_cloud.py | 3 +++
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 6f8fb579b..c18711cbd 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -64,7 +64,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -106,7 +106,7 @@ get_instance(instance_id: str) → OpenstackInstance
---
-
+
### method `get_instance_name`
@@ -120,7 +120,7 @@ get_instance_name(name: str) → str
---
-
+
### method `get_instances`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index ab1efdd07..04314fbf2 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -194,6 +194,9 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
raise SshError(f"Missing keyfile for server: {instance.server_name}, key path: {key_path}")
if not instance.addresses:
raise SshError(f"No addresses found for OpenStack server {instance.server_name}")
+
+ import pytest
+ pytest.set_trace()
for ip in instance.addresses:
try:
From 2be2847f430b9e5dc58696e8987765692dc2c143 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 12:48:09 +0800
Subject: [PATCH 028/278] Revert manual test enf
---
.github/workflows/manual_test_env.yaml | 58 +++++++++++++-------------
1 file changed, 29 insertions(+), 29 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index a2a7ff630..fd9087999 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -8,33 +8,33 @@ concurrency:
cancel-in-progress: true
jobs:
- manual-test-env:
- name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint"]
- steps:
- - name: Setup operator environment
- uses: charmed-kubernetes/actions-operator@main
- with:
- provider: lxd
- juju-channel: 3.2/stable
- - uses: actions/checkout@v4
- - run: bash scripts/setup-lxd.sh
- - name: Tmate debugging session (self-hosted)
- uses: canonical/action-tmate@main
- timeout-minutes: ${{ inputs.tmate-timeout }}
+ # manual-test-env:
+ # name: manual-test-env
+ # runs-on: ["self-hosted", "stg-private-endpoint"]
+ # steps:
+ # - name: Setup operator environment
+ # uses: charmed-kubernetes/actions-operator@main
+ # with:
+ # provider: lxd
+ # juju-channel: 3.2/stable
+ # - uses: actions/checkout@v4
+ # - run: bash scripts/setup-lxd.sh
+ # - name: Tmate debugging session (self-hosted)
+ # uses: canonical/action-tmate@main
+ # timeout-minutes: ${{ inputs.tmate-timeout }}
- # openstack-integration-tests-private-endpoint:
- # name: Integration test using private-endpoint
- # uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
- # secrets: inherit
- # with:
- # juju-channel: 3.2/stable
- # pre-run-script: scripts/setup-lxd.sh
- # provider: lxd
- # test-tox-env: integration-juju3.2
- # modules: '["test_runner_manager_openstack"]'
- # extra-arguments: "-m openstack"
- # self-hosted-runner: true
- # self-hosted-runner-label: stg-private-endpoint
- # tmate-debug: true
- # tmate-timeout: 300
+ openstack-integration-tests-private-endpoint:
+ name: Integration test using private-endpoint
+ uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ secrets: inherit
+ with:
+ juju-channel: 3.2/stable
+ pre-run-script: scripts/setup-lxd.sh
+ provider: lxd
+ test-tox-env: integration-juju3.2
+ modules: '["test_runner_manager_openstack"]'
+ extra-arguments: "-m openstack"
+ self-hosted-runner: true
+ self-hosted-runner-label: stg-private-endpoint
+ tmate-debug: true
+ tmate-timeout: 300
From d9b3ffed69e84568c93775abf2c566c3ba2ba67c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 12:50:46 +0800
Subject: [PATCH 029/278] Create manual test env
---
.github/workflows/manual_test_env.yaml | 46 +++++++++++++-------------
1 file changed, 23 insertions(+), 23 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index fd9087999..a513cf466 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -8,33 +8,33 @@ concurrency:
cancel-in-progress: true
jobs:
- # manual-test-env:
- # name: manual-test-env
- # runs-on: ["self-hosted", "stg-private-endpoint"]
- # steps:
+ manual-test-env:
+ name: manual-test-env
+ runs-on: ["self-hosted", "stg-private-endpoint"]
+ steps:
# - name: Setup operator environment
# uses: charmed-kubernetes/actions-operator@main
# with:
# provider: lxd
# juju-channel: 3.2/stable
- # - uses: actions/checkout@v4
+ - uses: actions/checkout@v4
# - run: bash scripts/setup-lxd.sh
- # - name: Tmate debugging session (self-hosted)
- # uses: canonical/action-tmate@main
- # timeout-minutes: ${{ inputs.tmate-timeout }}
+ - name: Tmate debugging session (self-hosted)
+ uses: canonical/action-tmate@main
+ timeout-minutes: ${{ inputs.tmate-timeout }}
- openstack-integration-tests-private-endpoint:
- name: Integration test using private-endpoint
- uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
- secrets: inherit
- with:
- juju-channel: 3.2/stable
- pre-run-script: scripts/setup-lxd.sh
- provider: lxd
- test-tox-env: integration-juju3.2
- modules: '["test_runner_manager_openstack"]'
- extra-arguments: "-m openstack"
- self-hosted-runner: true
- self-hosted-runner-label: stg-private-endpoint
- tmate-debug: true
- tmate-timeout: 300
+ # openstack-integration-tests-private-endpoint:
+ # name: Integration test using private-endpoint
+ # uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ # secrets: inherit
+ # with:
+ # juju-channel: 3.2/stable
+ # pre-run-script: scripts/setup-lxd.sh
+ # provider: lxd
+ # test-tox-env: integration-juju3.2
+ # modules: '["test_runner_manager_openstack"]'
+ # extra-arguments: "-m openstack"
+ # self-hosted-runner: true
+ # self-hosted-runner-label: stg-private-endpoint
+ # tmate-debug: true
+ # tmate-timeout: 300
From 3ed04356a29cc83068a1508938876964440f8b14 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 12:51:48 +0800
Subject: [PATCH 030/278] Retry old manual env
---
.github/workflows/manual_test_env.yaml | 50 ++++++++++++--------------
1 file changed, 23 insertions(+), 27 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index a513cf466..58c58324b 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -3,38 +3,34 @@ name: Manual test env
on:
pull_request:
-concurrency:
- group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
- cancel-in-progress: true
-
jobs:
- manual-test-env:
- name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint"]
- steps:
+ # manual-test-env:
+ # name: manual-test-env
+ # runs-on: ["self-hosted", "stg-private-endpoint"]
+ # steps:
# - name: Setup operator environment
# uses: charmed-kubernetes/actions-operator@main
# with:
# provider: lxd
# juju-channel: 3.2/stable
- - uses: actions/checkout@v4
+ # - uses: actions/checkout@v4
# - run: bash scripts/setup-lxd.sh
- - name: Tmate debugging session (self-hosted)
- uses: canonical/action-tmate@main
- timeout-minutes: ${{ inputs.tmate-timeout }}
+ # - name: Tmate debugging session (self-hosted)
+ # uses: canonical/action-tmate@main
+ # timeout-minutes: ${{ inputs.tmate-timeout }}
- # openstack-integration-tests-private-endpoint:
- # name: Integration test using private-endpoint
- # uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
- # secrets: inherit
- # with:
- # juju-channel: 3.2/stable
- # pre-run-script: scripts/setup-lxd.sh
- # provider: lxd
- # test-tox-env: integration-juju3.2
- # modules: '["test_runner_manager_openstack"]'
- # extra-arguments: "-m openstack"
- # self-hosted-runner: true
- # self-hosted-runner-label: stg-private-endpoint
- # tmate-debug: true
- # tmate-timeout: 300
+ openstack-integration-tests-private-endpoint:
+ name: Integration test using private-endpoint
+ uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ secrets: inherit
+ with:
+ juju-channel: 3.2/stable
+ pre-run-script: scripts/setup-lxd.sh
+ provider: lxd
+ test-tox-env: integration-juju3.2
+ modules: '["test_runner_manager_openstack"]'
+ extra-arguments: "-m openstack"
+ self-hosted-runner: true
+ self-hosted-runner-label: stg-private-endpoint
+ tmate-debug: true
+ tmate-timeout: 300
From 36e0eb025e232b64400f4723583d863a44e06c2a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 15:01:25 +0800
Subject: [PATCH 031/278] Spawn two debug runners
---
.github/workflows/manual_test_env.yaml | 23 ++++++++---------------
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 58c58324b..9dd827f36 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -4,21 +4,14 @@ on:
pull_request:
jobs:
- # manual-test-env:
- # name: manual-test-env
- # runs-on: ["self-hosted", "stg-private-endpoint"]
- # steps:
- # - name: Setup operator environment
- # uses: charmed-kubernetes/actions-operator@main
- # with:
- # provider: lxd
- # juju-channel: 3.2/stable
- # - uses: actions/checkout@v4
- # - run: bash scripts/setup-lxd.sh
- # - name: Tmate debugging session (self-hosted)
- # uses: canonical/action-tmate@main
- # timeout-minutes: ${{ inputs.tmate-timeout }}
-
+ manual-test-env:
+ name: manual-test-env
+ runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Tmate debugging session (self-hosted)
+ uses: canonical/action-tmate@main
+ timeout-minutes: ${{ inputs.tmate-timeout }}
openstack-integration-tests-private-endpoint:
name: Integration test using private-endpoint
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
From 0ea7156d43c28164f2a9df2b741fafce6b816075 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 15:52:29 +0800
Subject: [PATCH 032/278] Fix string formatting.
---
src-docs/openstack_cloud.openstack_cloud.md | 6 +++---
src-docs/openstack_cloud.openstack_runner_manager.md | 2 +-
src/openstack_cloud/openstack_cloud.py | 5 +----
src/openstack_cloud/openstack_runner_manager.py | 8 ++++++--
4 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index c18711cbd..6f8fb579b 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -64,7 +64,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -106,7 +106,7 @@ get_instance(instance_id: str) → OpenstackInstance
---
-
+
### method `get_instance_name`
@@ -120,7 +120,7 @@ get_instance_name(name: str) → str
---
-
+
### method `get_instances`
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 537b4b776..351e83941 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -79,7 +79,7 @@ __init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
---
-
+
### method `cleanup`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 04314fbf2..9e24ccf98 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -195,9 +195,6 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
if not instance.addresses:
raise SshError(f"No addresses found for OpenStack server {instance.server_name}")
- import pytest
- pytest.set_trace()
-
for ip in instance.addresses:
try:
connection = SshConnection(
@@ -206,7 +203,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
connect_kwargs={"key_filename": str(key_path)},
connect_timeout=_SSH_TIMEOUT,
)
- result = connection.run("echo {_TEST_STRING}", warn=True, timeout=_SSH_TIMEOUT)
+ result = connection.run(f"echo {_TEST_STRING}", warn=True, timeout=_SSH_TIMEOUT)
if not result.ok:
logger.warning(
"SSH test connection failed, server: %s, address: %s", instance.server_name, ip
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index b89214ffc..a0d5d07b5 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -114,7 +114,7 @@ def create_runner(self, registration_token: str) -> RunnerId:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SshError as err:
raise RunnerCreateError(
- "Failed to SSH connect to {instance_name} openstack runner"
+ f"Failed to SSH connect to {instance_name} openstack runner"
) from err
OpenstackRunnerManager._wait_runner_startup(ssh_conn, instance_name)
@@ -159,7 +159,11 @@ def delete_runner(self, id: RunnerId, remove_token: str) -> None:
self._delete_runner(instance, remove_token)
def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
- ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ try:
+ ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ except SshError:
+ logger.exception("Failed SSH connection while removing %s", instance.server_name)
+ raise RunnerRemoveError(F"Failed SSH connection for {instance.server_name}")
self._pull_runner_metrics(instance.server_name, ssh_conn)
try:
OpenstackRunnerManager._run_github_runner_removal_script(
From df66ec8a8ecb69ab06716265253b6d6f1093be18 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 16:22:39 +0800
Subject: [PATCH 033/278] Fix startup health check
---
...penstack_cloud.openstack_runner_manager.md | 8 ++---
.../openstack_runner_manager.py | 29 +++++++++----------
2 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 351e83941..e6de1dd7a 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -79,7 +79,7 @@ __init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
---
-
+
### method `cleanup`
@@ -107,7 +107,7 @@ create_runner(registration_token: str) → str
---
-
+
### method `delete_runner`
@@ -135,7 +135,7 @@ get_name_prefix() → str
---
-
+
### method `get_runner`
@@ -149,7 +149,7 @@ get_runner(id: str) → CloudRunnerInstance | None
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index a0d5d07b5..8937decc9 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -110,14 +110,7 @@ def create_runner(self, registration_token: str) -> RunnerId:
except OpenStackError as err:
raise RunnerCreateError("Failed to create {instance_name} openstack runner") from err
- try:
- ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- except SshError as err:
- raise RunnerCreateError(
- f"Failed to SSH connect to {instance_name} openstack runner"
- ) from err
-
- OpenstackRunnerManager._wait_runner_startup(ssh_conn, instance_name)
+ self._wait_runner_startup(instance)
end_timestamp = time.time()
OpenstackRunnerManager._issue_runner_installed_metric(
@@ -282,16 +275,22 @@ def _run_health_check(ssh_conn: SshConnection, name: str):
raise RunnerError(f"Runner process not found on {name}")
@retry(tries=10, delay=60, local_logger=logger)
- @staticmethod
- def _wait_runner_startup(ssh_conn: SshConnection, name: str) -> None:
+ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
+ try:
+ ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ except SshError as err:
+ raise RunnerCreateError(
+ f"Failed to SSH connect to {instance.server_name} openstack runner"
+ ) from err
+
result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
if not result.ok:
- logger.warning("SSH run of `ps aux` failed on %s", name)
- raise RunnerStartError(f"Unable to SSH run `ps aux` on {name}")
+ logger.warning("SSH run of `ps aux` failed on %s", instance.server_name)
+ raise RunnerStartError(f"Unable to SSH run `ps aux` on {instance.server_name}")
if RUNNER_STARTUP_PROCESS not in result.stdout:
- logger.warning("Runner startup process not found on %s", name)
- return RunnerStartError(f"Runner startup process not found on {name}")
- logger.info("Runner startup process found to be healthy on %s", name)
+ logger.warning("Runner startup process not found on %s", instance.server_name)
+ return RunnerStartError(f"Runner startup process not found on {instance.server_name}")
+ logger.info("Runner startup process found to be healthy on %s", instance.server_name)
@staticmethod
def _generate_runner_id() -> RunnerId:
From 2bdae7ceb8e40be67153059a1ce2e8940b68ce3b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 5 Aug 2024 19:06:46 +0800
Subject: [PATCH 034/278] Use less runners
---
.github/workflows/manual_test_env.yaml | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 9dd827f36..53503f162 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -4,14 +4,14 @@ on:
pull_request:
jobs:
- manual-test-env:
- name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
- steps:
- - uses: actions/checkout@v4
- - name: Tmate debugging session (self-hosted)
- uses: canonical/action-tmate@main
- timeout-minutes: ${{ inputs.tmate-timeout }}
+ # manual-test-env:
+ # name: manual-test-env
+ # runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
+ # steps:
+ # - uses: actions/checkout@v4
+ # - name: Tmate debugging session (self-hosted)
+ # uses: canonical/action-tmate@main
+ # timeout-minutes: ${{ inputs.tmate-timeout }}
openstack-integration-tests-private-endpoint:
name: Integration test using private-endpoint
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
From 7156e09fa3f45e82a19f7e502b8b9f6f5847c9ab Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 09:23:20 +0800
Subject: [PATCH 035/278] Fix runner install metric
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 8937decc9..e64a45ceb 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -308,7 +308,7 @@ def _issue_runner_installed_metric(
event=metric_events.RunnerInstalled(
timestamp=install_start_timestamp,
flavor=flavor,
- duration=install_start_timestamp - install_end_timestamp,
+ duration=install_end_timestamp - install_start_timestamp,
)
)
except IssueMetricEventError:
From 06335c70e4e14b02ef1bf0e4182124b6f67d3c56 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 09:40:09 +0800
Subject: [PATCH 036/278] Create manual test env
---
.github/workflows/manual_test_env.yaml | 48 ++++++++++++++------------
1 file changed, 25 insertions(+), 23 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 53503f162..fa352c259 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -4,26 +4,28 @@ on:
pull_request:
jobs:
- # manual-test-env:
- # name: manual-test-env
- # runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
- # steps:
- # - uses: actions/checkout@v4
- # - name: Tmate debugging session (self-hosted)
- # uses: canonical/action-tmate@main
- # timeout-minutes: ${{ inputs.tmate-timeout }}
- openstack-integration-tests-private-endpoint:
- name: Integration test using private-endpoint
- uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
- secrets: inherit
- with:
- juju-channel: 3.2/stable
- pre-run-script: scripts/setup-lxd.sh
- provider: lxd
- test-tox-env: integration-juju3.2
- modules: '["test_runner_manager_openstack"]'
- extra-arguments: "-m openstack"
- self-hosted-runner: true
- self-hosted-runner-label: stg-private-endpoint
- tmate-debug: true
- tmate-timeout: 300
+ manual-test-env:
+ name: manual-test-env
+ runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
+ steps:
+ - run: python -m pip install pipx-in-pipx --user
+ - run: pipx install tox
+ - uses: actions/checkout@v4
+ - name: Tmate debugging session (self-hosted)
+ uses: canonical/action-tmate@main
+ timeout-minutes: ${{ inputs.tmate-timeout }}
+ # openstack-integration-tests-private-endpoint:
+ # name: Integration test using private-endpoint
+ # uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ # secrets: inherit
+ # with:
+ # juju-channel: 3.2/stable
+ # pre-run-script: scripts/setup-lxd.sh
+ # provider: lxd
+ # test-tox-env: integration-juju3.2
+ # modules: '["test_runner_manager_openstack"]'
+ # extra-arguments: "-m openstack"
+ # self-hosted-runner: true
+ # self-hosted-runner-label: stg-private-endpoint
+ # tmate-debug: true
+ # tmate-timeout: 300
From 153e7745aefdff2d9669a62987ee9a3f65e7c6a6 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 09:44:02 +0800
Subject: [PATCH 037/278] Fix manual test env pipx installation
---
.github/workflows/manual_test_env.yaml | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index fa352c259..88bd996ba 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -8,7 +8,9 @@ jobs:
name: manual-test-env
runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
steps:
- - run: python -m pip install pipx-in-pipx --user
+ - run: sudo apt update -yq
+ - run: sudo apt install pipx
+ - run: pipx ensurepath
- run: pipx install tox
- uses: actions/checkout@v4
- name: Tmate debugging session (self-hosted)
From 3b382590b9dc64ed5d857d9c2a004c0e52b7ea69 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 09:47:29 +0800
Subject: [PATCH 038/278] Just get a manual test env
---
.github/workflows/manual_test_env.yaml | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 88bd996ba..5559ddbdf 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -8,10 +8,10 @@ jobs:
name: manual-test-env
runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
steps:
- - run: sudo apt update -yq
- - run: sudo apt install pipx
- - run: pipx ensurepath
- - run: pipx install tox
+ # - run: sudo apt update -yq
+ # - run: sudo apt install pipx -yq
+ # - run: pipx ensurepath
+ # - run: pipx install tox
- uses: actions/checkout@v4
- name: Tmate debugging session (self-hosted)
uses: canonical/action-tmate@main
From f32b4b12151fe6f1f94f8dcabc3da8f06092e60b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 09:58:15 +0800
Subject: [PATCH 039/278] Patch runner log path in tests
---
.github/workflows/manual_test_env.yaml | 12 +++++++-----
tests/integration/test_runner_manager_openstack.py | 9 +++++++--
2 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 5559ddbdf..ea6121fde 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -1,17 +1,19 @@
name: Manual test env
on:
- pull_request:
+ # TODO: Uncomment
+ #pull_request:
+ workflow_dispatch:
jobs:
manual-test-env:
name: manual-test-env
runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
steps:
- # - run: sudo apt update -yq
- # - run: sudo apt install pipx -yq
- # - run: pipx ensurepath
- # - run: pipx install tox
+ - run: sudo apt update -yq
+ - run: sudo apt install pipx -yq
+ - run: pipx ensurepath
+ - run: pipx install tox
- uses: actions/checkout@v4
- name: Tmate debugging session (self-hosted)
uses: canonical/action-tmate@main
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b1f820a1a..0263050e5 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -4,6 +4,7 @@
"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager."""
+from pathlib import Path
import pytest
import pytest_asyncio
import yaml
@@ -13,6 +14,7 @@
from manager.cloud_runner_manager import CloudRunnerState
from manager.github_runner_manager import GithubRunnerState
from manager.runner_manager import RunnerManager, RunnerManagerConfig
+from metrics import runner_logs
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
OpenstackRunnerManager,
@@ -20,7 +22,6 @@
)
from tests.integration.helpers.openstack import PrivateEndpointConfigs
-
@pytest.fixture(scope="module", name="github_path")
def github_path_fixture(path: str) -> GithubPath:
return parse_github_path(path, "Default")
@@ -80,8 +81,12 @@ async def openstack_runner_manager_fixture(
@pytest_asyncio.fixture(scope="module", name="runner_manager")
async def runner_manager_fixture(
- openstack_runner_manager: OpenstackRunnerManager, token: str, github_path: GithubPath
+ openstack_runner_manager: OpenstackRunnerManager, token: str, github_path: GithubPath, log_dir_base_path: Path
) -> RunnerManager:
+ """
+
+ Import of log_dir_base_path to monkeypatch the runner logs path with tmp_path.
+ """
config = RunnerManagerConfig(token, github_path)
return RunnerManager(openstack_runner_manager, config)
From 6017b39eb8ffa7e2a1688ec14a8a08666a099e6c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:00:51 +0800
Subject: [PATCH 040/278] Add missing fixture
---
tests/integration/test_runner_manager_openstack.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 0263050e5..77c5f3f29 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -22,6 +22,13 @@
)
from tests.integration.helpers.openstack import PrivateEndpointConfigs
+@pytest.fixture(name="log_dir_base_path")
+def log_dir_base_path_fixture(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+ """Mock the log directory path and return it."""
+ log_dir_base_path = tmp_path / "log_dir"
+ monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", log_dir_base_path)
+ return log_dir_base_path
+
@pytest.fixture(scope="module", name="github_path")
def github_path_fixture(path: str) -> GithubPath:
return parse_github_path(path, "Default")
@@ -83,7 +90,7 @@ async def openstack_runner_manager_fixture(
async def runner_manager_fixture(
openstack_runner_manager: OpenstackRunnerManager, token: str, github_path: GithubPath, log_dir_base_path: Path
) -> RunnerManager:
- """
+ """Get RunnerManager instance.
Import of log_dir_base_path to monkeypatch the runner logs path with tmp_path.
"""
From 02c6f63b1694be0b2d176171ceeb68469cd2cc22 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:01:47 +0800
Subject: [PATCH 041/278] Fix the scope of fixture
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 77c5f3f29..b3a5d9d64 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -22,7 +22,7 @@
)
from tests.integration.helpers.openstack import PrivateEndpointConfigs
-@pytest.fixture(name="log_dir_base_path")
+@pytest.fixture(scope="module", name="log_dir_base_path")
def log_dir_base_path_fixture(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Mock the log directory path and return it."""
log_dir_base_path = tmp_path / "log_dir"
From f4e819cb35194f3ef8e44320c64b147178d4ad3d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:04:06 +0800
Subject: [PATCH 042/278] Fix tmp_path scope issue
---
tests/integration/test_runner_manager_openstack.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b3a5d9d64..8d40058e3 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -23,9 +23,9 @@
from tests.integration.helpers.openstack import PrivateEndpointConfigs
@pytest.fixture(scope="module", name="log_dir_base_path")
-def log_dir_base_path_fixture(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+def log_dir_base_path_fixture(tmp_path_factory: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Mock the log directory path and return it."""
- log_dir_base_path = tmp_path / "log_dir"
+ log_dir_base_path = tmp_path_factory.mktemp("log") / "log_dir"
monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", log_dir_base_path)
return log_dir_base_path
From 8847325655d9e7c7cd352521fb8b3d6ac81494d0 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:06:33 +0800
Subject: [PATCH 043/278] Fix monkeypatch fixture scope issue
---
src-docs/openstack_cloud.openstack_cloud.md | 6 ++---
...penstack_cloud.openstack_runner_manager.md | 2 +-
src/manager/cloud_runner_manager.py | 1 +
src/openstack_cloud/openstack_cloud.py | 10 ++++++---
.../openstack_runner_manager.py | 10 ++++++---
.../test_runner_manager_openstack.py | 22 ++++++++++++-------
6 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 6f8fb579b..22e151214 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -64,7 +64,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -106,7 +106,7 @@ get_instance(instance_id: str) → OpenstackInstance
---
-
+
### method `get_instance_name`
@@ -120,7 +120,7 @@ get_instance_name(name: str) → str
---
-
+
### method `get_instances`
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index e6de1dd7a..8a4a6365d 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -79,7 +79,7 @@ __init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
---
-
+
### method `cleanup`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 1f4c8b507..8f83fc02f 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -8,6 +8,7 @@
RunnerId = str
+
class CloudRunnerState(str, Enum):
CREATED = "created"
ACTIVE = "active"
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 9e24ccf98..6b6eaf484 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -191,10 +191,12 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
key_path = OpenstackCloud._get_key_path(instance.server_name)
if not key_path.exists():
- raise SshError(f"Missing keyfile for server: {instance.server_name}, key path: {key_path}")
+ raise SshError(
+ f"Missing keyfile for server: {instance.server_name}, key path: {key_path}"
+ )
if not instance.addresses:
raise SshError(f"No addresses found for OpenStack server {instance.server_name}")
-
+
for ip in instance.addresses:
try:
connection = SshConnection(
@@ -206,7 +208,9 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
result = connection.run(f"echo {_TEST_STRING}", warn=True, timeout=_SSH_TIMEOUT)
if not result.ok:
logger.warning(
- "SSH test connection failed, server: %s, address: %s", instance.server_name, ip
+ "SSH test connection failed, server: %s, address: %s",
+ instance.server_name,
+ ip,
)
continue
if _TEST_STRING in result.stdout:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index e64a45ceb..a802fc6ca 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -156,7 +156,7 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SshError:
logger.exception("Failed SSH connection while removing %s", instance.server_name)
- raise RunnerRemoveError(F"Failed SSH connection for {instance.server_name}")
+ raise RunnerRemoveError(f"Failed SSH connection for {instance.server_name}")
self._pull_runner_metrics(instance.server_name, ssh_conn)
try:
OpenstackRunnerManager._run_github_runner_removal_script(
@@ -164,13 +164,17 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
)
except GithubRunnerRemoveError:
logger.warning(
- "Unable to run github runner removal script for %s", instance.server_name, stack_info=True
+ "Unable to run github runner removal script for %s",
+ instance.server_name,
+ stack_info=True,
)
try:
self._openstack_cloud.delete_instance(id)
except OpenStackError:
- logger.exception("Unable to delete openstack instance for runner %s", instance.server_name)
+ logger.exception(
+ "Unable to delete openstack instance for runner %s", instance.server_name
+ )
def cleanup(self, remove_token: str) -> None:
runner_list = self._openstack_cloud.get_instances()
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 8d40058e3..70de3ab79 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -5,6 +5,7 @@
from pathlib import Path
+
import pytest
import pytest_asyncio
import yaml
@@ -22,12 +23,15 @@
)
from tests.integration.helpers.openstack import PrivateEndpointConfigs
+
@pytest.fixture(scope="module", name="log_dir_base_path")
-def log_dir_base_path_fixture(tmp_path_factory: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+def log_dir_base_path_fixture(tmp_path_factory: Path) -> Path:
"""Mock the log directory path and return it."""
- log_dir_base_path = tmp_path_factory.mktemp("log") / "log_dir"
- monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", log_dir_base_path)
- return log_dir_base_path
+ with pytest.MonkeyPatch.context() as monkeypatch:
+ log_dir_base_path = tmp_path_factory.mktemp("log") / "log_dir"
+ monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", log_dir_base_path)
+ yield log_dir_base_path
+
@pytest.fixture(scope="module", name="github_path")
def github_path_fixture(path: str) -> GithubPath:
@@ -88,10 +92,13 @@ async def openstack_runner_manager_fixture(
@pytest_asyncio.fixture(scope="module", name="runner_manager")
async def runner_manager_fixture(
- openstack_runner_manager: OpenstackRunnerManager, token: str, github_path: GithubPath, log_dir_base_path: Path
+ openstack_runner_manager: OpenstackRunnerManager,
+ token: str,
+ github_path: GithubPath,
+ log_dir_base_path: Path,
) -> RunnerManager:
"""Get RunnerManager instance.
-
+
Import of log_dir_base_path to monkeypatch the runner logs path with tmp_path.
"""
config = RunnerManagerConfig(token, github_path)
@@ -125,7 +132,7 @@ async def test_create_runner(runner_manager: RunnerManager) -> None:
assert isinstance(runner_id_list, tuple)
assert len(runner_id_list) == 1
runner_id = runner_id[0]
-
+
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
assert len(runner_list) == 1
@@ -133,4 +140,3 @@ async def test_create_runner(runner_manager: RunnerManager) -> None:
assert runner.id == runner_id
assert runner.cloud_state == CloudRunnerState.ACTIVE
assert runner.github_state == GithubRunnerState.IDLE
-
From d2499c964c898870aefc74380a7c70b00971eb53 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:13:09 +0800
Subject: [PATCH 044/278] Add patch of metric log path
---
tests/integration/test_runner_manager_openstack.py | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 70de3ab79..b3f5140c2 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -16,6 +16,7 @@
from manager.github_runner_manager import GithubRunnerState
from manager.runner_manager import RunnerManager, RunnerManagerConfig
from metrics import runner_logs
+from metrics import events
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
OpenstackRunnerManager,
@@ -25,12 +26,14 @@
@pytest.fixture(scope="module", name="log_dir_base_path")
-def log_dir_base_path_fixture(tmp_path_factory: Path) -> Path:
+def log_dir_base_path_fixture(tmp_path_factory: Path):
"""Mock the log directory path and return it."""
with pytest.MonkeyPatch.context() as monkeypatch:
- log_dir_base_path = tmp_path_factory.mktemp("log") / "log_dir"
- monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", log_dir_base_path)
- yield log_dir_base_path
+ runner_log_dir_path = tmp_path_factory.mktemp("log") / "runner_log"
+ metric_log_path = tmp_path_factory.mktemp("log") / "runner_log"
+ monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", runner_log_dir_path)
+ monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
+ yield
@pytest.fixture(scope="module", name="github_path")
From c0c8319694d9baa1601108dd6cc9a0bdba965d44 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:18:18 +0800
Subject: [PATCH 045/278] Fix return type of create_runners
---
src/manager/runner_manager.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 3970587e4..6ce5e03bd 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -59,14 +59,14 @@ def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManag
prefix=self._cloud.get_name_prefix(), token=self._config.token, path=self._config.path
)
- def create_runners(self, num: int) -> list[RunnerId]:
+ def create_runners(self, num: int) -> tuple[RunnerId]:
registration_token = self._github.get_registration_token()
runner_ids = []
for _ in range(num):
runner_ids.append(self._cloud.create_runner(registration_token=registration_token))
- return runner_ids
+ return tuple(runner_ids)
def get_runners(
self,
From 20d54b4b8b91c0eb81b2444aeeb518e31a313f88 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:42:09 +0800
Subject: [PATCH 046/278] Add health check test
---
...penstack_cloud.openstack_runner_manager.md | 6 ++---
src/manager/github_runner_manager.py | 8 ++++---
src/manager/runner_manager.py | 8 +++----
.../openstack_runner_manager.py | 6 +++--
.../test_runner_manager_openstack.py | 23 ++++++++++++++++++-
5 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 8a4a6365d..e0576d952 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -79,7 +79,7 @@ __init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
---
-
+
### method `cleanup`
@@ -107,7 +107,7 @@ create_runner(registration_token: str) → str
---
-
+
### method `delete_runner`
@@ -155,7 +155,7 @@ get_runner(id: str) → CloudRunnerInstance | None
```python
get_runners(
- cloud_runner_status: Sequence[CloudRunnerState]
+ cloud_runner_state: Optional[Sequence[CloudRunnerState]] = None
) → Tuple[CloudRunnerInstance]
```
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 3e8972e10..dcfde1acc 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -33,7 +33,7 @@ def __init__(self, prefix: str, token: str, path: GithubPath):
self._path = path
self._github = GithubClient(token)
- def get_runners(self, states: Sequence[GithubRunnerState]) -> tuple[SelfHostedRunner]:
+ def get_runners(self, states: Sequence[GithubRunnerState] | None = None) -> tuple[SelfHostedRunner]:
runner_list = self._github.get_runner_github_info(self._path)
return tuple(
runner
@@ -42,7 +42,7 @@ def get_runners(self, states: Sequence[GithubRunnerState]) -> tuple[SelfHostedRu
and GithubRunnerManager._filter_runner_state(runner, states)
)
- def delete_runners(self, states: Sequence[GithubRunnerState]) -> None:
+ def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> None:
runner_list = self.get_runners(states)
for runner in runner_list:
self._github.delete_runner(self._path, runner.id)
@@ -55,6 +55,8 @@ def get_removal_token(self) -> str:
@staticmethod
def _filter_runner_state(
- runner: SelfHostedRunner, states: Sequence[GithubRunnerState]
+ runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
) -> bool:
+ if states is None:
+ return True
return GithubRunnerState.from_runner(runner) in states
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 6ce5e03bd..9cf2df5e8 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -70,8 +70,8 @@ def create_runners(self, num: int) -> tuple[RunnerId]:
def get_runners(
self,
- github_runner_state: Sequence[GithubRunnerState] = None,
- cloud_runner_state: Sequence[CloudRunnerState] = None,
+ github_runner_state: Sequence[GithubRunnerState] | None = None,
+ cloud_runner_state: Sequence[CloudRunnerState] | None = None,
) -> tuple[RunnerInstance]:
"""Get information on runner filter by state.
@@ -84,10 +84,10 @@ def get_runners(
Returns:
Information on the runners.
"""
+ github_infos = self._github.get_runners(github_runner_state=github_runner_state)
cloud_infos = self._cloud.get_runners(cloud_runner_status=cloud_runner_state)
- github_infos = self._github.get_runners(github_runner_state)
- cloud_infos_map = {info.name: info for info in cloud_infos}
github_infos_map = {info.name: info for info in github_infos}
+ cloud_infos_map = {info.name: info for info in cloud_infos}
return tuple(
RunnerInstance(cloud_infos_map[name], github_infos_map[name])
for name in cloud_infos_map.keys() & github_infos_map.keys()
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index a802fc6ca..7089e4542 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -134,7 +134,7 @@ def get_runner(self, id: RunnerId) -> CloudRunnerInstance | None:
return None
def get_runners(
- self, cloud_runner_status: Sequence[CloudRunnerState]
+ self, cloud_runner_state: Sequence[CloudRunnerState] | None = None
) -> Tuple[CloudRunnerInstance]:
instances_list = self._openstack_cloud.get_instances()
instances_list = [
@@ -145,7 +145,9 @@ def get_runners(
)
for instance in instances_list
]
- return [instance for instance in instances_list if instance.state in cloud_runner_status]
+ if cloud_runner_state is None:
+ return instances_list
+ return [instance for instance in instances_list if instance.state in cloud_runner_state]
def delete_runner(self, id: RunnerId, remove_token: str) -> None:
instance = self._openstack_cloud.get_instance(id)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b3f5140c2..27336b608 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -1,7 +1,12 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
-"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager."""
+"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager.
+
+To prevent consistent deletion and recreate of openstack machines, the unit are arranged in a order
+to take advantage of the state from the previous tests.
+Take note of the arrange condition of each test.
+"""
from pathlib import Path
@@ -143,3 +148,19 @@ async def test_create_runner(runner_manager: RunnerManager) -> None:
assert runner.id == runner_id
assert runner.cloud_state == CloudRunnerState.ACTIVE
assert runner.github_state == GithubRunnerState.IDLE
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_create_runner(runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager) -> None:
+ """
+ Arrange: RunnerManager instance with one runner.
+ Act: Run openstack health check.
+ Assert: health check passes.
+ """
+ openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
+ assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
+ runner = openstack_instances[0]
+
+ assert openstack_runner_manager._health_check(runner)
+
\ No newline at end of file
From dd0a73372e3696e6e6fa931473c8daed6d5ec7ad Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:44:16 +0800
Subject: [PATCH 047/278] Fix arg naming
---
src-docs/openstack_cloud.openstack_runner_manager.md | 2 +-
src/manager/cloud_runner_manager.py | 2 +-
src/manager/runner_manager.py | 4 ++--
src/openstack_cloud/openstack_runner_manager.py | 6 +++---
4 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index e0576d952..bfca3ba95 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -155,7 +155,7 @@ get_runner(id: str) → CloudRunnerInstance | None
```python
get_runners(
- cloud_runner_state: Optional[Sequence[CloudRunnerState]] = None
+ states: Optional[Sequence[CloudRunnerState]] = None
) → Tuple[CloudRunnerInstance]
```
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 8f83fc02f..9fbd65933 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -67,7 +67,7 @@ def create_runner(self, registration_token: str) -> RunnerId: ...
def get_runner(self, id: RunnerId) -> CloudRunnerInstance: ...
def get_runners(
- self, cloud_runner_status: Sequence[CloudRunnerState]
+ self, states: Sequence[CloudRunnerState]
) -> Tuple[CloudRunnerInstance]: ...
def delete_runner(self, id: RunnerId, remove_token: str) -> None: ...
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 9cf2df5e8..92680206f 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -84,8 +84,8 @@ def get_runners(
Returns:
Information on the runners.
"""
- github_infos = self._github.get_runners(github_runner_state=github_runner_state)
- cloud_infos = self._cloud.get_runners(cloud_runner_status=cloud_runner_state)
+ github_infos = self._github.get_runners(github_runner_state)
+ cloud_infos = self._cloud.get_runners(cloud_runner_state)
github_infos_map = {info.name: info for info in github_infos}
cloud_infos_map = {info.name: info for info in cloud_infos}
return tuple(
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 7089e4542..39d64b091 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -134,7 +134,7 @@ def get_runner(self, id: RunnerId) -> CloudRunnerInstance | None:
return None
def get_runners(
- self, cloud_runner_state: Sequence[CloudRunnerState] | None = None
+ self, states: Sequence[CloudRunnerState] | None = None
) -> Tuple[CloudRunnerInstance]:
instances_list = self._openstack_cloud.get_instances()
instances_list = [
@@ -145,9 +145,9 @@ def get_runners(
)
for instance in instances_list
]
- if cloud_runner_state is None:
+ if states is None:
return instances_list
- return [instance for instance in instances_list if instance.state in cloud_runner_state]
+ return [instance for instance in instances_list if instance.state in states]
def delete_runner(self, id: RunnerId, remove_token: str) -> None:
instance = self._openstack_cloud.get_instance(id)
From e12169f31ff376552cebe918f7fdc16d7c00cfa4 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 12:37:23 +0800
Subject: [PATCH 048/278] Add debug statement
---
tests/integration/test_runner_manager_openstack.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 27336b608..27ca31962 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -158,6 +158,8 @@ async def test_create_runner(runner_manager: RunnerManager, openstack_runner_man
Act: Run openstack health check.
Assert: health check passes.
"""
+ pytest.set_trace()
+
openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
runner = openstack_instances[0]
From 7e09cc4b03af6e377cd72f49f0e55e02ff0f0891 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 12:39:23 +0800
Subject: [PATCH 049/278] Add more debug statement
---
tests/integration/test_runner_manager_openstack.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 27ca31962..225fecce3 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -148,6 +148,7 @@ async def test_create_runner(runner_manager: RunnerManager) -> None:
assert runner.id == runner_id
assert runner.cloud_state == CloudRunnerState.ACTIVE
assert runner.github_state == GithubRunnerState.IDLE
+ pytest.set_trace()
@pytest.mark.openstack
@pytest.mark.asyncio
From 2b0c95f2a0f02f1ced3bad27477c4879073937a2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 12:41:40 +0800
Subject: [PATCH 050/278] Move debug statement
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 225fecce3..313da351c 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -145,10 +145,10 @@ async def test_create_runner(runner_manager: RunnerManager) -> None:
assert isinstance(runner_list, tuple)
assert len(runner_list) == 1
runner = runner_list[0]
+ pytest.set_trace()
assert runner.id == runner_id
assert runner.cloud_state == CloudRunnerState.ACTIVE
assert runner.github_state == GithubRunnerState.IDLE
- pytest.set_trace()
@pytest.mark.openstack
@pytest.mark.asyncio
From 9d4e6f73273347444f33b979d9abd8df8f447e5d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 12:44:53 +0800
Subject: [PATCH 051/278] Merge tests
---
.../test_runner_manager_openstack.py | 33 +++++++------------
1 file changed, 11 insertions(+), 22 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 313da351c..3bd23a4ce 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -1,12 +1,7 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
-"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager.
-
-To prevent consistent deletion and recreate of openstack machines, the unit are arranged in a order
-to take advantage of the state from the previous tests.
-Take note of the arrange condition of each test.
-"""
+"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager."""
from pathlib import Path
@@ -130,12 +125,16 @@ async def test_get_no_runner(runner_manager: RunnerManager) -> None:
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
-async def test_create_runner(runner_manager: RunnerManager) -> None:
+async def test_create_runner(runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager) -> None:
"""
Arrange: RunnerManager instance with no runners.
- Act: Create one runner.
- Assert: An active idle runner.
+ Act:
+ 1. Create one runner.
+ 2.
+ Assert:
+ 1. An active idle runner.
"""
+ # 1.
runner_id_list = runner_manager.create_runners(1)
assert isinstance(runner_id_list, tuple)
assert len(runner_id_list) == 1
@@ -145,25 +144,15 @@ async def test_create_runner(runner_manager: RunnerManager) -> None:
assert isinstance(runner_list, tuple)
assert len(runner_list) == 1
runner = runner_list[0]
- pytest.set_trace()
assert runner.id == runner_id
assert runner.cloud_state == CloudRunnerState.ACTIVE
assert runner.github_state == GithubRunnerState.IDLE
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_create_runner(runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager) -> None:
- """
- Arrange: RunnerManager instance with one runner.
- Act: Run openstack health check.
- Assert: health check passes.
- """
- pytest.set_trace()
-
+ # 2.
openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
runner = openstack_instances[0]
+ pytest.set_trace()
+
assert openstack_runner_manager._health_check(runner)
-
\ No newline at end of file
From d349ea82e12a8888557968aa7040def56ce90002 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 12:52:39 +0800
Subject: [PATCH 052/278] Handle openstack errors with delete runner
---
src/openstack_cloud/openstack_cloud.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 6b6eaf484..3aeaf99dd 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -352,7 +352,10 @@ def _get_and_ensure_unique_server(
)
outdated_servers = filter(lambda x: x != latest_server, servers)
for server in outdated_servers:
- server.delete()
+ try:
+ server.delete()
+ except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout):
+ logger.warning("Unable to delete server with duplicate name %s with ID %s", name, server.id, stack_info=True)
return latest_server
@@ -388,7 +391,7 @@ def _delete_key_pair(conn: OpenstackConnection, name: str) -> None:
# Keypair have unique names, access by ID is not needed.
if not conn.delete_keypair(name):
logger.warning("Unable to delete keypair for %s", name)
- except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout) as err:
+ except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout):
logger.warning("Unable to delete keypair for %s", name, stack_info=True)
key_path = OpenstackCloud._get_key_path(name)
From 4b3c497710f982b95d944fe1e3f8a7f2be65ac50 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 12:56:58 +0800
Subject: [PATCH 053/278] Fix delete server
---
src/openstack_cloud/openstack_cloud.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 3aeaf99dd..86bdf1e61 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -353,7 +353,7 @@ def _get_and_ensure_unique_server(
outdated_servers = filter(lambda x: x != latest_server, servers)
for server in outdated_servers:
try:
- server.delete()
+ conn.delete_server(name_or_id=server.id)
except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout):
logger.warning("Unable to delete server with duplicate name %s with ID %s", name, server.id, stack_info=True)
From 1cf31901bd2ee67a3e64e0890e0cb51de860b42e Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 13:04:25 +0800
Subject: [PATCH 054/278] Fix test variable reference
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 3bd23a4ce..8d6ee92b1 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -138,7 +138,7 @@ async def test_create_runner(runner_manager: RunnerManager, openstack_runner_man
runner_id_list = runner_manager.create_runners(1)
assert isinstance(runner_id_list, tuple)
assert len(runner_id_list) == 1
- runner_id = runner_id[0]
+ runner_id = runner_id_list[0]
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
From fe0a6bc95de1424f615c4beb62496625a013c944 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 13:09:06 +0800
Subject: [PATCH 055/278] Fix OpenstackInstance creation
---
src/openstack_cloud/openstack_cloud.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 86bdf1e61..391d2eee8 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -168,7 +168,7 @@ def get_instance(self, instance_id: str) -> OpenstackInstance:
with _get_openstack_connection(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
- return OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, full_name))
+ return OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, full_name), self.prefix)
def delete_instance(self, instance_id: str) -> None:
full_name = self.get_instance_name(instance_id)
@@ -237,7 +237,7 @@ def get_instances(self) -> tuple[OpenstackInstance]:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
return [
- OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, name))
+ OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, name), self.prefix)
for name in server_names
]
From fa330a33afbc1140e4c128ce4c27ecddeaaf933f Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 13:57:09 +0800
Subject: [PATCH 056/278] Add some docstrings
---
src-docs/openstack_cloud.openstack_cloud.md | 57 ++++++++++++-------
src/manager/cloud_runner_manager.py | 4 +-
src/manager/github_runner_manager.py | 6 +-
src/manager/runner_manager.py | 2 +
src/openstack_cloud/openstack_cloud.py | 55 ++++++++++++++----
.../openstack_runner_manager.py | 6 +-
.../test_runner_manager_openstack.py | 15 ++---
7 files changed, 100 insertions(+), 45 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 22e151214..019487f67 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -3,20 +3,28 @@
# module `openstack_cloud.openstack_cloud`
+Class for accessing OpenStack API for managing servers.
+---
+
+## class `OpenstackInstance`
+Represents an OpenStack instance.
----
-
-## class `OpenstackInstance`
-OpenstackInstance(server: openstack.compute.v2.server.Server, prefix: str)
+**Attributes:**
+
+ - `server_id`: ID of server assigned by OpenStack.
+ - `server_name`: Name of the server on OpenStack.
+ - `instance_id`: ID used by OpenstackCloud class to manage the instances. See docs on the OpenstackCloud.
+ - `addresses`: IP addresses assigned to the server.
+ - `status`: Status of the server.
-
+
### method `__init__`
@@ -34,14 +42,14 @@ __init__(server: Server, prefix: str)
---
-
+
## class `OpenstackCloud`
+Client to interact with OpenStack cloud.
+The OpenStack server name is managed by this cloud. Caller refers to the instances via instance_id. If the caller needs the server name, e.g., for logging, it can be queried with get_server_name.
-
-
-
+
### method `__init__`
@@ -64,7 +72,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -78,7 +86,7 @@ cleanup() → None
---
-
+
### method `delete_instance`
@@ -92,7 +100,7 @@ delete_instance(instance_id: str) → None
---
-
+
### method `get_instance`
@@ -106,12 +114,12 @@ get_instance(instance_id: str) → OpenstackInstance
---
-
+
-### method `get_instance_name`
+### method `get_instances`
```python
-get_instance_name(name: str) → str
+get_instances() → tuple[OpenstackInstance]
```
@@ -120,21 +128,30 @@ get_instance_name(name: str) → str
---
-
+
-### method `get_instances`
+### method `get_server_name`
```python
-get_instances() → tuple[OpenstackInstance]
+get_server_name(instance_id: str) → str
```
+Get server name on OpenStack.
+
+
+
+**Args:**
+
+ - `instance_id`: ID used to identify a instance.
+**Returns:**
+ The OpenStack server name.
---
-
+
### method `get_ssh_connection`
@@ -148,7 +165,7 @@ get_ssh_connection(instance: OpenstackInstance) → Connection
---
-
+
### method `launch_instance`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 9fbd65933..e794df22b 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -66,9 +66,7 @@ def create_runner(self, registration_token: str) -> RunnerId: ...
def get_runner(self, id: RunnerId) -> CloudRunnerInstance: ...
- def get_runners(
- self, states: Sequence[CloudRunnerState]
- ) -> Tuple[CloudRunnerInstance]: ...
+ def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerInstance]: ...
def delete_runner(self, id: RunnerId, remove_token: str) -> None: ...
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index dcfde1acc..a5af41211 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -33,7 +33,9 @@ def __init__(self, prefix: str, token: str, path: GithubPath):
self._path = path
self._github = GithubClient(token)
- def get_runners(self, states: Sequence[GithubRunnerState] | None = None) -> tuple[SelfHostedRunner]:
+ def get_runners(
+ self, states: Sequence[GithubRunnerState] | None = None
+ ) -> tuple[SelfHostedRunner]:
runner_list = self._github.get_runner_github_info(self._path)
return tuple(
runner
@@ -55,7 +57,7 @@ def get_removal_token(self) -> str:
@staticmethod
def _filter_runner_state(
- runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
+ runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
) -> bool:
if states is None:
return True
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 92680206f..36c8db6a0 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -1,6 +1,8 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+"""Class for managing the runners."""
+
from dataclasses import dataclass
from enum import Enum, auto
from typing import Sequence
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 391d2eee8..4cc61ae95 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -1,9 +1,10 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+"""Class for accessing OpenStack API for managing servers."""
+
import datetime
import logging
-import shutil
from contextlib import contextmanager
from dataclasses import dataclass
from functools import reduce
@@ -38,6 +39,17 @@
@dataclass
class OpenstackInstance:
+ """Represents an OpenStack instance.
+
+ Attributes:
+ server_id: ID of server assigned by OpenStack.
+ server_name: Name of the server on OpenStack.
+ instance_id: ID used by OpenstackCloud class to manage the instances. See docs on the
+ OpenstackCloud.
+ addresses: IP addresses assigned to the server.
+ status: Status of the server.
+ """
+
server_id: str
server_name: str
instance_id: str
@@ -97,6 +109,12 @@ def _get_openstack_connection(
class OpenstackCloud:
+ """Client to interact with OpenStack cloud.
+
+ The OpenStack server name is managed by this cloud. Caller refers to the instances via
+ instance_id. If the caller needs the server name, e.g., for logging, it can be queried with
+ get_server_name.
+ """
def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
"""Create a OpenstackCloud instance.
@@ -114,7 +132,7 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
def launch_instance(
self, instance_id: str, image: str, flavor: str, network: str, userdata: str
) -> OpenstackInstance:
- full_name = self.get_instance_name(instance_id)
+ full_name = self.get_server_name(instance_id)
logger.info("Creating openstack server with %s", full_name)
with _get_openstack_connection(
@@ -162,16 +180,18 @@ def launch_instance(
return OpenstackInstance(server, self.prefix)
def get_instance(self, instance_id: str) -> OpenstackInstance:
- full_name = self.get_instance_name(instance_id)
+ full_name = self.get_server_name(instance_id)
logger.info("Getting openstack server with %s", full_name)
with _get_openstack_connection(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
- return OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, full_name), self.prefix)
+ return OpenstackInstance(
+ OpenstackCloud._get_and_ensure_unique_server(conn, full_name), self.prefix
+ )
def delete_instance(self, instance_id: str) -> None:
- full_name = self.get_instance_name(instance_id)
+ full_name = self.get_server_name(instance_id)
logger.info("Deleting openstack server with %s", full_name)
with _get_openstack_connection(
@@ -237,7 +257,9 @@ def get_instances(self) -> tuple[OpenstackInstance]:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
return [
- OpenstackInstance(OpenstackCloud._get_and_ensure_unique_server(conn, name), self.prefix)
+ OpenstackInstance(
+ OpenstackCloud._get_and_ensure_unique_server(conn, name), self.prefix
+ )
for name in server_names
]
@@ -250,6 +272,17 @@ def cleanup(self) -> None:
self._cleanup_key_files(conn, exclude_list)
self._clean_up_openstack_keypairs(conn, exclude_list)
+ def get_server_name(self, instance_id: str) -> str:
+ """Get server name on OpenStack.
+
+ Args:
+ instance_id: ID used to identify a instance.
+
+ Returns:
+ The OpenStack server name.
+ """
+ return f"{self.prefix}-{instance_id}"
+
def _cleanup_key_files(
self, conn: OpenstackConnection, exclude_instances: Iterable[str]
) -> None:
@@ -315,9 +348,6 @@ def _clean_up_openstack_keypairs(
key.name,
)
- def get_instance_name(self, name: str) -> str:
- return f"{self.prefix}-{name}"
-
def _get_openstack_instances(self, conn: OpenstackConnection) -> tuple[OpenstackServer]:
"""Get the OpenStack servers managed by this unit.
@@ -355,7 +385,12 @@ def _get_and_ensure_unique_server(
try:
conn.delete_server(name_or_id=server.id)
except (openstack.exceptions.SDKException, openstack.exceptions.ResourceTimeout):
- logger.warning("Unable to delete server with duplicate name %s with ID %s", name, server.id, stack_info=True)
+ logger.warning(
+ "Unable to delete server with duplicate name %s with ID %s",
+ name,
+ server.id,
+ stack_info=True,
+ )
return latest_server
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 39d64b091..023f6df25 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -95,7 +95,7 @@ def get_name_prefix(self) -> str:
def create_runner(self, registration_token: str) -> RunnerId:
start_timestamp = time.time()
id = OpenstackRunnerManager._generate_runner_id()
- instance_name = self._openstack_cloud.get_instance_name(name=id)
+ instance_name = self._openstack_cloud.get_server_name(instance_id=id)
userdata = self._generate_userdata(
instance_name=instance_name, registration_token=registration_token
)
@@ -122,7 +122,7 @@ def create_runner(self, registration_token: str) -> RunnerId:
return id
def get_runner(self, id: RunnerId) -> CloudRunnerInstance | None:
- name = self._openstack_cloud.get_instance_name(id)
+ name = self._openstack_cloud.get_server_name(id)
instances_list = self._openstack_cloud.get_instances()
for instance in instances_list:
if instance.server_name == name:
@@ -146,7 +146,7 @@ def get_runners(
for instance in instances_list
]
if states is None:
- return instances_list
+ return instances_list
return [instance for instance in instances_list if instance.state in states]
def delete_runner(self, id: RunnerId, remove_token: str) -> None:
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 8d6ee92b1..178656b92 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -15,8 +15,7 @@
from manager.cloud_runner_manager import CloudRunnerState
from manager.github_runner_manager import GithubRunnerState
from manager.runner_manager import RunnerManager, RunnerManagerConfig
-from metrics import runner_logs
-from metrics import events
+from metrics import events, runner_logs
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
OpenstackRunnerManager,
@@ -125,13 +124,15 @@ async def test_get_no_runner(runner_manager: RunnerManager) -> None:
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
-async def test_create_runner(runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager) -> None:
+async def test_create_runner(
+ runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+) -> None:
"""
Arrange: RunnerManager instance with no runners.
- Act:
+ Act:
1. Create one runner.
- 2.
- Assert:
+ 2.
+ Assert:
1. An active idle runner.
"""
# 1.
@@ -152,7 +153,7 @@ async def test_create_runner(runner_manager: RunnerManager, openstack_runner_man
openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
runner = openstack_instances[0]
-
+
pytest.set_trace()
assert openstack_runner_manager._health_check(runner)
From 8a4b5d766aa284ac0df8a896c6ecb32a5e96f98e Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 14:42:54 +0800
Subject: [PATCH 057/278] Fix args issues with RunnerInstance
---
src/manager/runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 36c8db6a0..987f0a562 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -42,7 +42,7 @@ def __init__(
) -> "RunnerInstance":
self.name = github_info.name
self.id = cloud_instance.id
- self.github_state = GithubRunnerState(SelfHostedRunner)
+ self.github_state = GithubRunnerState(github_info)
self.cloud_state = cloud_instance.state
From f11705ebf5259ae11d8f9171bebb7592c5953289 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 15:07:12 +0800
Subject: [PATCH 058/278] Add more docs
---
src/manager/runner_manager.py | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 987f0a562..fdf8b0239 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -32,6 +32,14 @@ class FlushMode(Enum):
@dataclass
class RunnerInstance:
+ """Represents an instance of runner.
+
+ Attributes:
+ name: Full name of the runner. Managed by the cloud runner manager.
+ id: ID of the runner. Managed by the runner manager.
+ github_state: State on github.
+ cloud_state: State on cloud.
+ """
name: str
id: RunnerId
github_state: GithubRunnerState
@@ -40,6 +48,14 @@ class RunnerInstance:
def __init__(
self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner
) -> "RunnerInstance":
+ """Construct an instance.
+
+ Args:
+ cloud_instance: Information on the cloud instance.
+ github_info: Information on the GitHub of the runner.
+ Returns:
+ A RunnerInstance object.
+ """
self.name = github_info.name
self.id = cloud_instance.id
self.github_state = GithubRunnerState(github_info)
@@ -48,13 +64,26 @@ def __init__(
@dataclass
class RunnerManagerConfig:
+ """Configuration for the runner manager.
+
+ Attributes:
+ token: GitHub personal access token to query GitHub API.
+ path: Path to GitHub repository or organization to registry the runners.
+ """
token: str
path: GithubPath
class RunnerManager:
+ """Manage the runners."""
def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManagerConfig):
+ """Construct the object.
+
+ Args:
+ cloud_runner_manager: For managing the cloud instance of the runner.
+ config: Configuration of this class.
+ """
self._config = config
self._cloud = cloud_runner_manager
self._github = GithubRunnerManager(
From da2929f993121336c80e50b977029749c203ee63 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 15:09:42 +0800
Subject: [PATCH 059/278] Fix GithubRunnerState construction
---
src/manager/runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index fdf8b0239..22b6b2e5c 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -58,7 +58,7 @@ def __init__(
"""
self.name = github_info.name
self.id = cloud_instance.id
- self.github_state = GithubRunnerState(github_info)
+ self.github_state = GithubRunnerState.from_runner(github_info)
self.cloud_state = cloud_instance.state
From b0eeb17a302578d7fcb5e53eb97e261b826f10cb Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 15:21:28 +0800
Subject: [PATCH 060/278] Fix instance-id parsing from full name
---
src/manager/runner_manager.py | 12 +++++++-----
src/openstack_cloud/openstack_cloud.py | 4 ++--
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 22b6b2e5c..1e9994154 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -33,13 +33,14 @@ class FlushMode(Enum):
@dataclass
class RunnerInstance:
"""Represents an instance of runner.
-
+
Attributes:
name: Full name of the runner. Managed by the cloud runner manager.
id: ID of the runner. Managed by the runner manager.
github_state: State on github.
- cloud_state: State on cloud.
+ cloud_state: State on cloud.
"""
+
name: str
id: RunnerId
github_state: GithubRunnerState
@@ -49,7 +50,7 @@ def __init__(
self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner
) -> "RunnerInstance":
"""Construct an instance.
-
+
Args:
cloud_instance: Information on the cloud instance.
github_info: Information on the GitHub of the runner.
@@ -65,11 +66,12 @@ def __init__(
@dataclass
class RunnerManagerConfig:
"""Configuration for the runner manager.
-
+
Attributes:
token: GitHub personal access token to query GitHub API.
path: Path to GitHub repository or organization to registry the runners.
"""
+
token: str
path: GithubPath
@@ -79,7 +81,7 @@ class RunnerManager:
def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManagerConfig):
"""Construct the object.
-
+
Args:
cloud_runner_manager: For managing the cloud instance of the runner.
config: Configuration of this class.
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 4cc61ae95..a384e8bb2 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -66,12 +66,12 @@ def __init__(self, server: OpenstackServer, prefix: str):
for address in network_addresses
]
- if not self.server_name.startswith(prefix):
+ if not self.server_name.startswith(f"{prefix}-"):
# Should never happen.
raise ValueError(
f"Found openstack server {server.name} managed under prefix {prefix}, contact devs"
)
- self.instance_id = self.server_name[len(prefix) :]
+ self.instance_id = self.server_name[len(prefix) + 1 :]
@contextmanager
From b38cd1163459877613266ca47c08bd96dbfd9102 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 15:29:33 +0800
Subject: [PATCH 061/278] Add delete idle runner test.
---
.../integration/test_runner_manager_openstack.py | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 178656b92..092aab85a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -124,16 +124,19 @@ async def test_get_no_runner(runner_manager: RunnerManager) -> None:
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
-async def test_create_runner(
+async def test_runner_normal_lifecycle(
runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
) -> None:
"""
Arrange: RunnerManager instance with no runners.
Act:
1. Create one runner.
- 2.
+ 2. Run health check on the runner.
+ 3. Delete all idle runner.
Assert:
1. An active idle runner.
+ 2. Health check passes.
+ 3. No runners.
"""
# 1.
runner_id_list = runner_manager.create_runners(1)
@@ -154,6 +157,11 @@ async def test_create_runner(
assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
runner = openstack_instances[0]
- pytest.set_trace()
-
assert openstack_runner_manager._health_check(runner)
+
+ # 3.
+ runner_manager.delete_runners()
+
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 0
From f37b7a8cdcc44b1fc5b2a8a13b30773ccba4b246 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 15:54:20 +0800
Subject: [PATCH 062/278] Add busy flush to test
---
.../test_runner_manager_openstack.py | 32 +++++++++++++++----
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 092aab85a..103648849 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -14,7 +14,7 @@
from charm_state import GithubPath, ProxyConfig, parse_github_path
from manager.cloud_runner_manager import CloudRunnerState
from manager.github_runner_manager import GithubRunnerState
-from manager.runner_manager import RunnerManager, RunnerManagerConfig
+from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
from metrics import events, runner_logs
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
@@ -124,7 +124,7 @@ async def test_get_no_runner(runner_manager: RunnerManager) -> None:
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
-async def test_runner_normal_lifecycle(
+async def test_runner_normal_idle_lifecycle(
runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
) -> None:
"""
@@ -132,11 +132,13 @@ async def test_runner_normal_lifecycle(
Act:
1. Create one runner.
2. Run health check on the runner.
- 3. Delete all idle runner.
+ 4. Delete all busy runner.
+ 4. Delete all idle runner.
Assert:
1. An active idle runner.
2. Health check passes.
- 3. No runners.
+ 3. An active idle runner.
+ 4. No runners.
"""
# 1.
runner_id_list = runner_manager.create_runners(1)
@@ -158,10 +160,28 @@ async def test_runner_normal_lifecycle(
runner = openstack_instances[0]
assert openstack_runner_manager._health_check(runner)
-
+
# 3.
- runner_manager.delete_runners()
+ runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 1
+ runner = runner_list[0]
+ assert runner.id == runner_id
+ assert runner.cloud_state == CloudRunnerState.ACTIVE
+ assert runner.github_state == GithubRunnerState.IDLE
+ # 4.
+ runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
assert len(runner_list) == 0
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_normal_busy_lifecycle(
+ runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+):
+ pass
\ No newline at end of file
From 207913f360213d61fcd8dc3fc176164adb61b8b3 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:00:18 +0800
Subject: [PATCH 063/278] Spawn a manual test env
---
.github/workflows/manual_test_env.yaml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index ea6121fde..1568e4757 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -2,8 +2,8 @@ name: Manual test env
on:
# TODO: Uncomment
- #pull_request:
- workflow_dispatch:
+ pull_request:
+ # workflow_dispatch:
jobs:
manual-test-env:
From 7af058967b5136743009eb008e764df7463d6a36 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:00:38 +0800
Subject: [PATCH 064/278] Disable spawning on manual test env
---
.github/workflows/manual_test_env.yaml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 1568e4757..c1060f3fb 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -2,8 +2,8 @@ name: Manual test env
on:
# TODO: Uncomment
- pull_request:
- # workflow_dispatch:
+ # pull_request:
+ workflow_dispatch:
jobs:
manual-test-env:
From 1c8ea0d103390775c128cae7be3f83c7c8e7edd4 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:02:35 +0800
Subject: [PATCH 065/278] Remove useless class
---
src/manager/cloud_runner_manager.py | 5 -----
1 file changed, 5 deletions(-)
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index e794df22b..7a5976176 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -54,11 +54,6 @@ class CloudRunnerInstance:
state: CloudRunnerState
-@dataclass
-class RunnerMetrics:
- pass
-
-
class CloudRunnerManager(ABC):
def get_name_prefix(self) -> str: ...
From ce05cf59d20c33be6d77a5d9c45aba25ee61694c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:24:08 +0800
Subject: [PATCH 066/278] Fix runner deletion
---
src/manager/cloud_runner_manager.py | 10 +++++++++-
src/manager/github_runner_manager.py | 9 +++++++++
src/manager/runner_manager.py | 16 +++++++++++++++-
3 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 7a5976176..f91bfb801 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -10,6 +10,7 @@
class CloudRunnerState(str, Enum):
+ """Represent state of the instance hosting the runner."""
CREATED = "created"
ACTIVE = "active"
DELETED = "deleted"
@@ -26,7 +27,7 @@ def from_openstack_server_status(openstack_server_status: str) -> None:
https://docs.openstack.org/api-guide/compute/server_concepts.html
Args:
- status: Openstack server status.
+ openstack_server_status: Openstack server status.
"""
match openstack_server_status:
case "BUILD":
@@ -49,6 +50,13 @@ def from_openstack_server_status(openstack_server_status: str) -> None:
@dataclass
class CloudRunnerInstance:
+ """Information on the runner on the cloud.
+
+ Attributes:
+ name: Name of the instance hosting the runner.
+ id: ID of the instance.
+ state: State of the instance hosting the runner.
+ """
name: str
id: str
state: CloudRunnerState
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index a5af41211..b7bb54616 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -10,6 +10,7 @@
class GithubRunnerState(str, Enum):
+ """State of the runner on GitHub."""
BUSY = "busy"
IDLE = "idle"
OFFLINE = "offline"
@@ -17,6 +18,14 @@ class GithubRunnerState(str, Enum):
@staticmethod
def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
+ """Construct the object from GtiHub runner information.
+
+ Args:
+ runner: Information on the GitHub self-hosted runner.
+
+ Returns:
+ The state of runner.
+ """
state = GithubRunnerState.OFFLINE
if runner.status == GitHubRunnerStatus.ONLINE:
if runner.busy:
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 1e9994154..b1cba9709 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -93,6 +93,14 @@ def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManag
)
def create_runners(self, num: int) -> tuple[RunnerId]:
+ """Create runners.
+
+ Args:
+ num: Number of runners to create.
+
+ Returns:
+ List of instance ID of the runners.
+ """
registration_token = self._github.get_registration_token()
runner_ids = []
@@ -127,6 +135,11 @@ def get_runners(
)
def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
+ """Delete the runners.
+
+ Args:
+ flush_mode: The type of runners affect by the deletion.
+ """
states = [GithubRunnerState.IDLE]
if flush_mode == FlushMode.FLUSH_BUSY:
states.append(GithubRunnerState.BUSY)
@@ -135,9 +148,10 @@ def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
remove_token = self._github.get_removal_token()
for runner in runners_list:
- self._cloud.delete_runners(id=runner.id, remove_token=remove_token)
+ self._cloud.delete_runner(id=runner.id, remove_token=remove_token)
def cleanup(self) -> None:
+ """Runs cleanup of the runners and other resources."""
self._github.delete_runners([GithubRunnerState.OFFLINE, GithubRunnerState.UNKNOWN])
remove_token = self._github.get_removal_token()
self._cloud.cleanup_runner(remove_token)
From ea1a726a92904ff8accba2a9385a350e6d9471fc Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:34:31 +0800
Subject: [PATCH 067/278] Fix import error
---
...openstack_cloud.openstack_runner_manager.md | 18 +++++++++---------
src/manager/cloud_runner_manager.py | 6 ++++--
src/manager/github_runner_manager.py | 5 +++--
.../openstack_runner_manager.py | 1 -
.../test_runner_manager_openstack.py | 2 +-
5 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index bfca3ba95..7de71ebdf 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -19,7 +19,7 @@
---
-
+
## class `OpenstackRunnerManagerConfig`
OpenstackRunnerManagerConfig(clouds_config: dict[str, dict], cloud: str, image: str, flavor: str, network: str, github_path: charm_state.GithubOrg | charm_state.GithubRepo, labels: list[str], proxy_config: charm_state.ProxyConfig | None, dockerhub_mirror: str | None, ssh_debug_connections: list[charm_state.SSHDebugConnection] | None, repo_policy_url: str | None, repo_policy_token: str | None)
@@ -55,14 +55,14 @@ __init__(
---
-
+
## class `OpenstackRunnerManager`
-
+
### method `__init__`
@@ -79,7 +79,7 @@ __init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
---
-
+
### method `cleanup`
@@ -93,7 +93,7 @@ cleanup(remove_token: str) → None
---
-
+
### method `create_runner`
@@ -107,7 +107,7 @@ create_runner(registration_token: str) → str
---
-
+
### method `delete_runner`
@@ -121,7 +121,7 @@ delete_runner(id: str, remove_token: str) → None
---
-
+
### method `get_name_prefix`
@@ -135,7 +135,7 @@ get_name_prefix() → str
---
-
+
### method `get_runner`
@@ -149,7 +149,7 @@ get_runner(id: str) → CloudRunnerInstance | None
---
-
+
### method `get_runners`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index f91bfb801..3222bfcea 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -11,6 +11,7 @@
class CloudRunnerState(str, Enum):
"""Represent state of the instance hosting the runner."""
+
CREATED = "created"
ACTIVE = "active"
DELETED = "deleted"
@@ -51,12 +52,13 @@ def from_openstack_server_status(openstack_server_status: str) -> None:
@dataclass
class CloudRunnerInstance:
"""Information on the runner on the cloud.
-
+
Attributes:
name: Name of the instance hosting the runner.
- id: ID of the instance.
+ id: ID of the instance.
state: State of the instance hosting the runner.
"""
+
name: str
id: str
state: CloudRunnerState
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index b7bb54616..15beed1d5 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -11,6 +11,7 @@
class GithubRunnerState(str, Enum):
"""State of the runner on GitHub."""
+
BUSY = "busy"
IDLE = "idle"
OFFLINE = "offline"
@@ -19,10 +20,10 @@ class GithubRunnerState(str, Enum):
@staticmethod
def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
"""Construct the object from GtiHub runner information.
-
+
Args:
runner: Information on the GitHub self-hosted runner.
-
+
Returns:
The state of runner.
"""
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 023f6df25..a26d0e161 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -31,7 +31,6 @@
CloudRunnerManager,
CloudRunnerState,
RunnerId,
- RunnerMetrics,
)
from metrics import events as metric_events
from metrics import github as github_metrics
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 103648849..f9e6f8f62 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -184,4 +184,4 @@ async def test_runner_normal_idle_lifecycle(
async def test_runner_normal_busy_lifecycle(
runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
):
- pass
\ No newline at end of file
+ pass
From 05a90169be5913f90360420ead17adf6d31efcf3 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:51:15 +0800
Subject: [PATCH 068/278] Add more docs
---
src/manager/github_runner_manager.py | 10 ++++++++++
src/manager/runner_manager.py | 4 ++--
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 15beed1d5..5c23a0d65 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -1,6 +1,8 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+"""Client for managing self-hosted runner on GitHub side."""
+
from enum import Enum, auto
from typing import Sequence
@@ -37,8 +39,16 @@ def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
class GithubRunnerManager:
+ """Manage self-hosted runner on GitHub side."""
def __init__(self, prefix: str, token: str, path: GithubPath):
+ """Construct the object.
+
+ Args:
+ prefix: The prefix in the name to identify the runners managed by this instance.
+ token: The GitHub personal access token to access the GitHub API.
+ path: The GitHub repository or organization to register the runners under.
+ """
self._prefix = prefix
self._path = path
self._github = GithubClient(token)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index b1cba9709..9e3df6df7 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -1,7 +1,7 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
-"""Class for managing the runners."""
+"""Class for managing the GitHub self-hosted runners hosted on cloud instances."""
from dataclasses import dataclass
from enum import Enum, auto
@@ -151,7 +151,7 @@ def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
self._cloud.delete_runner(id=runner.id, remove_token=remove_token)
def cleanup(self) -> None:
- """Runs cleanup of the runners and other resources."""
+ """Run cleanup of the runners and other resources."""
self._github.delete_runners([GithubRunnerState.OFFLINE, GithubRunnerState.UNKNOWN])
remove_token = self._github.get_removal_token()
self._cloud.cleanup_runner(remove_token)
From 752aa0e5699905928b1afa24b73eed1110a90fd8 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:52:32 +0800
Subject: [PATCH 069/278] Fix get no-existing openstack server
---
src/openstack_cloud/openstack_cloud.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index a384e8bb2..0b97c96be 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -373,6 +373,9 @@ def _get_and_ensure_unique_server(
returned. Other servers is deleted.
"""
servers: list[OpenstackServer] = conn.search_servers(name)
+
+ if not servers:
+ return None
latest_server = reduce(
lambda a, b: (
From 280480b01401e927c3dbc23b1d2f3a472215dd86 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:56:45 +0800
Subject: [PATCH 070/278] Add debug statement
---
src-docs/openstack_cloud.openstack_cloud.md | 8 ++++----
src/openstack_cloud/openstack_cloud.py | 3 +++
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 019487f67..f69a4892c 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -72,7 +72,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -114,7 +114,7 @@ get_instance(instance_id: str) → OpenstackInstance
---
-
+
### method `get_instances`
@@ -128,7 +128,7 @@ get_instances() → tuple[OpenstackInstance]
---
-
+
### method `get_server_name`
@@ -151,7 +151,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 0b97c96be..efbe5968b 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -198,6 +198,9 @@ def delete_instance(self, instance_id: str) -> None:
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
try:
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
conn.delete_server(name_or_id=server.id)
OpenstackCloud._delete_key_pair(conn, full_name)
From 7df180251ab05a5771c5b36da684d069092af744 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 17:02:34 +0800
Subject: [PATCH 071/278] Fix variable name and function name mixup
---
src-docs/openstack_cloud.openstack_cloud.md | 8 ++++----
src/openstack_cloud/openstack_cloud.py | 3 ---
2 files changed, 4 insertions(+), 7 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index f69a4892c..019487f67 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -72,7 +72,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -114,7 +114,7 @@ get_instance(instance_id: str) → OpenstackInstance
---
-
+
### method `get_instances`
@@ -128,7 +128,7 @@ get_instances() → tuple[OpenstackInstance]
---
-
+
### method `get_server_name`
@@ -151,7 +151,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index efbe5968b..0b97c96be 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -198,9 +198,6 @@ def delete_instance(self, instance_id: str) -> None:
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
try:
- # TODO: debug
- import pytest
- pytest.set_trace()
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
conn.delete_server(name_or_id=server.id)
OpenstackCloud._delete_key_pair(conn, full_name)
From d1ecfb693ccd228014295bcfac766a30f5128ccb Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 17:07:18 +0800
Subject: [PATCH 072/278] Fix id variable name, function name mixup
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index a26d0e161..950a280ea 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -171,7 +171,7 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
)
try:
- self._openstack_cloud.delete_instance(id)
+ self._openstack_cloud.delete_instance(instance.instance_id)
except OpenStackError:
logger.exception(
"Unable to delete openstack instance for runner %s", instance.server_name
From 1ab8db64c20914d1389392f945b85931e397f6cc Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 17:13:36 +0800
Subject: [PATCH 073/278] Add debug statement
---
tests/integration/test_runner_manager_openstack.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index f9e6f8f62..ac9885ff4 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -163,6 +163,8 @@ async def test_runner_normal_idle_lifecycle(
# 3.
runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+ # TODO: debug
+ pytest.set_trace()
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
assert len(runner_list) == 1
From 78fdac22f0aed33bda0126824fddc0f233fe42d5 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 18:10:16 +0800
Subject: [PATCH 074/278] Move debug
---
tests/integration/test_runner_manager_openstack.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index ac9885ff4..3cf2ad69b 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -161,10 +161,11 @@ async def test_runner_normal_idle_lifecycle(
assert openstack_runner_manager._health_check(runner)
- # 3.
- runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
# TODO: debug
pytest.set_trace()
+
+ # 3.
+ runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
assert len(runner_list) == 1
From fd628e6bd47d0027905e16daee6650e7d593e4d5 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 18:42:45 +0800
Subject: [PATCH 075/278] Add busy runner test
---
src/manager/github_runner_manager.py | 2 +-
src/openstack_cloud/openstack_cloud.py | 2 +-
tests/integration/helpers/common.py | 10 ++-
.../test_runner_manager_openstack.py | 85 ++++++++++++++-----
4 files changed, 73 insertions(+), 26 deletions(-)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 5c23a0d65..ce6591cb4 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -43,7 +43,7 @@ class GithubRunnerManager:
def __init__(self, prefix: str, token: str, path: GithubPath):
"""Construct the object.
-
+
Args:
prefix: The prefix in the name to identify the runners managed by this instance.
token: The GitHub personal access token to access the GitHub API.
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 0b97c96be..3698ed32a 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -373,7 +373,7 @@ def _get_and_ensure_unique_server(
returned. Other servers is deleted.
"""
servers: list[OpenstackServer] = conn.search_servers(name)
-
+
if not servers:
return None
diff --git a/tests/integration/helpers/common.py b/tests/integration/helpers/common.py
index a1f319697..16622c038 100644
--- a/tests/integration/helpers/common.py
+++ b/tests/integration/helpers/common.py
@@ -375,7 +375,7 @@ def _is_workflow_run_complete(run: WorkflowRun) -> bool:
async def dispatch_workflow(
- app: Application,
+ app: Application | None,
branch: Branch,
github_repository: Repository,
conclusion: str,
@@ -400,14 +400,16 @@ async def dispatch_workflow(
Returns:
The workflow run.
"""
+ if dispatch_input is None:
+ assert app is not None, "If dispatch input not given the app cannot be None."
+ dispatch_input = {"runner": app.name}
+
start_time = datetime.now(timezone.utc)
workflow = github_repository.get_workflow(id_or_file_name=workflow_id_or_name)
# The `create_dispatch` returns True on success.
- assert workflow.create_dispatch(
- branch, dispatch_input or {"runner": app.name}
- ), "Failed to create workflow"
+ assert workflow.create_dispatch(branch, dispatch_input), "Failed to create workflow"
# There is a very small chance of selecting a run not created by the dispatch above.
run: WorkflowRun | None = await wait_for(
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 3cf2ad69b..27dafb3b5 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -9,6 +9,8 @@
import pytest
import pytest_asyncio
import yaml
+from github.Branch import Branch
+from github.Repository import Repository
from openstack.connection import Connection as OpenstackConnection
from charm_state import GithubPath, ProxyConfig, parse_github_path
@@ -21,6 +23,11 @@
OpenstackRunnerManager,
OpenstackRunnerManagerConfig,
)
+from tests.integration.helpers.common import (
+ DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+ dispatch_workflow,
+ wait_for,
+)
from tests.integration.helpers.openstack import PrivateEndpointConfigs
@@ -132,13 +139,11 @@ async def test_runner_normal_idle_lifecycle(
Act:
1. Create one runner.
2. Run health check on the runner.
- 4. Delete all busy runner.
- 4. Delete all idle runner.
+ 3. Delete all idle runner.
Assert:
1. An active idle runner.
2. Health check passes.
- 3. An active idle runner.
- 4. No runners.
+ 3. No runners.
"""
# 1.
runner_id_list = runner_manager.create_runners(1)
@@ -161,20 +166,7 @@ async def test_runner_normal_idle_lifecycle(
assert openstack_runner_manager._health_check(runner)
- # TODO: debug
- pytest.set_trace()
-
# 3.
- runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert len(runner_list) == 1
- runner = runner_list[0]
- assert runner.id == runner_id
- assert runner.cloud_state == CloudRunnerState.ACTIVE
- assert runner.github_state == GithubRunnerState.IDLE
-
- # 4.
runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
@@ -184,7 +176,60 @@ async def test_runner_normal_idle_lifecycle(
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
-async def test_runner_normal_busy_lifecycle(
- runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+async def test_runner_flush_busy_lifecycle(
+ runner_manager: RunnerManager,
+ openstack_runner_manager: OpenstackRunnerManager,
+ test_github_branch: Branch,
+ github_repository: Repository,
):
- pass
+ """
+ Arrange: RunnerManager with one idle runner.
+ Act:
+ 1. Run a long workflow.
+ 2. Run flush idle runner.
+ 3. Run flush busy runner.
+ Assert:
+ 1. Runner takes the job and become busy.
+ 2. Busy runner still exists.
+ 3. No runners exists.
+ """
+ runner_manager.create_runners(1)
+ runner_list = runner_manager.get_runners()
+ assert len(runner_list) == 1, "Test arrange failed: Expect one runner"
+ runner = runner_list[0]
+ assert (
+ runner.cloud_state == CloudRunnerState.ACTIVE
+ ), "Test arrange failed: Expect runner in active state"
+ assert (
+ runner.github_state == GithubRunnerState.IDLE
+ ), "Test arrange failed: Expect runner in idle state"
+
+ # 1.
+ workflow = await dispatch_workflow(
+ app=None,
+ branch=test_github_branch,
+ github_repository=github_repository,
+ conclusion="success",
+ workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+ dispatch_input={"runner": runner.name, "minutes": "10"},
+ wait=False,
+ )
+ await wait_for(lambda: workflow.update() or workflow.status == "in_progress")
+
+ runner_list = runner_manager.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GithubRunnerState.BUSY
+
+ # 2.
+ runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_list = runner_manager.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GithubRunnerState.BUSY
+
+ # 3.
+ runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+ assert len(runner_list) == 0
From e688b6cfbfe0a2132ec864ce9c6971ba2633538b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 18:48:52 +0800
Subject: [PATCH 076/278] Add debug statement.
---
tests/integration/test_runner_manager_openstack.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 27dafb3b5..6a9542f05 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -215,6 +215,7 @@ async def test_runner_flush_busy_lifecycle(
wait=False,
)
await wait_for(lambda: workflow.update() or workflow.status == "in_progress")
+ pytest.set_trace()
runner_list = runner_manager.get_runners()
assert len(runner_list) == 1
From 4296e04e855e6e6c582f2fa7fef54f3a87994d28 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 18:49:33 +0800
Subject: [PATCH 077/278] Disable some test
---
tests/integration/test_runner_manager_openstack.py | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 6a9542f05..64642397f 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -114,9 +114,10 @@ async def runner_manager_fixture(
return RunnerManager(openstack_runner_manager, config)
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
+# TODO: Re-enable all tests
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
async def test_get_no_runner(runner_manager: RunnerManager) -> None:
"""
Arrange: RunnerManager instance with no runners.
@@ -128,9 +129,9 @@ async def test_get_no_runner(runner_manager: RunnerManager) -> None:
assert not runner_list
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
async def test_runner_normal_idle_lifecycle(
runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
) -> None:
From 7d2bc004dbd3dd0b1ce24ead07efe5665a3b8b75 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 18:51:25 +0800
Subject: [PATCH 078/278] Disable some test
---
.../test_runner_manager_openstack.py | 99 +++++++++----------
1 file changed, 49 insertions(+), 50 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 64642397f..a6a988e1c 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -114,64 +114,63 @@ async def runner_manager_fixture(
return RunnerManager(openstack_runner_manager, config)
-# TODO: Re-enable all tests
# @pytest.mark.openstack
# @pytest.mark.asyncio
# @pytest.mark.abort_on_fail
-async def test_get_no_runner(runner_manager: RunnerManager) -> None:
- """
- Arrange: RunnerManager instance with no runners.
- Act: Get runners.
- Assert: Empty tuple returned.
- """
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert not runner_list
+# async def test_get_no_runner(runner_manager: RunnerManager) -> None:
+# """
+# Arrange: RunnerManager instance with no runners.
+# Act: Get runners.
+# Assert: Empty tuple returned.
+# """
+# runner_list = runner_manager.get_runners()
+# assert isinstance(runner_list, tuple)
+# assert not runner_list
# @pytest.mark.openstack
# @pytest.mark.asyncio
# @pytest.mark.abort_on_fail
-async def test_runner_normal_idle_lifecycle(
- runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
-) -> None:
- """
- Arrange: RunnerManager instance with no runners.
- Act:
- 1. Create one runner.
- 2. Run health check on the runner.
- 3. Delete all idle runner.
- Assert:
- 1. An active idle runner.
- 2. Health check passes.
- 3. No runners.
- """
- # 1.
- runner_id_list = runner_manager.create_runners(1)
- assert isinstance(runner_id_list, tuple)
- assert len(runner_id_list) == 1
- runner_id = runner_id_list[0]
-
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert len(runner_list) == 1
- runner = runner_list[0]
- assert runner.id == runner_id
- assert runner.cloud_state == CloudRunnerState.ACTIVE
- assert runner.github_state == GithubRunnerState.IDLE
-
- # 2.
- openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
- assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
- runner = openstack_instances[0]
-
- assert openstack_runner_manager._health_check(runner)
-
- # 3.
- runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert len(runner_list) == 0
+# async def test_runner_normal_idle_lifecycle(
+# runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+# ) -> None:
+# """
+# Arrange: RunnerManager instance with no runners.
+# Act:
+# 1. Create one runner.
+# 2. Run health check on the runner.
+# 3. Delete all idle runner.
+# Assert:
+# 1. An active idle runner.
+# 2. Health check passes.
+# 3. No runners.
+# """
+# # 1.
+# runner_id_list = runner_manager.create_runners(1)
+# assert isinstance(runner_id_list, tuple)
+# assert len(runner_id_list) == 1
+# runner_id = runner_id_list[0]
+
+# runner_list = runner_manager.get_runners()
+# assert isinstance(runner_list, tuple)
+# assert len(runner_list) == 1
+# runner = runner_list[0]
+# assert runner.id == runner_id
+# assert runner.cloud_state == CloudRunnerState.ACTIVE
+# assert runner.github_state == GithubRunnerState.IDLE
+
+# # 2.
+# openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
+# assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
+# runner = openstack_instances[0]
+
+# assert openstack_runner_manager._health_check(runner)
+
+# # 3.
+# runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+# runner_list = runner_manager.get_runners()
+# assert isinstance(runner_list, tuple)
+# assert len(runner_list) == 0
@pytest.mark.openstack
From d90f70bd3ec276516f1b1e5c0f25ca93ecc55538 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 19:02:33 +0800
Subject: [PATCH 079/278] Fix runner label in workflow
---
.../test_runner_manager_openstack.py | 20 +++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index a6a988e1c..b6dd1ebd5 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -5,12 +5,14 @@
from pathlib import Path
+from secrets import token_hex
import pytest
import pytest_asyncio
import yaml
from github.Branch import Branch
from github.Repository import Repository
+from github.Workflow import Workflow
from openstack.connection import Connection as OpenstackConnection
from charm_state import GithubPath, ProxyConfig, parse_github_path
@@ -31,6 +33,11 @@
from tests.integration.helpers.openstack import PrivateEndpointConfigs
+
+@pytest.fixture(scope="module", name="runner_label")
+def runner_label():
+ return f"test-{token_hex(6)}"
+
@pytest.fixture(scope="module", name="log_dir_base_path")
def log_dir_base_path_fixture(tmp_path_factory: Path):
"""Mock the log directory path and return it."""
@@ -73,6 +80,7 @@ async def openstack_runner_manager_fixture(
network_name: str,
github_path: GithubPath,
proxy_config: ProxyConfig,
+ runner_label: str,
openstack_connection: OpenstackConnection,
) -> OpenstackRunnerManager:
"""Create OpenstackRunnerManager instance.
@@ -89,7 +97,7 @@ async def openstack_runner_manager_fixture(
flavor=flavor_name,
network=network_name,
github_path=github_path,
- labels=["openstack_test"],
+ labels=["openstack_test", runner_label],
proxy_config=proxy_config,
dockerhub_mirror=None,
ssh_debug_connections=None,
@@ -113,6 +121,10 @@ async def runner_manager_fixture(
config = RunnerManagerConfig(token, github_path)
return RunnerManager(openstack_runner_manager, config)
+def workflow_in_progress(workflow: Workflow) -> bool:
+ workflow.update()
+ return workflow.status == "in_progress"
+
# @pytest.mark.openstack
# @pytest.mark.asyncio
@@ -181,6 +193,7 @@ async def test_runner_flush_busy_lifecycle(
openstack_runner_manager: OpenstackRunnerManager,
test_github_branch: Branch,
github_repository: Repository,
+ runner_label: str
):
"""
Arrange: RunnerManager with one idle runner.
@@ -211,11 +224,10 @@ async def test_runner_flush_busy_lifecycle(
github_repository=github_repository,
conclusion="success",
workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
- dispatch_input={"runner": runner.name, "minutes": "10"},
+ dispatch_input={"runner": runner_label, "minutes": "10"},
wait=False,
)
- await wait_for(lambda: workflow.update() or workflow.status == "in_progress")
- pytest.set_trace()
+ await wait_for(workflow_in_progress)
runner_list = runner_manager.get_runners()
assert len(runner_list) == 1
From a868a837b1dd60a0107e153cabbee920d1d83651 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 6 Aug 2024 19:08:15 +0800
Subject: [PATCH 080/278] Fix lambda
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b6dd1ebd5..6692b4fe8 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -227,7 +227,7 @@ async def test_runner_flush_busy_lifecycle(
dispatch_input={"runner": runner_label, "minutes": "10"},
wait=False,
)
- await wait_for(workflow_in_progress)
+ await wait_for(lambda: workflow_in_progress(workflow))
runner_list = runner_manager.get_runners()
assert len(runner_list) == 1
From 11b9f7896beb8ea615142a38e2a9a9e84cc3733c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 10:41:48 +0800
Subject: [PATCH 081/278] Debug
---
tests/integration/test_runner_manager_openstack.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 6692b4fe8..c624e5986 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -245,4 +245,5 @@ async def test_runner_flush_busy_lifecycle(
# 3.
runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- assert len(runner_list) == 0
+ assert len(runner_list) == 1
+ pytest.set_trace()
From 4d25f2c4912e0f12087ff61e4d768310188eb6a1 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 10:51:38 +0800
Subject: [PATCH 082/278] Debug
---
tests/integration/test_runner_manager_openstack.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index c624e5986..4839bc962 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -190,7 +190,6 @@ def workflow_in_progress(workflow: Workflow) -> bool:
@pytest.mark.abort_on_fail
async def test_runner_flush_busy_lifecycle(
runner_manager: RunnerManager,
- openstack_runner_manager: OpenstackRunnerManager,
test_github_branch: Branch,
github_repository: Repository,
runner_label: str
@@ -245,5 +244,5 @@ async def test_runner_flush_busy_lifecycle(
# 3.
runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- assert len(runner_list) == 1
+ runner_list = runner_manager.get_runners()
pytest.set_trace()
From d38d067db0ed7359aeada85d247fd5808dabdefa Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 11:16:29 +0800
Subject: [PATCH 083/278] Add debug
---
src-docs/openstack_cloud.openstack_cloud.md | 4 ++--
src/openstack_cloud/openstack_cloud.py | 5 +++++
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 019487f67..3bd5690bd 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -72,7 +72,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -128,7 +128,7 @@ get_instances() → tuple[OpenstackInstance]
---
-
+
### method `get_server_name`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 3698ed32a..59a4a1db4 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -256,6 +256,11 @@ def get_instances(self) -> tuple[OpenstackInstance]:
) as conn:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
+
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+
return [
OpenstackInstance(
OpenstackCloud._get_and_ensure_unique_server(conn, name), self.prefix
From 03dd8396bca922f590cfb26fed58ba4587867f70 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 11:17:10 +0800
Subject: [PATCH 084/278] Start new manual test env
---
.github/workflows/manual_test_env.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index c1060f3fb..3d0f7ad56 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -2,7 +2,7 @@ name: Manual test env
on:
# TODO: Uncomment
- # pull_request:
+ pull_request:
workflow_dispatch:
jobs:
From d42f2b5d0cb0432abc4eb5095259df0565398739 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 11:52:28 +0800
Subject: [PATCH 085/278] Add none check
---
.github/workflows/manual_test_env.yaml | 2 +-
src-docs/openstack_cloud.openstack_cloud.md | 4 ++--
src/openstack_cloud/openstack_cloud.py | 18 ++++++++----------
3 files changed, 11 insertions(+), 13 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 3d0f7ad56..c1060f3fb 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -2,7 +2,7 @@ name: Manual test env
on:
# TODO: Uncomment
- pull_request:
+ # pull_request:
workflow_dispatch:
jobs:
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 3bd5690bd..fcbc4e4fa 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -72,7 +72,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -128,7 +128,7 @@ get_instances() → tuple[OpenstackInstance]
---
-
+
### method `get_server_name`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 59a4a1db4..77c4caf08 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -257,16 +257,14 @@ def get_instances(self) -> tuple[OpenstackInstance]:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
- # TODO: debug
- import pytest
- pytest.set_trace()
-
- return [
- OpenstackInstance(
- OpenstackCloud._get_and_ensure_unique_server(conn, name), self.prefix
- )
- for name in server_names
- ]
+ instances = []
+ for name in server_names:
+ # The server can be deleted between the `_get_openstack_instances` call and this
+ # line. This is an issues during tests. Hence the need for None check.
+ server = OpenstackCloud._get_and_ensure_unique_server(conn, name)
+ if server is not None:
+ instances.append(OpenstackInstance(server))
+ return instances
def cleanup(self) -> None:
with _get_openstack_connection(
From 6aff0f13a6d0dce2d75b0d36d8ddd5e0ff71fe03 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 11:54:45 +0800
Subject: [PATCH 086/278] Fix missing prefix
---
src/openstack_cloud/openstack_cloud.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 77c4caf08..cf390790a 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -263,7 +263,7 @@ def get_instances(self) -> tuple[OpenstackInstance]:
# line. This is an issues during tests. Hence the need for None check.
server = OpenstackCloud._get_and_ensure_unique_server(conn, name)
if server is not None:
- instances.append(OpenstackInstance(server))
+ instances.append(OpenstackInstance(server, self.prefix))
return instances
def cleanup(self) -> None:
From c106fd20b6222861ba9ad6147a2c48d5b183cdd2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 13:55:50 +0800
Subject: [PATCH 087/278] Add more logging
---
src/manager/runner_manager.py | 33 ++++-
src/openstack_cloud/openstack_cloud.py | 6 +-
.../test_runner_manager_openstack.py | 127 +++++++++---------
3 files changed, 98 insertions(+), 68 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 9e3df6df7..4510787ea 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -3,6 +3,7 @@
"""Class for managing the GitHub self-hosted runners hosted on cloud instances."""
+import logging
from dataclasses import dataclass
from enum import Enum, auto
from typing import Sequence
@@ -17,6 +18,8 @@
)
from manager.github_runner_manager import GithubRunnerManager, GithubRunnerState
+logger = logging.getLogger(__name__)
+
class FlushMode(Enum):
"""Strategy for flushing runners.
@@ -101,6 +104,7 @@ def create_runners(self, num: int) -> tuple[RunnerId]:
Returns:
List of instance ID of the runners.
"""
+ logger.info("Creating %s runners", num)
registration_token = self._github.get_registration_token()
runner_ids = []
@@ -125,13 +129,28 @@ def get_runners(
Returns:
Information on the runners.
"""
+ logger.info("Getting runners...")
github_infos = self._github.get_runners(github_runner_state)
cloud_infos = self._cloud.get_runners(cloud_runner_state)
github_infos_map = {info.name: info for info in github_infos}
cloud_infos_map = {info.name: info for info in cloud_infos}
+ runner_names = cloud_infos_map.keys() & github_infos_map.keys()
+ logger.info("Found following runners: %s", runner_names)
+
+ cloud_only = cloud_infos_map.keys() - runner_names
+ github_only = github_infos_map.keys() - runner_names
+ if cloud_only:
+ logger.warning(
+ "Found runner instance on cloud but not registered on GitHub: %s", cloud_only
+ )
+ if github_only:
+ logger.warning(
+ "Found self-hosted runner on GitHub but no matching runner instance on cloud: %s",
+ github_only,
+ )
+
return tuple(
- RunnerInstance(cloud_infos_map[name], github_infos_map[name])
- for name in cloud_infos_map.keys() & github_infos_map.keys()
+ RunnerInstance(cloud_infos_map[name], github_infos_map[name]) for name in runner_names
)
def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
@@ -140,11 +159,21 @@ def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
Args:
flush_mode: The type of runners affect by the deletion.
"""
+ match flush_mode:
+ case FlushMode.FLUSH_IDLE:
+ logger.info("Deleting idle runners...")
+ case FlushMode.FLUSH_BUSY:
+ logger.info("Deleting idle and busy runners...")
+ case _:
+ logger.critical("Unknown flush mode %s encountered, contact developers", flush_mode)
+
states = [GithubRunnerState.IDLE]
if flush_mode == FlushMode.FLUSH_BUSY:
states.append(GithubRunnerState.BUSY)
runners_list = self.get_runners(github_runner_state=states)
+ runner_names = [runner.name for runner in runners_list]
+ logger.info("Deleting runners: %s", runner_names)
remove_token = self._github.get_removal_token()
for runner in runners_list:
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index cf390790a..56146a33d 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -256,10 +256,10 @@ def get_instances(self) -> tuple[OpenstackInstance]:
) as conn:
servers = self._get_openstack_instances(conn)
server_names = set(server.name for server in servers)
-
- instances = []
+
+ instances = []
for name in server_names:
- # The server can be deleted between the `_get_openstack_instances` call and this
+ # The server can be deleted between the `_get_openstack_instances` call and this
# line. This is an issues during tests. Hence the need for None check.
server = OpenstackCloud._get_and_ensure_unique_server(conn, name)
if server is not None:
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 4839bc962..8ae890e1a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -33,11 +33,11 @@
from tests.integration.helpers.openstack import PrivateEndpointConfigs
-
@pytest.fixture(scope="module", name="runner_label")
def runner_label():
return f"test-{token_hex(6)}"
+
@pytest.fixture(scope="module", name="log_dir_base_path")
def log_dir_base_path_fixture(tmp_path_factory: Path):
"""Mock the log directory path and return it."""
@@ -121,8 +121,9 @@ async def runner_manager_fixture(
config = RunnerManagerConfig(token, github_path)
return RunnerManager(openstack_runner_manager, config)
+
def workflow_in_progress(workflow: Workflow) -> bool:
- workflow.update()
+ workflow.update()
return workflow.status == "in_progress"
@@ -185,64 +186,64 @@ def workflow_in_progress(workflow: Workflow) -> bool:
# assert len(runner_list) == 0
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_runner_flush_busy_lifecycle(
- runner_manager: RunnerManager,
- test_github_branch: Branch,
- github_repository: Repository,
- runner_label: str
-):
- """
- Arrange: RunnerManager with one idle runner.
- Act:
- 1. Run a long workflow.
- 2. Run flush idle runner.
- 3. Run flush busy runner.
- Assert:
- 1. Runner takes the job and become busy.
- 2. Busy runner still exists.
- 3. No runners exists.
- """
- runner_manager.create_runners(1)
- runner_list = runner_manager.get_runners()
- assert len(runner_list) == 1, "Test arrange failed: Expect one runner"
- runner = runner_list[0]
- assert (
- runner.cloud_state == CloudRunnerState.ACTIVE
- ), "Test arrange failed: Expect runner in active state"
- assert (
- runner.github_state == GithubRunnerState.IDLE
- ), "Test arrange failed: Expect runner in idle state"
-
- # 1.
- workflow = await dispatch_workflow(
- app=None,
- branch=test_github_branch,
- github_repository=github_repository,
- conclusion="success",
- workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
- dispatch_input={"runner": runner_label, "minutes": "10"},
- wait=False,
- )
- await wait_for(lambda: workflow_in_progress(workflow))
-
- runner_list = runner_manager.get_runners()
- assert len(runner_list) == 1
- busy_runner = runner_list[0]
- assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GithubRunnerState.BUSY
-
- # 2.
- runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
- runner_list = runner_manager.get_runners()
- assert len(runner_list) == 1
- busy_runner = runner_list[0]
- assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GithubRunnerState.BUSY
-
- # 3.
- runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- runner_list = runner_manager.get_runners()
- pytest.set_trace()
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_runner_flush_busy_lifecycle(
+# runner_manager: RunnerManager,
+# test_github_branch: Branch,
+# github_repository: Repository,
+# runner_label: str,
+# ):
+# """
+# Arrange: RunnerManager with one idle runner.
+# Act:
+# 1. Run a long workflow.
+# 2. Run flush idle runner.
+# 3. Run flush busy runner.
+# Assert:
+# 1. Runner takes the job and become busy.
+# 2. Busy runner still exists.
+# 3. No runners exists.
+# """
+# runner_manager.create_runners(1)
+# runner_list = runner_manager.get_runners()
+# assert len(runner_list) == 1, "Test arrange failed: Expect one runner"
+# runner = runner_list[0]
+# assert (
+# runner.cloud_state == CloudRunnerState.ACTIVE
+# ), "Test arrange failed: Expect runner in active state"
+# assert (
+# runner.github_state == GithubRunnerState.IDLE
+# ), "Test arrange failed: Expect runner in idle state"
+
+# # 1.
+# workflow = await dispatch_workflow(
+# app=None,
+# branch=test_github_branch,
+# github_repository=github_repository,
+# conclusion="success",
+# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+# dispatch_input={"runner": runner_label, "minutes": "10"},
+# wait=False,
+# )
+# await wait_for(lambda: workflow_in_progress(workflow))
+
+# runner_list = runner_manager.get_runners()
+# assert len(runner_list) == 1
+# busy_runner = runner_list[0]
+# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+# assert busy_runner.github_state == GithubRunnerState.BUSY
+
+# # 2.
+# runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+# runner_list = runner_manager.get_runners()
+# assert len(runner_list) == 1
+# busy_runner = runner_list[0]
+# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+# assert busy_runner.github_state == GithubRunnerState.BUSY
+
+# # 3.
+# runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+# runner_list = runner_manager.get_runners()
+# pytest.set_trace()
From 0f5b6cee7adb7cbea05bf5944db058505ca583ba Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 16:37:36 +0800
Subject: [PATCH 088/278] Refactor runner manager one runner fixture
---
src/manager/runner_manager.py | 16 +-
.../test_runner_manager_openstack.py | 168 +++++++++++-------
2 files changed, 111 insertions(+), 73 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 4510787ea..e18cd61fc 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -160,13 +160,15 @@ def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
flush_mode: The type of runners affect by the deletion.
"""
match flush_mode:
- case FlushMode.FLUSH_IDLE:
- logger.info("Deleting idle runners...")
- case FlushMode.FLUSH_BUSY:
- logger.info("Deleting idle and busy runners...")
- case _:
- logger.critical("Unknown flush mode %s encountered, contact developers", flush_mode)
-
+ case FlushMode.FLUSH_IDLE:
+ logger.info("Deleting idle runners...")
+ case FlushMode.FLUSH_BUSY:
+ logger.info("Deleting idle and busy runners...")
+ case _:
+ logger.critical(
+ "Unknown flush mode %s encountered, contact developers", flush_mode
+ )
+
states = [GithubRunnerState.IDLE]
if flush_mode == FlushMode.FLUSH_BUSY:
states.append(GithubRunnerState.BUSY)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 8ae890e1a..984a5c640 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -6,6 +6,7 @@
from pathlib import Path
from secrets import token_hex
+from typing import Iterator
import pytest
import pytest_asyncio
@@ -39,14 +40,14 @@ def runner_label():
@pytest.fixture(scope="module", name="log_dir_base_path")
-def log_dir_base_path_fixture(tmp_path_factory: Path):
+def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path]]:
"""Mock the log directory path and return it."""
with pytest.MonkeyPatch.context() as monkeypatch:
runner_log_dir_path = tmp_path_factory.mktemp("log") / "runner_log"
metric_log_path = tmp_path_factory.mktemp("log") / "runner_log"
monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", runner_log_dir_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
- yield
+ yield {"runner_logs_dir": runner_log_dir_path, "metric_log": metric_log_path}
@pytest.fixture(scope="module", name="github_path")
@@ -112,7 +113,7 @@ async def runner_manager_fixture(
openstack_runner_manager: OpenstackRunnerManager,
token: str,
github_path: GithubPath,
- log_dir_base_path: Path,
+ log_dir_base_path: dict[str, Path],
) -> RunnerManager:
"""Get RunnerManager instance.
@@ -122,9 +123,24 @@ async def runner_manager_fixture(
return RunnerManager(openstack_runner_manager, config)
-def workflow_in_progress(workflow: Workflow) -> bool:
+@pytest_asyncio.fixture(scope="module", name="runner_manager_with_one_runner")
+async def runner_manager_with_one_runner_fixture(runner_manager: RunnerManager) -> RunnerManager:
+ runner_manager.create_runners(1)
+ runner_list = runner_manager.get_runners()
+ assert len(runner_list) == 1, "Test arrange failed: Expect one runner"
+ runner = runner_list[0]
+ assert (
+ runner.cloud_state == CloudRunnerState.ACTIVE
+ ), "Test arrange failed: Expect runner in active state"
+ assert (
+ runner.github_state == GithubRunnerState.IDLE
+ ), "Test arrange failed: Expect runner in idle state"
+ return runner_manager
+
+
+def workflow_is_status(workflow: Workflow, status: str) -> bool:
workflow.update()
- return workflow.status == "in_progress"
+ return workflow.status == status
# @pytest.mark.openstack
@@ -186,64 +202,84 @@ def workflow_in_progress(workflow: Workflow) -> bool:
# assert len(runner_list) == 0
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_runner_flush_busy_lifecycle(
-# runner_manager: RunnerManager,
-# test_github_branch: Branch,
-# github_repository: Repository,
-# runner_label: str,
-# ):
-# """
-# Arrange: RunnerManager with one idle runner.
-# Act:
-# 1. Run a long workflow.
-# 2. Run flush idle runner.
-# 3. Run flush busy runner.
-# Assert:
-# 1. Runner takes the job and become busy.
-# 2. Busy runner still exists.
-# 3. No runners exists.
-# """
-# runner_manager.create_runners(1)
-# runner_list = runner_manager.get_runners()
-# assert len(runner_list) == 1, "Test arrange failed: Expect one runner"
-# runner = runner_list[0]
-# assert (
-# runner.cloud_state == CloudRunnerState.ACTIVE
-# ), "Test arrange failed: Expect runner in active state"
-# assert (
-# runner.github_state == GithubRunnerState.IDLE
-# ), "Test arrange failed: Expect runner in idle state"
-
-# # 1.
-# workflow = await dispatch_workflow(
-# app=None,
-# branch=test_github_branch,
-# github_repository=github_repository,
-# conclusion="success",
-# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
-# dispatch_input={"runner": runner_label, "minutes": "10"},
-# wait=False,
-# )
-# await wait_for(lambda: workflow_in_progress(workflow))
-
-# runner_list = runner_manager.get_runners()
-# assert len(runner_list) == 1
-# busy_runner = runner_list[0]
-# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
-# assert busy_runner.github_state == GithubRunnerState.BUSY
-
-# # 2.
-# runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
-# runner_list = runner_manager.get_runners()
-# assert len(runner_list) == 1
-# busy_runner = runner_list[0]
-# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
-# assert busy_runner.github_state == GithubRunnerState.BUSY
-
-# # 3.
-# runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
-# runner_list = runner_manager.get_runners()
-# pytest.set_trace()
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_flush_busy_lifecycle(
+ runner_manager_with_one_runner: RunnerManager,
+ test_github_branch: Branch,
+ github_repository: Repository,
+ runner_label: str,
+):
+ """
+ Arrange: RunnerManager with one idle runner.
+ Act:
+ 1. Run a long workflow.
+ 2. Run flush idle runner.
+ 3. Run flush busy runner.
+ Assert:
+ 1. Runner takes the job and become busy.
+ 2. Busy runner still exists.
+ 3. No runners exists.
+ """
+ # 1.
+ workflow = await dispatch_workflow(
+ app=None,
+ branch=test_github_branch,
+ github_repository=github_repository,
+ conclusion="success",
+ workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+ dispatch_input={"runner": runner_label, "minutes": "10"},
+ wait=False,
+ )
+ await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
+
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GithubRunnerState.BUSY
+
+ # 2.
+ runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GithubRunnerState.BUSY
+
+ # 3.
+ runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+ runner_list = runner_manager_with_one_runner.get_runners()
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_normal_lifecycle(
+ runner_manager_with_one_runner: RunnerManager,
+ test_github_branch: Branch,
+ github_repository: Repository,
+ runner_label: str,
+ log_dir_base_path: dict[str, Path],
+):
+ """
+ Arrange: RunnerManager with one runner.
+ Act:
+ 1. Start a test workflow for the runner.
+ 2. Run cleanup.
+ Assert:
+ 1. The workflow complete successfully.
+ 2. The runner should be deleted. The metrics should be recorded.
+ """
+ workflow = await dispatch_workflow(
+ app=None,
+ branch=test_github_branch,
+ github_repository=github_repository,
+ conclusion="success",
+ workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+ dispatch_input={"runner": runner_label, "minutes": "0"},
+ wait=False,
+ )
+ await wait_for(lambda: workflow_is_status(workflow, "completed"))
+ metric_log_path = log_dir_base_path["metric_log"]
From d0a3173878e2fd7df06a45aaf5147c9e8063b9db Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 16:42:09 +0800
Subject: [PATCH 089/278] Fix error string formatting
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 950a280ea..918950a68 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -107,7 +107,7 @@ def create_runner(self, registration_token: str) -> RunnerId:
userdata=userdata,
)
except OpenStackError as err:
- raise RunnerCreateError("Failed to create {instance_name} openstack runner") from err
+ raise RunnerCreateError(f"Failed to create {instance_name} openstack runner") from err
self._wait_runner_startup(instance)
From dd14f44895f5d870c4ef150d73b0121bd3b543ae Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 17:00:19 +0800
Subject: [PATCH 090/278] Adding the docstring for github_runner_manager
---
src/manager/github_runner_manager.py | 38 ++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index ce6591cb4..96b1b2dff 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -56,6 +56,14 @@ def __init__(self, prefix: str, token: str, path: GithubPath):
def get_runners(
self, states: Sequence[GithubRunnerState] | None = None
) -> tuple[SelfHostedRunner]:
+ """Get info on self-hosted runners of certain states.
+
+ Args:
+ states: Filter the runners for these states. If None, all runners are returned.
+
+ Returns:
+ Information on the runners.
+ """
runner_list = self._github.get_runner_github_info(self._path)
return tuple(
runner
@@ -65,20 +73,50 @@ def get_runners(
)
def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> None:
+ """Delete the self-hosted runners of certain states.
+
+ Args:
+ states: Filter the runners for these states. If None, all runners are deleted.
+
+ Returns:
+ Information on the runners.
+ """
runner_list = self.get_runners(states)
for runner in runner_list:
self._github.delete_runner(self._path, runner.id)
def get_registration_token(self) -> str:
+ """Get registration token from GitHub.
+
+ This token is used for registering self-hosted runners.
+
+ Returns:
+ The registration token.
+ """
return self._github.get_runner_registration_token(self._path)
def get_removal_token(self) -> str:
+ """Get removal token from GitHub.
+
+ This token is used for removing self-hosted runners.
+
+ Returns:
+ The removal token.
+ """
return self._github.get_runner_remove_token(self._path)
@staticmethod
def _filter_runner_state(
runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
) -> bool:
+ """Filter the runner by the state.
+
+ Args:
+ states: Filter the runners for these states. If None, return true.
+
+ Returns:
+ True if the runner is in one of the state, else false.
+ """
if states is None:
return True
return GithubRunnerState.from_runner(runner) in states
From 8cc1325c18d03a88dbf6df5362a9f09ee96c89fb Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 17:03:10 +0800
Subject: [PATCH 091/278] Fix test fixture scope
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 984a5c640..32b5f28b2 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -123,7 +123,7 @@ async def runner_manager_fixture(
return RunnerManager(openstack_runner_manager, config)
-@pytest_asyncio.fixture(scope="module", name="runner_manager_with_one_runner")
+@pytest_asyncio.fixture(scope="function", name="runner_manager_with_one_runner")
async def runner_manager_with_one_runner_fixture(runner_manager: RunnerManager) -> RunnerManager:
runner_manager.create_runners(1)
runner_list = runner_manager.get_runners()
From fa0313ace177d57c816fe7857cde62c6f8fd1d91 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 17:41:51 +0800
Subject: [PATCH 092/278] Add docstring on cloud_runner_manager
---
src/manager/cloud_runner_manager.py | 59 +++++++++++++++++--
src/manager/github_runner_manager.py | 8 +--
src/manager/runner_manager.py | 6 +-
.../openstack_runner_manager.py | 10 ++--
4 files changed, 67 insertions(+), 16 deletions(-)
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 3222bfcea..88fbe6c0f 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -1,12 +1,14 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+"""Interface of manager of runner instance on clouds."""
+
from abc import ABC
from dataclasses import dataclass
from enum import Enum
from typing import Sequence, Tuple
-RunnerId = str
+InstanceId = str
class CloudRunnerState(str, Enum):
@@ -65,14 +67,63 @@ class CloudRunnerInstance:
class CloudRunnerManager(ABC):
+ """Manage runner instance on cloud."""
+
def get_name_prefix(self) -> str: ...
- def create_runner(self, registration_token: str) -> RunnerId: ...
+ """Get the name prefix of the self-hosted runners.
+
+ Returns:
+ The name prefix.
+ """
- def get_runner(self, id: RunnerId) -> CloudRunnerInstance: ...
+ def create_runner(self, registration_token: str) -> InstanceId: ...
+
+ """Create a self-hosted runner.
+
+ Args:
+ registration_token: The GitHub registration token for registering runners.
+
+ Returns:
+ Instance ID of the runner.
+ """
+
+ def get_runner(self, id: InstanceId) -> CloudRunnerInstance: ...
+
+ """Get a self-hosted runner by instance id.
+
+ Args:
+ id: The instance id.
+
+ Returns:
+ Information on the runner instance.
+ """
def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerInstance]: ...
- def delete_runner(self, id: RunnerId, remove_token: str) -> None: ...
+ """Get self-hosted runners by state.
+
+ Args:
+ states: Filter for the runners with these github states. If None all states will be
+ included.
+
+ Returns:
+ Information on the runner instances.
+ """
+
+ def delete_runner(self, id: InstanceId, remove_token: str) -> None: ...
+
+ """Delete self-hosted runners.
+
+ Args:
+ id: The instance id of the runner to delete.
+ remove_token: The GitHub remove token.
+ """
def cleanup_runner(self, remove_token: str) -> None: ...
+
+ """Cleanup runner and resource on the cloud.
+
+ Args:
+ remove_token: The GitHub remove token.
+ """
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 96b1b2dff..5f68b99cc 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -60,7 +60,7 @@ def get_runners(
Args:
states: Filter the runners for these states. If None, all runners are returned.
-
+
Returns:
Information on the runners.
"""
@@ -74,10 +74,10 @@ def get_runners(
def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> None:
"""Delete the self-hosted runners of certain states.
-
+
Args:
states: Filter the runners for these states. If None, all runners are deleted.
-
+
Returns:
Information on the runners.
"""
@@ -115,7 +115,7 @@ def _filter_runner_state(
states: Filter the runners for these states. If None, return true.
Returns:
- True if the runner is in one of the state, else false.
+ True if the runner is in one of the state, else false.
"""
if states is None:
return True
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index e18cd61fc..c2ffb8bee 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -14,7 +14,7 @@
CloudRunnerInstance,
CloudRunnerManager,
CloudRunnerState,
- RunnerId,
+ InstanceId,
)
from manager.github_runner_manager import GithubRunnerManager, GithubRunnerState
@@ -45,7 +45,7 @@ class RunnerInstance:
"""
name: str
- id: RunnerId
+ id: InstanceId
github_state: GithubRunnerState
cloud_state: CloudRunnerState
@@ -95,7 +95,7 @@ def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManag
prefix=self._cloud.get_name_prefix(), token=self._config.token, path=self._config.path
)
- def create_runners(self, num: int) -> tuple[RunnerId]:
+ def create_runners(self, num: int) -> tuple[InstanceId]:
"""Create runners.
Args:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 918950a68..90b80227b 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -30,7 +30,7 @@
CloudRunnerInstance,
CloudRunnerManager,
CloudRunnerState,
- RunnerId,
+ InstanceId,
)
from metrics import events as metric_events
from metrics import github as github_metrics
@@ -91,7 +91,7 @@ def __init__(self, prefix: str, config: OpenstackRunnerManagerConfig) -> None:
def get_name_prefix(self) -> str:
return self.prefix
- def create_runner(self, registration_token: str) -> RunnerId:
+ def create_runner(self, registration_token: str) -> InstanceId:
start_timestamp = time.time()
id = OpenstackRunnerManager._generate_runner_id()
instance_name = self._openstack_cloud.get_server_name(instance_id=id)
@@ -120,7 +120,7 @@ def create_runner(self, registration_token: str) -> RunnerId:
)
return id
- def get_runner(self, id: RunnerId) -> CloudRunnerInstance | None:
+ def get_runner(self, id: InstanceId) -> CloudRunnerInstance | None:
name = self._openstack_cloud.get_server_name(id)
instances_list = self._openstack_cloud.get_instances()
for instance in instances_list:
@@ -148,7 +148,7 @@ def get_runners(
return instances_list
return [instance for instance in instances_list if instance.state in states]
- def delete_runner(self, id: RunnerId, remove_token: str) -> None:
+ def delete_runner(self, id: InstanceId, remove_token: str) -> None:
instance = self._openstack_cloud.get_instance(id)
self._delete_runner(instance, remove_token)
@@ -298,7 +298,7 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
logger.info("Runner startup process found to be healthy on %s", instance.server_name)
@staticmethod
- def _generate_runner_id() -> RunnerId:
+ def _generate_runner_id() -> InstanceId:
return secrets.token_hex(12)
@staticmethod
From 66e770fd8f93a23922323d280da5811e8f891a56 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 17:47:33 +0800
Subject: [PATCH 093/278] Add debug
---
tests/integration/test_runner_manager_openstack.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 32b5f28b2..1af6d2c1f 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -283,3 +283,4 @@ async def test_runner_normal_lifecycle(
)
await wait_for(lambda: workflow_is_status(workflow, "completed"))
metric_log_path = log_dir_base_path["metric_log"]
+ pytest.set_trace()
From 042f4c2619402267bd0a27159230f09a071fe936 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 17:54:04 +0800
Subject: [PATCH 094/278] Fix docstring for cloud runner manager
---
...penstack_cloud.openstack_runner_manager.md | 62 ++++++++++---
src/manager/cloud_runner_manager.py | 88 +++++++++----------
.../openstack_runner_manager.py | 54 ++++++++++++
3 files changed, 150 insertions(+), 54 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 7de71ebdf..f967746dc 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -58,11 +58,9 @@ __init__(
## class `OpenstackRunnerManager`
+Manage self-hosted runner on OpenStack cloud.
-
-
-
-
+
### method `__init__`
@@ -70,16 +68,21 @@ __init__(
__init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
```
+Construct the object.
+**Args:**
+
+ - `prefix`: The prefix to runner name.
+ - `config`: Configuration of the object.
---
-
+
### method `cleanup`
@@ -87,13 +90,17 @@ __init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
cleanup(remove_token: str) → None
```
+Cleanup runner and resource on the cloud.
+**Args:**
+
+ - `remove_token`: The GitHub remove token.
---
-
+
### method `create_runner`
@@ -101,13 +108,22 @@ cleanup(remove_token: str) → None
create_runner(registration_token: str) → str
```
+Create a self-hosted runner.
+
+
+
+**Args:**
+
+ - `registration_token`: The GitHub registration token for registering runners.
+**Returns:**
+ Instance ID of the runner.
---
-
+
### method `delete_runner`
@@ -115,13 +131,18 @@ create_runner(registration_token: str) → str
delete_runner(id: str, remove_token: str) → None
```
+Delete self-hosted runners.
+**Args:**
+
+ - `id`: The instance id of the runner to delete.
+ - `remove_token`: The GitHub remove token.
---
-
+
### method `get_name_prefix`
@@ -129,13 +150,16 @@ delete_runner(id: str, remove_token: str) → None
get_name_prefix() → str
```
+Get the name prefix of the self-hosted runners.
+**Returns:**
+ The name prefix.
---
-
+
### method `get_runner`
@@ -143,13 +167,22 @@ get_name_prefix() → str
get_runner(id: str) → CloudRunnerInstance | None
```
+Get a self-hosted runner by instance id.
+**Args:**
+
+ - `id`: The instance id.
+
+
+
+**Returns:**
+ Information on the runner instance.
---
-
+
### method `get_runners`
@@ -159,8 +192,17 @@ get_runners(
) → Tuple[CloudRunnerInstance]
```
+Get self-hosted runners by state.
+
+
+
+**Args:**
+
+ - `states`: Filter for the runners with these github states. If None all states will be included.
+**Returns:**
+ Information on the runner instances.
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 88fbe6c0f..d40664d85 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -69,61 +69,61 @@ class CloudRunnerInstance:
class CloudRunnerManager(ABC):
"""Manage runner instance on cloud."""
- def get_name_prefix(self) -> str: ...
+ def get_name_prefix(self) -> str:
+ """Get the name prefix of the self-hosted runners.
- """Get the name prefix of the self-hosted runners.
-
- Returns:
- The name prefix.
- """
+ Returns:
+ The name prefix.
+ """
+ ...
- def create_runner(self, registration_token: str) -> InstanceId: ...
+ def create_runner(self, registration_token: str) -> InstanceId:
+ """Create a self-hosted runner.
- """Create a self-hosted runner.
-
- Args:
- registration_token: The GitHub registration token for registering runners.
+ Args:
+ registration_token: The GitHub registration token for registering runners.
- Returns:
- Instance ID of the runner.
- """
+ Returns:
+ Instance ID of the runner.
+ """
+ ...
- def get_runner(self, id: InstanceId) -> CloudRunnerInstance: ...
+ def get_runner(self, id: InstanceId) -> CloudRunnerInstance:
+ """Get a self-hosted runner by instance id.
- """Get a self-hosted runner by instance id.
+ Args:
+ id: The instance id.
- Args:
- id: The instance id.
-
- Returns:
- Information on the runner instance.
- """
+ Returns:
+ Information on the runner instance.
+ """
+ ...
- def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerInstance]: ...
+ def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerInstance]:
+ """Get self-hosted runners by state.
- """Get self-hosted runners by state.
-
- Args:
- states: Filter for the runners with these github states. If None all states will be
- included.
+ Args:
+ states: Filter for the runners with these github states. If None all states will be
+ included.
- Returns:
- Information on the runner instances.
- """
+ Returns:
+ Information on the runner instances.
+ """
+ ...
- def delete_runner(self, id: InstanceId, remove_token: str) -> None: ...
+ def delete_runner(self, id: InstanceId, remove_token: str) -> None:
+ """Delete self-hosted runners.
- """Delete self-hosted runners.
-
- Args:
- id: The instance id of the runner to delete.
- remove_token: The GitHub remove token.
- """
+ Args:
+ id: The instance id of the runner to delete.
+ remove_token: The GitHub remove token.
+ """
+ ...
- def cleanup_runner(self, remove_token: str) -> None: ...
+ def cleanup_runner(self, remove_token: str) -> None:
+ """Cleanup runner and resource on the cloud.
- """Cleanup runner and resource on the cloud.
-
- Args:
- remove_token: The GitHub remove token.
- """
+ Args:
+ remove_token: The GitHub remove token.
+ """
+ ...
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 90b80227b..9acd0c648 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -78,8 +78,15 @@ class OpenstackRunnerManagerConfig:
class OpenstackRunnerManager(CloudRunnerManager):
+ """Manage self-hosted runner on OpenStack cloud."""
def __init__(self, prefix: str, config: OpenstackRunnerManagerConfig) -> None:
+ """Construct the object.
+
+ Args:
+ prefix: The prefix to runner name.
+ config: Configuration of the object.
+ """
self.prefix = prefix
self.config = config
self._openstack_cloud = OpenstackCloud(
@@ -89,9 +96,22 @@ def __init__(self, prefix: str, config: OpenstackRunnerManagerConfig) -> None:
)
def get_name_prefix(self) -> str:
+ """Get the name prefix of the self-hosted runners.
+
+ Returns:
+ The name prefix.
+ """
return self.prefix
def create_runner(self, registration_token: str) -> InstanceId:
+ """Create a self-hosted runner.
+
+ Args:
+ registration_token: The GitHub registration token for registering runners.
+
+ Returns:
+ Instance ID of the runner.
+ """
start_timestamp = time.time()
id = OpenstackRunnerManager._generate_runner_id()
instance_name = self._openstack_cloud.get_server_name(instance_id=id)
@@ -121,6 +141,14 @@ def create_runner(self, registration_token: str) -> InstanceId:
return id
def get_runner(self, id: InstanceId) -> CloudRunnerInstance | None:
+ """Get a self-hosted runner by instance id.
+
+ Args:
+ id: The instance id.
+
+ Returns:
+ Information on the runner instance.
+ """
name = self._openstack_cloud.get_server_name(id)
instances_list = self._openstack_cloud.get_instances()
for instance in instances_list:
@@ -135,6 +163,15 @@ def get_runner(self, id: InstanceId) -> CloudRunnerInstance | None:
def get_runners(
self, states: Sequence[CloudRunnerState] | None = None
) -> Tuple[CloudRunnerInstance]:
+ """Get self-hosted runners by state.
+
+ Args:
+ states: Filter for the runners with these github states. If None all states will be
+ included.
+
+ Returns:
+ Information on the runner instances.
+ """
instances_list = self._openstack_cloud.get_instances()
instances_list = [
CloudRunnerInstance(
@@ -149,10 +186,22 @@ def get_runners(
return [instance for instance in instances_list if instance.state in states]
def delete_runner(self, id: InstanceId, remove_token: str) -> None:
+ """Delete self-hosted runners.
+
+ Args:
+ id: The instance id of the runner to delete.
+ remove_token: The GitHub remove token.
+ """
instance = self._openstack_cloud.get_instance(id)
self._delete_runner(instance, remove_token)
def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
+ """Delete self-hosted runners by openstack instance.
+
+ Args:
+ instance: The OpenStack instance.
+ remove_token: The GitHub remove token.
+ """
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SshError:
@@ -178,6 +227,11 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
)
def cleanup(self, remove_token: str) -> None:
+ """Cleanup runner and resource on the cloud.
+
+ Args:
+ remove_token: The GitHub remove token.
+ """
runner_list = self._openstack_cloud.get_instances()
for runner in runner_list:
From c91d78b98bf59a55a4af982bc01a410cad7a2824 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 7 Aug 2024 18:25:37 +0800
Subject: [PATCH 095/278] Add more docstrings
---
requirements.txt | 2 -
src-docs/openstack_cloud.openstack_cloud.md | 67 +++++++++++++----
...penstack_cloud.openstack_runner_manager.md | 24 +++---
src/manager/github_runner_manager.py | 2 +-
src/manager/runner_manager.py | 4 +-
src/openstack_cloud/openstack_cloud.py | 74 +++++++++++++++++--
.../openstack_runner_manager.py | 6 +-
.../test_runner_manager_openstack.py | 2 +-
8 files changed, 140 insertions(+), 41 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 927fa70c5..1046b854a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,3 @@
-# TODO 2024-07-12: PyGithub-based inteface will be replacing the ghapi in the future
-PyGithub
ghapi
jinja2
fabric >=3,<4
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index fcbc4e4fa..70e9d7f0e 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -32,9 +32,14 @@ Represents an OpenStack instance.
__init__(server: Server, prefix: str)
```
+Construct the object.
+**Args:**
+
+ - `server`: The OpenStack server.
+ - `prefix`: The name prefix for the servers.
@@ -42,14 +47,14 @@ __init__(server: Server, prefix: str)
---
-
+
## class `OpenstackCloud`
Client to interact with OpenStack cloud.
The OpenStack server name is managed by this cloud. Caller refers to the instances via instance_id. If the caller needs the server name, e.g., for logging, it can be queried with get_server_name.
-
+
### method `__init__`
@@ -57,7 +62,7 @@ The OpenStack server name is managed by this cloud. Caller refers to the instanc
__init__(clouds_config: dict[str, dict], cloud: str, prefix: str)
```
-Create a OpenstackCloud instance.
+Create the object.
@@ -72,7 +77,7 @@ Create a OpenstackCloud instance.
---
-
+
### method `cleanup`
@@ -80,13 +85,11 @@ Create a OpenstackCloud instance.
cleanup() → None
```
-
-
-
+Cleanup unused openstack resources.
---
-
+
### method `delete_instance`
@@ -94,13 +97,17 @@ cleanup() → None
delete_instance(instance_id: str) → None
```
+Delete a openstack instance.
+**Args:**
+
+ - `instance_id`: The instance ID of the instance to delete.
---
-
+
### method `get_instance`
@@ -108,13 +115,22 @@ delete_instance(instance_id: str) → None
get_instance(instance_id: str) → OpenstackInstance
```
+Get OpenStack instance by instance ID.
+**Args:**
+
+ - `instance_id`: The instance ID.
+
+
+
+**Returns:**
+ The OpenStack instance.
---
-
+
### method `get_instances`
@@ -122,13 +138,16 @@ get_instance(instance_id: str) → OpenstackInstance
get_instances() → tuple[OpenstackInstance]
```
+Get all OpenStack instances.
+**Returns:**
+ The OpenStack instances.
---
-
+
### method `get_server_name`
@@ -151,7 +170,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
@@ -159,13 +178,22 @@ Get server name on OpenStack.
get_ssh_connection(instance: OpenstackInstance) → Connection
```
+Get SSH connection to an OpenStack instance.
+
+**Args:**
+
+ - `instance`: The OpenStack instance to connect to.
+
+**Returns:**
+ SSH connection object.
+
---
-
+
### method `launch_instance`
@@ -179,8 +207,21 @@ launch_instance(
) → OpenstackInstance
```
+Create an OpenStack instance.
+**Args:**
+
+ - `instance_id`: The instance ID to form the instance name.
+ - `image`: The image used to create the instance.
+ - `flavor`: The flavor used to create the instance.
+ - `network`: The network used to create the instance.
+ - `userdata`: The cloud init userdata to startup the instance.
+
+
+
+**Returns:**
+ The OpenStack instance created.
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index f967746dc..5a6c882db 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -3,9 +3,7 @@
# module `openstack_cloud.openstack_runner_manager`
-
-
-
+Manager for self-hosted runner on OpenStack.
**Global Variables**
---------------
@@ -19,10 +17,10 @@
---
-
+
## class `OpenstackRunnerManagerConfig`
-OpenstackRunnerManagerConfig(clouds_config: dict[str, dict], cloud: str, image: str, flavor: str, network: str, github_path: charm_state.GithubOrg | charm_state.GithubRepo, labels: list[str], proxy_config: charm_state.ProxyConfig | None, dockerhub_mirror: str | None, ssh_debug_connections: list[charm_state.SSHDebugConnection] | None, repo_policy_url: str | None, repo_policy_token: str | None)
+Configuration for OpenstackRunnerManager.
@@ -55,12 +53,12 @@ __init__(
---
-
+
## class `OpenstackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
-
+
### method `__init__`
@@ -82,7 +80,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -100,7 +98,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -123,7 +121,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -142,7 +140,7 @@ Delete self-hosted runners.
---
-
+
### method `get_name_prefix`
@@ -159,7 +157,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -182,7 +180,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 5f68b99cc..843c434ba 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -3,7 +3,7 @@
"""Client for managing self-hosted runner on GitHub side."""
-from enum import Enum, auto
+from enum import Enum
from typing import Sequence
from charm_state import GithubPath
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index c2ffb8bee..7f398668f 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -51,14 +51,12 @@ class RunnerInstance:
def __init__(
self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner
- ) -> "RunnerInstance":
+ ):
"""Construct an instance.
Args:
cloud_instance: Information on the cloud instance.
github_info: Information on the GitHub of the runner.
- Returns:
- A RunnerInstance object.
"""
self.name = github_info.name
self.id = cloud_instance.id
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 56146a33d..cd1843333 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -57,6 +57,12 @@ class OpenstackInstance:
status: str
def __init__(self, server: OpenstackServer, prefix: str):
+ """Construct the object.
+
+ Args:
+ server: The OpenStack server.
+ prefix: The name prefix for the servers.
+ """
self.server_id = server.id
self.server_name = server.name
self.status = server.status
@@ -117,7 +123,7 @@ class OpenstackCloud:
"""
def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
- """Create a OpenstackCloud instance.
+ """Create the object.
Args:
clouds_config: The openstack clouds.yaml in dict format.
@@ -132,6 +138,18 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
def launch_instance(
self, instance_id: str, image: str, flavor: str, network: str, userdata: str
) -> OpenstackInstance:
+ """Create an OpenStack instance.
+
+ Args:
+ instance_id: The instance ID to form the instance name.
+ image: The image used to create the instance.
+ flavor: The flavor used to create the instance.
+ network: The network used to create the instance.
+ userdata: The cloud init userdata to startup the instance.
+
+ Returns:
+ The OpenStack instance created.
+ """
full_name = self.get_server_name(instance_id)
logger.info("Creating openstack server with %s", full_name)
@@ -139,7 +157,7 @@ def launch_instance(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
security_group = OpenstackCloud._ensure_security_group(conn)
- keypair = OpenstackCloud._setup_key_pair(conn, full_name)
+ keypair = OpenstackCloud._setup_keypair(conn, full_name)
try:
server = conn.create_server(
@@ -170,16 +188,24 @@ def launch_instance(
"Failed to cleanup openstack server %s that timeout during creation",
full_name,
)
- self._delete_key_pair(conn, instance_id)
+ self._delete_keypair(conn, instance_id)
raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
except openstack.exceptions.SDKException as err:
logger.exception("Failed to create openstack server %s", full_name)
- self._delete_key_pair(conn, instance_id)
+ self._delete_keypair(conn, instance_id)
raise OpenStackError(f"Failed to create openstack server {full_name}") from err
return OpenstackInstance(server, self.prefix)
def get_instance(self, instance_id: str) -> OpenstackInstance:
+ """Get OpenStack instance by instance ID.
+
+ Args:
+ instance_id: The instance ID.
+
+ Returns:
+ The OpenStack instance.
+ """
full_name = self.get_server_name(instance_id)
logger.info("Getting openstack server with %s", full_name)
@@ -191,6 +217,11 @@ def get_instance(self, instance_id: str) -> OpenstackInstance:
)
def delete_instance(self, instance_id: str) -> None:
+ """Delete a openstack instance.
+
+ Args:
+ instance_id: The instance ID of the instance to delete.
+ """
full_name = self.get_server_name(instance_id)
logger.info("Deleting openstack server with %s", full_name)
@@ -200,7 +231,7 @@ def delete_instance(self, instance_id: str) -> None:
try:
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
conn.delete_server(name_or_id=server.id)
- OpenstackCloud._delete_key_pair(conn, full_name)
+ OpenstackCloud._delete_keypair(conn, full_name)
except (
openstack.exceptions.SDKException,
openstack.exceptions.ResourceTimeout,
@@ -208,6 +239,14 @@ def delete_instance(self, instance_id: str) -> None:
raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
+ """Get SSH connection to an OpenStack instance.
+
+ Args:
+ instance: The OpenStack instance to connect to.
+
+ Returns:
+ SSH connection object.
+ """
key_path = OpenstackCloud._get_key_path(instance.server_name)
if not key_path.exists():
@@ -249,6 +288,11 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
)
def get_instances(self) -> tuple[OpenstackInstance]:
+ """Get all OpenStack instances.
+
+ Returns:
+ The OpenStack instances.
+ """
logger.info("Getting all openstack servers managed by the charm")
with _get_openstack_connection(
@@ -267,6 +311,7 @@ def get_instances(self) -> tuple[OpenstackInstance]:
return instances
def cleanup(self) -> None:
+ """Cleanup unused openstack resources."""
with _get_openstack_connection(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
@@ -413,7 +458,16 @@ def _get_key_path(name: str) -> Path:
return _SSH_KEY_PATH / f"{name}.key"
@staticmethod
- def _setup_key_pair(conn: OpenstackConnection, name: str) -> OpenstackKeypair:
+ def _setup_keypair(conn: OpenstackConnection, name: str) -> OpenstackKeypair:
+ """Create OpenStack keypair.
+
+ Args:
+ conn: The connection object to access OpenStack cloud.
+ name: The name of the keypair.
+
+ Returns:
+ The OpenStack keypair.
+ """
key_path = OpenstackCloud._get_key_path(name)
if key_path.exists():
@@ -427,7 +481,13 @@ def _setup_key_pair(conn: OpenstackConnection, name: str) -> OpenstackKeypair:
return keypair
@staticmethod
- def _delete_key_pair(conn: OpenstackConnection, name: str) -> None:
+ def _delete_keypair(conn: OpenstackConnection, name: str) -> None:
+ """Delete OpenStack keypair.
+
+ Args:
+ conn: The connection object to access OpenStack cloud.
+ name: The name of the keypair.
+ """
try:
# Keypair have unique names, access by ID is not needed.
if not conn.delete_keypair(name):
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 9acd0c648..cdd4a2ee7 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -1,6 +1,8 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+"""Manager for self-hosted runner on OpenStack."""
+
import logging
import secrets
import time
@@ -63,6 +65,8 @@ class _PullFileError(Exception):
@dataclass
class OpenstackRunnerManagerConfig:
+ """Configuration for OpenstackRunnerManager."""
+
clouds_config: dict[str, dict]
cloud: str
image: str
@@ -82,7 +86,7 @@ class OpenstackRunnerManager(CloudRunnerManager):
def __init__(self, prefix: str, config: OpenstackRunnerManagerConfig) -> None:
"""Construct the object.
-
+
Args:
prefix: The prefix to runner name.
config: Configuration of the object.
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 1af6d2c1f..d09090c3b 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -86,7 +86,7 @@ async def openstack_runner_manager_fixture(
) -> OpenstackRunnerManager:
"""Create OpenstackRunnerManager instance.
- The prefix args of OpenstackRunnerManager set to app_name to let openstack_connection_fixture preform the cleanup of openstack resources.
+ The prefix args of OpenstackRunnerManager set to app_name to let openstack_connection_fixture perform the cleanup of openstack resources.
"""
_CLOUDS_YAML_PATH.unlink(missing_ok=True)
clouds_config = yaml.safe_load(private_endpoint_clouds_yaml)
From db68ce4518b74696ddcbfa373905285569b3ee8d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 8 Aug 2024 15:25:27 +0800
Subject: [PATCH 096/278] Add metrics for deleted and cleanup runners
---
src-docs/metrics.runner.md | 8 ++-
...penstack_cloud.openstack_runner_manager.md | 49 +++++++++++++---
src/manager/cloud_runner_manager.py | 15 ++++-
src/manager/github_runner_manager.py | 10 ++--
src/manager/runner_manager.py | 54 +++++++++++++++---
src/metrics/runner.py | 10 +++-
src/openstack_cloud/openstack_manager.py | 2 +-
.../openstack_runner_manager.py | 56 +++++++++++++++----
src/runner_manager.py | 2 +-
.../test_runner_manager_openstack.py | 20 +++++--
tests/unit/metrics/test_runner.py | 40 ++++---------
11 files changed, 188 insertions(+), 78 deletions(-)
diff --git a/src-docs/metrics.runner.md b/src-docs/metrics.runner.md
index edf8f0a65..269d2581a 100644
--- a/src-docs/metrics.runner.md
+++ b/src-docs/metrics.runner.md
@@ -21,7 +21,8 @@ Classes and function to extract the metrics from storage and issue runner metric
```python
extract(
metrics_storage_manager: StorageManager,
- ignore_runners: set[str]
+ runners: set[str],
+ include: bool = False
) → Iterator[RunnerMetrics]
```
@@ -38,7 +39,8 @@ In order to avoid DoS attacks, the file size is also checked.
**Args:**
- `metrics_storage_manager`: The metrics storage manager.
- - `ignore_runners`: The set of runners to ignore.
+ - `runners`: The runners to include or exclude.
+ - `include`: If true the provided runners are included for metric extraction, else the provided runners are excluded.
@@ -48,7 +50,7 @@ In order to avoid DoS attacks, the file size is also checked.
---
-
+
## function `issue_events`
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 5a6c882db..f486aa6c5 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -55,10 +55,36 @@ __init__(
+## class `RunnerHealth`
+RunnerHealth(healthy: tuple[openstack_cloud.openstack_cloud.OpenstackInstance], unhealthy: tuple[openstack_cloud.openstack_cloud.OpenstackInstance])
+
+
+
+### method `__init__`
+
+```python
+__init__(
+ healthy: tuple[OpenstackInstance],
+ unhealthy: tuple[OpenstackInstance]
+) → None
+```
+
+
+
+
+
+
+
+
+
+---
+
+
+
## class `OpenstackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
-
+
### method `__init__`
@@ -80,12 +106,12 @@ Construct the object.
---
-
+
### method `cleanup`
```python
-cleanup(remove_token: str) → None
+cleanup(remove_token: str) → RunnerMetrics
```
Cleanup runner and resource on the cloud.
@@ -96,9 +122,14 @@ Cleanup runner and resource on the cloud.
- `remove_token`: The GitHub remove token.
+
+
+**Returns:**
+ Any metrics retrieved from cleanup runners.
+
---
-
+
### method `create_runner`
@@ -121,12 +152,12 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
```python
-delete_runner(id: str, remove_token: str) → None
+delete_runner(id: str, remove_token: str) → RunnerMetrics
```
Delete self-hosted runners.
@@ -140,7 +171,7 @@ Delete self-hosted runners.
---
-
+
### method `get_name_prefix`
@@ -157,7 +188,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -180,7 +211,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index d40664d85..2a25717a5 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -6,7 +6,10 @@
from abc import ABC
from dataclasses import dataclass
from enum import Enum
-from typing import Sequence, Tuple
+from typing import Iterator, Sequence, Tuple, Type
+
+from metrics import events as metric_events
+from metrics.runner import RunnerMetrics
InstanceId = str
@@ -111,19 +114,25 @@ def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerIn
"""
...
- def delete_runner(self, id: InstanceId, remove_token: str) -> None:
+ def delete_runner(self, id: InstanceId, remove_token: str) -> RunnerMetrics:
"""Delete self-hosted runners.
Args:
id: The instance id of the runner to delete.
remove_token: The GitHub remove token.
+
+ Returns:
+ Metrics of the runner deleted.
"""
...
- def cleanup_runner(self, remove_token: str) -> None:
+ def cleanup_runner(self, remove_token: str) -> Iterator[RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
Args:
remove_token: The GitHub remove token.
+
+ Returns:
+ Metrics of the runners that was cleanup.
"""
...
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 843c434ba..6985405b0 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -51,7 +51,7 @@ def __init__(self, prefix: str, token: str, path: GithubPath):
"""
self._prefix = prefix
self._path = path
- self._github = GithubClient(token)
+ self.github = GithubClient(token)
def get_runners(
self, states: Sequence[GithubRunnerState] | None = None
@@ -64,7 +64,7 @@ def get_runners(
Returns:
Information on the runners.
"""
- runner_list = self._github.get_runner_github_info(self._path)
+ runner_list = self.github.get_runner_github_info(self._path)
return tuple(
runner
for runner in runner_list
@@ -83,7 +83,7 @@ def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> N
"""
runner_list = self.get_runners(states)
for runner in runner_list:
- self._github.delete_runner(self._path, runner.id)
+ self.github.delete_runner(self._path, runner.id)
def get_registration_token(self) -> str:
"""Get registration token from GitHub.
@@ -93,7 +93,7 @@ def get_registration_token(self) -> str:
Returns:
The registration token.
"""
- return self._github.get_runner_registration_token(self._path)
+ return self.github.get_runner_registration_token(self._path)
def get_removal_token(self) -> str:
"""Get removal token from GitHub.
@@ -103,7 +103,7 @@ def get_removal_token(self) -> str:
Returns:
The removal token.
"""
- return self._github.get_runner_remove_token(self._path)
+ return self.github.get_runner_remove_token(self._path)
@staticmethod
def _filter_runner_state(
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 7f398668f..663419e82 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -6,9 +6,10 @@
import logging
from dataclasses import dataclass
from enum import Enum, auto
-from typing import Sequence
+from typing import Iterator, Sequence, Type
from charm_state import GithubPath
+from errors import GithubMetricsError
from github_type import SelfHostedRunner
from manager.cloud_runner_manager import (
CloudRunnerInstance,
@@ -17,9 +18,15 @@
InstanceId,
)
from manager.github_runner_manager import GithubRunnerManager, GithubRunnerState
+from metrics import events as metric_events
+from metrics import github as github_metrics
+from metrics import runner as runner_metrics
+from metrics.runner import RunnerMetrics
logger = logging.getLogger(__name__)
+IssuedMetricEventsStats = dict[Type[metric_events.Event], int]
+
class FlushMode(Enum):
"""Strategy for flushing runners.
@@ -49,9 +56,7 @@ class RunnerInstance:
github_state: GithubRunnerState
cloud_state: CloudRunnerState
- def __init__(
- self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner
- ):
+ def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner):
"""Construct an instance.
Args:
@@ -151,7 +156,9 @@ def get_runners(
RunnerInstance(cloud_infos_map[name], github_infos_map[name]) for name in runner_names
)
- def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
+ def delete_runners(
+ self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE
+ ) -> IssuedMetricEventsStats:
"""Delete the runners.
Args:
@@ -176,11 +183,42 @@ def delete_runners(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
logger.info("Deleting runners: %s", runner_names)
remove_token = self._github.get_removal_token()
+ metrics = []
for runner in runners_list:
- self._cloud.delete_runner(id=runner.id, remove_token=remove_token)
+ metrics.append(self._cloud.delete_runner(id=runner.id, remove_token=remove_token))
+
+ return self._issue_runner_metrics(metrics=iter(metric_events))
- def cleanup(self) -> None:
+ def cleanup(self) -> IssuedMetricEventsStats:
"""Run cleanup of the runners and other resources."""
self._github.delete_runners([GithubRunnerState.OFFLINE, GithubRunnerState.UNKNOWN])
remove_token = self._github.get_removal_token()
- self._cloud.cleanup_runner(remove_token)
+ metrics = self._cloud.cleanup_runner(remove_token)
+ return self._issue_runner_metrics(metrics=metrics)
+
+ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetricEventsStats:
+ total_stats: IssuedMetricEventsStats = {}
+
+ for extracted_metrics in metrics:
+ try:
+ job_metrics = github_metrics.job(
+ github_client=self._github.github,
+ pre_job_metrics=extracted_metrics.pre_job,
+ runner_name=extracted_metrics.runner_name,
+ )
+ except GithubMetricsError:
+ logger.exception(
+ "Failed to calculate job metrics for %s", extracted_metrics.runner_name
+ )
+ job_metrics = None
+
+ issued_events = runner_metrics.issue_events(
+ runner_metrics=extracted_metrics,
+ job_metrics=job_metrics,
+ flavor=self._cloud.get_name_prefix(),
+ )
+
+ for event_type in issued_events:
+ total_stats[event_type] = total_stats.get(event_type, 0) + 1
+
+ return total_stats
diff --git a/src/metrics/runner.py b/src/metrics/runner.py
index dfdf11044..b0ccc191a 100644
--- a/src/metrics/runner.py
+++ b/src/metrics/runner.py
@@ -105,7 +105,7 @@ class RunnerMetrics(BaseModel):
def extract(
- metrics_storage_manager: MetricsStorageManager, ignore_runners: set[str]
+ metrics_storage_manager: MetricsStorageManager, runners: set[str], include: bool = False
) -> Iterator[RunnerMetrics]:
"""Extract metrics from runners.
@@ -120,13 +120,17 @@ def extract(
Args:
metrics_storage_manager: The metrics storage manager.
- ignore_runners: The set of runners to ignore.
+ runners: The runners to include or exclude.
+ include: If true the provided runners are included for metric extraction, else the provided
+ runners are excluded.
Yields:
Extracted runner metrics of a particular runner.
"""
for ms in metrics_storage_manager.list_all():
- if ms.runner_name not in ignore_runners:
+ if (include and ms.runner_name in runners) or (
+ not include and ms.runner_name not in runners
+ ):
runner_metrics = _extract_storage(
metrics_storage_manager=metrics_storage_manager, metrics_storage=ms
)
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index f61d28b8d..fbc4d9ba0 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -1450,7 +1450,7 @@ def _issue_runner_metrics(self, conn: OpenstackConnection) -> IssuedMetricEvents
for extracted_metrics in runner_metrics.extract(
metrics_storage_manager=metrics_storage,
- ignore_runners=instance_names,
+ runners=instance_names,
):
try:
job_metrics = github_metrics.job(
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index cdd4a2ee7..c540b2097 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -16,6 +16,7 @@
import paramiko.ssh_exception
from fabric import Connection as SshConnection
+import shared_fs
from charm_state import GithubOrg, GithubPath, ProxyConfig, SSHDebugConnection
from errors import (
CreateMetricsStorageError,
@@ -35,7 +36,6 @@
InstanceId,
)
from metrics import events as metric_events
-from metrics import github as github_metrics
from metrics import runner as runner_metrics
from metrics import storage as metrics_storage
from openstack_cloud.openstack_cloud import OpenstackCloud, OpenstackInstance
@@ -81,6 +81,12 @@ class OpenstackRunnerManagerConfig:
repo_policy_token: str | None
+@dataclass
+class RunnerHealth:
+ healthy: tuple[OpenstackInstance]
+ unhealthy: tuple[OpenstackInstance]
+
+
class OpenstackRunnerManager(CloudRunnerManager):
"""Manage self-hosted runner on OpenStack cloud."""
@@ -189,7 +195,7 @@ def get_runners(
return instances_list
return [instance for instance in instances_list if instance.state in states]
- def delete_runner(self, id: InstanceId, remove_token: str) -> None:
+ def delete_runner(self, id: InstanceId, remove_token: str) -> runner_metrics.RunnerMetrics:
"""Delete self-hosted runners.
Args:
@@ -197,7 +203,31 @@ def delete_runner(self, id: InstanceId, remove_token: str) -> None:
remove_token: The GitHub remove token.
"""
instance = self._openstack_cloud.get_instance(id)
+ metric = runner_metrics.extract(
+ metrics_storage_manager=shared_fs, runners=instance.server_name
+ )
self._delete_runner(instance, remove_token)
+ return metric
+
+ def cleanup(self, remove_token: str) -> runner_metrics.RunnerMetrics:
+ """Cleanup runner and resource on the cloud.
+
+ Args:
+ remove_token: The GitHub remove token.
+
+ Returns:
+ Any metrics retrieved from cleanup runners.
+ """
+ runners = self._get_runner_health()
+ healthy_runner_names = [runner.server_name for runner in runners.healthy]
+ metrics = runner_metrics.extract(
+ metrics_storage_manager=shared_fs, runners=set(healthy_runner_names)
+ )
+ for runner in runners.unhealthy:
+ self._delete_runner(runner, remove_token)
+
+ self._openstack_cloud.cleanup()
+ return metrics
def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
"""Delete self-hosted runners by openstack instance.
@@ -230,24 +260,26 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
"Unable to delete openstack instance for runner %s", instance.server_name
)
- def cleanup(self, remove_token: str) -> None:
- """Cleanup runner and resource on the cloud.
+ def _get_runner_health(self) -> RunnerHealth:
+ """Get runners by health state.
- Args:
- remove_token: The GitHub remove token.
+ Returns:
+ Runners by health state.
"""
runner_list = self._openstack_cloud.get_instances()
+ healthy, unhealthy = [], []
for runner in runner_list:
- state = (CloudRunnerState(runner.status),)
- if state in (
+ cloud_state = CloudRunnerState(runner.status)
+ if cloud_state in (
CloudRunnerState.DELETED,
CloudRunnerState.ERROR,
CloudRunnerState.STOPPED,
- ) or self._health_check(runner):
- self._delete_runner(runner, remove_token)
-
- self._openstack_cloud.cleanup()
+ ) or not self._health_check(runner):
+ unhealthy.append(runner)
+ else:
+ healthy.append(runner)
+ return RunnerHealth(healthy=healthy, unhealthy=unhealthy)
def _generate_userdata(self, instance_name: str, registration_token: str) -> str:
jinja = jinja2.Environment(loader=jinja2.FileSystemLoader("templates"), autoescape=True)
diff --git a/src/runner_manager.py b/src/runner_manager.py
index 09487f453..25aca060e 100644
--- a/src/runner_manager.py
+++ b/src/runner_manager.py
@@ -325,7 +325,7 @@ def _issue_runner_metrics(self) -> IssuedMetricEventsStats:
total_stats: IssuedMetricEventsStats = {}
for extracted_metrics in runner_metrics.extract(
- metrics_storage_manager=shared_fs, ignore_runners=set(runner_states.healthy)
+ metrics_storage_manager=shared_fs, runners=set(runner_states.healthy)
):
try:
job_metrics = github_metrics.job(
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index d09090c3b..377e7980d 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -21,6 +21,7 @@
from manager.github_runner_manager import GithubRunnerState
from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
from metrics import events, runner_logs
+from openstack_cloud import openstack_runner_manager
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
OpenstackRunnerManager,
@@ -31,7 +32,6 @@
dispatch_workflow,
wait_for,
)
-from tests.integration.helpers.openstack import PrivateEndpointConfigs
@pytest.fixture(scope="module", name="runner_label")
@@ -43,11 +43,23 @@ def runner_label():
def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path]]:
"""Mock the log directory path and return it."""
with pytest.MonkeyPatch.context() as monkeypatch:
- runner_log_dir_path = tmp_path_factory.mktemp("log") / "runner_log"
- metric_log_path = tmp_path_factory.mktemp("log") / "runner_log"
+ temp_log_dir = tmp_path_factory.mktemp("log")
+
+ runner_log_dir_path = temp_log_dir / "runner_log"
+ metric_log_path = temp_log_dir / "metric_log"
+ metric_exchange_path = temp_log_dir / "metric_exchange"
+
monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", runner_log_dir_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
- yield {"runner_logs_dir": runner_log_dir_path, "metric_log": metric_log_path}
+ monkeypatch.setattr(
+ openstack_runner_manager, "METRICS_EXCHANGE_PATH", metric_exchange_path
+ )
+
+ yield {
+ "runner_logs_dir": runner_log_dir_path,
+ "metric_log": metric_log_path,
+ "metric_exchange": metric_exchange_path,
+ }
@pytest.fixture(scope="module", name="github_path")
diff --git a/tests/unit/metrics/test_runner.py b/tests/unit/metrics/test_runner.py
index 02b5ad028..bf0a14251 100644
--- a/tests/unit/metrics/test_runner.py
+++ b/tests/unit/metrics/test_runner.py
@@ -170,9 +170,7 @@ def test_extract(runner_fs_base: Path):
metrics_storage_manager.list_all.return_value = [runner1_fs, runner2_fs, runner3_fs]
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert extracted_metrics == [
@@ -218,7 +216,7 @@ def test_extract_ignores_runners(runner_fs_base: Path):
extracted_metrics = list(
runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=ignore_runners
+ metrics_storage_manager=metrics_storage_manager, runners=ignore_runners
)
)
@@ -253,9 +251,7 @@ def test_extract_corrupt_data(runner_fs_base: Path, monkeypatch: pytest.MonkeyPa
monkeypatch.setattr(runner_metrics, "move_to_quarantine", move_to_quarantine_mock)
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert not extracted_metrics
@@ -275,9 +271,7 @@ def test_extract_corrupt_data(runner_fs_base: Path, monkeypatch: pytest.MonkeyPa
metrics_storage_manager.list_all.return_value = [runner_fs]
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert not extracted_metrics
move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
@@ -296,9 +290,7 @@ def test_extract_corrupt_data(runner_fs_base: Path, monkeypatch: pytest.MonkeyPa
metrics_storage_manager.list_all.return_value = [runner_fs]
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert not extracted_metrics
move_to_quarantine_mock.assert_any_call(metrics_storage_manager, runner_fs.runner_name)
@@ -317,9 +309,7 @@ def test_extract_corrupt_data(runner_fs_base: Path, monkeypatch: pytest.MonkeyPa
metrics_storage_manager.list_all.return_value = [runner_fs]
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert not extracted_metrics
@@ -357,9 +347,7 @@ def test_extract_raises_error_for_too_large_files(
monkeypatch.setattr(runner_metrics, "move_to_quarantine", move_to_quarantine_mock)
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert not extracted_metrics
@@ -381,9 +369,7 @@ def test_extract_raises_error_for_too_large_files(
metrics_storage_manager.list_all.return_value = [runner_fs]
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert not extracted_metrics
@@ -406,9 +392,7 @@ def test_extract_raises_error_for_too_large_files(
metrics_storage_manager.list_all.return_value = [runner_fs]
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert not extracted_metrics
@@ -446,9 +430,7 @@ def test_extract_ignores_filesystems_without_ts(runner_fs_base: Path):
metrics_storage_manager.list_all.return_value = [runner_fs]
extracted_metrics = list(
- runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
- )
+ runner_metrics.extract(metrics_storage_manager=metrics_storage_manager, runners=set())
)
assert not extracted_metrics
metrics_storage_manager.delete.assert_called_once_with(runner_fs.runner_name)
@@ -481,7 +463,7 @@ def test_extract_ignores_failure_on_shared_fs_cleanup(
)
extracted_metrics = runner_metrics.extract(
- metrics_storage_manager=metrics_storage_manager, ignore_runners=set()
+ metrics_storage_manager=metrics_storage_manager, runners=set()
)
assert list(extracted_metrics) == [runner_metrics_data]
From ab6b44514a9733303da11099e6fbbc4ba2964133 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 8 Aug 2024 15:26:30 +0800
Subject: [PATCH 097/278] Enable tests again
---
.../test_runner_manager_openstack.py | 114 +++++++++---------
1 file changed, 57 insertions(+), 57 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 377e7980d..0fe57194d 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -155,63 +155,63 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
return workflow.status == status
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_get_no_runner(runner_manager: RunnerManager) -> None:
-# """
-# Arrange: RunnerManager instance with no runners.
-# Act: Get runners.
-# Assert: Empty tuple returned.
-# """
-# runner_list = runner_manager.get_runners()
-# assert isinstance(runner_list, tuple)
-# assert not runner_list
-
-
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_runner_normal_idle_lifecycle(
-# runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
-# ) -> None:
-# """
-# Arrange: RunnerManager instance with no runners.
-# Act:
-# 1. Create one runner.
-# 2. Run health check on the runner.
-# 3. Delete all idle runner.
-# Assert:
-# 1. An active idle runner.
-# 2. Health check passes.
-# 3. No runners.
-# """
-# # 1.
-# runner_id_list = runner_manager.create_runners(1)
-# assert isinstance(runner_id_list, tuple)
-# assert len(runner_id_list) == 1
-# runner_id = runner_id_list[0]
-
-# runner_list = runner_manager.get_runners()
-# assert isinstance(runner_list, tuple)
-# assert len(runner_list) == 1
-# runner = runner_list[0]
-# assert runner.id == runner_id
-# assert runner.cloud_state == CloudRunnerState.ACTIVE
-# assert runner.github_state == GithubRunnerState.IDLE
-
-# # 2.
-# openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
-# assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
-# runner = openstack_instances[0]
-
-# assert openstack_runner_manager._health_check(runner)
-
-# # 3.
-# runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
-# runner_list = runner_manager.get_runners()
-# assert isinstance(runner_list, tuple)
-# assert len(runner_list) == 0
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_get_no_runner(runner_manager: RunnerManager) -> None:
+ """
+ Arrange: RunnerManager instance with no runners.
+ Act: Get runners.
+ Assert: Empty tuple returned.
+ """
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert not runner_list
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_normal_idle_lifecycle(
+ runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+) -> None:
+ """
+ Arrange: RunnerManager instance with no runners.
+ Act:
+ 1. Create one runner.
+ 2. Run health check on the runner.
+ 3. Delete all idle runner.
+ Assert:
+ 1. An active idle runner.
+ 2. Health check passes.
+ 3. No runners.
+ """
+ # 1.
+ runner_id_list = runner_manager.create_runners(1)
+ assert isinstance(runner_id_list, tuple)
+ assert len(runner_id_list) == 1
+ runner_id = runner_id_list[0]
+
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 1
+ runner = runner_list[0]
+ assert runner.id == runner_id
+ assert runner.cloud_state == CloudRunnerState.ACTIVE
+ assert runner.github_state == GithubRunnerState.IDLE
+
+ # 2.
+ openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
+ assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
+ runner = openstack_instances[0]
+
+ assert openstack_runner_manager._health_check(runner)
+
+ # 3.
+ runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 0
@pytest.mark.openstack
From ef4a0209ece36e18275fc5422988de0e971d0fff Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 8 Aug 2024 18:18:26 +0800
Subject: [PATCH 098/278] Add debug
---
tests/integration/test_runner_manager_openstack.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 0fe57194d..f1b432970 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -188,6 +188,7 @@ async def test_runner_normal_idle_lifecycle(
"""
# 1.
runner_id_list = runner_manager.create_runners(1)
+ pytest.set_trace()
assert isinstance(runner_id_list, tuple)
assert len(runner_id_list) == 1
runner_id = runner_id_list[0]
From 4aee652d0d8f1820395ce5ed5ae8aaf3a7c7ddd0 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 8 Aug 2024 18:45:52 +0800
Subject: [PATCH 099/278] Get runner info not on GitHub
---
src/manager/runner_manager.py | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 663419e82..aa76bf473 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -53,19 +53,19 @@ class RunnerInstance:
name: str
id: InstanceId
- github_state: GithubRunnerState
+ github_state: GithubRunnerState | None
cloud_state: CloudRunnerState
- def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner):
+ def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner | None):
"""Construct an instance.
Args:
cloud_instance: Information on the cloud instance.
github_info: Information on the GitHub of the runner.
"""
- self.name = github_info.name
+ self.name =cloud_instance.name
self.id = cloud_instance.id
- self.github_state = GithubRunnerState.from_runner(github_info)
+ self.github_state = GithubRunnerState.from_runner(github_info) if github_info is not None else None
self.cloud_state = cloud_instance.state
@@ -122,6 +122,8 @@ def get_runners(
cloud_runner_state: Sequence[CloudRunnerState] | None = None,
) -> tuple[RunnerInstance]:
"""Get information on runner filter by state.
+
+ Only runners that has cloud instance are returned.
Args:
github_runner_state: Filter for the runners with these github states. If None all
@@ -137,9 +139,9 @@ def get_runners(
cloud_infos = self._cloud.get_runners(cloud_runner_state)
github_infos_map = {info.name: info for info in github_infos}
cloud_infos_map = {info.name: info for info in cloud_infos}
- runner_names = cloud_infos_map.keys() & github_infos_map.keys()
- logger.info("Found following runners: %s", runner_names)
+ logger.info("Found following runners: %s", cloud_infos_map.keys() | github_infos_map.keys())
+ runner_names = cloud_infos_map.keys() & github_infos_map.keys()
cloud_only = cloud_infos_map.keys() - runner_names
github_only = github_infos_map.keys() - runner_names
if cloud_only:
@@ -153,7 +155,7 @@ def get_runners(
)
return tuple(
- RunnerInstance(cloud_infos_map[name], github_infos_map[name]) for name in runner_names
+ RunnerInstance(cloud_infos_map[name], github_infos_map[name]) for name in cloud_infos_map.keys()
)
def delete_runners(
From 6403bedc862ebad225a89d09344e350a917cf644 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 8 Aug 2024 18:59:46 +0800
Subject: [PATCH 100/278] Fix dict access
---
src/manager/runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index aa76bf473..35704929f 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -155,7 +155,7 @@ def get_runners(
)
return tuple(
- RunnerInstance(cloud_infos_map[name], github_infos_map[name]) for name in cloud_infos_map.keys()
+ RunnerInstance(cloud_infos_map[name], github_infos_map[name] if name in github_infos_map else None) for name in cloud_infos_map.keys()
)
def delete_runners(
From 6149b9c53f31732d384ff70d9f80feb8a1eeee5e Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 10:02:57 +0800
Subject: [PATCH 101/278] Add debug of userdata
---
src-docs/openstack_cloud.openstack_runner_manager.md | 8 ++++----
src/openstack_cloud/openstack_runner_manager.py | 3 +++
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index f486aa6c5..ffdc70dd9 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -106,7 +106,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -152,7 +152,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -188,7 +188,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -211,7 +211,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c540b2097..b1c7a0e48 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -128,6 +128,9 @@ def create_runner(self, registration_token: str) -> InstanceId:
userdata = self._generate_userdata(
instance_name=instance_name, registration_token=registration_token
)
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
try:
instance = self._openstack_cloud.launch_instance(
instance_id=id,
From 178b281c03d1deea9a1387af2454f52e50714044 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 10:11:40 +0800
Subject: [PATCH 102/278] Fix metric path
---
src-docs/openstack_cloud.openstack_runner_manager.md | 8 ++++----
src/openstack_cloud/openstack_runner_manager.py | 5 +----
tests/integration/test_runner_manager_openstack.py | 5 -----
3 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index ffdc70dd9..f486aa6c5 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -106,7 +106,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -152,7 +152,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -188,7 +188,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -211,7 +211,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index b1c7a0e48..3e2b8265c 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -128,9 +128,6 @@ def create_runner(self, registration_token: str) -> InstanceId:
userdata = self._generate_userdata(
instance_name=instance_name, registration_token=registration_token
)
- # TODO: debug
- import pytest
- pytest.set_trace()
try:
instance = self._openstack_cloud.launch_instance(
instance_id=id,
@@ -387,7 +384,7 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
raise RunnerStartError(f"Unable to SSH run `ps aux` on {instance.server_name}")
if RUNNER_STARTUP_PROCESS not in result.stdout:
logger.warning("Runner startup process not found on %s", instance.server_name)
- return RunnerStartError(f"Runner startup process not found on {instance.server_name}")
+ raise RunnerStartError(f"Runner startup process not found on {instance.server_name}")
logger.info("Runner startup process found to be healthy on %s", instance.server_name)
@staticmethod
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index f1b432970..08013cb90 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -47,18 +47,13 @@ def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path
runner_log_dir_path = temp_log_dir / "runner_log"
metric_log_path = temp_log_dir / "metric_log"
- metric_exchange_path = temp_log_dir / "metric_exchange"
monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", runner_log_dir_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
- monkeypatch.setattr(
- openstack_runner_manager, "METRICS_EXCHANGE_PATH", metric_exchange_path
- )
yield {
"runner_logs_dir": runner_log_dir_path,
"metric_log": metric_log_path,
- "metric_exchange": metric_exchange_path,
}
From 131d6bc496abb40c74f18516d57b43eb1ea144df Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 10:21:55 +0800
Subject: [PATCH 103/278] Debug metric
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 08013cb90..65fd07e77 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -183,7 +183,6 @@ async def test_runner_normal_idle_lifecycle(
"""
# 1.
runner_id_list = runner_manager.create_runners(1)
- pytest.set_trace()
assert isinstance(runner_id_list, tuple)
assert len(runner_id_list) == 1
runner_id = runner_id_list[0]
@@ -290,5 +289,6 @@ async def test_runner_normal_lifecycle(
wait=False,
)
await wait_for(lambda: workflow_is_status(workflow, "completed"))
+ runner_log_dir_path = log_dir_base_path["runner_log_dir"]
metric_log_path = log_dir_base_path["metric_log"]
pytest.set_trace()
From 9a7d138966fe8bdbc444d87f89c664031ff4ec9b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 10:29:58 +0800
Subject: [PATCH 104/278] Fix variable naming
---
src/manager/runner_manager.py | 2 +-
tests/integration/test_runner_manager_openstack.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 35704929f..f4e2b779c 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -189,7 +189,7 @@ def delete_runners(
for runner in runners_list:
metrics.append(self._cloud.delete_runner(id=runner.id, remove_token=remove_token))
- return self._issue_runner_metrics(metrics=iter(metric_events))
+ return self._issue_runner_metrics(metrics=iter(metrics))
def cleanup(self) -> IssuedMetricEventsStats:
"""Run cleanup of the runners and other resources."""
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 65fd07e77..5aa2f3142 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -289,6 +289,6 @@ async def test_runner_normal_lifecycle(
wait=False,
)
await wait_for(lambda: workflow_is_status(workflow, "completed"))
- runner_log_dir_path = log_dir_base_path["runner_log_dir"]
+ runner_log_dir_path = log_dir_base_path["runner_logs_dir"]
metric_log_path = log_dir_base_path["metric_log"]
pytest.set_trace()
From 3fe7977a75997e4b58d8933df6f1596272cff20d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 10:47:08 +0800
Subject: [PATCH 105/278] Test
---
.../test_runner_manager_openstack.py | 183 +++++++++---------
1 file changed, 91 insertions(+), 92 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 5aa2f3142..ad79d202a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -45,14 +45,11 @@ def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path
with pytest.MonkeyPatch.context() as monkeypatch:
temp_log_dir = tmp_path_factory.mktemp("log")
- runner_log_dir_path = temp_log_dir / "runner_log"
metric_log_path = temp_log_dir / "metric_log"
- monkeypatch.setattr(runner_logs, "RUNNER_LOGS_DIR_PATH", runner_log_dir_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
yield {
- "runner_logs_dir": runner_log_dir_path,
"metric_log": metric_log_path,
}
@@ -150,63 +147,63 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
return workflow.status == status
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_get_no_runner(runner_manager: RunnerManager) -> None:
- """
- Arrange: RunnerManager instance with no runners.
- Act: Get runners.
- Assert: Empty tuple returned.
- """
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert not runner_list
-
-
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_runner_normal_idle_lifecycle(
- runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
-) -> None:
- """
- Arrange: RunnerManager instance with no runners.
- Act:
- 1. Create one runner.
- 2. Run health check on the runner.
- 3. Delete all idle runner.
- Assert:
- 1. An active idle runner.
- 2. Health check passes.
- 3. No runners.
- """
- # 1.
- runner_id_list = runner_manager.create_runners(1)
- assert isinstance(runner_id_list, tuple)
- assert len(runner_id_list) == 1
- runner_id = runner_id_list[0]
-
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert len(runner_list) == 1
- runner = runner_list[0]
- assert runner.id == runner_id
- assert runner.cloud_state == CloudRunnerState.ACTIVE
- assert runner.github_state == GithubRunnerState.IDLE
-
- # 2.
- openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
- assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
- runner = openstack_instances[0]
-
- assert openstack_runner_manager._health_check(runner)
-
- # 3.
- runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert len(runner_list) == 0
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_get_no_runner(runner_manager: RunnerManager) -> None:
+# """
+# Arrange: RunnerManager instance with no runners.
+# Act: Get runners.
+# Assert: Empty tuple returned.
+# """
+# runner_list = runner_manager.get_runners()
+# assert isinstance(runner_list, tuple)
+# assert not runner_list
+
+
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_runner_normal_idle_lifecycle(
+# runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+# ) -> None:
+# """
+# Arrange: RunnerManager instance with no runners.
+# Act:
+# 1. Create one runner.
+# 2. Run health check on the runner.
+# 3. Delete all idle runner.
+# Assert:
+# 1. An active idle runner.
+# 2. Health check passes.
+# 3. No runners.
+# """
+# # 1.
+# runner_id_list = runner_manager.create_runners(1)
+# assert isinstance(runner_id_list, tuple)
+# assert len(runner_id_list) == 1
+# runner_id = runner_id_list[0]
+
+# runner_list = runner_manager.get_runners()
+# assert isinstance(runner_list, tuple)
+# assert len(runner_list) == 1
+# runner = runner_list[0]
+# assert runner.id == runner_id
+# assert runner.cloud_state == CloudRunnerState.ACTIVE
+# assert runner.github_state == GithubRunnerState.IDLE
+
+# # 2.
+# openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
+# assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
+# runner = openstack_instances[0]
+
+# assert openstack_runner_manager._health_check(runner)
+
+# # 3.
+# runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+# runner_list = runner_manager.get_runners()
+# assert isinstance(runner_list, tuple)
+# assert len(runner_list) == 0
@pytest.mark.openstack
@@ -260,35 +257,37 @@ async def test_runner_flush_busy_lifecycle(
runner_list = runner_manager_with_one_runner.get_runners()
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_runner_normal_lifecycle(
- runner_manager_with_one_runner: RunnerManager,
- test_github_branch: Branch,
- github_repository: Repository,
- runner_label: str,
- log_dir_base_path: dict[str, Path],
-):
- """
- Arrange: RunnerManager with one runner.
- Act:
- 1. Start a test workflow for the runner.
- 2. Run cleanup.
- Assert:
- 1. The workflow complete successfully.
- 2. The runner should be deleted. The metrics should be recorded.
- """
- workflow = await dispatch_workflow(
- app=None,
- branch=test_github_branch,
- github_repository=github_repository,
- conclusion="success",
- workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
- dispatch_input={"runner": runner_label, "minutes": "0"},
- wait=False,
- )
- await wait_for(lambda: workflow_is_status(workflow, "completed"))
- runner_log_dir_path = log_dir_base_path["runner_logs_dir"]
- metric_log_path = log_dir_base_path["metric_log"]
- pytest.set_trace()
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_runner_normal_lifecycle(
+# runner_manager_with_one_runner: RunnerManager,
+# test_github_branch: Branch,
+# github_repository: Repository,
+# runner_label: str,
+# log_dir_base_path: dict[str, Path],
+# ):
+# """
+# Arrange: RunnerManager with one runner. Clean metric logs.
+# Act:
+# 1. Start a test workflow for the runner.
+# 2. Run cleanup.
+# Assert:
+# 1. The workflow complete successfully.
+# 2. The runner should be deleted. The metrics should be recorded.
+# """
+# metric_log_path = log_dir_base_path["metric_log"]
+# metric_log_path.write_text("")
+
+# workflow = await dispatch_workflow(
+# app=None,
+# branch=test_github_branch,
+# github_repository=github_repository,
+# conclusion="success",
+# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+# dispatch_input={"runner": runner_label, "minutes": "0"},
+# wait=False,
+# )
+# await wait_for(lambda: workflow_is_status(workflow, "completed"))
+
+# pytest.set_trace()
From 755ddc282b9f2aa62fb6128a3273b167c23e88f3 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 10:53:55 +0800
Subject: [PATCH 106/278] Fix iterator
---
src/manager/runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index f4e2b779c..767766192 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -189,7 +189,7 @@ def delete_runners(
for runner in runners_list:
metrics.append(self._cloud.delete_runner(id=runner.id, remove_token=remove_token))
- return self._issue_runner_metrics(metrics=iter(metrics))
+ return self._issue_runner_metrics(metrics=metrics)
def cleanup(self) -> IssuedMetricEventsStats:
"""Run cleanup of the runners and other resources."""
From c8b5021e5d19114fd891ef0bc8267ebc93b9e186 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 10:58:08 +0800
Subject: [PATCH 107/278] Debug
---
src/manager/runner_manager.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 767766192..aa826a9eb 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -188,6 +188,10 @@ def delete_runners(
metrics = []
for runner in runners_list:
metrics.append(self._cloud.delete_runner(id=runner.id, remove_token=remove_token))
+
+ # TODO: DEBUG
+ import pytest
+ pytest.set_trace()
return self._issue_runner_metrics(metrics=metrics)
@@ -202,6 +206,9 @@ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetri
total_stats: IssuedMetricEventsStats = {}
for extracted_metrics in metrics:
+ # TODO: DEBUG
+ import pytest
+ pytest.set_trace()
try:
job_metrics = github_metrics.job(
github_client=self._github.github,
From 6eb469a0208ddb0b49ba67e8990286fcc8feab3d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 11:03:08 +0800
Subject: [PATCH 108/278] Debug
---
src/manager/runner_manager.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index aa826a9eb..d70a517f1 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -185,22 +185,22 @@ def delete_runners(
logger.info("Deleting runners: %s", runner_names)
remove_token = self._github.get_removal_token()
- metrics = []
+ runner_metrics = []
for runner in runners_list:
- metrics.append(self._cloud.delete_runner(id=runner.id, remove_token=remove_token))
+ runner_metrics.append(self._cloud.delete_runner(id=runner.id, remove_token=remove_token))
# TODO: DEBUG
import pytest
pytest.set_trace()
- return self._issue_runner_metrics(metrics=metrics)
+ return self._issue_runner_metrics(metrics=iter(runner_metrics))
def cleanup(self) -> IssuedMetricEventsStats:
"""Run cleanup of the runners and other resources."""
self._github.delete_runners([GithubRunnerState.OFFLINE, GithubRunnerState.UNKNOWN])
remove_token = self._github.get_removal_token()
- metrics = self._cloud.cleanup_runner(remove_token)
- return self._issue_runner_metrics(metrics=metrics)
+ runner_metrics = self._cloud.cleanup_runner(remove_token)
+ return self._issue_runner_metrics(metrics=runner_metrics)
def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetricEventsStats:
total_stats: IssuedMetricEventsStats = {}
From 5e27cc3fe6f4e6005b8ea13094e74234d1b0ff8e Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 11:08:28 +0800
Subject: [PATCH 109/278] Fix for iterator return value
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 3e2b8265c..e0b9608d9 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -207,7 +207,7 @@ def delete_runner(self, id: InstanceId, remove_token: str) -> runner_metrics.Run
metrics_storage_manager=shared_fs, runners=instance.server_name
)
self._delete_runner(instance, remove_token)
- return metric
+ return next(metric)
def cleanup(self, remove_token: str) -> runner_metrics.RunnerMetrics:
"""Cleanup runner and resource on the cloud.
From 26eb6b78878db070b1099e5dc9b25ca74370c59a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 11:25:01 +0800
Subject: [PATCH 110/278] Add more log path patching
---
src/openstack_cloud/openstack_runner_manager.py | 2 ++
.../integration/test_runner_manager_openstack.py | 15 ++++++++++++++-
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index e0b9608d9..eafbdad0a 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -241,7 +241,9 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
except SshError:
logger.exception("Failed SSH connection while removing %s", instance.server_name)
raise RunnerRemoveError(f"Failed SSH connection for {instance.server_name}")
+
self._pull_runner_metrics(instance.server_name, ssh_conn)
+
try:
OpenstackRunnerManager._run_github_runner_removal_script(
instance.server_name, ssh_conn, remove_token
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index ad79d202a..769ff6c10 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -20,13 +20,14 @@
from manager.cloud_runner_manager import CloudRunnerState
from manager.github_runner_manager import GithubRunnerState
from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
-from metrics import events, runner_logs
+from metrics import events, runner_logs, storage
from openstack_cloud import openstack_runner_manager
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
OpenstackRunnerManager,
OpenstackRunnerManagerConfig,
)
+import shared_fs
from tests.integration.helpers.common import (
DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
dispatch_workflow,
@@ -45,11 +46,23 @@ def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path
with pytest.MonkeyPatch.context() as monkeypatch:
temp_log_dir = tmp_path_factory.mktemp("log")
+ filesystem_base_path = temp_log_dir / "runner-fs"
+ filesystem_quarantine_path = temp_log_dir / "runner-fs-quarantine"
+ filesystem_images_path = temp_log_dir / "runner-fs-images"
+ metrics_exchange_path = temp_log_dir / "metrics-exchange"
metric_log_path = temp_log_dir / "metric_log"
+ monkeypatch.setattr(storage, "FILESYSTEM_BASE_PATH", filesystem_base_path)
+ monkeypatch.setattr(storage, "FILESYSTEM_QUARANTINE_PATH", filesystem_quarantine_path)
+ monkeypatch.setattr(shared_fs, "FILESYSTEM_IMAGES_PATH" , filesystem_images_path)
+ monkeypatch.setattr(openstack_runner_manager, "METRICS_EXCHANGE" , metrics_exchange_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
yield {
+ "filesystem_base_path": filesystem_base_path,
+ "filesystem_quarantine_path": filesystem_quarantine_path,
+ "filesystem_images_path": filesystem_images_path,
+ "metrics_exchange": metrics_exchange_path,
"metric_log": metric_log_path,
}
From e620c153c73cb988e4aa627054918df0e2377e02 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 12:26:00 +0800
Subject: [PATCH 111/278] Fix path naming
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 769ff6c10..b6c940837 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -55,7 +55,7 @@ def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path
monkeypatch.setattr(storage, "FILESYSTEM_BASE_PATH", filesystem_base_path)
monkeypatch.setattr(storage, "FILESYSTEM_QUARANTINE_PATH", filesystem_quarantine_path)
monkeypatch.setattr(shared_fs, "FILESYSTEM_IMAGES_PATH" , filesystem_images_path)
- monkeypatch.setattr(openstack_runner_manager, "METRICS_EXCHANGE" , metrics_exchange_path)
+ monkeypatch.setattr(openstack_runner_manager, "METRICS_EXCHANGE_PATH" , metrics_exchange_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
yield {
From a65258297f076591d43b766cc9bf7f7f7ffa254a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 12:34:39 +0800
Subject: [PATCH 112/278] Fix monkey patch
---
tests/integration/test_runner_manager_openstack.py | 3 ---
1 file changed, 3 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b6c940837..d471cbb5a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -49,20 +49,17 @@ def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path
filesystem_base_path = temp_log_dir / "runner-fs"
filesystem_quarantine_path = temp_log_dir / "runner-fs-quarantine"
filesystem_images_path = temp_log_dir / "runner-fs-images"
- metrics_exchange_path = temp_log_dir / "metrics-exchange"
metric_log_path = temp_log_dir / "metric_log"
monkeypatch.setattr(storage, "FILESYSTEM_BASE_PATH", filesystem_base_path)
monkeypatch.setattr(storage, "FILESYSTEM_QUARANTINE_PATH", filesystem_quarantine_path)
monkeypatch.setattr(shared_fs, "FILESYSTEM_IMAGES_PATH" , filesystem_images_path)
- monkeypatch.setattr(openstack_runner_manager, "METRICS_EXCHANGE_PATH" , metrics_exchange_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
yield {
"filesystem_base_path": filesystem_base_path,
"filesystem_quarantine_path": filesystem_quarantine_path,
"filesystem_images_path": filesystem_images_path,
- "metrics_exchange": metrics_exchange_path,
"metric_log": metric_log_path,
}
From 6ab206a24e0575539617e7657cbb2043ff0b0d16 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 12:56:52 +0800
Subject: [PATCH 113/278] Start a arm64 manual test env
---
.github/workflows/manual_test_env.yaml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index c1060f3fb..ab1faa838 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -2,13 +2,13 @@ name: Manual test env
on:
# TODO: Uncomment
- # pull_request:
+ pull_request:
workflow_dispatch:
jobs:
manual-test-env:
name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
+ runs-on: ["self-hosted", "stg-private-endpoint"]
steps:
- run: sudo apt update -yq
- run: sudo apt install pipx -yq
From 0f010e657208e71ed0b60f4aed17fe7d75bfd491 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 12:57:19 +0800
Subject: [PATCH 114/278] Not spawning manual test env
---
.github/workflows/manual_test_env.yaml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index ab1faa838..c1060f3fb 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -2,13 +2,13 @@ name: Manual test env
on:
# TODO: Uncomment
- pull_request:
+ # pull_request:
workflow_dispatch:
jobs:
manual-test-env:
name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint"]
+ runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
steps:
- run: sudo apt update -yq
- run: sudo apt install pipx -yq
From 0a3869493322009dda16d5dcf45a98e6970a5062 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 13:38:57 +0800
Subject: [PATCH 115/278] Update fmt
---
...penstack_cloud.openstack_runner_manager.md | 25 ++++++++++++-------
src/manager/runner_manager.py | 25 +++++++++++++------
.../openstack_runner_manager.py | 7 ++++++
.../test_runner_manager_openstack.py | 4 +--
4 files changed, 43 insertions(+), 18 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index f486aa6c5..699d48c3b 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -56,7 +56,14 @@ __init__(
## class `RunnerHealth`
-RunnerHealth(healthy: tuple[openstack_cloud.openstack_cloud.OpenstackInstance], unhealthy: tuple[openstack_cloud.openstack_cloud.OpenstackInstance])
+Runners with health state.
+
+
+
+**Attributes:**
+
+ - `healthy`: The list of healthy runners.
+ - `unhealthy`: The list of unhealthy runners.
@@ -79,12 +86,12 @@ __init__(
---
-
+
## class `OpenstackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
-
+
### method `__init__`
@@ -106,7 +113,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -129,7 +136,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -152,7 +159,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -171,7 +178,7 @@ Delete self-hosted runners.
---
-
+
### method `get_name_prefix`
@@ -188,7 +195,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -211,7 +218,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index d70a517f1..5c01f06ed 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -63,9 +63,11 @@ def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedR
cloud_instance: Information on the cloud instance.
github_info: Information on the GitHub of the runner.
"""
- self.name =cloud_instance.name
+ self.name = cloud_instance.name
self.id = cloud_instance.id
- self.github_state = GithubRunnerState.from_runner(github_info) if github_info is not None else None
+ self.github_state = (
+ GithubRunnerState.from_runner(github_info) if github_info is not None else None
+ )
self.cloud_state = cloud_instance.state
@@ -122,7 +124,7 @@ def get_runners(
cloud_runner_state: Sequence[CloudRunnerState] | None = None,
) -> tuple[RunnerInstance]:
"""Get information on runner filter by state.
-
+
Only runners that has cloud instance are returned.
Args:
@@ -139,7 +141,9 @@ def get_runners(
cloud_infos = self._cloud.get_runners(cloud_runner_state)
github_infos_map = {info.name: info for info in github_infos}
cloud_infos_map = {info.name: info for info in cloud_infos}
- logger.info("Found following runners: %s", cloud_infos_map.keys() | github_infos_map.keys())
+ logger.info(
+ "Found following runners: %s", cloud_infos_map.keys() | github_infos_map.keys()
+ )
runner_names = cloud_infos_map.keys() & github_infos_map.keys()
cloud_only = cloud_infos_map.keys() - runner_names
@@ -155,7 +159,10 @@ def get_runners(
)
return tuple(
- RunnerInstance(cloud_infos_map[name], github_infos_map[name] if name in github_infos_map else None) for name in cloud_infos_map.keys()
+ RunnerInstance(
+ cloud_infos_map[name], github_infos_map[name] if name in github_infos_map else None
+ )
+ for name in cloud_infos_map.keys()
)
def delete_runners(
@@ -187,10 +194,13 @@ def delete_runners(
runner_metrics = []
for runner in runners_list:
- runner_metrics.append(self._cloud.delete_runner(id=runner.id, remove_token=remove_token))
-
+ runner_metrics.append(
+ self._cloud.delete_runner(id=runner.id, remove_token=remove_token)
+ )
+
# TODO: DEBUG
import pytest
+
pytest.set_trace()
return self._issue_runner_metrics(metrics=iter(runner_metrics))
@@ -208,6 +218,7 @@ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetri
for extracted_metrics in metrics:
# TODO: DEBUG
import pytest
+
pytest.set_trace()
try:
job_metrics = github_metrics.job(
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index eafbdad0a..9c4485f54 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -83,6 +83,13 @@ class OpenstackRunnerManagerConfig:
@dataclass
class RunnerHealth:
+ """Runners with health state.
+
+ Attributes:
+ healthy: The list of healthy runners.
+ unhealthy: The list of unhealthy runners.
+ """
+
healthy: tuple[OpenstackInstance]
unhealthy: tuple[OpenstackInstance]
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index d471cbb5a..e9692d033 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -16,6 +16,7 @@
from github.Workflow import Workflow
from openstack.connection import Connection as OpenstackConnection
+import shared_fs
from charm_state import GithubPath, ProxyConfig, parse_github_path
from manager.cloud_runner_manager import CloudRunnerState
from manager.github_runner_manager import GithubRunnerState
@@ -27,7 +28,6 @@
OpenstackRunnerManager,
OpenstackRunnerManagerConfig,
)
-import shared_fs
from tests.integration.helpers.common import (
DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
dispatch_workflow,
@@ -53,7 +53,7 @@ def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path
monkeypatch.setattr(storage, "FILESYSTEM_BASE_PATH", filesystem_base_path)
monkeypatch.setattr(storage, "FILESYSTEM_QUARANTINE_PATH", filesystem_quarantine_path)
- monkeypatch.setattr(shared_fs, "FILESYSTEM_IMAGES_PATH" , filesystem_images_path)
+ monkeypatch.setattr(shared_fs, "FILESYSTEM_IMAGES_PATH", filesystem_images_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
yield {
From ceae00704f517670b3d3c0a5f0d104e50bcdb779 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 13:45:05 +0800
Subject: [PATCH 116/278] Fix metric storage implementation for openstack
---
src/openstack_cloud/openstack_runner_manager.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 9c4485f54..8ec2ac73c 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -84,7 +84,7 @@ class OpenstackRunnerManagerConfig:
@dataclass
class RunnerHealth:
"""Runners with health state.
-
+
Attributes:
healthy: The list of healthy runners.
unhealthy: The list of unhealthy runners.
@@ -228,7 +228,7 @@ def cleanup(self, remove_token: str) -> runner_metrics.RunnerMetrics:
runners = self._get_runner_health()
healthy_runner_names = [runner.server_name for runner in runners.healthy]
metrics = runner_metrics.extract(
- metrics_storage_manager=shared_fs, runners=set(healthy_runner_names)
+ metrics_storage_manager=metrics_storage, runners=set(healthy_runner_names)
)
for runner in runners.unhealthy:
self._delete_runner(runner, remove_token)
From cd96114786845b05b24aba04c215a2bb72c83732 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 13:51:26 +0800
Subject: [PATCH 117/278] Fix metric storage provider usage in openstack runner
manager
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 8ec2ac73c..4515edc84 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -211,7 +211,7 @@ def delete_runner(self, id: InstanceId, remove_token: str) -> runner_metrics.Run
"""
instance = self._openstack_cloud.get_instance(id)
metric = runner_metrics.extract(
- metrics_storage_manager=shared_fs, runners=instance.server_name
+ metrics_storage_manager=metrics_storage, runners=instance.server_name
)
self._delete_runner(instance, remove_token)
return next(metric)
From 71da31ec758c7099281409f8b20c6c9ee93a7d58 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 13:59:00 +0800
Subject: [PATCH 118/278] Debug
---
src-docs/openstack_cloud.openstack_runner_manager.md | 2 +-
src/openstack_cloud/openstack_runner_manager.py | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 699d48c3b..0c53ddc25 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -113,7 +113,7 @@ Construct the object.
---
-
+
### method `cleanup`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 4515edc84..7d09a664c 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -214,6 +214,9 @@ def delete_runner(self, id: InstanceId, remove_token: str) -> runner_metrics.Run
metrics_storage_manager=metrics_storage, runners=instance.server_name
)
self._delete_runner(instance, remove_token)
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
return next(metric)
def cleanup(self, remove_token: str) -> runner_metrics.RunnerMetrics:
From 12aa0b8caf43e938a6b7dbb98dca941bb8181811 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:05:38 +0800
Subject: [PATCH 119/278] Fix iterator
---
src-docs/openstack_cloud.openstack_runner_manager.md | 6 +++---
src/manager/cloud_runner_manager.py | 4 ++--
src/openstack_cloud/openstack_runner_manager.py | 11 ++++-------
3 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 0c53ddc25..f3be9a576 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -113,12 +113,12 @@ Construct the object.
---
-
+
### method `cleanup`
```python
-cleanup(remove_token: str) → RunnerMetrics
+cleanup(remove_token: str) → Iterator[RunnerMetrics]
```
Cleanup runner and resource on the cloud.
@@ -164,7 +164,7 @@ Create a self-hosted runner.
### method `delete_runner`
```python
-delete_runner(id: str, remove_token: str) → RunnerMetrics
+delete_runner(id: str, remove_token: str) → RunnerMetrics | None
```
Delete self-hosted runners.
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 2a25717a5..0847ea82b 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -114,7 +114,7 @@ def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerIn
"""
...
- def delete_runner(self, id: InstanceId, remove_token: str) -> RunnerMetrics:
+ def delete_runner(self, id: InstanceId, remove_token: str) -> RunnerMetrics | None:
"""Delete self-hosted runners.
Args:
@@ -122,7 +122,7 @@ def delete_runner(self, id: InstanceId, remove_token: str) -> RunnerMetrics:
remove_token: The GitHub remove token.
Returns:
- Metrics of the runner deleted.
+ Metrics of the runner deleted if any.
"""
...
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 7d09a664c..c86082551 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -8,7 +8,7 @@
import time
from dataclasses import dataclass
from pathlib import Path
-from typing import Sequence, Tuple
+from typing import Iterator, Sequence, Tuple
import invoke
import jinja2
@@ -202,7 +202,7 @@ def get_runners(
return instances_list
return [instance for instance in instances_list if instance.state in states]
- def delete_runner(self, id: InstanceId, remove_token: str) -> runner_metrics.RunnerMetrics:
+ def delete_runner(self, id: InstanceId, remove_token: str) -> runner_metrics.RunnerMetrics | None:
"""Delete self-hosted runners.
Args:
@@ -214,12 +214,9 @@ def delete_runner(self, id: InstanceId, remove_token: str) -> runner_metrics.Run
metrics_storage_manager=metrics_storage, runners=instance.server_name
)
self._delete_runner(instance, remove_token)
- # TODO: debug
- import pytest
- pytest.set_trace()
- return next(metric)
+ return next(metric, None)
- def cleanup(self, remove_token: str) -> runner_metrics.RunnerMetrics:
+ def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
Args:
From 4d19b1d84671bcfe9aa1bd49688060f037b7cd59 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:19:27 +0800
Subject: [PATCH 120/278] Add debug
---
src/manager/runner_manager.py | 6 ------
1 file changed, 6 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 5c01f06ed..22cd272fc 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -197,12 +197,6 @@ def delete_runners(
runner_metrics.append(
self._cloud.delete_runner(id=runner.id, remove_token=remove_token)
)
-
- # TODO: DEBUG
- import pytest
-
- pytest.set_trace()
-
return self._issue_runner_metrics(metrics=iter(runner_metrics))
def cleanup(self) -> IssuedMetricEventsStats:
From 6c286bf77fe1d59065b536f62166347c250a4bc6 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:23:15 +0800
Subject: [PATCH 121/278] Fix None in iterator
---
src/manager/runner_manager.py | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 22cd272fc..3479c5f7b 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -192,12 +192,12 @@ def delete_runners(
logger.info("Deleting runners: %s", runner_names)
remove_token = self._github.get_removal_token()
- runner_metrics = []
+ runner_metrics_list = []
for runner in runners_list:
- runner_metrics.append(
- self._cloud.delete_runner(id=runner.id, remove_token=remove_token)
- )
- return self._issue_runner_metrics(metrics=iter(runner_metrics))
+ runner_metrics = self._cloud.delete_runner(id=runner.id, remove_token=remove_token)
+ if runner_metrics is not None:
+ runner_metrics_list.append(runner_metrics)
+ return self._issue_runner_metrics(metrics=iter(runner_metrics_list))
def cleanup(self) -> IssuedMetricEventsStats:
"""Run cleanup of the runners and other resources."""
@@ -210,10 +210,6 @@ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetri
total_stats: IssuedMetricEventsStats = {}
for extracted_metrics in metrics:
- # TODO: DEBUG
- import pytest
-
- pytest.set_trace()
try:
job_metrics = github_metrics.job(
github_client=self._github.github,
From e7e2811c5fa8381e43f72ade749ecdf2b6a12a23 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:30:44 +0800
Subject: [PATCH 122/278] Add debug
---
src/manager/runner_manager.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 3479c5f7b..07260b0bb 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -188,6 +188,9 @@ def delete_runners(
states.append(GithubRunnerState.BUSY)
runners_list = self.get_runners(github_runner_state=states)
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
runner_names = [runner.name for runner in runners_list]
logger.info("Deleting runners: %s", runner_names)
remove_token = self._github.get_removal_token()
From 2a0272816c83dd009e809d8369f6c85e83b1e8d0 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:38:38 +0800
Subject: [PATCH 123/278] Trying fix for get runner filter
---
src/manager/runner_manager.py | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 07260b0bb..c4f4c7403 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -158,12 +158,20 @@ def get_runners(
github_only,
)
- return tuple(
+ runner_instances = tuple(
RunnerInstance(
cloud_infos_map[name], github_infos_map[name] if name in github_infos_map else None
)
for name in cloud_infos_map.keys()
)
+ if cloud_runner_state is not None:
+ runner_instances = [runner for runner in runner_instances if runner.cloud_state in cloud_runner_state]
+ if github_runner_state is not None:
+ runner_instances = [runner for runner in runner_instances if runner.github_state is not None and runner.github_state in github_runner_state]
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+ return runner_instances
def delete_runners(
self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE
@@ -188,9 +196,6 @@ def delete_runners(
states.append(GithubRunnerState.BUSY)
runners_list = self.get_runners(github_runner_state=states)
- # TODO: debug
- import pytest
- pytest.set_trace()
runner_names = [runner.name for runner in runners_list]
logger.info("Deleting runners: %s", runner_names)
remove_token = self._github.get_removal_token()
From f6d1ef788e56fc1f9387003d2d0bf97a2c54802c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:49:36 +0800
Subject: [PATCH 124/278] Add test
---
src/manager/runner_manager.py | 3 -
.../test_runner_manager_openstack.py | 80 +++++++++----------
2 files changed, 37 insertions(+), 46 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index c4f4c7403..4eef09817 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -168,9 +168,6 @@ def get_runners(
runner_instances = [runner for runner in runner_instances if runner.cloud_state in cloud_runner_state]
if github_runner_state is not None:
runner_instances = [runner for runner in runner_instances if runner.github_state is not None and runner.github_state in github_runner_state]
- # TODO: debug
- import pytest
- pytest.set_trace()
return runner_instances
def delete_runners(
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index e9692d033..22fa7b8fb 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -46,20 +46,14 @@ def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path
with pytest.MonkeyPatch.context() as monkeypatch:
temp_log_dir = tmp_path_factory.mktemp("log")
- filesystem_base_path = temp_log_dir / "runner-fs"
- filesystem_quarantine_path = temp_log_dir / "runner-fs-quarantine"
- filesystem_images_path = temp_log_dir / "runner-fs-images"
+ metric_exchange_path = temp_log_dir / "metric_exchange"
metric_log_path = temp_log_dir / "metric_log"
- monkeypatch.setattr(storage, "FILESYSTEM_BASE_PATH", filesystem_base_path)
- monkeypatch.setattr(storage, "FILESYSTEM_QUARANTINE_PATH", filesystem_quarantine_path)
- monkeypatch.setattr(shared_fs, "FILESYSTEM_IMAGES_PATH", filesystem_images_path)
+ monkeypatch.setattr(openstack_runner_manager, "METRICS_EXCHANGE_PATH", metric_exchange_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
yield {
- "filesystem_base_path": filesystem_base_path,
- "filesystem_quarantine_path": filesystem_quarantine_path,
- "filesystem_images_path": filesystem_images_path,
+ "metric_exchange": metric_exchange_path,
"metric_log": metric_log_path,
}
@@ -267,37 +261,37 @@ async def test_runner_flush_busy_lifecycle(
runner_list = runner_manager_with_one_runner.get_runners()
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_runner_normal_lifecycle(
-# runner_manager_with_one_runner: RunnerManager,
-# test_github_branch: Branch,
-# github_repository: Repository,
-# runner_label: str,
-# log_dir_base_path: dict[str, Path],
-# ):
-# """
-# Arrange: RunnerManager with one runner. Clean metric logs.
-# Act:
-# 1. Start a test workflow for the runner.
-# 2. Run cleanup.
-# Assert:
-# 1. The workflow complete successfully.
-# 2. The runner should be deleted. The metrics should be recorded.
-# """
-# metric_log_path = log_dir_base_path["metric_log"]
-# metric_log_path.write_text("")
-
-# workflow = await dispatch_workflow(
-# app=None,
-# branch=test_github_branch,
-# github_repository=github_repository,
-# conclusion="success",
-# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
-# dispatch_input={"runner": runner_label, "minutes": "0"},
-# wait=False,
-# )
-# await wait_for(lambda: workflow_is_status(workflow, "completed"))
-
-# pytest.set_trace()
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_normal_lifecycle(
+ runner_manager_with_one_runner: RunnerManager,
+ test_github_branch: Branch,
+ github_repository: Repository,
+ runner_label: str,
+ log_dir_base_path: dict[str, Path],
+):
+ """
+ Arrange: RunnerManager with one runner. Clean metric logs.
+ Act:
+ 1. Start a test workflow for the runner.
+ 2. Run cleanup.
+ Assert:
+ 1. The workflow complete successfully.
+ 2. The runner should be deleted. The metrics should be recorded.
+ """
+ metric_log_path = log_dir_base_path["metric_log"]
+ metric_log_path.write_text("")
+
+ workflow = await dispatch_workflow(
+ app=None,
+ branch=test_github_branch,
+ github_repository=github_repository,
+ conclusion="success",
+ workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+ dispatch_input={"runner": runner_label, "minutes": "0"},
+ wait=False,
+ )
+ await wait_for(lambda: workflow_is_status(workflow, "completed"))
+
+ pytest.set_trace()
From b281a8ad42fb9c28dfd46ce952169a38673053bf Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 15:00:41 +0800
Subject: [PATCH 125/278] Patch the path for logs
---
tests/integration/test_runner_manager_openstack.py | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 22fa7b8fb..119d6dddc 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -46,14 +46,17 @@ def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path
with pytest.MonkeyPatch.context() as monkeypatch:
temp_log_dir = tmp_path_factory.mktemp("log")
- metric_exchange_path = temp_log_dir / "metric_exchange"
+ filesystem_base_path = temp_log_dir / "runner-fs"
+ filesystem_quarantine_path = temp_log_dir / "runner-fs-quarantine"
metric_log_path = temp_log_dir / "metric_log"
- monkeypatch.setattr(openstack_runner_manager, "METRICS_EXCHANGE_PATH", metric_exchange_path)
+ monkeypatch.setattr(storage, "FILESYSTEM_BASE_PATH", filesystem_base_path)
+ monkeypatch.setattr(storage, "FILESYSTEM_QUARANTINE_PATH", filesystem_quarantine_path)
monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path)
yield {
- "metric_exchange": metric_exchange_path,
+ "filesystem_base_path": filesystem_base_path,
+ "filesystem_quarantine_path": filesystem_quarantine_path,
"metric_log": metric_log_path,
}
From 01d62c19ccfd25d9dc4d7a73599d430970ef0bdf Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 15:16:17 +0800
Subject: [PATCH 126/278] Add cleanup test
---
tests/integration/test_runner_manager_openstack.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 119d6dddc..77ebce76e 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -284,7 +284,8 @@ async def test_runner_normal_lifecycle(
2. The runner should be deleted. The metrics should be recorded.
"""
metric_log_path = log_dir_base_path["metric_log"]
- metric_log_path.write_text("")
+ filesystem_base_path = log_dir_base_path["filesystem_base_path"]
+ filesystem_quarantine_path = log_dir_base_path["filesystem_quarantine_path"]
workflow = await dispatch_workflow(
app=None,
@@ -296,5 +297,7 @@ async def test_runner_normal_lifecycle(
wait=False,
)
await wait_for(lambda: workflow_is_status(workflow, "completed"))
+
+ issue_metrics_events = runner_manager_with_one_runner.cleanup()
pytest.set_trace()
From 7333a762fbc5e045d4fbbf1aa8920e937f84e65f Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 15:38:17 +0800
Subject: [PATCH 127/278] Debug
---
src-docs/openstack_cloud.openstack_cloud.md | 24 +++++++----
...penstack_cloud.openstack_runner_manager.md | 2 +-
src/manager/cloud_runner_manager.py | 41 ++++++++-----------
src/manager/github_runner_manager.py | 18 +++++---
src/manager/runner_manager.py | 29 +++++++++++--
src/openstack_cloud/openstack_cloud.py | 3 ++
.../openstack_runner_manager.py | 4 +-
.../test_runner_manager_openstack.py | 2 +-
8 files changed, 78 insertions(+), 45 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 70e9d7f0e..7cb5fe9a1 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -43,18 +43,24 @@ Construct the object.
+**Raises:**
+
+ - `ValueError`: Provided server should not be managed under this prefix.
+
+
+
---
-
+
## class `OpenstackCloud`
Client to interact with OpenStack cloud.
The OpenStack server name is managed by this cloud. Caller refers to the instances via instance_id. If the caller needs the server name, e.g., for logging, it can be queried with get_server_name.
-
+
### method `__init__`
@@ -77,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -89,7 +95,7 @@ Cleanup unused openstack resources.
---
-
+
### method `delete_instance`
@@ -107,7 +113,7 @@ Delete a openstack instance.
---
-
+
### method `get_instance`
@@ -130,7 +136,7 @@ Get OpenStack instance by instance ID.
---
-
+
### method `get_instances`
@@ -147,7 +153,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -170,7 +176,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
@@ -193,7 +199,7 @@ Get SSH connection to an OpenStack instance.
---
-
+
### method `launch_instance`
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index f3be9a576..c06da20c1 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -113,7 +113,7 @@ Construct the object.
---
-
+
### method `cleanup`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 0847ea82b..ef031b2be 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -6,16 +6,25 @@
from abc import ABC
from dataclasses import dataclass
from enum import Enum
-from typing import Iterator, Sequence, Tuple, Type
+from typing import Iterator, Sequence, Tuple
-from metrics import events as metric_events
from metrics.runner import RunnerMetrics
InstanceId = str
class CloudRunnerState(str, Enum):
- """Represent state of the instance hosting the runner."""
+ """Represent state of the instance hosting the runner.
+
+ Attributes:
+ CREATED: The instance is created.
+ ACTIVE: The instance is active and running.
+ DELETED: The instance is deleted.
+ ERROR: The instance has encountered error and not running.
+ STOPPED: The instance has stopped.
+ UNKNOWN: The state of the instance is not known.
+ UNEXPECTED: An unknown state not accounted by the developer is encountered.
+ """
CREATED = "created"
ACTIVE = "active"
@@ -26,7 +35,7 @@ class CloudRunnerState(str, Enum):
UNEXPECTED = "unexpected"
@staticmethod
- def from_openstack_server_status(openstack_server_status: str) -> None:
+ def from_openstack_server_status(openstack_server_status: str) -> "CloudRunnerState":
"""Create from openstack server status.
The openstack server status are documented here:
@@ -34,6 +43,9 @@ def from_openstack_server_status(openstack_server_status: str) -> None:
Args:
openstack_server_status: Openstack server status.
+
+ Returns:
+ The state of the runner.
"""
match openstack_server_status:
case "BUILD":
@@ -73,11 +85,7 @@ class CloudRunnerManager(ABC):
"""Manage runner instance on cloud."""
def get_name_prefix(self) -> str:
- """Get the name prefix of the self-hosted runners.
-
- Returns:
- The name prefix.
- """
+ """Get the name prefix of the self-hosted runners."""
...
def create_runner(self, registration_token: str) -> InstanceId:
@@ -85,9 +93,6 @@ def create_runner(self, registration_token: str) -> InstanceId:
Args:
registration_token: The GitHub registration token for registering runners.
-
- Returns:
- Instance ID of the runner.
"""
...
@@ -96,9 +101,6 @@ def get_runner(self, id: InstanceId) -> CloudRunnerInstance:
Args:
id: The instance id.
-
- Returns:
- Information on the runner instance.
"""
...
@@ -108,9 +110,6 @@ def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerIn
Args:
states: Filter for the runners with these github states. If None all states will be
included.
-
- Returns:
- Information on the runner instances.
"""
...
@@ -120,9 +119,6 @@ def delete_runner(self, id: InstanceId, remove_token: str) -> RunnerMetrics | No
Args:
id: The instance id of the runner to delete.
remove_token: The GitHub remove token.
-
- Returns:
- Metrics of the runner deleted if any.
"""
...
@@ -131,8 +127,5 @@ def cleanup_runner(self, remove_token: str) -> Iterator[RunnerMetrics]:
Args:
remove_token: The GitHub remove token.
-
- Returns:
- Metrics of the runners that was cleanup.
"""
...
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 6985405b0..85c4a8ffd 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -12,12 +12,17 @@
class GithubRunnerState(str, Enum):
- """State of the runner on GitHub."""
+ """State of the self-hosted runner on GitHub.
+
+ Attributes:
+ BUSY: Runner is working on a job assigned by GitHub.
+ IDLE: Runner is waiting to take a job.
+ OFFLINE: Runner is not connected to GitHub.
+ """
BUSY = "busy"
IDLE = "idle"
OFFLINE = "offline"
- UNKNOWN = "unknown"
@staticmethod
def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
@@ -77,11 +82,13 @@ def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> N
Args:
states: Filter the runners for these states. If None, all runners are deleted.
-
- Returns:
- Information on the runners.
"""
runner_list = self.get_runners(states)
+
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+
for runner in runner_list:
self.github.delete_runner(self._path, runner.id)
@@ -112,6 +119,7 @@ def _filter_runner_state(
"""Filter the runner by the state.
Args:
+ runner: Runner to filter.
states: Filter the runners for these states. If None, return true.
Returns:
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 4eef09817..056ab6bf7 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -165,9 +165,15 @@ def get_runners(
for name in cloud_infos_map.keys()
)
if cloud_runner_state is not None:
- runner_instances = [runner for runner in runner_instances if runner.cloud_state in cloud_runner_state]
+ runner_instances = [
+ runner for runner in runner_instances if runner.cloud_state in cloud_runner_state
+ ]
if github_runner_state is not None:
- runner_instances = [runner for runner in runner_instances if runner.github_state is not None and runner.github_state in github_runner_state]
+ runner_instances = [
+ runner
+ for runner in runner_instances
+ if runner.github_state is not None and runner.github_state in github_runner_state
+ ]
return runner_instances
def delete_runners(
@@ -177,6 +183,9 @@ def delete_runners(
Args:
flush_mode: The type of runners affect by the deletion.
+
+ Returns:
+ Stats on metrics events issued during the deletion of runners.
"""
match flush_mode:
case FlushMode.FLUSH_IDLE:
@@ -205,13 +214,25 @@ def delete_runners(
return self._issue_runner_metrics(metrics=iter(runner_metrics_list))
def cleanup(self) -> IssuedMetricEventsStats:
- """Run cleanup of the runners and other resources."""
- self._github.delete_runners([GithubRunnerState.OFFLINE, GithubRunnerState.UNKNOWN])
+ """Run cleanup of the runners and other resources.
+
+ Returns:
+ Stats on metrics events issued during the cleanup of runners.
+ """
+ self._github.delete_runners([GithubRunnerState.OFFLINE])
remove_token = self._github.get_removal_token()
runner_metrics = self._cloud.cleanup_runner(remove_token)
return self._issue_runner_metrics(metrics=runner_metrics)
def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetricEventsStats:
+ """Issue runner metrics.
+
+ Args:
+ metrics: Runner metrics to issue.
+
+ Returns:
+ Stats on runner metrics issued.
+ """
total_stats: IssuedMetricEventsStats = {}
for extracted_metrics in metrics:
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index cd1843333..9559a405f 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -62,6 +62,9 @@ def __init__(self, server: OpenstackServer, prefix: str):
Args:
server: The OpenStack server.
prefix: The name prefix for the servers.
+
+ Raises:
+ ValueError: Provided server should not be managed under this prefix.
"""
self.server_id = server.id
self.server_name = server.name
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c86082551..860ca2fde 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -202,7 +202,9 @@ def get_runners(
return instances_list
return [instance for instance in instances_list if instance.state in states]
- def delete_runner(self, id: InstanceId, remove_token: str) -> runner_metrics.RunnerMetrics | None:
+ def delete_runner(
+ self, id: InstanceId, remove_token: str
+ ) -> runner_metrics.RunnerMetrics | None:
"""Delete self-hosted runners.
Args:
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 77ebce76e..2242e881d 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -297,7 +297,7 @@ async def test_runner_normal_lifecycle(
wait=False,
)
await wait_for(lambda: workflow_is_status(workflow, "completed"))
-
+
issue_metrics_events = runner_manager_with_one_runner.cleanup()
pytest.set_trace()
From 163474d0f726824e6861754350de930ac59b41b4 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 15:51:15 +0800
Subject: [PATCH 128/278] Fix github state determining busy runner
---
src/manager/github_runner_manager.py | 8 +++++---
.../openstack_runner_manager.py | 19 ++++++++++++++++++-
.../test_runner_manager_openstack.py | 7 +++----
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 85c4a8ffd..eae395ad0 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -35,9 +35,10 @@ def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
The state of runner.
"""
state = GithubRunnerState.OFFLINE
+ # A runner that is busy and offline is possible.
+ if runner.busy:
+ state = GithubRunnerState.BUSY
if runner.status == GitHubRunnerStatus.ONLINE:
- if runner.busy:
- state = GithubRunnerState.BUSY
if not runner.busy:
state = GithubRunnerState.IDLE
return state
@@ -84,9 +85,10 @@ def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> N
states: Filter the runners for these states. If None, all runners are deleted.
"""
runner_list = self.get_runners(states)
-
+
# TODO: debug
import pytest
+
pytest.set_trace()
for runner in runner_list:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 860ca2fde..748b02bfd 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -293,6 +293,17 @@ def _get_runner_health(self) -> RunnerHealth:
return RunnerHealth(healthy=healthy, unhealthy=unhealthy)
def _generate_userdata(self, instance_name: str, registration_token: str) -> str:
+ """Generate cloud init userdata.
+
+ This is the script the openstack server runs on startup.
+
+ Args:
+ instance_name: The name of the instance.
+ registration_token: The GitHub runner registration token.
+
+ Returns:
+ The userdata for openstack instance.
+ """
jinja = jinja2.Environment(loader=jinja2.FileSystemLoader("templates"), autoescape=True)
env_contents = jinja.get_template("env.j2").render(
@@ -442,6 +453,12 @@ def _issue_runner_installed_metric(
@staticmethod
def _pull_runner_metrics(name: str, ssh_conn: SshConnection) -> None:
+ """Pull metrics from runner.
+
+ Args:
+ name: The name of the runner.
+ ssh_conn: The SSH connection to the runner.
+ """
try:
storage = metrics_storage.get(name)
except GetMetricsStorageError:
@@ -486,7 +503,7 @@ def _ssh_pull_file(
Raises:
_PullFileError: Unable to pull the file from the runner instance.
- _SSHError: Issue with SSH connection.
+ SSHError: Issue with SSH connection.
"""
try:
result = ssh_conn.run(f"stat -c %s {remote_path}", warn=True)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 2242e881d..2e9ce0108 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -16,13 +16,11 @@
from github.Workflow import Workflow
from openstack.connection import Connection as OpenstackConnection
-import shared_fs
from charm_state import GithubPath, ProxyConfig, parse_github_path
from manager.cloud_runner_manager import CloudRunnerState
from manager.github_runner_manager import GithubRunnerState
from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
-from metrics import events, runner_logs, storage
-from openstack_cloud import openstack_runner_manager
+from metrics import events, storage
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
OpenstackRunnerManager,
@@ -97,7 +95,8 @@ async def openstack_runner_manager_fixture(
) -> OpenstackRunnerManager:
"""Create OpenstackRunnerManager instance.
- The prefix args of OpenstackRunnerManager set to app_name to let openstack_connection_fixture perform the cleanup of openstack resources.
+ The prefix args of OpenstackRunnerManager set to app_name to let openstack_connection_fixture
+ perform the cleanup of openstack resources.
"""
_CLOUDS_YAML_PATH.unlink(missing_ok=True)
clouds_config = yaml.safe_load(private_endpoint_clouds_yaml)
From 85a5268009bf6a110161b3b316c7b66f916f42fc Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:12:47 +0800
Subject: [PATCH 129/278] Fix wrong naming for method in ABC
---
src/manager/cloud_runner_manager.py | 2 +-
src/manager/runner_manager.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index ef031b2be..ca28d8a8a 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -122,7 +122,7 @@ def delete_runner(self, id: InstanceId, remove_token: str) -> RunnerMetrics | No
"""
...
- def cleanup_runner(self, remove_token: str) -> Iterator[RunnerMetrics]:
+ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
Args:
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 056ab6bf7..d2a3f8b24 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -221,7 +221,7 @@ def cleanup(self) -> IssuedMetricEventsStats:
"""
self._github.delete_runners([GithubRunnerState.OFFLINE])
remove_token = self._github.get_removal_token()
- runner_metrics = self._cloud.cleanup_runner(remove_token)
+ runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=runner_metrics)
def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetricEventsStats:
From 512101a574b740987964958f242ac91d2ff23c8e Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:13:57 +0800
Subject: [PATCH 130/278] Remove debugging
---
src/manager/github_runner_manager.py | 6 ------
1 file changed, 6 deletions(-)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index eae395ad0..f48330fa1 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -85,12 +85,6 @@ def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> N
states: Filter the runners for these states. If None, all runners are deleted.
"""
runner_list = self.get_runners(states)
-
- # TODO: debug
- import pytest
-
- pytest.set_trace()
-
for runner in runner_list:
self.github.delete_runner(self._path, runner.id)
From f7fee4869e3fe57f7b00038bacd6fdcc9347d07b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:22:50 +0800
Subject: [PATCH 131/278] Add more docstrings
---
.../openstack_runner_manager.py | 42 ++++++++++++++++---
tests/integration/test_openstack_cloud.py | 1 -
.../test_runner_manager_openstack.py | 9 ++++
3 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 748b02bfd..c87f7e1b4 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -130,7 +130,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
Instance ID of the runner.
"""
start_timestamp = time.time()
- id = OpenstackRunnerManager._generate_runner_id()
+ id = OpenstackRunnerManager._generate_instance_id()
instance_name = self._openstack_cloud.get_server_name(instance_id=id)
userdata = self._generate_userdata(
instance_name=instance_name, registration_token=registration_token
@@ -294,13 +294,13 @@ def _get_runner_health(self) -> RunnerHealth:
def _generate_userdata(self, instance_name: str, registration_token: str) -> str:
"""Generate cloud init userdata.
-
+
This is the script the openstack server runs on startup.
Args:
instance_name: The name of the instance.
registration_token: The GitHub runner registration token.
-
+
Returns:
The userdata for openstack instance.
"""
@@ -357,6 +357,11 @@ def _generate_userdata(self, instance_name: str, registration_token: str) -> str
)
def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | None:
+ """Get repo policy compliance client.
+
+ Returns:
+ The repo policy compliance client.
+ """
if self.config.repo_policy_url and self.config.repo_policy_token:
return RepoPolicyComplianceClient(
self.config.repo_policy_url, self.config.repo_policy_token
@@ -379,7 +384,13 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
@retry(tries=3, delay=60, local_logger=logger)
@staticmethod
- def _run_health_check(ssh_conn: SshConnection, name: str):
+ def _run_health_check(ssh_conn: SshConnection, name: str) -> None:
+ """Run a health check for runner process.
+
+ Args:
+ ssh_conn: The SSH connection to the runner.
+ name: The name of the runner.
+ """
result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
if not result.ok:
logger.warning("SSH run of `ps aux` failed on %s", name)
@@ -393,6 +404,11 @@ def _run_health_check(ssh_conn: SshConnection, name: str):
@retry(tries=10, delay=60, local_logger=logger)
def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
+ """Wait until runner is startup.
+
+ Args:
+ instance: The runner instance.
+ """
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SshError as err:
@@ -410,7 +426,12 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
logger.info("Runner startup process found to be healthy on %s", instance.server_name)
@staticmethod
- def _generate_runner_id() -> InstanceId:
+ def _generate_instance_id() -> InstanceId:
+ """Generate a instance id.
+
+ Return:
+ The id.
+ """
return secrets.token_hex(12)
@staticmethod
@@ -420,6 +441,14 @@ def _issue_runner_installed_metric(
install_start_timestamp: float,
install_end_timestamp: float,
) -> None:
+ """Issue metric for runner installed event.
+
+ Args:
+ name: The name of the runner.
+ flavor: The flavor of the runner.
+ install_start_timestamp: The timestamp of installation start.
+ install_end_timestamp: The timestamp of installation end.
+ """
try:
metric_events.issue_event(
event=metric_events.RunnerInstalled(
@@ -503,7 +532,7 @@ def _ssh_pull_file(
Raises:
_PullFileError: Unable to pull the file from the runner instance.
- SSHError: Issue with SSH connection.
+ SshError: Issue with SSH connection.
"""
try:
result = ssh_conn.run(f"stat -c %s {remote_path}", warn=True)
@@ -554,6 +583,7 @@ def _run_github_runner_removal_script(
"""Run Github runner removal script.
Args:
+ instance_name: The name of the runner instance.
ssh_conn: The SSH connection to the runner instance.
remove_token: The GitHub instance removal token.
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
index 926e545bb..f0dd8f148 100644
--- a/tests/integration/test_openstack_cloud.py
+++ b/tests/integration/test_openstack_cloud.py
@@ -4,7 +4,6 @@
"""Test for OpenstackCloud class integration with OpenStack."""
from secrets import token_hex
-from typing import AsyncIterator
import pytest
import pytest_asyncio
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 2e9ce0108..7bbfa2424 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -149,6 +149,15 @@ async def runner_manager_with_one_runner_fixture(runner_manager: RunnerManager)
def workflow_is_status(workflow: Workflow, status: str) -> bool:
+ """Check if workflow in provided status.
+
+ Args:
+ workflow: The workflow to check.
+ status: The status to check for.
+
+ Returns:
+ Whether the workflow is in the status.
+ """
workflow.update()
return workflow.status == status
From 535c520aafb27b873d0abbeb0f487f1a382db0d2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:27:06 +0800
Subject: [PATCH 132/278] Fix runner deletion
---
.../openstack_runner_manager.py | 40 ++++++++++---------
1 file changed, 21 insertions(+), 19 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c87f7e1b4..004cf2dbc 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -247,22 +247,21 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
"""
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- except SshError:
- logger.exception("Failed SSH connection while removing %s", instance.server_name)
- raise RunnerRemoveError(f"Failed SSH connection for {instance.server_name}")
-
- self._pull_runner_metrics(instance.server_name, ssh_conn)
+ self._pull_runner_metrics(instance.server_name, ssh_conn)
- try:
- OpenstackRunnerManager._run_github_runner_removal_script(
- instance.server_name, ssh_conn, remove_token
- )
- except GithubRunnerRemoveError:
- logger.warning(
- "Unable to run github runner removal script for %s",
- instance.server_name,
- stack_info=True,
- )
+ try:
+ OpenstackRunnerManager._run_github_runner_removal_script(
+ instance.server_name, ssh_conn, remove_token
+ )
+ except GithubRunnerRemoveError:
+ logger.warning(
+ "Unable to run github runner removal script for %s",
+ instance.server_name,
+ stack_info=True,
+ )
+ except SshError:
+ logger.exception("Failed to get SSH connection while removing %s", instance.server_name)
+ logger.warning("Skipping runner remove script for %s due to SSH issues", instance.server_name)
try:
self._openstack_cloud.delete_instance(instance.instance_id)
@@ -386,7 +385,7 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
@staticmethod
def _run_health_check(ssh_conn: SshConnection, name: str) -> None:
"""Run a health check for runner process.
-
+
Args:
ssh_conn: The SSH connection to the runner.
name: The name of the runner.
@@ -408,11 +407,14 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
Args:
instance: The runner instance.
+
+ Raises:
+ RunnerStartError: The runner process was not found on the runner.
"""
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SshError as err:
- raise RunnerCreateError(
+ raise RunnerStartError(
f"Failed to SSH connect to {instance.server_name} openstack runner"
) from err
@@ -428,8 +430,8 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
@staticmethod
def _generate_instance_id() -> InstanceId:
"""Generate a instance id.
-
- Return:
+
+ Return:
The id.
"""
return secrets.token_hex(12)
From eb08ff7ccdd0a2fed14d7cce1950ed6867a981b6 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:44:36 +0800
Subject: [PATCH 133/278] Add more docs
---
src-docs/openstack_cloud.openstack_cloud.md | 18 +++++---
...penstack_cloud.openstack_runner_manager.md | 43 ++++++++++++++-----
src/openstack_cloud/openstack_cloud.py | 5 ++-
.../openstack_runner_manager.py | 35 ++++++++++++---
4 files changed, 78 insertions(+), 23 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 7cb5fe9a1..c9271e0fa 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -83,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -95,7 +95,7 @@ Cleanup unused openstack resources.
---
-
+
### method `delete_instance`
@@ -113,7 +113,7 @@ Delete a openstack instance.
---
-
+
### method `get_instance`
@@ -136,7 +136,7 @@ Get OpenStack instance by instance ID.
---
-
+
### method `get_instances`
@@ -153,7 +153,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -176,7 +176,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
@@ -227,6 +227,12 @@ Create an OpenStack instance.
+**Raises:**
+
+ - `OpenstackError`: Unable to create OpenStack server for runner.
+
+
+
**Returns:**
The OpenStack instance created.
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index c06da20c1..b07c74043 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -17,11 +17,28 @@ Manager for self-hosted runner on OpenStack.
---
-
+
## class `OpenstackRunnerManagerConfig`
Configuration for OpenstackRunnerManager.
+
+
+**Attributes:**
+
+ - `clouds_config`: The clouds.yaml.
+ - `cloud`: The cloud name to connect to.
+ - `image`: The image name for runners to use.
+ - `flavor`: The flavor name for runners to use.
+ - `network`: The network name for runners to use.
+ - `github_path`: The GitHub organization or repository for runners to connect to.
+ - `labels`: The labels to add to runners.
+ - `proxy_config`: The proxy configuration.
+ - `dockerhub_mirror`: The dockerhub mirror to use for runners.
+ - `ssh_debug_connections`: The information on the ssh debug services.
+ - `repo_policy_url`: The URL of the repo policy service.
+ - `repo_policy_token`: The token to access the repo policy service.
+
### method `__init__`
@@ -53,7 +70,7 @@ __init__(
---
-
+
## class `RunnerHealth`
Runners with health state.
@@ -86,12 +103,12 @@ __init__(
---
-
+
## class `OpenstackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
-
+
### method `__init__`
@@ -113,7 +130,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -136,7 +153,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -154,12 +171,18 @@ Create a self-hosted runner.
+**Raises:**
+
+ - `RunnerCreateError`: Unable to create runner due to OpenStack issues.
+
+
+
**Returns:**
Instance ID of the runner.
---
-
+
### method `delete_runner`
@@ -178,7 +201,7 @@ Delete self-hosted runners.
---
-
+
### method `get_name_prefix`
@@ -195,7 +218,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -218,7 +241,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 9559a405f..6c5a87bfb 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -149,6 +149,9 @@ def launch_instance(
flavor: The flavor used to create the instance.
network: The network used to create the instance.
userdata: The cloud init userdata to startup the instance.
+
+ Raises:
+ OpenstackError: Unable to create OpenStack server for runner.
Returns:
The OpenStack instance created.
@@ -175,7 +178,7 @@ def launch_instance(
timeout=_CREATE_SERVER_TIMEOUT,
wait=True,
)
- except openstack.exceptions.ResourceTimeout as err:
+ except openstack.exceptions.ResourceTimeout:
logger.exception("Timeout creating openstack server %s", full_name)
logger.info(
"Attempting clean up of openstack server %s that timeout during creation",
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 004cf2dbc..929bd696b 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -16,7 +16,6 @@
import paramiko.ssh_exception
from fabric import Connection as SshConnection
-import shared_fs
from charm_state import GithubOrg, GithubPath, ProxyConfig, SSHDebugConnection
from errors import (
CreateMetricsStorageError,
@@ -25,7 +24,6 @@
OpenStackError,
RunnerCreateError,
RunnerError,
- RunnerRemoveError,
RunnerStartError,
SshError,
)
@@ -65,7 +63,22 @@ class _PullFileError(Exception):
@dataclass
class OpenstackRunnerManagerConfig:
- """Configuration for OpenstackRunnerManager."""
+ """Configuration for OpenstackRunnerManager.
+
+ Attributes:
+ clouds_config: The clouds.yaml.
+ cloud: The cloud name to connect to.
+ image: The image name for runners to use.
+ flavor: The flavor name for runners to use.
+ network: The network name for runners to use.
+ github_path: The GitHub organization or repository for runners to connect to.
+ labels: The labels to add to runners.
+ proxy_config: The proxy configuration.
+ dockerhub_mirror: The dockerhub mirror to use for runners.
+ ssh_debug_connections: The information on the ssh debug services.
+ repo_policy_url: The URL of the repo policy service.
+ repo_policy_token: The token to access the repo policy service.
+ """
clouds_config: dict[str, dict]
cloud: str
@@ -125,6 +138,9 @@ def create_runner(self, registration_token: str) -> InstanceId:
Args:
registration_token: The GitHub registration token for registering runners.
+
+ Raises:
+ RunnerCreateError: Unable to create runner due to OpenStack issues.
Returns:
Instance ID of the runner.
@@ -260,8 +276,12 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
stack_info=True,
)
except SshError:
- logger.exception("Failed to get SSH connection while removing %s", instance.server_name)
- logger.warning("Skipping runner remove script for %s due to SSH issues", instance.server_name)
+ logger.exception(
+ "Failed to get SSH connection while removing %s", instance.server_name
+ )
+ logger.warning(
+ "Skipping runner remove script for %s due to SSH issues", instance.server_name
+ )
try:
self._openstack_cloud.delete_instance(instance.instance_id)
@@ -389,6 +409,9 @@ def _run_health_check(ssh_conn: SshConnection, name: str) -> None:
Args:
ssh_conn: The SSH connection to the runner.
name: The name of the runner.
+
+ Raises:
+ RunnerError: Unable to SSH and find the runner process on the runner.
"""
result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
if not result.ok:
@@ -407,7 +430,7 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
Args:
instance: The runner instance.
-
+
Raises:
RunnerStartError: The runner process was not found on the runner.
"""
From fe7951dc18ef7e026bf189acad174b8beeec7a55 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:47:57 +0800
Subject: [PATCH 134/278] Fix typing
---
src/manager/cloud_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index ca28d8a8a..2b6010cf1 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -35,7 +35,7 @@ class CloudRunnerState(str, Enum):
UNEXPECTED = "unexpected"
@staticmethod
- def from_openstack_server_status(openstack_server_status: str) -> "CloudRunnerState":
+ def from_openstack_server_status(openstack_server_status: str) -> str:
"""Create from openstack server status.
The openstack server status are documented here:
From 75bdd915cb1996a9723bf84f7484a4ff4901719e Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:49:29 +0800
Subject: [PATCH 135/278] Debug
---
src-docs/openstack_cloud.openstack_runner_manager.md | 4 ++--
src/manager/cloud_runner_manager.py | 2 +-
src/openstack_cloud/openstack_runner_manager.py | 5 +++++
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index b07c74043..778ba0611 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -130,7 +130,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -182,7 +182,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 2b6010cf1..ca28d8a8a 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -35,7 +35,7 @@ class CloudRunnerState(str, Enum):
UNEXPECTED = "unexpected"
@staticmethod
- def from_openstack_server_status(openstack_server_status: str) -> str:
+ def from_openstack_server_status(openstack_server_status: str) -> "CloudRunnerState":
"""Create from openstack server status.
The openstack server status are documented here:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 929bd696b..fa4318ec2 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -206,6 +206,11 @@ def get_runners(
Information on the runner instances.
"""
instances_list = self._openstack_cloud.get_instances()
+
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+
instances_list = [
CloudRunnerInstance(
name=instance.server_name,
From b70f1faf363e28eb1e842be5c684e34d46f223e1 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:55:40 +0800
Subject: [PATCH 136/278] Update SSH health check
---
src-docs/openstack_cloud.openstack_manager.md | 2 +-
.../openstack_runner_manager.py | 34 ++++++++-----------
2 files changed, 15 insertions(+), 21 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_manager.md b/src-docs/openstack_cloud.openstack_manager.md
index 0a39a9a37..f87a1b8b4 100644
--- a/src-docs/openstack_cloud.openstack_manager.md
+++ b/src-docs/openstack_cloud.openstack_manager.md
@@ -146,7 +146,7 @@ Construct OpenstackRunnerManager object.
---
-
+
### method `flush`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index fa4318ec2..d13c176c1 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -205,23 +205,23 @@ def get_runners(
Returns:
Information on the runner instances.
"""
- instances_list = self._openstack_cloud.get_instances()
+ instance_list = self._openstack_cloud.get_instances()
# TODO: debug
import pytest
pytest.set_trace()
- instances_list = [
+ instance_list = [
CloudRunnerInstance(
name=instance.server_name,
id=instance.instance_id,
state=CloudRunnerState.from_openstack_server_status(instance.status),
)
- for instance in instances_list
+ for instance in instance_list
]
if states is None:
- return instances_list
- return [instance for instance in instances_list if instance.state in states]
+ return instance_list
+ return [instance for instance in instance_list if instance.state in states]
def delete_runner(
self, id: InstanceId, remove_token: str
@@ -392,42 +392,36 @@ def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | Non
)
return None
+ @retry(tries=3, delay=5, backoff=2, local_logger=logger)
def _health_check(self, instance: OpenstackInstance) -> bool:
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SshError:
- logger.exception("SSH connection failure with %s", instance.server_name)
- return False
- try:
- OpenstackRunnerManager._run_health_check(ssh_conn, instance.server_name)
- except RunnerError:
- logger.exception("Health check failure for %s", instance.server_name)
- return False
- logger.info("Health check success for %s", instance.server_name)
- return True
+ logger.exception("SSH connection failure with %s during health check", instance.server_name)
+ raise
+ return OpenstackRunnerManager._run_health_check(ssh_conn, instance.server_name)
- @retry(tries=3, delay=60, local_logger=logger)
@staticmethod
- def _run_health_check(ssh_conn: SshConnection, name: str) -> None:
+ def _run_health_check(ssh_conn: SshConnection, name: str) -> bool:
"""Run a health check for runner process.
Args:
ssh_conn: The SSH connection to the runner.
name: The name of the runner.
- Raises:
- RunnerError: Unable to SSH and find the runner process on the runner.
+ Returns:
+ Whether the health succeed.
"""
result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
if not result.ok:
logger.warning("SSH run of `ps aux` failed on %s", name)
- raise RunnerError(f"Unable to SSH run `ps aux` on {name}")
+ return False
if (
RUNNER_WORKER_PROCESS not in result.stdout
and RUNNER_LISTENER_PROCESS not in result.stdout
):
logger.warning("Runner process not found on %s", name)
- raise RunnerError(f"Runner process not found on {name}")
+ return False
@retry(tries=10, delay=60, local_logger=logger)
def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
From 1b8fa8145f21a24e7df10c5c40ae6fbb5453f09f Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 17:06:49 +0800
Subject: [PATCH 137/278] Tmp disable a passing test
---
.../test_runner_manager_openstack.py | 98 +++++++++----------
1 file changed, 49 insertions(+), 49 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 7bbfa2424..4b64a8098 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -221,55 +221,55 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
# assert len(runner_list) == 0
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_runner_flush_busy_lifecycle(
- runner_manager_with_one_runner: RunnerManager,
- test_github_branch: Branch,
- github_repository: Repository,
- runner_label: str,
-):
- """
- Arrange: RunnerManager with one idle runner.
- Act:
- 1. Run a long workflow.
- 2. Run flush idle runner.
- 3. Run flush busy runner.
- Assert:
- 1. Runner takes the job and become busy.
- 2. Busy runner still exists.
- 3. No runners exists.
- """
- # 1.
- workflow = await dispatch_workflow(
- app=None,
- branch=test_github_branch,
- github_repository=github_repository,
- conclusion="success",
- workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
- dispatch_input={"runner": runner_label, "minutes": "10"},
- wait=False,
- )
- await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
-
- runner_list = runner_manager_with_one_runner.get_runners()
- assert len(runner_list) == 1
- busy_runner = runner_list[0]
- assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GithubRunnerState.BUSY
-
- # 2.
- runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
- runner_list = runner_manager_with_one_runner.get_runners()
- assert len(runner_list) == 1
- busy_runner = runner_list[0]
- assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GithubRunnerState.BUSY
-
- # 3.
- runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- runner_list = runner_manager_with_one_runner.get_runners()
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_runner_flush_busy_lifecycle(
+# runner_manager_with_one_runner: RunnerManager,
+# test_github_branch: Branch,
+# github_repository: Repository,
+# runner_label: str,
+# ):
+# """
+# Arrange: RunnerManager with one idle runner.
+# Act:
+# 1. Run a long workflow.
+# 2. Run flush idle runner.
+# 3. Run flush busy runner.
+# Assert:
+# 1. Runner takes the job and become busy.
+# 2. Busy runner still exists.
+# 3. No runners exists.
+# """
+# # 1.
+# workflow = await dispatch_workflow(
+# app=None,
+# branch=test_github_branch,
+# github_repository=github_repository,
+# conclusion="success",
+# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+# dispatch_input={"runner": runner_label, "minutes": "10"},
+# wait=False,
+# )
+# await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
+
+# runner_list = runner_manager_with_one_runner.get_runners()
+# assert len(runner_list) == 1
+# busy_runner = runner_list[0]
+# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+# assert busy_runner.github_state == GithubRunnerState.BUSY
+
+# # 2.
+# runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+# runner_list = runner_manager_with_one_runner.get_runners()
+# assert len(runner_list) == 1
+# busy_runner = runner_list[0]
+# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+# assert busy_runner.github_state == GithubRunnerState.BUSY
+
+# # 3.
+# runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+# runner_list = runner_manager_with_one_runner.get_runners()
@pytest.mark.openstack
From b3dbc572570d4903eac90ab9f46fab609b616673 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 17:30:12 +0800
Subject: [PATCH 138/278] Add deubg
---
src/openstack_cloud/openstack_runner_manager.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index d13c176c1..7a6eaecc0 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -305,6 +305,9 @@ def _get_runner_health(self) -> RunnerHealth:
healthy, unhealthy = [], []
for runner in runner_list:
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
cloud_state = CloudRunnerState(runner.status)
if cloud_state in (
CloudRunnerState.DELETED,
From 80b3d0ab360ca7bc2670458d32d66090650c3ae2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 17:38:12 +0800
Subject: [PATCH 139/278] Remove a debug
---
src-docs/openstack_cloud.openstack_runner_manager.md | 4 ++--
src/openstack_cloud/openstack_runner_manager.py | 5 -----
2 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 778ba0611..b07c74043 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -130,7 +130,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -182,7 +182,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 7a6eaecc0..2f12f739d 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -206,11 +206,6 @@ def get_runners(
Information on the runner instances.
"""
instance_list = self._openstack_cloud.get_instances()
-
- # TODO: debug
- import pytest
- pytest.set_trace()
-
instance_list = [
CloudRunnerInstance(
name=instance.server_name,
From 5a4655f56fd174db0d85bf5da95ed57cca1a181a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 9 Aug 2024 17:39:04 +0800
Subject: [PATCH 140/278] Fix Cloud runner state init
---
src/openstack_cloud/openstack_runner_manager.py | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 2f12f739d..01b1fa0fa 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -300,10 +300,7 @@ def _get_runner_health(self) -> RunnerHealth:
healthy, unhealthy = [], []
for runner in runner_list:
- # TODO: debug
- import pytest
- pytest.set_trace()
- cloud_state = CloudRunnerState(runner.status)
+ cloud_state = CloudRunnerState.from_openstack_server_status(runner.status)
if cloud_state in (
CloudRunnerState.DELETED,
CloudRunnerState.ERROR,
From cb8c4262eb90f0f32b8a608c2370c6f4af41a313 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 11:00:32 +0800
Subject: [PATCH 141/278] Change clean up to cleanup
---
src/openstack_cloud/openstack_cloud.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 6c5a87bfb..1ae8a2897 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -324,7 +324,7 @@ def cleanup(self) -> None:
server_list = self._get_openstack_instances(conn)
exclude_list = [server.name for server in server_list]
self._cleanup_key_files(conn, exclude_list)
- self._clean_up_openstack_keypairs(conn, exclude_list)
+ self._cleanup_openstack_keypairs(conn, exclude_list)
def get_server_name(self, instance_id: str) -> str:
"""Get server name on OpenStack.
@@ -377,7 +377,7 @@ def _cleanup_key_files(
deleted += 1
logger.info("Found %s key files, clean up %s key files", total, deleted)
- def _clean_up_openstack_keypairs(
+ def _cleanup_openstack_keypairs(
self, conn: OpenstackConnection, exclude_instances: Iterable[str]
) -> None:
"""Delete all OpenStack keypairs except the specified instances.
From 6c5788ea1aad0d9b63046bfc7d0de6df009a7e91 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 11:01:31 +0800
Subject: [PATCH 142/278] Fix attr naming issue in openstack cloud
---
src/openstack_cloud/openstack_cloud.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 1ae8a2897..0aeb47873 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -357,7 +357,7 @@ def _cleanup_key_files(
# Find key file from this application.
if (
path.is_file()
- and path.name.startswith(self.instance_name)
+ and path.name.startswith(self.prefix)
and path.name.endswith(".key")
):
total += 1
From d58cc6d9c2b5a0b7cfd7e7eae0ab15e2f4a21890 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 11:05:21 +0800
Subject: [PATCH 143/278] Fix reference to non-existing instance_name in
openstack cloud
---
src/openstack_cloud/openstack_cloud.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 0aeb47873..e1618e447 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -390,7 +390,7 @@ def _cleanup_openstack_keypairs(
keypairs = conn.list_keypairs()
for key in keypairs:
# The `name` attribute is of resource.Body type.
- if key.name and str(key.name).startswith(self.instance_name):
+ if key.name and str(key.name).startswith(self.prefix):
if str(key.name) in exclude_instances:
continue
From 0cbd90a7494e2b5ad20492a9b89a7ebbb5f44c37 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 12:45:38 +0800
Subject: [PATCH 144/278] Add metric log processing to test
---
tests/integration/test_runner_manager_openstack.py | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 4b64a8098..ad4e54c67 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -4,6 +4,7 @@
"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager."""
+import json
from pathlib import Path
from secrets import token_hex
from typing import Iterator
@@ -292,8 +293,7 @@ async def test_runner_normal_lifecycle(
2. The runner should be deleted. The metrics should be recorded.
"""
metric_log_path = log_dir_base_path["metric_log"]
- filesystem_base_path = log_dir_base_path["filesystem_base_path"]
- filesystem_quarantine_path = log_dir_base_path["filesystem_quarantine_path"]
+ metric_log_existing_content = metric_log_path.read_text(encoding='utf-8')
workflow = await dispatch_workflow(
app=None,
@@ -307,5 +307,12 @@ async def test_runner_normal_lifecycle(
await wait_for(lambda: workflow_is_status(workflow, "completed"))
issue_metrics_events = runner_manager_with_one_runner.cleanup()
+ assert issue_metrics_events[events.RunnerStart] == 1
+ assert issue_metrics_events[events.RunnerStop] == 1
+
+ metric_log_full_content = metric_log_path.read_text(encoding='utf-8')
+ assert metric_log_full_content.startswith(metric_log_existing_content), "The metric log was modified in ways other than appending"
+ metric_log_new_content = metric_log_full_content[len(metric_log_existing_content):]
+ metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()]
pytest.set_trace()
From 11c45b19afca68fd87cbad4db033788e853b6d93 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 12:53:11 +0800
Subject: [PATCH 145/278] Enable all tests
---
.../test_runner_manager_openstack.py | 223 +++++++++---------
1 file changed, 113 insertions(+), 110 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index ad4e54c67..f378ee634 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -163,114 +163,114 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
return workflow.status == status
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_get_no_runner(runner_manager: RunnerManager) -> None:
-# """
-# Arrange: RunnerManager instance with no runners.
-# Act: Get runners.
-# Assert: Empty tuple returned.
-# """
-# runner_list = runner_manager.get_runners()
-# assert isinstance(runner_list, tuple)
-# assert not runner_list
-
-
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_runner_normal_idle_lifecycle(
-# runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
-# ) -> None:
-# """
-# Arrange: RunnerManager instance with no runners.
-# Act:
-# 1. Create one runner.
-# 2. Run health check on the runner.
-# 3. Delete all idle runner.
-# Assert:
-# 1. An active idle runner.
-# 2. Health check passes.
-# 3. No runners.
-# """
-# # 1.
-# runner_id_list = runner_manager.create_runners(1)
-# assert isinstance(runner_id_list, tuple)
-# assert len(runner_id_list) == 1
-# runner_id = runner_id_list[0]
-
-# runner_list = runner_manager.get_runners()
-# assert isinstance(runner_list, tuple)
-# assert len(runner_list) == 1
-# runner = runner_list[0]
-# assert runner.id == runner_id
-# assert runner.cloud_state == CloudRunnerState.ACTIVE
-# assert runner.github_state == GithubRunnerState.IDLE
-
-# # 2.
-# openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
-# assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
-# runner = openstack_instances[0]
-
-# assert openstack_runner_manager._health_check(runner)
-
-# # 3.
-# runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
-# runner_list = runner_manager.get_runners()
-# assert isinstance(runner_list, tuple)
-# assert len(runner_list) == 0
-
-
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_runner_flush_busy_lifecycle(
-# runner_manager_with_one_runner: RunnerManager,
-# test_github_branch: Branch,
-# github_repository: Repository,
-# runner_label: str,
-# ):
-# """
-# Arrange: RunnerManager with one idle runner.
-# Act:
-# 1. Run a long workflow.
-# 2. Run flush idle runner.
-# 3. Run flush busy runner.
-# Assert:
-# 1. Runner takes the job and become busy.
-# 2. Busy runner still exists.
-# 3. No runners exists.
-# """
-# # 1.
-# workflow = await dispatch_workflow(
-# app=None,
-# branch=test_github_branch,
-# github_repository=github_repository,
-# conclusion="success",
-# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
-# dispatch_input={"runner": runner_label, "minutes": "10"},
-# wait=False,
-# )
-# await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
-
-# runner_list = runner_manager_with_one_runner.get_runners()
-# assert len(runner_list) == 1
-# busy_runner = runner_list[0]
-# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
-# assert busy_runner.github_state == GithubRunnerState.BUSY
-
-# # 2.
-# runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
-# runner_list = runner_manager_with_one_runner.get_runners()
-# assert len(runner_list) == 1
-# busy_runner = runner_list[0]
-# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
-# assert busy_runner.github_state == GithubRunnerState.BUSY
-
-# # 3.
-# runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
-# runner_list = runner_manager_with_one_runner.get_runners()
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_get_no_runner(runner_manager: RunnerManager) -> None:
+ """
+ Arrange: RunnerManager instance with no runners.
+ Act: Get runners.
+ Assert: Empty tuple returned.
+ """
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert not runner_list
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_normal_idle_lifecycle(
+ runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+) -> None:
+ """
+ Arrange: RunnerManager instance with no runners.
+ Act:
+ 1. Create one runner.
+ 2. Run health check on the runner.
+ 3. Delete all idle runner.
+ Assert:
+ 1. An active idle runner.
+ 2. Health check passes.
+ 3. No runners.
+ """
+ # 1.
+ runner_id_list = runner_manager.create_runners(1)
+ assert isinstance(runner_id_list, tuple)
+ assert len(runner_id_list) == 1
+ runner_id = runner_id_list[0]
+
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 1
+ runner = runner_list[0]
+ assert runner.id == runner_id
+ assert runner.cloud_state == CloudRunnerState.ACTIVE
+ assert runner.github_state == GithubRunnerState.IDLE
+
+ # 2.
+ openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
+ assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
+ runner = openstack_instances[0]
+
+ assert openstack_runner_manager._health_check(runner)
+
+ # 3.
+ runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 0
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_flush_busy_lifecycle(
+ runner_manager_with_one_runner: RunnerManager,
+ test_github_branch: Branch,
+ github_repository: Repository,
+ runner_label: str,
+):
+ """
+ Arrange: RunnerManager with one idle runner.
+ Act:
+ 1. Run a long workflow.
+ 2. Run flush idle runner.
+ 3. Run flush busy runner.
+ Assert:
+ 1. Runner takes the job and become busy.
+ 2. Busy runner still exists.
+ 3. No runners exists.
+ """
+ # 1.
+ workflow = await dispatch_workflow(
+ app=None,
+ branch=test_github_branch,
+ github_repository=github_repository,
+ conclusion="success",
+ workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+ dispatch_input={"runner": runner_label, "minutes": "10"},
+ wait=False,
+ )
+ await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
+
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GithubRunnerState.BUSY
+
+ # 2.
+ runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GithubRunnerState.BUSY
+
+ # 3.
+ runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+ runner_list = runner_manager_with_one_runner.get_runners()
@pytest.mark.openstack
@@ -314,5 +314,8 @@ async def test_runner_normal_lifecycle(
assert metric_log_full_content.startswith(metric_log_existing_content), "The metric log was modified in ways other than appending"
metric_log_new_content = metric_log_full_content[len(metric_log_existing_content):]
metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()]
-
- pytest.set_trace()
+ assert len(metric_logs) == 2, "Assuming two events should be runner_start and runner_stop, modify this if new events are added"
+ assert metric_logs[0]['event'] == "runner_start"
+ assert metric_logs[0]['workflow'] == "Workflow Dispatch Wait Tests"
+ assert metric_logs[1]['event'] == "runner_stop"
+ assert metric_logs[1]['workflow'] == "Workflow Dispatch Wait Tests"
From f5551b676aad24bbb04f24f67e1ad0c574b2039c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 12:59:31 +0800
Subject: [PATCH 146/278] Fix health check return value
---
src/openstack_cloud/openstack_runner_manager.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 01b1fa0fa..ceb0edfe5 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -417,6 +417,7 @@ def _run_health_check(ssh_conn: SshConnection, name: str) -> bool:
):
logger.warning("Runner process not found on %s", name)
return False
+ return True
@retry(tries=10, delay=60, local_logger=logger)
def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
From 97e56857972cb367904e506634cecf26e27f0f56 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 13:21:02 +0800
Subject: [PATCH 147/278] Fix all flake8 lints
---
src-docs/errors.md | 2 +-
src/errors.py | 6 ++--
src/openstack_cloud/openstack_cloud.py | 32 ++++++++++++-------
src/openstack_cloud/openstack_manager.py | 6 ++--
.../openstack_runner_manager.py | 31 +++++++++++++-----
.../test_runner_manager_openstack.py | 27 ++++++++++------
tests/unit/test_openstack_manager.py | 4 +--
7 files changed, 70 insertions(+), 38 deletions(-)
diff --git a/src-docs/errors.md b/src-docs/errors.md
index d091b72f9..1a6316046 100644
--- a/src-docs/errors.md
+++ b/src-docs/errors.md
@@ -374,7 +374,7 @@ Base class for all runner logs errors.
-## class `OpenStackError`
+## class `OpenstackError`
Base class for OpenStack errors.
diff --git a/src/errors.py b/src/errors.py
index 0dab2a54a..204877cd5 100644
--- a/src/errors.py
+++ b/src/errors.py
@@ -156,15 +156,15 @@ class RunnerLogsError(Exception):
"""Base class for all runner logs errors."""
-class OpenStackError(Exception):
+class OpenstackError(Exception):
"""Base class for OpenStack errors."""
-class OpenStackInvalidConfigError(OpenStackError):
+class OpenStackInvalidConfigError(OpenstackError):
"""Represents an invalid OpenStack configuration."""
-class OpenStackUnauthorizedError(OpenStackError):
+class OpenStackUnauthorizedError(OpenstackError):
"""Represents an unauthorized connection to OpenStack."""
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index e1618e447..39cb12ea4 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -80,7 +80,8 @@ def __init__(self, server: OpenstackServer, prefix: str):
raise ValueError(
f"Found openstack server {server.name} managed under prefix {prefix}, contact devs"
)
- self.instance_id = self.server_name[len(prefix) + 1 :]
+ # Disable E203 (space before :) as it conflicts with the formatter (black).
+ self.instance_id = self.server_name[len(prefix) + 1 :] # noqa: E203
@contextmanager
@@ -92,7 +93,7 @@ def _get_openstack_connection(
The file of _CLOUDS_YAML_PATH should only be modified by this function.
Args:
- cloud_config: The configuration in clouds.yaml format to apply.
+ clouds_config: The configuration in clouds.yaml format to apply.
cloud: The name of cloud to use in the clouds.yaml.
Raises:
@@ -149,9 +150,9 @@ def launch_instance(
flavor: The flavor used to create the instance.
network: The network used to create the instance.
userdata: The cloud init userdata to startup the instance.
-
+
Raises:
- OpenstackError: Unable to create OpenStack server for runner.
+ OpenStackError: Unable to create OpenStack server.
Returns:
The OpenStack instance created.
@@ -178,7 +179,7 @@ def launch_instance(
timeout=_CREATE_SERVER_TIMEOUT,
wait=True,
)
- except openstack.exceptions.ResourceTimeout:
+ except openstack.exceptions.ResourceTimeout as err:
logger.exception("Timeout creating openstack server %s", full_name)
logger.info(
"Attempting clean up of openstack server %s that timeout during creation",
@@ -189,7 +190,7 @@ def launch_instance(
except (
openstack.exceptions.SDKException,
openstack.exceptions.ResourceTimeout,
- ) as err:
+ ):
logger.exception(
"Failed to cleanup openstack server %s that timeout during creation",
full_name,
@@ -225,6 +226,9 @@ def get_instance(self, instance_id: str) -> OpenstackInstance:
def delete_instance(self, instance_id: str) -> None:
"""Delete a openstack instance.
+ Raises:
+ OpenStackError: Unable to delete OpenStack server.
+
Args:
instance_id: The instance ID of the instance to delete.
"""
@@ -250,6 +254,9 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
Args:
instance: The OpenStack instance to connect to.
+ Raises:
+ SshError: Unable to get a working SSH connection to the instance.
+
Returns:
SSH connection object.
"""
@@ -355,11 +362,7 @@ def _cleanup_key_files(
deleted = 0
for path in _SSH_KEY_PATH.iterdir():
# Find key file from this application.
- if (
- path.is_file()
- and path.name.startswith(self.prefix)
- and path.name.endswith(".key")
- ):
+ if path.is_file() and path.name.startswith(self.prefix) and path.name.endswith(".key"):
total += 1
if path.name in exclude_filename:
continue
@@ -425,6 +428,13 @@ def _get_and_ensure_unique_server(
If multiple servers with the same name is found, the latest server in creation time is
returned. Other servers is deleted.
+
+ Args:
+ conn: The connection to OpenStack.
+ name: The name of the OpenStack name.
+
+ Returns:
+ A server with the name.
"""
servers: list[OpenstackServer] = conn.search_servers(name)
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index 35799d8bb..c5d2bce44 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -48,7 +48,7 @@
GithubClientError,
GithubMetricsError,
IssueMetricEventError,
- OpenStackError,
+ OpenstackError,
RunnerCreateError,
RunnerStartError,
)
@@ -161,7 +161,7 @@ def _create_connection(cloud_config: dict[str, dict]) -> Iterator[openstack.conn
cloud_config: The configuration in clouds.yaml format to apply.
Raises:
- OpenStackError: if the credentials provided is not authorized.
+ OpenstackError: if the credentials provided is not authorized.
Yields:
An openstack.connection.Connection object.
@@ -180,7 +180,7 @@ def _create_connection(cloud_config: dict[str, dict]) -> Iterator[openstack.conn
# pylint thinks this isn't an exception, but does inherit from Exception class.
except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
logger.exception("OpenStack API call failure")
- raise OpenStackError("Failed OpenStack API call") from exc
+ raise OpenstackError("Failed OpenStack API call") from exc
# Disable too many arguments, as they are needed to create the dataclass.
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index ceb0edfe5..4c069ed64 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -21,9 +21,8 @@
CreateMetricsStorageError,
GetMetricsStorageError,
IssueMetricEventError,
- OpenStackError,
+ OpenstackError,
RunnerCreateError,
- RunnerError,
RunnerStartError,
SshError,
)
@@ -64,7 +63,7 @@ class _PullFileError(Exception):
@dataclass
class OpenstackRunnerManagerConfig:
"""Configuration for OpenstackRunnerManager.
-
+
Attributes:
clouds_config: The clouds.yaml.
cloud: The cloud name to connect to.
@@ -138,7 +137,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
Args:
registration_token: The GitHub registration token for registering runners.
-
+
Raises:
RunnerCreateError: Unable to create runner due to OpenStack issues.
@@ -159,7 +158,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
network=self.config.network,
userdata=userdata,
)
- except OpenStackError as err:
+ except OpenstackError as err:
raise RunnerCreateError(f"Failed to create {instance_name} openstack runner") from err
self._wait_runner_startup(instance)
@@ -226,6 +225,9 @@ def delete_runner(
Args:
id: The instance id of the runner to delete.
remove_token: The GitHub remove token.
+
+ Returns:
+ Any metrics collected during the deletion of the runner.
"""
instance = self._openstack_cloud.get_instance(id)
metric = runner_metrics.extract(
@@ -285,7 +287,7 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
try:
self._openstack_cloud.delete_instance(instance.instance_id)
- except OpenStackError:
+ except OpenstackError:
logger.exception(
"Unable to delete openstack instance for runner %s", instance.server_name
)
@@ -389,10 +391,23 @@ def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | Non
@retry(tries=3, delay=5, backoff=2, local_logger=logger)
def _health_check(self, instance: OpenstackInstance) -> bool:
+ """Check whether runner is healthy.
+
+ Args:
+ instance: The OpenStack instance to conduit the health check.
+
+ Raises:
+ SshError: Unable to get a SSH connection to the instance.
+
+ Returns:
+ Whether the runner is healthy.
+ """
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SshError:
- logger.exception("SSH connection failure with %s during health check", instance.server_name)
+ logger.exception(
+ "SSH connection failure with %s during health check", instance.server_name
+ )
raise
return OpenstackRunnerManager._run_health_check(ssh_conn, instance.server_name)
@@ -403,7 +418,7 @@ def _run_health_check(ssh_conn: SshConnection, name: str) -> bool:
Args:
ssh_conn: The SSH connection to the runner.
name: The name of the runner.
-
+
Returns:
Whether the health succeed.
"""
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index f378ee634..2cb1814ae 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -293,7 +293,7 @@ async def test_runner_normal_lifecycle(
2. The runner should be deleted. The metrics should be recorded.
"""
metric_log_path = log_dir_base_path["metric_log"]
- metric_log_existing_content = metric_log_path.read_text(encoding='utf-8')
+ metric_log_existing_content = metric_log_path.read_text(encoding="utf-8")
workflow = await dispatch_workflow(
app=None,
@@ -309,13 +309,20 @@ async def test_runner_normal_lifecycle(
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
assert issue_metrics_events[events.RunnerStop] == 1
-
- metric_log_full_content = metric_log_path.read_text(encoding='utf-8')
- assert metric_log_full_content.startswith(metric_log_existing_content), "The metric log was modified in ways other than appending"
- metric_log_new_content = metric_log_full_content[len(metric_log_existing_content):]
+
+ metric_log_full_content = metric_log_path.read_text(encoding="utf-8")
+ assert metric_log_full_content.startswith(
+ metric_log_existing_content
+ ), "The metric log was modified in ways other than appending"
+ # Disable E203 (space before :) as it conflicts with the formatter (black).
+ metric_log_new_content = metric_log_full_content[
+ len(metric_log_existing_content) : # noqa: E203
+ ]
metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()]
- assert len(metric_logs) == 2, "Assuming two events should be runner_start and runner_stop, modify this if new events are added"
- assert metric_logs[0]['event'] == "runner_start"
- assert metric_logs[0]['workflow'] == "Workflow Dispatch Wait Tests"
- assert metric_logs[1]['event'] == "runner_stop"
- assert metric_logs[1]['workflow'] == "Workflow Dispatch Wait Tests"
+ assert (
+ len(metric_logs) == 2
+ ), "Assuming two events should be runner_start and runner_stop, modify this if new events are added"
+ assert metric_logs[0]["event"] == "runner_start"
+ assert metric_logs[0]["workflow"] == "Workflow Dispatch Wait Tests"
+ assert metric_logs[1]["event"] == "runner_stop"
+ assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
diff --git a/tests/unit/test_openstack_manager.py b/tests/unit/test_openstack_manager.py
index 445a0b8d3..5349b1570 100644
--- a/tests/unit/test_openstack_manager.py
+++ b/tests/unit/test_openstack_manager.py
@@ -19,7 +19,7 @@
import metrics.storage
import reactive.runner_manager
from charm_state import CharmState, ProxyConfig, ReactiveConfig, RepoPolicyComplianceConfig
-from errors import OpenStackError, RunnerStartError
+from errors import OpenstackError, RunnerStartError
from github_type import GitHubRunnerStatus, RunnerApplication, SelfHostedRunner
from metrics import events as metric_events
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
@@ -262,7 +262,7 @@ def test__create_connection_error(clouds_yaml: dict, openstack_connect_mock: Mag
connection_mock.__enter__.return_value = connection_context
openstack_connect_mock.return_value = connection_mock
- with pytest.raises(OpenStackError) as exc:
+ with pytest.raises(OpenstackError) as exc:
with openstack_manager._create_connection(cloud_config=clouds_yaml):
pass
From 3ce240e98531ad6f432b53a4054d1afec5f87e78 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 13:28:19 +0800
Subject: [PATCH 148/278] Fix test
---
tests/integration/test_runner_manager_openstack.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 2cb1814ae..f40845a2a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -272,6 +272,8 @@ async def test_runner_flush_busy_lifecycle(
runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
runner_list = runner_manager_with_one_runner.get_runners()
+ issue_metrics_events = runner_manager_with_one_runner.cleanup()
+ assert issue_metrics_events[events.RunnerStart] == 1
@pytest.mark.openstack
@pytest.mark.asyncio
From 233741889d1def6ecb1369f2a6ba0fb6220e3a07 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 14:21:21 +0800
Subject: [PATCH 149/278] Fix all lints
---
src-docs/openstack_cloud.openstack_cloud.md | 36 ++++++++----
...penstack_cloud.openstack_runner_manager.md | 15 +++--
src/manager/cloud_runner_manager.py | 30 +++++-----
src/manager/runner_manager.py | 20 ++++---
src/openstack_cloud/openstack_cloud.py | 56 +++++++++++--------
.../openstack_runner_manager.py | 38 ++++++++-----
.../test_runner_manager_openstack.py | 13 +++--
7 files changed, 126 insertions(+), 82 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index c9271e0fa..c431d5f28 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -53,14 +53,14 @@ Construct the object.
---
-
+
## class `OpenstackCloud`
Client to interact with OpenStack cloud.
The OpenStack server name is managed by this cloud. Caller refers to the instances via instance_id. If the caller needs the server name, e.g., for logging, it can be queried with get_server_name.
-
+
### method `__init__`
@@ -83,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -95,7 +95,7 @@ Cleanup unused openstack resources.
---
-
+
### method `delete_instance`
@@ -107,18 +107,24 @@ Delete a openstack instance.
+**Raises:**
+
+ - `OpenstackError`: Unable to delete OpenStack server.
+
+
+
**Args:**
- `instance_id`: The instance ID of the instance to delete.
---
-
+
### method `get_instance`
```python
-get_instance(instance_id: str) → OpenstackInstance
+get_instance(instance_id: str) → OpenstackInstance | None
```
Get OpenStack instance by instance ID.
@@ -132,11 +138,11 @@ Get OpenStack instance by instance ID.
**Returns:**
- The OpenStack instance.
+ The OpenStack instance if found.
---
-
+
### method `get_instances`
@@ -153,7 +159,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -176,7 +182,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
@@ -194,12 +200,18 @@ Get SSH connection to an OpenStack instance.
+**Raises:**
+
+ - `SshError`: Unable to get a working SSH connection to the instance.
+
+
+
**Returns:**
SSH connection object.
---
-
+
### method `launch_instance`
@@ -229,7 +241,7 @@ Create an OpenStack instance.
**Raises:**
- - `OpenstackError`: Unable to create OpenStack server for runner.
+ - `OpenstackError`: Unable to create OpenStack server.
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index b07c74043..029b39268 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -130,7 +130,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -187,7 +187,7 @@ Create a self-hosted runner.
### method `delete_runner`
```python
-delete_runner(id: str, remove_token: str) → RunnerMetrics | None
+delete_runner(instance_id: str, remove_token: str) → RunnerMetrics | None
```
Delete self-hosted runners.
@@ -196,9 +196,14 @@ Delete self-hosted runners.
**Args:**
- - `id`: The instance id of the runner to delete.
+ - `instance_id`: The instance id of the runner to delete.
- `remove_token`: The GitHub remove token.
+
+
+**Returns:**
+ Any metrics collected during the deletion of the runner.
+
---
@@ -223,7 +228,7 @@ Get the name prefix of the self-hosted runners.
### method `get_runner`
```python
-get_runner(id: str) → CloudRunnerInstance | None
+get_runner(instance_id: str) → CloudRunnerInstance | None
```
Get a self-hosted runner by instance id.
@@ -232,7 +237,7 @@ Get a self-hosted runner by instance id.
**Args:**
- - `id`: The instance id.
+ - `instance_id`: The instance id.
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index ca28d8a8a..654e5663b 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -3,7 +3,7 @@
"""Interface of manager of runner instance on clouds."""
-from abc import ABC
+import abc
from dataclasses import dataclass
from enum import Enum
from typing import Iterator, Sequence, Tuple
@@ -34,8 +34,12 @@ class CloudRunnerState(str, Enum):
UNKNOWN = "unknown"
UNEXPECTED = "unexpected"
+ # Disable "Too many return statements" as this method is using case statement for converting
+ # the states, which does not cause a complexity issue.
@staticmethod
- def from_openstack_server_status(openstack_server_status: str) -> "CloudRunnerState":
+ def from_openstack_server_status( # pylint: disable=R0911
+ openstack_server_status: str,
+ ) -> "CloudRunnerState":
"""Create from openstack server status.
The openstack server status are documented here:
@@ -81,29 +85,30 @@ class CloudRunnerInstance:
state: CloudRunnerState
-class CloudRunnerManager(ABC):
+class CloudRunnerManager(abc.ABC):
"""Manage runner instance on cloud."""
+ @abc.abstractmethod
def get_name_prefix(self) -> str:
"""Get the name prefix of the self-hosted runners."""
- ...
+ @abc.abstractmethod
def create_runner(self, registration_token: str) -> InstanceId:
"""Create a self-hosted runner.
Args:
registration_token: The GitHub registration token for registering runners.
"""
- ...
- def get_runner(self, id: InstanceId) -> CloudRunnerInstance:
+ @abc.abstractmethod
+ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance:
"""Get a self-hosted runner by instance id.
Args:
- id: The instance id.
+ instance_id: The instance id.
"""
- ...
+ @abc.abstractmethod
def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerInstance]:
"""Get self-hosted runners by state.
@@ -111,21 +116,20 @@ def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerIn
states: Filter for the runners with these github states. If None all states will be
included.
"""
- ...
- def delete_runner(self, id: InstanceId, remove_token: str) -> RunnerMetrics | None:
+ @abc.abstractmethod
+ def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMetrics | None:
"""Delete self-hosted runners.
Args:
- id: The instance id of the runner to delete.
+ instance_id: The instance id of the runner to delete.
remove_token: The GitHub remove token.
"""
- ...
+ @abc.abstractmethod
def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
Args:
remove_token: The GitHub remove token.
"""
- ...
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index d2a3f8b24..ad5354166 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -6,7 +6,7 @@
import logging
from dataclasses import dataclass
from enum import Enum, auto
-from typing import Iterator, Sequence, Type
+from typing import Iterator, Sequence, Type, cast
from charm_state import GithubPath
from errors import GithubMetricsError
@@ -158,12 +158,12 @@ def get_runners(
github_only,
)
- runner_instances = tuple(
+ runner_instances: list[RunnerInstance] = [
RunnerInstance(
cloud_infos_map[name], github_infos_map[name] if name in github_infos_map else None
)
for name in cloud_infos_map.keys()
- )
+ ]
if cloud_runner_state is not None:
runner_instances = [
runner for runner in runner_instances if runner.cloud_state in cloud_runner_state
@@ -174,7 +174,7 @@ def get_runners(
for runner in runner_instances
if runner.github_state is not None and runner.github_state in github_runner_state
]
- return runner_instances
+ return cast(tuple[RunnerInstance], tuple(runner_instances))
def delete_runners(
self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE
@@ -208,9 +208,11 @@ def delete_runners(
runner_metrics_list = []
for runner in runners_list:
- runner_metrics = self._cloud.delete_runner(id=runner.id, remove_token=remove_token)
- if runner_metrics is not None:
- runner_metrics_list.append(runner_metrics)
+ deleted_runner_metrics = self._cloud.delete_runner(
+ instance_id=runner.id, remove_token=remove_token
+ )
+ if deleted_runner_metrics is not None:
+ runner_metrics_list.append(deleted_runner_metrics)
return self._issue_runner_metrics(metrics=iter(runner_metrics_list))
def cleanup(self) -> IssuedMetricEventsStats:
@@ -221,8 +223,8 @@ def cleanup(self) -> IssuedMetricEventsStats:
"""
self._github.delete_runners([GithubRunnerState.OFFLINE])
remove_token = self._github.get_removal_token()
- runner_metrics = self._cloud.cleanup(remove_token)
- return self._issue_runner_metrics(metrics=runner_metrics)
+ deleted_runner_metrics = self._cloud.cleanup(remove_token)
+ return self._issue_runner_metrics(metrics=deleted_runner_metrics)
def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetricEventsStats:
"""Issue runner metrics.
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 39cb12ea4..888897bdc 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -3,10 +3,10 @@
"""Class for accessing OpenStack API for managing servers."""
-import datetime
import logging
from contextlib import contextmanager
from dataclasses import dataclass
+from datetime import datetime
from functools import reduce
from pathlib import Path
from typing import Iterable, Iterator, cast
@@ -22,7 +22,7 @@
from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
from paramiko.ssh_exception import NoValidConnectionsError
-from errors import OpenStackError, SshError
+from errors import OpenstackError, SshError
logger = logging.getLogger(__name__)
@@ -97,7 +97,7 @@ def _get_openstack_connection(
cloud: The name of cloud to use in the clouds.yaml.
Raises:
- OpenStackError: if the credentials provided is not authorized.
+ OpenstackError: if the credentials provided is not authorized.
Yields:
An openstack.connection.Connection object.
@@ -115,7 +115,7 @@ def _get_openstack_connection(
# pylint thinks this isn't an exception, but does inherit from Exception class.
except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
logger.exception("OpenStack API call failure")
- raise OpenStackError("Failed OpenStack API call") from exc
+ raise OpenstackError("Failed OpenStack API call") from exc
class OpenstackCloud:
@@ -139,7 +139,9 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
self._cloud = cloud
self.prefix = prefix
- def launch_instance(
+ # Ignore "Too many arguments" as 6 args should be fine. Move to a dataclass is new args are
+ # added.
+ def launch_instance( # pylint: disable=R0913
self, instance_id: str, image: str, flavor: str, network: str, userdata: str
) -> OpenstackInstance:
"""Create an OpenStack instance.
@@ -152,7 +154,7 @@ def launch_instance(
userdata: The cloud init userdata to startup the instance.
Raises:
- OpenStackError: Unable to create OpenStack server.
+ OpenstackError: Unable to create OpenStack server.
Returns:
The OpenStack instance created.
@@ -196,22 +198,22 @@ def launch_instance(
full_name,
)
self._delete_keypair(conn, instance_id)
- raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
+ raise OpenstackError(f"Timeout creating openstack server {full_name}") from err
except openstack.exceptions.SDKException as err:
logger.exception("Failed to create openstack server %s", full_name)
self._delete_keypair(conn, instance_id)
- raise OpenStackError(f"Failed to create openstack server {full_name}") from err
+ raise OpenstackError(f"Failed to create openstack server {full_name}") from err
return OpenstackInstance(server, self.prefix)
- def get_instance(self, instance_id: str) -> OpenstackInstance:
+ def get_instance(self, instance_id: str) -> OpenstackInstance | None:
"""Get OpenStack instance by instance ID.
Args:
instance_id: The instance ID.
Returns:
- The OpenStack instance.
+ The OpenStack instance if found.
"""
full_name = self.get_server_name(instance_id)
logger.info("Getting openstack server with %s", full_name)
@@ -219,15 +221,16 @@ def get_instance(self, instance_id: str) -> OpenstackInstance:
with _get_openstack_connection(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
- return OpenstackInstance(
- OpenstackCloud._get_and_ensure_unique_server(conn, full_name), self.prefix
- )
+ server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
+ if server is not None:
+ return OpenstackInstance(server, self.prefix)
+ return None
def delete_instance(self, instance_id: str) -> None:
"""Delete a openstack instance.
Raises:
- OpenStackError: Unable to delete OpenStack server.
+ OpenstackError: Unable to delete OpenStack server.
Args:
instance_id: The instance ID of the instance to delete.
@@ -240,13 +243,14 @@ def delete_instance(self, instance_id: str) -> None:
) as conn:
try:
server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
- conn.delete_server(name_or_id=server.id)
+ if server is not None:
+ conn.delete_server(name_or_id=server.id)
OpenstackCloud._delete_keypair(conn, full_name)
except (
openstack.exceptions.SDKException,
openstack.exceptions.ResourceTimeout,
) as err:
- raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
+ raise OpenstackError(f"Failed to remove openstack runner {full_name}") from err
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
"""Get SSH connection to an OpenStack instance.
@@ -321,7 +325,7 @@ def get_instances(self) -> tuple[OpenstackInstance]:
server = OpenstackCloud._get_and_ensure_unique_server(conn, name)
if server is not None:
instances.append(OpenstackInstance(server, self.prefix))
- return instances
+ return cast(tuple[OpenstackInstance], tuple(instances))
def cleanup(self) -> None:
"""Cleanup unused openstack resources."""
@@ -414,11 +418,14 @@ def _get_openstack_instances(self, conn: OpenstackConnection) -> tuple[Openstack
Returns:
List of OpenStack instances.
"""
- return [
- server
- for server in cast(list[OpenstackServer], conn.list_servers())
- if server.name.startswith(f"{self.prefix}-")
- ]
+ return cast(
+ tuple[OpenstackServer],
+ tuple(
+ server
+ for server in cast(list[OpenstackServer], conn.list_servers())
+ if server.name.startswith(f"{self.prefix}-")
+ ),
+ )
@staticmethod
def _get_and_ensure_unique_server(
@@ -443,7 +450,10 @@ def _get_and_ensure_unique_server(
latest_server = reduce(
lambda a, b: (
- a if datetime.strptime(a.created_at) < datetime.strptime(b.create_at) else b
+ a
+ if datetime.strptime(a.created_at, "a %b %d %H:%M:%S %Y")
+ < datetime.strptime(b.create_at, "a %b %d %H:%M:%S %Y")
+ else b
),
servers,
)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 4c069ed64..f89ca7367 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -60,8 +60,9 @@ class _PullFileError(Exception):
"""Represents an error while pulling a file from the runner instance."""
+# Ignore "Too many instance attributes" as this dataclass is for passing arguments.
@dataclass
-class OpenstackRunnerManagerConfig:
+class OpenstackRunnerManagerConfig: # pylint: disable=R0902
"""Configuration for OpenstackRunnerManager.
Attributes:
@@ -145,14 +146,14 @@ def create_runner(self, registration_token: str) -> InstanceId:
Instance ID of the runner.
"""
start_timestamp = time.time()
- id = OpenstackRunnerManager._generate_instance_id()
- instance_name = self._openstack_cloud.get_server_name(instance_id=id)
+ instance_id = OpenstackRunnerManager._generate_instance_id()
+ instance_name = self._openstack_cloud.get_server_name(instance_id=instance_id)
userdata = self._generate_userdata(
instance_name=instance_name, registration_token=registration_token
)
try:
instance = self._openstack_cloud.launch_instance(
- instance_id=id,
+ instance_id=instance_id,
image=self.config.image,
flavor=self.config.flavor,
network=self.config.network,
@@ -170,24 +171,24 @@ def create_runner(self, registration_token: str) -> InstanceId:
install_start_timestamp=start_timestamp,
install_end_timestamp=end_timestamp,
)
- return id
+ return instance_id
- def get_runner(self, id: InstanceId) -> CloudRunnerInstance | None:
+ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
"""Get a self-hosted runner by instance id.
Args:
- id: The instance id.
+ instance_id: The instance id.
Returns:
Information on the runner instance.
"""
- name = self._openstack_cloud.get_server_name(id)
+ name = self._openstack_cloud.get_server_name(instance_id)
instances_list = self._openstack_cloud.get_instances()
for instance in instances_list:
if instance.server_name == name:
return CloudRunnerInstance(
name=name,
- id=id,
+ id=instance_id,
state=CloudRunnerState.from_openstack_server_status(instance.status),
)
return None
@@ -215,21 +216,28 @@ def get_runners(
]
if states is None:
return instance_list
- return [instance for instance in instance_list if instance.state in states]
+ return tuple(instance for instance in instance_list if instance.state in states)
def delete_runner(
- self, id: InstanceId, remove_token: str
+ self, instance_id: InstanceId, remove_token: str
) -> runner_metrics.RunnerMetrics | None:
"""Delete self-hosted runners.
Args:
- id: The instance id of the runner to delete.
+ instance_id: The instance id of the runner to delete.
remove_token: The GitHub remove token.
Returns:
Any metrics collected during the deletion of the runner.
"""
- instance = self._openstack_cloud.get_instance(id)
+ instance = self._openstack_cloud.get_instance(instance_id)
+ if instance is None:
+ logger.warning(
+ "Unable to delete instance %s as it is not found",
+ self._openstack_cloud.get_server_name(instance_id),
+ )
+ return None
+
metric = runner_metrics.extract(
metrics_storage_manager=metrics_storage, runners=instance.server_name
)
@@ -256,7 +264,7 @@ def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
self._openstack_cloud.cleanup()
return metrics
- def _delete_runner(self, instance: OpenstackInstance, remove_token) -> None:
+ def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None:
"""Delete self-hosted runners by openstack instance.
Args:
@@ -311,7 +319,7 @@ def _get_runner_health(self) -> RunnerHealth:
unhealthy.append(runner)
else:
healthy.append(runner)
- return RunnerHealth(healthy=healthy, unhealthy=unhealthy)
+ return RunnerHealth(healthy=tuple(healthy), unhealthy=tuple(unhealthy))
def _generate_userdata(self, instance_name: str, registration_token: str) -> str:
"""Generate cloud init userdata.
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index f40845a2a..041bcad3a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -40,7 +40,9 @@ def runner_label():
@pytest.fixture(scope="module", name="log_dir_base_path")
-def log_dir_base_path_fixture(tmp_path_factory: Path) -> Iterator[dict[str, Path]]:
+def log_dir_base_path_fixture(
+ tmp_path_factory: pytest.TempPathFactory,
+) -> Iterator[dict[str, Path]]:
"""Mock the log directory path and return it."""
with pytest.MonkeyPatch.context() as monkeypatch:
temp_log_dir = tmp_path_factory.mktemp("log")
@@ -72,11 +74,11 @@ def openstack_proxy_config_fixture(
use_aproxy = False
if openstack_http_proxy or openstack_https_proxy:
use_aproxy = True
- openstack_http_proxy = openstack_http_proxy if openstack_http_proxy else None
- openstack_https_proxy = openstack_https_proxy if openstack_https_proxy else None
+ http_proxy = openstack_http_proxy if openstack_http_proxy else None
+ https_proxy = openstack_https_proxy if openstack_https_proxy else None
return ProxyConfig(
- http=openstack_http_proxy,
- https=openstack_https_proxy,
+ http=http_proxy,
+ https=https_proxy,
no_proxy=openstack_no_proxy,
use_aproxy=use_aproxy,
)
@@ -275,6 +277,7 @@ async def test_runner_flush_busy_lifecycle(
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
+
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
From 5224cd28ad41d557e88babee7791cdeb9bbd4e49 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 14:38:10 +0800
Subject: [PATCH 150/278] Fix unit test issue due to method sig change
---
tests/unit/test_openstack_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/unit/test_openstack_manager.py b/tests/unit/test_openstack_manager.py
index 5349b1570..373f656f0 100644
--- a/tests/unit/test_openstack_manager.py
+++ b/tests/unit/test_openstack_manager.py
@@ -692,7 +692,7 @@ def test_reconcile_ignores_metrics_for_openstack_online_runners(
openstack_manager.runner_metrics.extract.assert_called_once_with(
metrics_storage_manager=metrics.storage,
- ignore_runners=set(openstack_online_runner_names),
+ runners=set(openstack_online_runner_names),
)
From 739d75b39ac82a0a52ca2d17b04671860bef7928 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 14:59:28 +0800
Subject: [PATCH 151/278] Ignore openstack cloud from coverage due to the test
requires private endpoint
---
pyproject.toml | 3 +++
1 file changed, 3 insertions(+)
diff --git a/pyproject.toml b/pyproject.toml
index e7d0f789f..458b72d93 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,6 +10,9 @@ skips = ["*/*test.py", "*/test_*.py", "*tests/*.py"]
[tool.coverage.run]
branch = true
omit = [
+ # These are covered by `tests/integration/test_runner_manager_openstack.py`.
+ "src/openstack_cloud/openstack_cloud.py",
+ "src/openstack_cloud/openstack_runner_manager.py",
# Contains interface for calling LXD. Tested in integration tests and end to end tests.
"src/lxd.py",
# Contains interface for calling repo policy compliance service. Tested in integration test
From 94a60c2b103a3b20bb3adbd646a6092fe1e58af3 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 15:01:49 +0800
Subject: [PATCH 152/278] Enable all tests
---
.github/workflows/e2e_test.yaml | 4 +--
.github/workflows/integration_test.yaml | 4 +--
.github/workflows/manual_test_env.yaml | 35 -------------------------
.github/workflows/openstack_test.yaml | 19 ++++++++++++++
4 files changed, 21 insertions(+), 41 deletions(-)
delete mode 100644 .github/workflows/manual_test_env.yaml
create mode 100644 .github/workflows/openstack_test.yaml
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index bb1dada46..5933451ee 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,9 +1,7 @@
name: End-to-End tests
on:
- # TODO: Uncomment
- #pull_request:
- workflow_dispatch:
+ pull_request:
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 1edd98aca..349bc302c 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,9 +1,7 @@
name: integration-tests
on:
- # TODO: Uncomment
- #pull_request:
- workflow_dispatch:
+ pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
deleted file mode 100644
index c1060f3fb..000000000
--- a/.github/workflows/manual_test_env.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: Manual test env
-
-on:
- # TODO: Uncomment
- # pull_request:
- workflow_dispatch:
-
-jobs:
- manual-test-env:
- name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint", "X64"]
- steps:
- - run: sudo apt update -yq
- - run: sudo apt install pipx -yq
- - run: pipx ensurepath
- - run: pipx install tox
- - uses: actions/checkout@v4
- - name: Tmate debugging session (self-hosted)
- uses: canonical/action-tmate@main
- timeout-minutes: ${{ inputs.tmate-timeout }}
- # openstack-integration-tests-private-endpoint:
- # name: Integration test using private-endpoint
- # uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
- # secrets: inherit
- # with:
- # juju-channel: 3.2/stable
- # pre-run-script: scripts/setup-lxd.sh
- # provider: lxd
- # test-tox-env: integration-juju3.2
- # modules: '["test_runner_manager_openstack"]'
- # extra-arguments: "-m openstack"
- # self-hosted-runner: true
- # self-hosted-runner-label: stg-private-endpoint
- # tmate-debug: true
- # tmate-timeout: 300
diff --git a/.github/workflows/openstack_test.yaml b/.github/workflows/openstack_test.yaml
new file mode 100644
index 000000000..a7521c472
--- /dev/null
+++ b/.github/workflows/openstack_test.yaml
@@ -0,0 +1,19 @@
+name: Openstack tests
+
+on:
+ pull_request:
+
+jobs:
+ openstack-integration-tests-private-endpoint:
+ name: Openstack integration test using private-endpoint
+ uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ secrets: inherit
+ with:
+ juju-channel: 3.2/stable
+ pre-run-script: scripts/setup-lxd.sh
+ provider: lxd
+ test-tox-env: integration-juju3.2
+ modules: '["test_runner_manager_openstack"]'
+ extra-arguments: "-m openstack"
+ self-hosted-runner: true
+ self-hosted-runner-label: stg-private-endpoint
From fa1eda367a9c92855ae994b18228512f2e254f65 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 16:42:09 +0800
Subject: [PATCH 153/278] Remove a repeated test
---
.github/workflows/openstack_test.yaml | 19 -------------------
1 file changed, 19 deletions(-)
delete mode 100644 .github/workflows/openstack_test.yaml
diff --git a/.github/workflows/openstack_test.yaml b/.github/workflows/openstack_test.yaml
deleted file mode 100644
index a7521c472..000000000
--- a/.github/workflows/openstack_test.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: Openstack tests
-
-on:
- pull_request:
-
-jobs:
- openstack-integration-tests-private-endpoint:
- name: Openstack integration test using private-endpoint
- uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
- secrets: inherit
- with:
- juju-channel: 3.2/stable
- pre-run-script: scripts/setup-lxd.sh
- provider: lxd
- test-tox-env: integration-juju3.2
- modules: '["test_runner_manager_openstack"]'
- extra-arguments: "-m openstack"
- self-hosted-runner: true
- self-hosted-runner-label: stg-private-endpoint
From 098f58e2b08e55be33c661f77405926b6dc306c8 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 12 Aug 2024 16:45:35 +0800
Subject: [PATCH 154/278] Re-enable test.yaml
---
.github/workflows/test.yaml | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 34803b2fb..99e540d31 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,9 +1,7 @@
name: Tests
on:
- # TODO: Uncomment
- #pull_request:
- workflow_dispatch:
+ pull_request:
jobs:
unit-tests:
From 6344c7aa589118b367908739a33ba29d0ae8e997 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 13 Aug 2024 12:09:10 +0800
Subject: [PATCH 155/278] Fix integration tests workflwo
---
.github/workflows/integration_test.yaml | 15 ++-
tests/integration/test_openstack_cloud.py | 130 ----------------------
2 files changed, 14 insertions(+), 131 deletions(-)
delete mode 100644 tests/integration/test_openstack_cloud.py
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 349bc302c..8e0bc700a 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -24,16 +24,29 @@ jobs:
# test_debug_ssh ensures tmate SSH actions works.
# TODO: Add OpenStack integration versions of these tests.
modules: '["test_charm_scheduled_events", "test_debug_ssh"]'
+ openstack-interface-tests-private-endpoint:
+ name: openstack interface test using private-endpoint
+ uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ secrets: inherit
+ with:
+ juju-channel: 3.2/stable
+ pre-run-script: scripts/setup-lxd.sh
+ provider: lxd
+ test-tox-env: integration-juju3.2
+ modules: '["test_runner_manager_openstack"]'
+ self-hosted-runner: true
+ self-hosted-runner-label: stg-private-endpoint
openstack-integration-tests-private-endpoint:
name: Integration test using private-endpoint
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
+ needs: openstack-interface-tests-private-endpoint
secrets: inherit
with:
juju-channel: 3.2/stable
pre-run-script: scripts/setup-lxd.sh
provider: lxd
test-tox-env: integration-juju3.2
- modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive", "test_openstack_cloud"]'
+ modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive"]'
extra-arguments: "-m openstack"
self-hosted-runner: true
self-hosted-runner-label: stg-private-endpoint
diff --git a/tests/integration/test_openstack_cloud.py b/tests/integration/test_openstack_cloud.py
deleted file mode 100644
index f0dd8f148..000000000
--- a/tests/integration/test_openstack_cloud.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Test for OpenstackCloud class integration with OpenStack."""
-
-from secrets import token_hex
-
-import pytest
-import pytest_asyncio
-import yaml
-from openstack.connection import Connection as OpenstackConnection
-
-from openstack_cloud.openstack_cloud import OpenstackCloud
-
-
-@pytest_asyncio.fixture(scope="function", name="base_openstack_cloud")
-async def base_openstack_cloud_fixture(private_endpoint_clouds_yaml: str) -> OpenstackCloud:
- """Setup a OpenstackCloud object with connection to openstack."""
- clouds_yaml = yaml.safe_load(private_endpoint_clouds_yaml)
- return OpenstackCloud(clouds_yaml, "testcloud", f"test-{token_hex(4)}")
-
-
-@pytest_asyncio.fixture(scope="function", name="openstack_cloud")
-async def openstack_cloud_fixture(base_openstack_cloud: OpenstackCloud) -> OpenstackCloud:
- """Ensures the OpenstackCloud object has no openstack servers."""
- instances = base_openstack_cloud.get_instances()
- for instance in instances:
- base_openstack_cloud.delete_instance(instance_id=instance.instance_id)
- return base_openstack_cloud
-
-
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_get_no_instances(base_openstack_cloud: OpenstackCloud) -> None:
- """
- arrange: No instance on OpenStack.
- act: Get instances on OpenStack.
- assert: An empty list returned.
-
- Uses base_openstack_cloud as openstack_cloud_fixture relies on this test.
- """
- instances = base_openstack_cloud.get_instances()
- assert not instances
-
-
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_launch_instance_and_delete(
- base_openstack_cloud: OpenstackCloud,
- openstack_connection: OpenstackConnection,
- openstack_test_image: str,
- openstack_test_flavor: str,
- network_name: str,
-) -> None:
- """
- arrange: No instance on OpenStack.
- act:
- 1. Create an openstack instance.
- 2. Delete openstack instance.
- assert:
- 1. Instance returned.
- 2. No instance exists.
-
- Uses base_openstack_cloud as openstack_cloud_fixture relies on this test.
- """
- instances = base_openstack_cloud.get_instances()
- assert not instances, "Test arrange failure: found existing openstack instance."
-
- instance_name = f"{token_hex(2)}"
-
- # 1.
- instance = base_openstack_cloud.launch_instance(
- instance_id=instance_name,
- image=openstack_test_image,
- flavor=openstack_test_flavor,
- network=network_name,
- userdata="",
- )
-
- assert instance is not None
- assert instance.instance_id is not None
- assert instance.server_name is not None
- assert instance.id is not None
-
- servers = openstack_connection.list_servers()
- for server in servers:
- if instance_name in server.name:
- break
- else:
- assert False, f"OpenStack server with {instance_name} in the name not found"
-
- # 2.
- base_openstack_cloud.delete_instance(instance_id=instance_name)
- instances = base_openstack_cloud.get_instances()
- assert not instances, "Test failure: openstack instance should be deleted."
-
-
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_instance_ssh_connection(
- openstack_cloud: OpenstackCloud,
- openstack_test_image: str,
- openstack_test_flavor: str,
- network_name: str,
-) -> None:
- """
- arrange: One instance on OpenStack.
- act: Get SSH connection of instance and execute command.
- assert: Test SSH command executed successfully.
-
- This tests whether the network rules (security group) are in place.
- """
- rand_chars = f"{token_hex(10)}"
- instance_name = f"{token_hex(2)}"
- instance = openstack_cloud.launch_instance(
- instance_id=instance_name,
- image=openstack_test_image,
- flavor=openstack_test_flavor,
- network=network_name,
- userdata="",
- )
-
- ssh_conn = openstack_cloud.get_ssh_connection(instance)
- result = ssh_conn.run(f"echo {rand_chars}")
-
- assert result.ok
- assert rand_chars in result.stdout
From addf218c9a5992e64f24a82c49467cf1a00b11ad Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:13:04 +0800
Subject: [PATCH 156/278] Add docs on cleanup method of cloud runner manager
---
src/manager/cloud_runner_manager.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 654e5663b..922368faf 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -129,6 +129,8 @@ def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMet
@abc.abstractmethod
def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
+
+ Perform health check on runner and delete the runner if it fails.
Args:
remove_token: The GitHub remove token.
From c90cdb3e120bee4d59339cc3ab1b0db6f30cc15a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:35:35 +0800
Subject: [PATCH 157/278] Add parallel spawning of runners.
---
src/manager/runner_manager.py | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index ad5354166..a15ad0d80 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -6,6 +6,7 @@
import logging
from dataclasses import dataclass
from enum import Enum, auto
+from multiprocessing import Pool
from typing import Iterator, Sequence, Type, cast
from charm_state import GithubPath
@@ -112,11 +113,16 @@ def create_runners(self, num: int) -> tuple[InstanceId]:
logger.info("Creating %s runners", num)
registration_token = self._github.get_registration_token()
- runner_ids = []
- for _ in range(num):
- runner_ids.append(self._cloud.create_runner(registration_token=registration_token))
+ instance_ids = []
+ create_runner_args = [self._cloud, registration_token] * num
+ with Pool(processes=min(num, 10)) as pool:
+ pool.map(func=RunnerManager._create_runner, iterable=create_runner_args)
+
+ return tuple(instance_ids)
- return tuple(runner_ids)
+ @staticmethod
+ def _create_runner(cloud_runner_manager: CloudRunnerManager, registration_token: str) -> InstanceId:
+ return cloud_runner_manager.create_runner(registration_token=registration_token)
def get_runners(
self,
From a76e528fb02b3032313e85fd7e952571da700245 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:38:51 +0800
Subject: [PATCH 158/278] Enable dev testing
---
.github/workflows/integration_test.yaml | 4 +++-
.github/workflows/manual_test_env.yaml | 18 ++++++++++++++++++
2 files changed, 21 insertions(+), 1 deletion(-)
create mode 100644 .github/workflows/manual_test_env.yaml
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 8e0bc700a..dfdfdf590 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,7 +1,9 @@
name: integration-tests
on:
- pull_request:
+ # TODO: Debug
+ #pull_request:
+ workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
new file mode 100644
index 000000000..6b65cbfce
--- /dev/null
+++ b/.github/workflows/manual_test_env.yaml
@@ -0,0 +1,18 @@
+name: Manual test env
+
+on:
+ pull_request:
+
+jobs:
+ manual-test-env:
+ name: manual-test-env
+ runs-on: ["self-hosted", "stg-private-endpoint"]
+ steps:
+ - run: sudo apt update -yq
+ - run: sudo apt install pipx -yq
+ - run: pipx ensurepath
+ - run: pipx install tox
+ - uses: actions/checkout@v4
+ - name: Tmate debugging session (self-hosted)
+ uses: canonical/action-tmate@main
+ timeout-minutes: 300
From 91e0a0f758c3ce40bd96a6038d31eb52b124a3a3 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:54:13 +0800
Subject: [PATCH 159/278] Fix parallel spawn
---
.github/workflows/e2e_test.yaml | 4 +++-
src/manager/runner_manager.py | 19 +++++++++++++------
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 5933451ee..bca9e4476 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,7 +1,9 @@
name: End-to-End tests
on:
- pull_request:
+ # TODO: Debug
+ #pull_request:
+ workflow_dispatch:
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index a15ad0d80..0c7a546cf 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -113,16 +113,12 @@ def create_runners(self, num: int) -> tuple[InstanceId]:
logger.info("Creating %s runners", num)
registration_token = self._github.get_registration_token()
- instance_ids = []
- create_runner_args = [self._cloud, registration_token] * num
+ create_runner_args = [RunnerManager._CreateRunnerArgs(self._cloud, registration_token) for _ in range(num)]
with Pool(processes=min(num, 10)) as pool:
- pool.map(func=RunnerManager._create_runner, iterable=create_runner_args)
+ instance_ids = pool.map(func=RunnerManager._create_runner, iterable=create_runner_args)
return tuple(instance_ids)
- @staticmethod
- def _create_runner(cloud_runner_manager: CloudRunnerManager, registration_token: str) -> InstanceId:
- return cloud_runner_manager.create_runner(registration_token=registration_token)
def get_runners(
self,
@@ -266,3 +262,14 @@ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetri
total_stats[event_type] = total_stats.get(event_type, 0) + 1
return total_stats
+
+ @dataclass
+ class _CreateRunnerArgs:
+ cloud_runner_manager: CloudRunnerManager
+ registration_token: str
+
+ @staticmethod
+ def _create_runner(args: _CreateRunnerArgs) -> InstanceId:
+ return args.cloud_runner_manager.create_runner(registration_token=args.registration_token)
+
+
From 7f16bdcc4b33cbda904ba63ab1edefb620712119 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 13:15:19 +0800
Subject: [PATCH 160/278] Allow openstack server to take a bit of time on
deletion
---
src/manager/cloud_runner_manager.py | 2 +-
src/manager/runner_manager.py | 11 +++++------
tests/integration/test_runner_manager_openstack.py | 8 ++++++++
3 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 922368faf..5191a934b 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -129,7 +129,7 @@ def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMet
@abc.abstractmethod
def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
-
+
Perform health check on runner and delete the runner if it fails.
Args:
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 0c7a546cf..24284fd10 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -113,12 +113,13 @@ def create_runners(self, num: int) -> tuple[InstanceId]:
logger.info("Creating %s runners", num)
registration_token = self._github.get_registration_token()
- create_runner_args = [RunnerManager._CreateRunnerArgs(self._cloud, registration_token) for _ in range(num)]
+ create_runner_args = [
+ RunnerManager._CreateRunnerArgs(self._cloud, registration_token) for _ in range(num)
+ ]
with Pool(processes=min(num, 10)) as pool:
instance_ids = pool.map(func=RunnerManager._create_runner, iterable=create_runner_args)
-
- return tuple(instance_ids)
+ return tuple(instance_ids)
def get_runners(
self,
@@ -262,7 +263,7 @@ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetri
total_stats[event_type] = total_stats.get(event_type, 0) + 1
return total_stats
-
+
@dataclass
class _CreateRunnerArgs:
cloud_runner_manager: CloudRunnerManager
@@ -271,5 +272,3 @@ class _CreateRunnerArgs:
@staticmethod
def _create_runner(args: _CreateRunnerArgs) -> InstanceId:
return args.cloud_runner_manager.create_runner(registration_token=args.registration_token)
-
-
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 041bcad3a..e7172ba41 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -221,6 +221,14 @@ async def test_runner_normal_idle_lifecycle(
runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
+ if len(runner_list) == 1:
+ runner = runner_list[0]
+ assert runner.github_state == None
+
+ # The openstack server can take sometime to fully clean up.
+ await wait_for(lambda: len(runner_manager.get_runners()) == 0, timeout=60)
+ return
+
assert len(runner_list) == 0
From ad05ac5c7a910bca92851239d0e6b40d6f350635 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 13:29:38 +0800
Subject: [PATCH 161/278] Refactor test detection of no runners
---
.../test_runner_manager_openstack.py | 35 ++++++++++++-------
1 file changed, 22 insertions(+), 13 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index e7172ba41..034c836f8 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -164,6 +164,24 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
workflow.update()
return workflow.status == status
+async def assert_no_runner(runner_manager: RunnerManager):
+ """Assert the runner manager has no runners.
+
+ Retry are performed if the number of runner is not 0. Due to it may take some time for
+ openstack to delete the servers.
+
+ A TimeoutError will be thrown if runners are still found after timeout.
+
+ Args:
+ runner_manager: The RunnerManager to check.
+ """
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ if len(runner_list) == 0:
+ return
+
+ # The openstack server can take sometime to fully clean up.
+ await wait_for(lambda: len(runner_manager.get_runners()) == 0, timeout=60)
@pytest.mark.openstack
@pytest.mark.asyncio
@@ -219,18 +237,7 @@ async def test_runner_normal_idle_lifecycle(
# 3.
runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- if len(runner_list) == 1:
- runner = runner_list[0]
- assert runner.github_state == None
-
- # The openstack server can take sometime to fully clean up.
- await wait_for(lambda: len(runner_manager.get_runners()) == 0, timeout=60)
- return
-
- assert len(runner_list) == 0
-
+ assert_no_runner(runner_manager)
@pytest.mark.openstack
@pytest.mark.asyncio
@@ -280,7 +287,7 @@ async def test_runner_flush_busy_lifecycle(
# 3.
runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- runner_list = runner_manager_with_one_runner.get_runners()
+ assert_no_runner(runner_manager_with_one_runner)
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
@@ -339,3 +346,5 @@ async def test_runner_normal_lifecycle(
assert metric_logs[0]["workflow"] == "Workflow Dispatch Wait Tests"
assert metric_logs[1]["event"] == "runner_stop"
assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
+
+ assert_no_runner(runner_manager_with_one_runner)
From 23bac2f1b286a66a4ee261e1940df0e8d1ce894d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 13:37:16 +0800
Subject: [PATCH 162/278] Re-enable the tests
---
.github/workflows/e2e_test.yaml | 4 +---
.github/workflows/integration_test.yaml | 4 +---
.github/workflows/manual_test_env.yaml | 18 ------------------
3 files changed, 2 insertions(+), 24 deletions(-)
delete mode 100644 .github/workflows/manual_test_env.yaml
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index bca9e4476..5933451ee 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,9 +1,7 @@
name: End-to-End tests
on:
- # TODO: Debug
- #pull_request:
- workflow_dispatch:
+ pull_request:
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index dfdfdf590..8e0bc700a 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,9 +1,7 @@
name: integration-tests
on:
- # TODO: Debug
- #pull_request:
- workflow_dispatch:
+ pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
deleted file mode 100644
index 6b65cbfce..000000000
--- a/.github/workflows/manual_test_env.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: Manual test env
-
-on:
- pull_request:
-
-jobs:
- manual-test-env:
- name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint"]
- steps:
- - run: sudo apt update -yq
- - run: sudo apt install pipx -yq
- - run: pipx ensurepath
- - run: pipx install tox
- - uses: actions/checkout@v4
- - name: Tmate debugging session (self-hosted)
- uses: canonical/action-tmate@main
- timeout-minutes: 300
From 325be1fb4b3f4730335e405e66d31f1f17297b6c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 13:52:09 +0800
Subject: [PATCH 163/278] Fix lints
---
src/manager/runner_manager.py | 17 +++++++++++++++++
.../test_runner_manager_openstack.py | 17 ++++++++++-------
2 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 24284fd10..98ee9ed9c 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -266,9 +266,26 @@ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetri
@dataclass
class _CreateRunnerArgs:
+ """Arguments for the _create_runner function.
+
+ Attrs:
+ cloud_runner_manager: For managing the cloud instance of the runner.
+ registration_token: The GitHub provided-token for registering runners.
+ """
+
cloud_runner_manager: CloudRunnerManager
registration_token: str
@staticmethod
def _create_runner(args: _CreateRunnerArgs) -> InstanceId:
+ """Create a single runner.
+
+ This is a staticmethod for usage with multiprocess.Pool.
+
+ Args:
+ args: The arguments.
+
+ Returns:
+ The instance ID of the runner created.
+ """
return args.cloud_runner_manager.create_runner(registration_token=args.registration_token)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 034c836f8..b884fc55a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -164,14 +164,15 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
workflow.update()
return workflow.status == status
+
async def assert_no_runner(runner_manager: RunnerManager):
"""Assert the runner manager has no runners.
- Retry are performed if the number of runner is not 0. Due to it may take some time for
+ Retry are performed if the number of runner is not 0. Due to it may take some time for
openstack to delete the servers.
-
+
A TimeoutError will be thrown if runners are still found after timeout.
-
+
Args:
runner_manager: The RunnerManager to check.
"""
@@ -179,10 +180,11 @@ async def assert_no_runner(runner_manager: RunnerManager):
assert isinstance(runner_list, tuple)
if len(runner_list) == 0:
return
-
+
# The openstack server can take sometime to fully clean up.
await wait_for(lambda: len(runner_manager.get_runners()) == 0, timeout=60)
+
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
@@ -237,7 +239,8 @@ async def test_runner_normal_idle_lifecycle(
# 3.
runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
- assert_no_runner(runner_manager)
+ await assert_no_runner(runner_manager)
+
@pytest.mark.openstack
@pytest.mark.asyncio
@@ -287,7 +290,7 @@ async def test_runner_flush_busy_lifecycle(
# 3.
runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- assert_no_runner(runner_manager_with_one_runner)
+ await assert_no_runner(runner_manager_with_one_runner)
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
@@ -347,4 +350,4 @@ async def test_runner_normal_lifecycle(
assert metric_logs[1]["event"] == "runner_stop"
assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
- assert_no_runner(runner_manager_with_one_runner)
+ await assert_no_runner(runner_manager_with_one_runner)
From 4a91bc0d581be662756dee7413b14aabecfff450 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:01:49 +0800
Subject: [PATCH 164/278] Disable tests again
---
.github/workflows/e2e_test.yaml | 4 +++-
.github/workflows/integration_test.yaml | 4 +++-
.github/workflows/test.yaml | 4 +++-
3 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 5933451ee..c7636df33 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,7 +1,9 @@
name: End-to-End tests
on:
- pull_request:
+ #TODO: Debug
+ # pull_request:
+ workflow_dispatch:
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 8e0bc700a..000aa5d2b 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,7 +1,9 @@
name: integration-tests
on:
- pull_request:
+ #TODO: Debug
+ # pull_request:
+ workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 99e540d31..876d3f2df 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,7 +1,9 @@
name: Tests
on:
- pull_request:
+ #TODO: Debug
+ # pull_request:
+ workflow_dispatch:
jobs:
unit-tests:
From d6b50a4d13c07d2cd4c02b2b8b01129c717b1f02 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:02:50 +0800
Subject: [PATCH 165/278] Disable some test
---
.../test_runner_manager_openstack.py | 218 +++++++++---------
1 file changed, 109 insertions(+), 109 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b884fc55a..897272d21 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -242,112 +242,112 @@ async def test_runner_normal_idle_lifecycle(
await assert_no_runner(runner_manager)
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_runner_flush_busy_lifecycle(
- runner_manager_with_one_runner: RunnerManager,
- test_github_branch: Branch,
- github_repository: Repository,
- runner_label: str,
-):
- """
- Arrange: RunnerManager with one idle runner.
- Act:
- 1. Run a long workflow.
- 2. Run flush idle runner.
- 3. Run flush busy runner.
- Assert:
- 1. Runner takes the job and become busy.
- 2. Busy runner still exists.
- 3. No runners exists.
- """
- # 1.
- workflow = await dispatch_workflow(
- app=None,
- branch=test_github_branch,
- github_repository=github_repository,
- conclusion="success",
- workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
- dispatch_input={"runner": runner_label, "minutes": "10"},
- wait=False,
- )
- await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
-
- runner_list = runner_manager_with_one_runner.get_runners()
- assert len(runner_list) == 1
- busy_runner = runner_list[0]
- assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GithubRunnerState.BUSY
-
- # 2.
- runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
- runner_list = runner_manager_with_one_runner.get_runners()
- assert len(runner_list) == 1
- busy_runner = runner_list[0]
- assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GithubRunnerState.BUSY
-
- # 3.
- runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- await assert_no_runner(runner_manager_with_one_runner)
-
- issue_metrics_events = runner_manager_with_one_runner.cleanup()
- assert issue_metrics_events[events.RunnerStart] == 1
-
-
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_runner_normal_lifecycle(
- runner_manager_with_one_runner: RunnerManager,
- test_github_branch: Branch,
- github_repository: Repository,
- runner_label: str,
- log_dir_base_path: dict[str, Path],
-):
- """
- Arrange: RunnerManager with one runner. Clean metric logs.
- Act:
- 1. Start a test workflow for the runner.
- 2. Run cleanup.
- Assert:
- 1. The workflow complete successfully.
- 2. The runner should be deleted. The metrics should be recorded.
- """
- metric_log_path = log_dir_base_path["metric_log"]
- metric_log_existing_content = metric_log_path.read_text(encoding="utf-8")
-
- workflow = await dispatch_workflow(
- app=None,
- branch=test_github_branch,
- github_repository=github_repository,
- conclusion="success",
- workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
- dispatch_input={"runner": runner_label, "minutes": "0"},
- wait=False,
- )
- await wait_for(lambda: workflow_is_status(workflow, "completed"))
-
- issue_metrics_events = runner_manager_with_one_runner.cleanup()
- assert issue_metrics_events[events.RunnerStart] == 1
- assert issue_metrics_events[events.RunnerStop] == 1
-
- metric_log_full_content = metric_log_path.read_text(encoding="utf-8")
- assert metric_log_full_content.startswith(
- metric_log_existing_content
- ), "The metric log was modified in ways other than appending"
- # Disable E203 (space before :) as it conflicts with the formatter (black).
- metric_log_new_content = metric_log_full_content[
- len(metric_log_existing_content) : # noqa: E203
- ]
- metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()]
- assert (
- len(metric_logs) == 2
- ), "Assuming two events should be runner_start and runner_stop, modify this if new events are added"
- assert metric_logs[0]["event"] == "runner_start"
- assert metric_logs[0]["workflow"] == "Workflow Dispatch Wait Tests"
- assert metric_logs[1]["event"] == "runner_stop"
- assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
-
- await assert_no_runner(runner_manager_with_one_runner)
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_runner_flush_busy_lifecycle(
+# runner_manager_with_one_runner: RunnerManager,
+# test_github_branch: Branch,
+# github_repository: Repository,
+# runner_label: str,
+# ):
+# """
+# Arrange: RunnerManager with one idle runner.
+# Act:
+# 1. Run a long workflow.
+# 2. Run flush idle runner.
+# 3. Run flush busy runner.
+# Assert:
+# 1. Runner takes the job and become busy.
+# 2. Busy runner still exists.
+# 3. No runners exists.
+# """
+# # 1.
+# workflow = await dispatch_workflow(
+# app=None,
+# branch=test_github_branch,
+# github_repository=github_repository,
+# conclusion="success",
+# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+# dispatch_input={"runner": runner_label, "minutes": "10"},
+# wait=False,
+# )
+# await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
+
+# runner_list = runner_manager_with_one_runner.get_runners()
+# assert len(runner_list) == 1
+# busy_runner = runner_list[0]
+# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+# assert busy_runner.github_state == GithubRunnerState.BUSY
+
+# # 2.
+# runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+# runner_list = runner_manager_with_one_runner.get_runners()
+# assert len(runner_list) == 1
+# busy_runner = runner_list[0]
+# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+# assert busy_runner.github_state == GithubRunnerState.BUSY
+
+# # 3.
+# runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+# await assert_no_runner(runner_manager_with_one_runner)
+
+# issue_metrics_events = runner_manager_with_one_runner.cleanup()
+# assert issue_metrics_events[events.RunnerStart] == 1
+
+
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_runner_normal_lifecycle(
+# runner_manager_with_one_runner: RunnerManager,
+# test_github_branch: Branch,
+# github_repository: Repository,
+# runner_label: str,
+# log_dir_base_path: dict[str, Path],
+# ):
+# """
+# Arrange: RunnerManager with one runner. Clean metric logs.
+# Act:
+# 1. Start a test workflow for the runner.
+# 2. Run cleanup.
+# Assert:
+# 1. The workflow complete successfully.
+# 2. The runner should be deleted. The metrics should be recorded.
+# """
+# metric_log_path = log_dir_base_path["metric_log"]
+# metric_log_existing_content = metric_log_path.read_text(encoding="utf-8")
+
+# workflow = await dispatch_workflow(
+# app=None,
+# branch=test_github_branch,
+# github_repository=github_repository,
+# conclusion="success",
+# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+# dispatch_input={"runner": runner_label, "minutes": "0"},
+# wait=False,
+# )
+# await wait_for(lambda: workflow_is_status(workflow, "completed"))
+
+# issue_metrics_events = runner_manager_with_one_runner.cleanup()
+# assert issue_metrics_events[events.RunnerStart] == 1
+# assert issue_metrics_events[events.RunnerStop] == 1
+
+# metric_log_full_content = metric_log_path.read_text(encoding="utf-8")
+# assert metric_log_full_content.startswith(
+# metric_log_existing_content
+# ), "The metric log was modified in ways other than appending"
+# # Disable E203 (space before :) as it conflicts with the formatter (black).
+# metric_log_new_content = metric_log_full_content[
+# len(metric_log_existing_content) : # noqa: E203
+# ]
+# metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()]
+# assert (
+# len(metric_logs) == 2
+# ), "Assuming two events should be runner_start and runner_stop, modify this if new events are added"
+# assert metric_logs[0]["event"] == "runner_start"
+# assert metric_logs[0]["workflow"] == "Workflow Dispatch Wait Tests"
+# assert metric_logs[1]["event"] == "runner_stop"
+# assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
+
+# await assert_no_runner(runner_manager_with_one_runner)
From 87f5cafb7c2d9cc016e0fbd4f56c2c157e7ff24b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:15:44 +0800
Subject: [PATCH 166/278] Add wait until runner is running
---
...penstack_cloud.openstack_runner_manager.md | 8 +++---
.../openstack_runner_manager.py | 28 ++++++++++++++++++-
2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 029b39268..3a85a76c1 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -130,7 +130,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -182,7 +182,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -223,7 +223,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -246,7 +246,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index f89ca7367..25be0ab0d 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -163,6 +163,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
raise RunnerCreateError(f"Failed to create {instance_name} openstack runner") from err
self._wait_runner_startup(instance)
+ self._wait_runner_running(instance)
end_timestamp = time.time()
OpenstackRunnerManager._issue_runner_installed_metric(
@@ -450,7 +451,7 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
instance: The runner instance.
Raises:
- RunnerStartError: The runner process was not found on the runner.
+ RunnerStartError: The runner startup process was not found on the runner.
"""
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
@@ -468,6 +469,31 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
raise RunnerStartError(f"Runner startup process not found on {instance.server_name}")
logger.info("Runner startup process found to be healthy on %s", instance.server_name)
+ @retry(tries=5, delay=60, local_logger=logger)
+ def _wait_runner_running(self, instance: OpenstackInstance) -> None:
+ """Wait until runner is running.
+
+ Args:
+ instance: The runner instance.
+
+ Raises:
+ RunnerStartError: The runner process was not found on the runner.
+ """
+ try:
+ ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ except SshError as err:
+ raise RunnerStartError(
+ f"Failed to SSH connect to {instance.server_name} openstack runner"
+ ) from err
+
+ if not self._run_health_check(ssh_conn=ssh_conn, name=instance.server_name):
+ logger.info("Runner process not found on %s", instance.server_name)
+ raise RunnerStartError(
+ f"Runner process on {instance.server_name} failed to initialize on after starting"
+ )
+
+ logger.info("Runner process found to be healthy on %s", instance.server_name)
+
@staticmethod
def _generate_instance_id() -> InstanceId:
"""Generate a instance id.
From 2390478d2d4ffb66034ccf440988a993f2145695 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:19:43 +0800
Subject: [PATCH 167/278] Enable openstack runner manager tests
---
.../openstack_runner_manager.py | 2 +-
.../test_runner_manager_openstack.py | 218 +++++++++---------
2 files changed, 110 insertions(+), 110 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 25be0ab0d..c25b25459 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -472,7 +472,7 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
@retry(tries=5, delay=60, local_logger=logger)
def _wait_runner_running(self, instance: OpenstackInstance) -> None:
"""Wait until runner is running.
-
+
Args:
instance: The runner instance.
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 897272d21..b884fc55a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -242,112 +242,112 @@ async def test_runner_normal_idle_lifecycle(
await assert_no_runner(runner_manager)
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_runner_flush_busy_lifecycle(
-# runner_manager_with_one_runner: RunnerManager,
-# test_github_branch: Branch,
-# github_repository: Repository,
-# runner_label: str,
-# ):
-# """
-# Arrange: RunnerManager with one idle runner.
-# Act:
-# 1. Run a long workflow.
-# 2. Run flush idle runner.
-# 3. Run flush busy runner.
-# Assert:
-# 1. Runner takes the job and become busy.
-# 2. Busy runner still exists.
-# 3. No runners exists.
-# """
-# # 1.
-# workflow = await dispatch_workflow(
-# app=None,
-# branch=test_github_branch,
-# github_repository=github_repository,
-# conclusion="success",
-# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
-# dispatch_input={"runner": runner_label, "minutes": "10"},
-# wait=False,
-# )
-# await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
-
-# runner_list = runner_manager_with_one_runner.get_runners()
-# assert len(runner_list) == 1
-# busy_runner = runner_list[0]
-# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
-# assert busy_runner.github_state == GithubRunnerState.BUSY
-
-# # 2.
-# runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
-# runner_list = runner_manager_with_one_runner.get_runners()
-# assert len(runner_list) == 1
-# busy_runner = runner_list[0]
-# assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
-# assert busy_runner.github_state == GithubRunnerState.BUSY
-
-# # 3.
-# runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
-# await assert_no_runner(runner_manager_with_one_runner)
-
-# issue_metrics_events = runner_manager_with_one_runner.cleanup()
-# assert issue_metrics_events[events.RunnerStart] == 1
-
-
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_runner_normal_lifecycle(
-# runner_manager_with_one_runner: RunnerManager,
-# test_github_branch: Branch,
-# github_repository: Repository,
-# runner_label: str,
-# log_dir_base_path: dict[str, Path],
-# ):
-# """
-# Arrange: RunnerManager with one runner. Clean metric logs.
-# Act:
-# 1. Start a test workflow for the runner.
-# 2. Run cleanup.
-# Assert:
-# 1. The workflow complete successfully.
-# 2. The runner should be deleted. The metrics should be recorded.
-# """
-# metric_log_path = log_dir_base_path["metric_log"]
-# metric_log_existing_content = metric_log_path.read_text(encoding="utf-8")
-
-# workflow = await dispatch_workflow(
-# app=None,
-# branch=test_github_branch,
-# github_repository=github_repository,
-# conclusion="success",
-# workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
-# dispatch_input={"runner": runner_label, "minutes": "0"},
-# wait=False,
-# )
-# await wait_for(lambda: workflow_is_status(workflow, "completed"))
-
-# issue_metrics_events = runner_manager_with_one_runner.cleanup()
-# assert issue_metrics_events[events.RunnerStart] == 1
-# assert issue_metrics_events[events.RunnerStop] == 1
-
-# metric_log_full_content = metric_log_path.read_text(encoding="utf-8")
-# assert metric_log_full_content.startswith(
-# metric_log_existing_content
-# ), "The metric log was modified in ways other than appending"
-# # Disable E203 (space before :) as it conflicts with the formatter (black).
-# metric_log_new_content = metric_log_full_content[
-# len(metric_log_existing_content) : # noqa: E203
-# ]
-# metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()]
-# assert (
-# len(metric_logs) == 2
-# ), "Assuming two events should be runner_start and runner_stop, modify this if new events are added"
-# assert metric_logs[0]["event"] == "runner_start"
-# assert metric_logs[0]["workflow"] == "Workflow Dispatch Wait Tests"
-# assert metric_logs[1]["event"] == "runner_stop"
-# assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
-
-# await assert_no_runner(runner_manager_with_one_runner)
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_flush_busy_lifecycle(
+ runner_manager_with_one_runner: RunnerManager,
+ test_github_branch: Branch,
+ github_repository: Repository,
+ runner_label: str,
+):
+ """
+ Arrange: RunnerManager with one idle runner.
+ Act:
+ 1. Run a long workflow.
+ 2. Run flush idle runner.
+ 3. Run flush busy runner.
+ Assert:
+ 1. Runner takes the job and become busy.
+ 2. Busy runner still exists.
+ 3. No runners exists.
+ """
+ # 1.
+ workflow = await dispatch_workflow(
+ app=None,
+ branch=test_github_branch,
+ github_repository=github_repository,
+ conclusion="success",
+ workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+ dispatch_input={"runner": runner_label, "minutes": "10"},
+ wait=False,
+ )
+ await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
+
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GithubRunnerState.BUSY
+
+ # 2.
+ runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GithubRunnerState.BUSY
+
+ # 3.
+ runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+ await assert_no_runner(runner_manager_with_one_runner)
+
+ issue_metrics_events = runner_manager_with_one_runner.cleanup()
+ assert issue_metrics_events[events.RunnerStart] == 1
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_normal_lifecycle(
+ runner_manager_with_one_runner: RunnerManager,
+ test_github_branch: Branch,
+ github_repository: Repository,
+ runner_label: str,
+ log_dir_base_path: dict[str, Path],
+):
+ """
+ Arrange: RunnerManager with one runner. Clean metric logs.
+ Act:
+ 1. Start a test workflow for the runner.
+ 2. Run cleanup.
+ Assert:
+ 1. The workflow complete successfully.
+ 2. The runner should be deleted. The metrics should be recorded.
+ """
+ metric_log_path = log_dir_base_path["metric_log"]
+ metric_log_existing_content = metric_log_path.read_text(encoding="utf-8")
+
+ workflow = await dispatch_workflow(
+ app=None,
+ branch=test_github_branch,
+ github_repository=github_repository,
+ conclusion="success",
+ workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
+ dispatch_input={"runner": runner_label, "minutes": "0"},
+ wait=False,
+ )
+ await wait_for(lambda: workflow_is_status(workflow, "completed"))
+
+ issue_metrics_events = runner_manager_with_one_runner.cleanup()
+ assert issue_metrics_events[events.RunnerStart] == 1
+ assert issue_metrics_events[events.RunnerStop] == 1
+
+ metric_log_full_content = metric_log_path.read_text(encoding="utf-8")
+ assert metric_log_full_content.startswith(
+ metric_log_existing_content
+ ), "The metric log was modified in ways other than appending"
+ # Disable E203 (space before :) as it conflicts with the formatter (black).
+ metric_log_new_content = metric_log_full_content[
+ len(metric_log_existing_content) : # noqa: E203
+ ]
+ metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()]
+ assert (
+ len(metric_logs) == 2
+ ), "Assuming two events should be runner_start and runner_stop, modify this if new events are added"
+ assert metric_logs[0]["event"] == "runner_start"
+ assert metric_logs[0]["workflow"] == "Workflow Dispatch Wait Tests"
+ assert metric_logs[1]["event"] == "runner_stop"
+ assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
+
+ await assert_no_runner(runner_manager_with_one_runner)
From d9500942da210a21915401ae6ca98720fbd7eb83 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:33:59 +0800
Subject: [PATCH 168/278] Add debug
---
tests/integration/test_runner_manager_openstack.py | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b884fc55a..a9e0e4da7 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -290,11 +290,15 @@ async def test_runner_flush_busy_lifecycle(
# 3.
runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
- await assert_no_runner(runner_manager_with_one_runner)
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
-
+
+ # TODO: Debug
+ runner_list = runner_manager_with_one_runner.get_runners()
+ pytest.set_trace()
+
+ await assert_no_runner(runner_manager_with_one_runner)
@pytest.mark.openstack
@pytest.mark.asyncio
@@ -350,4 +354,8 @@ async def test_runner_normal_lifecycle(
assert metric_logs[1]["event"] == "runner_stop"
assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
+ # TODO: Debug
+ runner_list = runner_manager_with_one_runner.get_runners()
+ pytest.set_trace()
+
await assert_no_runner(runner_manager_with_one_runner)
From 2f8c9fcdaa15b95bf57508c75746107b6aaad4b0 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:45:07 +0800
Subject: [PATCH 169/278] Wait for github state
---
.../integration/test_runner_manager_openstack.py | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index a9e0e4da7..9b38ab33a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -228,7 +228,12 @@ async def test_runner_normal_idle_lifecycle(
runner = runner_list[0]
assert runner.id == runner_id
assert runner.cloud_state == CloudRunnerState.ACTIVE
- assert runner.github_state == GithubRunnerState.IDLE
+ # Update on GitHub-side can take a bit of time.
+ await wait_for(
+ lambda: runner_manager.get_runners()[0].github_state == GithubRunnerState.IDLE,
+ timeout=120,
+ check_interval=10,
+ )
# 2.
openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
@@ -293,13 +298,14 @@ async def test_runner_flush_busy_lifecycle(
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
-
+
# TODO: Debug
runner_list = runner_manager_with_one_runner.get_runners()
- pytest.set_trace()
-
+ pytest.set_trace()
+
await assert_no_runner(runner_manager_with_one_runner)
+
@pytest.mark.openstack
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
@@ -356,6 +362,6 @@ async def test_runner_normal_lifecycle(
# TODO: Debug
runner_list = runner_manager_with_one_runner.get_runners()
- pytest.set_trace()
+ pytest.set_trace()
await assert_no_runner(runner_manager_with_one_runner)
From 0f49d156dd4282e4295959e554f7300c5bc3f836 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:57:27 +0800
Subject: [PATCH 170/278] Refactor wait until runner spawn
---
.../test_runner_manager_openstack.py | 37 +++++++++++++------
1 file changed, 26 insertions(+), 11 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 9b38ab33a..54202b308 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -140,14 +140,24 @@ async def runner_manager_fixture(
async def runner_manager_with_one_runner_fixture(runner_manager: RunnerManager) -> RunnerManager:
runner_manager.create_runners(1)
runner_list = runner_manager.get_runners()
- assert len(runner_list) == 1, "Test arrange failed: Expect one runner"
+ try:
+ await assert_runner_amount(runner_manager, 1)
+ except TimeoutError as err:
+ raise AssertionError("Test arrange failed: Expect one runner") from err
+
+
runner = runner_list[0]
assert (
runner.cloud_state == CloudRunnerState.ACTIVE
), "Test arrange failed: Expect runner in active state"
- assert (
- runner.github_state == GithubRunnerState.IDLE
- ), "Test arrange failed: Expect runner in idle state"
+ try:
+ await wait_for(
+ lambda: runner_manager.get_runners()[0].github_state == GithubRunnerState.IDLE,
+ timeout=120,
+ check_interval=10,
+ )
+ except TimeoutError as err:
+ raise AssertionError("Test arrange failed: Expect runner in idle state") from err
return runner_manager
@@ -165,7 +175,7 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
return workflow.status == status
-async def assert_no_runner(runner_manager: RunnerManager):
+async def assert_runner_amount(runner_manager: RunnerManager, num: int):
"""Assert the runner manager has no runners.
Retry are performed if the number of runner is not 0. Due to it may take some time for
@@ -178,11 +188,11 @@ async def assert_no_runner(runner_manager: RunnerManager):
"""
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
- if len(runner_list) == 0:
+ if len(runner_list) == num:
return
- # The openstack server can take sometime to fully clean up.
- await wait_for(lambda: len(runner_manager.get_runners()) == 0, timeout=60)
+ # The openstack server can take sometime to fully clean up or create.
+ await wait_for(lambda: len(runner_manager.get_runners()) == num)
@pytest.mark.openstack
@@ -222,6 +232,11 @@ async def test_runner_normal_idle_lifecycle(
assert len(runner_id_list) == 1
runner_id = runner_id_list[0]
+ try:
+ await assert_runner_amount(runner_manager, 1)
+ except TimeoutError as err:
+ raise AssertionError("Test arrange failed: Expect one runner") from err
+
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
assert len(runner_list) == 1
@@ -244,7 +259,7 @@ async def test_runner_normal_idle_lifecycle(
# 3.
runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
- await assert_no_runner(runner_manager)
+ await assert_runner_amount(runner_manager, 0)
@pytest.mark.openstack
@@ -303,7 +318,7 @@ async def test_runner_flush_busy_lifecycle(
runner_list = runner_manager_with_one_runner.get_runners()
pytest.set_trace()
- await assert_no_runner(runner_manager_with_one_runner)
+ await assert_runner_amount(runner_manager_with_one_runner, 0)
@pytest.mark.openstack
@@ -364,4 +379,4 @@ async def test_runner_normal_lifecycle(
runner_list = runner_manager_with_one_runner.get_runners()
pytest.set_trace()
- await assert_no_runner(runner_manager_with_one_runner)
+ await assert_runner_amount(runner_manager_with_one_runner, 0)
From bed55b1c8b80a6a1a040bd5f81275aa73eaa3b87 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 15:12:20 +0800
Subject: [PATCH 171/278] Add keyfile erorr
---
src-docs/errors.md | 11 ++++++++++
...penstack_cloud.openstack_runner_manager.md | 20 +++++++++----------
src/errors.py | 3 +++
src/openstack_cloud/openstack_cloud.py | 4 ++--
.../openstack_runner_manager.py | 6 ++++++
5 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/src-docs/errors.md b/src-docs/errors.md
index 1a6316046..6ca397b29 100644
--- a/src-docs/errors.md
+++ b/src-docs/errors.md
@@ -414,3 +414,14 @@ Represents an error while interacting with SSH.
+---
+
+
+
+## class `KeyfileError`
+Represents missing keyfile for SSH.
+
+
+
+
+
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 3a85a76c1..7c7acd442 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -17,7 +17,7 @@ Manager for self-hosted runner on OpenStack.
---
-
+
## class `OpenstackRunnerManagerConfig`
Configuration for OpenstackRunnerManager.
@@ -70,7 +70,7 @@ __init__(
---
-
+
## class `RunnerHealth`
Runners with health state.
@@ -103,12 +103,12 @@ __init__(
---
-
+
## class `OpenstackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
-
+
### method `__init__`
@@ -130,7 +130,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -153,7 +153,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -182,7 +182,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -206,7 +206,7 @@ Delete self-hosted runners.
---
-
+
### method `get_name_prefix`
@@ -223,7 +223,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -246,7 +246,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/errors.py b/src/errors.py
index 204877cd5..3c639107e 100644
--- a/src/errors.py
+++ b/src/errors.py
@@ -170,3 +170,6 @@ class OpenStackUnauthorizedError(OpenstackError):
class SshError(Exception):
"""Represents an error while interacting with SSH."""
+
+class KeyfileError(SshError):
+ """Represents missing keyfile for SSH."""
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 888897bdc..026e51980 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -22,7 +22,7 @@
from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
from paramiko.ssh_exception import NoValidConnectionsError
-from errors import OpenstackError, SshError
+from errors import KeyfileError, OpenstackError, SshError
logger = logging.getLogger(__name__)
@@ -267,7 +267,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
key_path = OpenstackCloud._get_key_path(instance.server_name)
if not key_path.exists():
- raise SshError(
+ raise KeyfileError(
f"Missing keyfile for server: {instance.server_name}, key path: {key_path}"
)
if not instance.addresses:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c25b25459..ae6aab863 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -21,6 +21,7 @@
CreateMetricsStorageError,
GetMetricsStorageError,
IssueMetricEventError,
+ KeyfileError,
OpenstackError,
RunnerCreateError,
RunnerStartError,
@@ -413,6 +414,11 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
"""
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ except KeyfileError:
+ logger.exception(
+ "Health check failed due to unable to find keyfile for %s", instance.server_name
+ )
+ return False
except SshError:
logger.exception(
"SSH connection failure with %s during health check", instance.server_name
From d4e11966b4bf7f8a41dc4bda971c5bc42a5087b5 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 15:24:28 +0800
Subject: [PATCH 172/278] Remove debug statement
---
tests/integration/test_runner_manager_openstack.py | 8 --------
1 file changed, 8 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 54202b308..608e38ccb 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -314,10 +314,6 @@ async def test_runner_flush_busy_lifecycle(
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
- # TODO: Debug
- runner_list = runner_manager_with_one_runner.get_runners()
- pytest.set_trace()
-
await assert_runner_amount(runner_manager_with_one_runner, 0)
@@ -375,8 +371,4 @@ async def test_runner_normal_lifecycle(
assert metric_logs[1]["event"] == "runner_stop"
assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
- # TODO: Debug
- runner_list = runner_manager_with_one_runner.get_runners()
- pytest.set_trace()
-
await assert_runner_amount(runner_manager_with_one_runner, 0)
From be1cad21411514dcd422e2b9b1d0eb9406e0bbf2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 15:28:15 +0800
Subject: [PATCH 173/278] Re-enable all tests
---
.github/workflows/e2e_test.yaml | 4 +---
.github/workflows/integration_test.yaml | 4 +---
.github/workflows/test.yaml | 4 +---
src-docs/errors.md | 2 +-
src-docs/openstack_cloud.openstack_cloud.md | 7 ++++---
src/errors.py | 1 +
src/openstack_cloud/openstack_cloud.py | 1 +
tests/integration/test_runner_manager_openstack.py | 7 ++-----
8 files changed, 12 insertions(+), 18 deletions(-)
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index c7636df33..5933451ee 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,9 +1,7 @@
name: End-to-End tests
on:
- #TODO: Debug
- # pull_request:
- workflow_dispatch:
+ pull_request:
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 000aa5d2b..8e0bc700a 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,9 +1,7 @@
name: integration-tests
on:
- #TODO: Debug
- # pull_request:
- workflow_dispatch:
+ pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 876d3f2df..99e540d31 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,9 +1,7 @@
name: Tests
on:
- #TODO: Debug
- # pull_request:
- workflow_dispatch:
+ pull_request:
jobs:
unit-tests:
diff --git a/src-docs/errors.md b/src-docs/errors.md
index 6ca397b29..c0a333b42 100644
--- a/src-docs/errors.md
+++ b/src-docs/errors.md
@@ -416,7 +416,7 @@ Represents an error while interacting with SSH.
---
-
+
## class `KeyfileError`
Represents missing keyfile for SSH.
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index c431d5f28..15348333b 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -83,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -142,7 +142,7 @@ Get OpenStack instance by instance ID.
---
-
+
### method `get_instances`
@@ -159,7 +159,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -203,6 +203,7 @@ Get SSH connection to an OpenStack instance.
**Raises:**
- `SshError`: Unable to get a working SSH connection to the instance.
+ - `KeyfileError`: Unable to find the keyfile to connect to the instance.
diff --git a/src/errors.py b/src/errors.py
index 3c639107e..adb4b7e0e 100644
--- a/src/errors.py
+++ b/src/errors.py
@@ -171,5 +171,6 @@ class OpenStackUnauthorizedError(OpenstackError):
class SshError(Exception):
"""Represents an error while interacting with SSH."""
+
class KeyfileError(SshError):
"""Represents missing keyfile for SSH."""
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 026e51980..91d88145a 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -260,6 +260,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
Raises:
SshError: Unable to get a working SSH connection to the instance.
+ KeyfileError: Unable to find the keyfile to connect to the instance.
Returns:
SSH connection object.
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 608e38ccb..3818ed17f 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -144,7 +144,6 @@ async def runner_manager_with_one_runner_fixture(runner_manager: RunnerManager)
await assert_runner_amount(runner_manager, 1)
except TimeoutError as err:
raise AssertionError("Test arrange failed: Expect one runner") from err
-
runner = runner_list[0]
assert (
@@ -176,15 +175,13 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
async def assert_runner_amount(runner_manager: RunnerManager, num: int):
- """Assert the runner manager has no runners.
-
- Retry are performed if the number of runner is not 0. Due to it may take some time for
- openstack to delete the servers.
+ """Assert the number of runner a runner manager has.
A TimeoutError will be thrown if runners are still found after timeout.
Args:
runner_manager: The RunnerManager to check.
+ num: Number of runner to check for.
"""
runner_list = runner_manager.get_runners()
assert isinstance(runner_list, tuple)
From 0c6e66831b3ecb0ac6d79c4121f73efbd404cc41 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 16:20:02 +0800
Subject: [PATCH 174/278] Update src/manager/github_runner_manager.py
Co-authored-by: Yanks Yoon <37652070+yanksyoon@users.noreply.github.com>
---
src/manager/github_runner_manager.py | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index f48330fa1..7921be7e2 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -109,15 +109,13 @@ def get_removal_token(self) -> str:
return self.github.get_runner_remove_token(self._path)
@staticmethod
- def _filter_runner_state(
- runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
+ def _is_runner_in_state(
+ runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
) -> bool:
- """Filter the runner by the state.
-
+ """Check that the runner is in one of the states provided.
Args:
runner: Runner to filter.
- states: Filter the runners for these states. If None, return true.
-
+ states: States in which to check the runner belongs to.
Returns:
True if the runner is in one of the state, else false.
"""
From 05f747858be7d536db727d0e94b917220f97f425 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 16:38:30 +0800
Subject: [PATCH 175/278] Update src/openstack_cloud/openstack_cloud.py
Co-authored-by: Yanks Yoon <37652070+yanksyoon@users.noreply.github.com>
---
src/openstack_cloud/openstack_cloud.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 91d88145a..2f289d0e8 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -139,7 +139,7 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
self._cloud = cloud
self.prefix = prefix
- # Ignore "Too many arguments" as 6 args should be fine. Move to a dataclass is new args are
+ # Ignore "Too many arguments" as 6 args should be fine. Move to a dataclass if new args are
# added.
def launch_instance( # pylint: disable=R0913
self, instance_id: str, image: str, flavor: str, network: str, userdata: str
From 248d5d15241c25f2bc60afbcfe646d817dd724dd Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 16:41:51 +0800
Subject: [PATCH 176/278] Suggestions
---
src-docs/errors.md | 4 +--
src-docs/openstack_cloud.openstack_cloud.md | 6 ++--
src/errors.py | 10 +++----
src/manager/cloud_runner_manager.py | 8 ++++--
src/manager/github_runner_manager.py | 2 +-
src/manager/runner_manager.py | 2 +-
src/openstack_cloud/openstack_cloud.py | 22 +++++++--------
src/openstack_cloud/openstack_manager.py | 6 ++--
.../openstack_runner_manager.py | 28 +++++++++----------
tests/unit/test_openstack_manager.py | 4 +--
10 files changed, 48 insertions(+), 44 deletions(-)
diff --git a/src-docs/errors.md b/src-docs/errors.md
index c0a333b42..cf7cde565 100644
--- a/src-docs/errors.md
+++ b/src-docs/errors.md
@@ -374,7 +374,7 @@ Base class for all runner logs errors.
-## class `OpenstackError`
+## class `OpenStackError`
Base class for OpenStack errors.
@@ -407,7 +407,7 @@ Represents an unauthorized connection to OpenStack.
-## class `SshError`
+## class `SSHError`
Represents an error while interacting with SSH.
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 15348333b..beff5a141 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -109,7 +109,7 @@ Delete a openstack instance.
**Raises:**
- - `OpenstackError`: Unable to delete OpenStack server.
+ - `OpenStackError`: Unable to delete OpenStack server.
@@ -202,7 +202,7 @@ Get SSH connection to an OpenStack instance.
**Raises:**
- - `SshError`: Unable to get a working SSH connection to the instance.
+ - `SSHError`: Unable to get a working SSH connection to the instance.
- `KeyfileError`: Unable to find the keyfile to connect to the instance.
@@ -242,7 +242,7 @@ Create an OpenStack instance.
**Raises:**
- - `OpenstackError`: Unable to create OpenStack server.
+ - `OpenStackError`: Unable to create OpenStack server.
diff --git a/src/errors.py b/src/errors.py
index adb4b7e0e..59d28a239 100644
--- a/src/errors.py
+++ b/src/errors.py
@@ -156,21 +156,21 @@ class RunnerLogsError(Exception):
"""Base class for all runner logs errors."""
-class OpenstackError(Exception):
+class OpenStackError(Exception):
"""Base class for OpenStack errors."""
-class OpenStackInvalidConfigError(OpenstackError):
+class OpenStackInvalidConfigError(OpenStackError):
"""Represents an invalid OpenStack configuration."""
-class OpenStackUnauthorizedError(OpenstackError):
+class OpenStackUnauthorizedError(OpenStackError):
"""Represents an unauthorized connection to OpenStack."""
-class SshError(Exception):
+class SSHError(Exception):
"""Represents an error while interacting with SSH."""
-class KeyfileError(SshError):
+class KeyfileError(SSHError):
"""Represents missing keyfile for SSH."""
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 5191a934b..dd5d5424e 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -6,13 +6,17 @@
import abc
from dataclasses import dataclass
from enum import Enum
+import logging
from typing import Iterator, Sequence, Tuple
from metrics.runner import RunnerMetrics
+logger = logging.getLogger(__name__)
+
InstanceId = str
+
class CloudRunnerState(str, Enum):
"""Represent state of the instance hosting the runner.
@@ -76,12 +80,12 @@ class CloudRunnerInstance:
Attributes:
name: Name of the instance hosting the runner.
- id: ID of the instance.
+ instance_id: ID of the instance.
state: State of the instance hosting the runner.
"""
name: str
- id: str
+ instance_id: InstanceId
state: CloudRunnerState
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 7921be7e2..e72d6bd64 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -16,7 +16,7 @@ class GithubRunnerState(str, Enum):
Attributes:
BUSY: Runner is working on a job assigned by GitHub.
- IDLE: Runner is waiting to take a job.
+ IDLE: Runner is waiting to take a job or is running pre-job tasks (i.e. repo-policy-compliance check).
OFFLINE: Runner is not connected to GitHub.
"""
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 98ee9ed9c..6ee4c275e 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -65,7 +65,7 @@ def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedR
github_info: Information on the GitHub of the runner.
"""
self.name = cloud_instance.name
- self.id = cloud_instance.id
+ self.id = cloud_instance.instance_id
self.github_state = (
GithubRunnerState.from_runner(github_info) if github_info is not None else None
)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 2f289d0e8..2c4d5e03c 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -22,7 +22,7 @@
from openstack.network.v2.security_group import SecurityGroup as OpenstackSecurityGroup
from paramiko.ssh_exception import NoValidConnectionsError
-from errors import KeyfileError, OpenstackError, SshError
+from errors import KeyfileError, OpenStackError, SSHError
logger = logging.getLogger(__name__)
@@ -97,7 +97,7 @@ def _get_openstack_connection(
cloud: The name of cloud to use in the clouds.yaml.
Raises:
- OpenstackError: if the credentials provided is not authorized.
+ OpenStackError: if the credentials provided is not authorized.
Yields:
An openstack.connection.Connection object.
@@ -115,7 +115,7 @@ def _get_openstack_connection(
# pylint thinks this isn't an exception, but does inherit from Exception class.
except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
logger.exception("OpenStack API call failure")
- raise OpenstackError("Failed OpenStack API call") from exc
+ raise OpenStackError("Failed OpenStack API call") from exc
class OpenstackCloud:
@@ -154,7 +154,7 @@ def launch_instance( # pylint: disable=R0913
userdata: The cloud init userdata to startup the instance.
Raises:
- OpenstackError: Unable to create OpenStack server.
+ OpenStackError: Unable to create OpenStack server.
Returns:
The OpenStack instance created.
@@ -198,11 +198,11 @@ def launch_instance( # pylint: disable=R0913
full_name,
)
self._delete_keypair(conn, instance_id)
- raise OpenstackError(f"Timeout creating openstack server {full_name}") from err
+ raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
except openstack.exceptions.SDKException as err:
logger.exception("Failed to create openstack server %s", full_name)
self._delete_keypair(conn, instance_id)
- raise OpenstackError(f"Failed to create openstack server {full_name}") from err
+ raise OpenStackError(f"Failed to create openstack server {full_name}") from err
return OpenstackInstance(server, self.prefix)
@@ -230,7 +230,7 @@ def delete_instance(self, instance_id: str) -> None:
"""Delete a openstack instance.
Raises:
- OpenstackError: Unable to delete OpenStack server.
+ OpenStackError: Unable to delete OpenStack server.
Args:
instance_id: The instance ID of the instance to delete.
@@ -250,7 +250,7 @@ def delete_instance(self, instance_id: str) -> None:
openstack.exceptions.SDKException,
openstack.exceptions.ResourceTimeout,
) as err:
- raise OpenstackError(f"Failed to remove openstack runner {full_name}") from err
+ raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
"""Get SSH connection to an OpenStack instance.
@@ -259,7 +259,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
instance: The OpenStack instance to connect to.
Raises:
- SshError: Unable to get a working SSH connection to the instance.
+ SSHError: Unable to get a working SSH connection to the instance.
KeyfileError: Unable to find the keyfile to connect to the instance.
Returns:
@@ -272,7 +272,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
f"Missing keyfile for server: {instance.server_name}, key path: {key_path}"
)
if not instance.addresses:
- raise SshError(f"No addresses found for OpenStack server {instance.server_name}")
+ raise SSHError(f"No addresses found for OpenStack server {instance.server_name}")
for ip in instance.addresses:
try:
@@ -300,7 +300,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
exc_info=True,
)
continue
- raise SshError(
+ raise SSHError(
f"No connectable SSH addresses found, server: {instance.server_name}, "
f"addresses: {instance.addresses}"
)
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index c5d2bce44..35799d8bb 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -48,7 +48,7 @@
GithubClientError,
GithubMetricsError,
IssueMetricEventError,
- OpenstackError,
+ OpenStackError,
RunnerCreateError,
RunnerStartError,
)
@@ -161,7 +161,7 @@ def _create_connection(cloud_config: dict[str, dict]) -> Iterator[openstack.conn
cloud_config: The configuration in clouds.yaml format to apply.
Raises:
- OpenstackError: if the credentials provided is not authorized.
+ OpenStackError: if the credentials provided is not authorized.
Yields:
An openstack.connection.Connection object.
@@ -180,7 +180,7 @@ def _create_connection(cloud_config: dict[str, dict]) -> Iterator[openstack.conn
# pylint thinks this isn't an exception, but does inherit from Exception class.
except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
logger.exception("OpenStack API call failure")
- raise OpenstackError("Failed OpenStack API call") from exc
+ raise OpenStackError("Failed OpenStack API call") from exc
# Disable too many arguments, as they are needed to create the dataclass.
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index ae6aab863..2d1e2bb12 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -22,10 +22,10 @@
GetMetricsStorageError,
IssueMetricEventError,
KeyfileError,
- OpenstackError,
+ OpenStackError,
RunnerCreateError,
RunnerStartError,
- SshError,
+ SSHError,
)
from manager.cloud_runner_manager import (
CloudRunnerInstance,
@@ -160,7 +160,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
network=self.config.network,
userdata=userdata,
)
- except OpenstackError as err:
+ except OpenStackError as err:
raise RunnerCreateError(f"Failed to create {instance_name} openstack runner") from err
self._wait_runner_startup(instance)
@@ -190,7 +190,7 @@ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
if instance.server_name == name:
return CloudRunnerInstance(
name=name,
- id=instance_id,
+ instance_id=instance_id,
state=CloudRunnerState.from_openstack_server_status(instance.status),
)
return None
@@ -211,7 +211,7 @@ def get_runners(
instance_list = [
CloudRunnerInstance(
name=instance.server_name,
- id=instance.instance_id,
+ instance_id=instance.instance_id,
state=CloudRunnerState.from_openstack_server_status(instance.status),
)
for instance in instance_list
@@ -287,7 +287,7 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None
instance.server_name,
stack_info=True,
)
- except SshError:
+ except SSHError:
logger.exception(
"Failed to get SSH connection while removing %s", instance.server_name
)
@@ -297,7 +297,7 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None
try:
self._openstack_cloud.delete_instance(instance.instance_id)
- except OpenstackError:
+ except OpenStackError:
logger.exception(
"Unable to delete openstack instance for runner %s", instance.server_name
)
@@ -407,7 +407,7 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
instance: The OpenStack instance to conduit the health check.
Raises:
- SshError: Unable to get a SSH connection to the instance.
+ SSHError: Unable to get a SSH connection to the instance.
Returns:
Whether the runner is healthy.
@@ -419,7 +419,7 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
"Health check failed due to unable to find keyfile for %s", instance.server_name
)
return False
- except SshError:
+ except SSHError:
logger.exception(
"SSH connection failure with %s during health check", instance.server_name
)
@@ -461,7 +461,7 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
"""
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- except SshError as err:
+ except SSHError as err:
raise RunnerStartError(
f"Failed to SSH connect to {instance.server_name} openstack runner"
) from err
@@ -487,7 +487,7 @@ def _wait_runner_running(self, instance: OpenstackInstance) -> None:
"""
try:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
- except SshError as err:
+ except SSHError as err:
raise RunnerStartError(
f"Failed to SSH connect to {instance.server_name} openstack runner"
) from err
@@ -607,7 +607,7 @@ def _ssh_pull_file(
Raises:
_PullFileError: Unable to pull the file from the runner instance.
- SshError: Issue with SSH connection.
+ SSHError: Issue with SSH connection.
"""
try:
result = ssh_conn.run(f"stat -c %s {remote_path}", warn=True)
@@ -616,7 +616,7 @@ def _ssh_pull_file(
paramiko.ssh_exception.NoValidConnectionsError,
paramiko.ssh_exception.SSHException,
) as exc:
- raise SshError(f"Unable to SSH into {ssh_conn.host}") from exc
+ raise SSHError(f"Unable to SSH into {ssh_conn.host}") from exc
if not result.ok:
logger.warning(
(
@@ -647,7 +647,7 @@ def _ssh_pull_file(
paramiko.ssh_exception.NoValidConnectionsError,
paramiko.ssh_exception.SSHException,
) as exc:
- raise SshError(f"Unable to SSH into {ssh_conn.host}") from exc
+ raise SSHError(f"Unable to SSH into {ssh_conn.host}") from exc
except OSError as exc:
raise _PullFileError(f"Unable to retrieve file {remote_path}") from exc
diff --git a/tests/unit/test_openstack_manager.py b/tests/unit/test_openstack_manager.py
index 373f656f0..6aabc7361 100644
--- a/tests/unit/test_openstack_manager.py
+++ b/tests/unit/test_openstack_manager.py
@@ -19,7 +19,7 @@
import metrics.storage
import reactive.runner_manager
from charm_state import CharmState, ProxyConfig, ReactiveConfig, RepoPolicyComplianceConfig
-from errors import OpenstackError, RunnerStartError
+from errors import OpenStackError, RunnerStartError
from github_type import GitHubRunnerStatus, RunnerApplication, SelfHostedRunner
from metrics import events as metric_events
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
@@ -262,7 +262,7 @@ def test__create_connection_error(clouds_yaml: dict, openstack_connect_mock: Mag
connection_mock.__enter__.return_value = connection_context
openstack_connect_mock.return_value = connection_mock
- with pytest.raises(OpenstackError) as exc:
+ with pytest.raises(OpenStackError) as exc:
with openstack_manager._create_connection(cloud_config=clouds_yaml):
pass
From e112a9b3daf1cecb636375b087c765e0a285d189 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 16:45:52 +0800
Subject: [PATCH 177/278] Refactor remove openstack server
---
src-docs/openstack_cloud.openstack_cloud.md | 12 ++---
src/openstack_cloud/openstack_cloud.py | 44 ++++++++++---------
.../openstack_runner_manager.py | 1 +
3 files changed, 30 insertions(+), 27 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index beff5a141..c49f9779f 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -83,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -95,7 +95,7 @@ Cleanup unused openstack resources.
---
-
+
### method `delete_instance`
@@ -119,7 +119,7 @@ Delete a openstack instance.
---
-
+
### method `get_instance`
@@ -142,7 +142,7 @@ Get OpenStack instance by instance ID.
---
-
+
### method `get_instances`
@@ -159,7 +159,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -182,7 +182,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 2c4d5e03c..b9a3aceb7 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -187,17 +187,7 @@ def launch_instance( # pylint: disable=R0913
"Attempting clean up of openstack server %s that timeout during creation",
full_name,
)
- try:
- conn.delete_server(name_or_id=full_name, wait=True)
- except (
- openstack.exceptions.SDKException,
- openstack.exceptions.ResourceTimeout,
- ):
- logger.exception(
- "Failed to cleanup openstack server %s that timeout during creation",
- full_name,
- )
- self._delete_keypair(conn, instance_id)
+ self._delete_instance(conn, full_name)
raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
except openstack.exceptions.SDKException as err:
logger.exception("Failed to create openstack server %s", full_name)
@@ -241,16 +231,28 @@ def delete_instance(self, instance_id: str) -> None:
with _get_openstack_connection(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
- try:
- server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
- if server is not None:
- conn.delete_server(name_or_id=server.id)
- OpenstackCloud._delete_keypair(conn, full_name)
- except (
- openstack.exceptions.SDKException,
- openstack.exceptions.ResourceTimeout,
- ) as err:
- raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
+ self._delete_instance(conn, full_name)
+
+ def _delete_instance(self, conn: OpenstackConnection, full_name: str) -> None:
+ """Delete a openstack instance.
+
+ Raises:
+ OpenStackError: Unable to delete OpenStack server.
+
+ Args:
+ conn: The openstack connection to use.
+ full_name: The full name of the server.
+ """
+ try:
+ server = OpenstackCloud._get_and_ensure_unique_server(conn, full_name)
+ if server is not None:
+ conn.delete_server(name_or_id=server.id)
+ OpenstackCloud._delete_keypair(conn, full_name)
+ except (
+ openstack.exceptions.SDKException,
+ openstack.exceptions.ResourceTimeout,
+ ) as err:
+ raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
"""Get SSH connection to an OpenStack instance.
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 2d1e2bb12..455f5e396 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -317,6 +317,7 @@ def _get_runner_health(self) -> RunnerHealth:
CloudRunnerState.DELETED,
CloudRunnerState.ERROR,
CloudRunnerState.STOPPED,
+ CloudRunnerState.UNKNOWN,
) or not self._health_check(runner):
unhealthy.append(runner)
else:
From 2562939ac7d3915544f229e8f34ba10d4601ea1c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 16:57:28 +0800
Subject: [PATCH 178/278] Test spawning two runners.
---
.github/workflows/e2e_test.yaml | 4 ++-
.github/workflows/integration_test.yaml | 4 ++-
src-docs/openstack_cloud.openstack_cloud.md | 14 +++-----
src/manager/cloud_runner_manager.py | 3 +-
src/manager/github_runner_manager.py | 9 +++--
src/manager/runner_manager.py | 19 ++++++++---
src/openstack_cloud/openstack_cloud.py | 7 ++--
.../test_runner_manager_openstack.py | 34 +++++++++++++++++++
8 files changed, 68 insertions(+), 26 deletions(-)
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 5933451ee..f950dc646 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,7 +1,9 @@
name: End-to-End tests
on:
- pull_request:
+ # TODO: Debug only
+ # pull_request:
+ workflow_dispatch:
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 8e0bc700a..41d9e5c7f 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,7 +1,9 @@
name: integration-tests
on:
- pull_request:
+ # TODO: Debug only
+ # pull_request:
+ workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index c49f9779f..d0895577a 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -83,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -107,12 +107,6 @@ Delete a openstack instance.
-**Raises:**
-
- - `OpenStackError`: Unable to delete OpenStack server.
-
-
-
**Args:**
- `instance_id`: The instance ID of the instance to delete.
@@ -142,7 +136,7 @@ Get OpenStack instance by instance ID.
---
-
+
### method `get_instances`
@@ -159,7 +153,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -182,7 +176,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index dd5d5424e..78fac93be 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -4,9 +4,9 @@
"""Interface of manager of runner instance on clouds."""
import abc
+import logging
from dataclasses import dataclass
from enum import Enum
-import logging
from typing import Iterator, Sequence, Tuple
from metrics.runner import RunnerMetrics
@@ -16,7 +16,6 @@
InstanceId = str
-
class CloudRunnerState(str, Enum):
"""Represent state of the instance hosting the runner.
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index e72d6bd64..4d6ae5788 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -16,7 +16,8 @@ class GithubRunnerState(str, Enum):
Attributes:
BUSY: Runner is working on a job assigned by GitHub.
- IDLE: Runner is waiting to take a job or is running pre-job tasks (i.e. repo-policy-compliance check).
+ IDLE: Runner is waiting to take a job or is running pre-job tasks (i.e.
+ repo-policy-compliance check).
OFFLINE: Runner is not connected to GitHub.
"""
@@ -75,7 +76,7 @@ def get_runners(
runner
for runner in runner_list
if runner.name.startswith(self._prefix)
- and GithubRunnerManager._filter_runner_state(runner, states)
+ and GithubRunnerManager._is_runner_in_state(runner, states)
)
def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> None:
@@ -110,12 +111,14 @@ def get_removal_token(self) -> str:
@staticmethod
def _is_runner_in_state(
- runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
+ runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
) -> bool:
"""Check that the runner is in one of the states provided.
+
Args:
runner: Runner to filter.
states: States in which to check the runner belongs to.
+
Returns:
True if the runner is in one of the state, else false.
"""
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 6ee4c275e..dde83c00b 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -10,7 +10,7 @@
from typing import Iterator, Sequence, Type, cast
from charm_state import GithubPath
-from errors import GithubMetricsError
+from errors import GithubMetricsError, RunnerCreateError
from github_type import SelfHostedRunner
from manager.cloud_runner_manager import (
CloudRunnerInstance,
@@ -116,10 +116,21 @@ def create_runners(self, num: int) -> tuple[InstanceId]:
create_runner_args = [
RunnerManager._CreateRunnerArgs(self._cloud, registration_token) for _ in range(num)
]
+ instance_id_list = []
with Pool(processes=min(num, 10)) as pool:
- instance_ids = pool.map(func=RunnerManager._create_runner, iterable=create_runner_args)
-
- return tuple(instance_ids)
+ jobs = pool.imap_unordered(
+ func=RunnerManager._create_runner, iterable=create_runner_args
+ )
+ for _ in range(num):
+ try:
+ instance_id = next(jobs)
+ except RunnerCreateError:
+ logger.exception("Failed to spawn a runner.")
+ except StopIteration:
+ break
+ else:
+ instance_id_list.append(instance_id)
+ return tuple(instance_id_list)
def get_runners(
self,
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index b9a3aceb7..6406a8c43 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -187,7 +187,7 @@ def launch_instance( # pylint: disable=R0913
"Attempting clean up of openstack server %s that timeout during creation",
full_name,
)
- self._delete_instance(conn, full_name)
+ self._delete_instance(conn, full_name)
raise OpenStackError(f"Timeout creating openstack server {full_name}") from err
except openstack.exceptions.SDKException as err:
logger.exception("Failed to create openstack server %s", full_name)
@@ -219,9 +219,6 @@ def get_instance(self, instance_id: str) -> OpenstackInstance | None:
def delete_instance(self, instance_id: str) -> None:
"""Delete a openstack instance.
- Raises:
- OpenStackError: Unable to delete OpenStack server.
-
Args:
instance_id: The instance ID of the instance to delete.
"""
@@ -232,7 +229,7 @@ def delete_instance(self, instance_id: str) -> None:
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
self._delete_instance(conn, full_name)
-
+
def _delete_instance(self, conn: OpenstackConnection, full_name: str) -> None:
"""Delete a openstack instance.
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 3818ed17f..82a1bbe0b 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -369,3 +369,37 @@ async def test_runner_normal_lifecycle(
assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
await assert_runner_amount(runner_manager_with_one_runner, 0)
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_spawn_two(
+ runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+) -> None:
+ """
+ Arrange: RunnerManager instance with no runners.
+ Act:
+ 1. Create two runner.
+ 3. Delete all idle runner.
+ Assert:
+ 1. Two active idle runner.
+ 3. No runners.
+ """
+ # 1.
+ runner_id_list = runner_manager.create_runners(1)
+ assert isinstance(runner_id_list, tuple)
+ assert len(runner_id_list) == 2
+
+ try:
+ await assert_runner_amount(runner_manager, 2)
+ except TimeoutError as err:
+ raise AssertionError("Test arrange failed: Expect two runner") from err
+
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 2
+
+ # 3.
+ runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ await assert_runner_amount(runner_manager, 0)
From fc11db3424e261e412cdc6ea642420b2168c5a3e Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 17:14:25 +0800
Subject: [PATCH 179/278] Fix test
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 82a1bbe0b..6160b0b4a 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -387,7 +387,7 @@ async def test_runner_spawn_two(
3. No runners.
"""
# 1.
- runner_id_list = runner_manager.create_runners(1)
+ runner_id_list = runner_manager.create_runners(2)
assert isinstance(runner_id_list, tuple)
assert len(runner_id_list) == 2
From 6dad4ab22d645a389088ff91baaa0d4e085e1125 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 14 Aug 2024 17:40:09 +0800
Subject: [PATCH 180/278] Fix naming
---
.github/workflows/e2e_test.yaml | 4 +---
.github/workflows/integration_test.yaml | 4 +---
src/openstack_cloud/openstack_cloud.py | 8 +++++---
src/openstack_cloud/openstack_manager.py | 10 +++++-----
src/openstack_cloud/openstack_runner_manager.py | 10 +++++-----
tests/unit/test_openstack_manager.py | 16 ++++++++--------
6 files changed, 25 insertions(+), 27 deletions(-)
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index f950dc646..5933451ee 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,9 +1,7 @@
name: End-to-End tests
on:
- # TODO: Debug only
- # pull_request:
- workflow_dispatch:
+ pull_request:
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 41d9e5c7f..8e0bc700a 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,9 +1,7 @@
name: integration-tests
on:
- # TODO: Debug only
- # pull_request:
- workflow_dispatch:
+ pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 6406a8c43..6de3bfeba 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -15,7 +15,7 @@
import openstack.exceptions
import paramiko
import yaml
-from fabric import Connection as SshConnection
+from fabric import Connection as SSHConnection
from openstack.compute.v2.keypair import Keypair as OpenstackKeypair
from openstack.compute.v2.server import Server as OpenstackServer
from openstack.connection import Connection as OpenstackConnection
@@ -251,7 +251,7 @@ def _delete_instance(self, conn: OpenstackConnection, full_name: str) -> None:
) as err:
raise OpenStackError(f"Failed to remove openstack runner {full_name}") from err
- def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
+ def get_ssh_connection(self, instance: OpenstackInstance) -> SSHConnection:
"""Get SSH connection to an OpenStack instance.
Args:
@@ -275,7 +275,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SshConnection:
for ip in instance.addresses:
try:
- connection = SshConnection(
+ connection = SSHConnection(
host=ip,
user="ubuntu",
connect_kwargs={"key_filename": str(key_path)},
@@ -448,6 +448,8 @@ def _get_and_ensure_unique_server(
if not servers:
return None
+ # 2024/08/14: The `format` arg for `strptime` is the default format.
+ # This is only provided to get around a bug of the function with type checking.
latest_server = reduce(
lambda a, b: (
a
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index 35799d8bb..2893d65dd 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -32,7 +32,7 @@
import openstack.exceptions
import openstack.image.v2.image
import paramiko
-from fabric import Connection as SshConnection
+from fabric import Connection as SSHConnection
from openstack.compute.v2.server import Server
from openstack.connection import Connection as OpenstackConnection
from openstack.exceptions import SDKException
@@ -556,7 +556,7 @@ def _ssh_health_check(conn: OpenstackConnection, server_name: str, startup: bool
@retry(tries=3, delay=5, max_delay=60, backoff=2, local_logger=logger)
def _get_ssh_connection(
conn: OpenstackConnection, server_name: str, timeout: int = 30
- ) -> SshConnection:
+ ) -> SSHConnection:
"""Get a valid ssh connection within a network for a given openstack instance.
The SSH connection will attempt to establish connection until the timeout configured.
@@ -593,7 +593,7 @@ def _get_ssh_connection(
]
for ip in server_addresses:
try:
- connection = SshConnection(
+ connection = SSHConnection(
host=ip,
user="ubuntu",
connect_kwargs={"key_filename": str(key_path)},
@@ -1093,7 +1093,7 @@ def _pull_metrics(self, conn: OpenstackConnection, instance_name: str) -> None:
return
def _pull_file(
- self, ssh_conn: SshConnection, remote_path: str, local_path: str, max_size: int
+ self, ssh_conn: SSHConnection, remote_path: str, local_path: str, max_size: int
) -> None:
"""Pull file from the runner instance.
@@ -1589,7 +1589,7 @@ def _kill_runner_processes(self, conn: OpenstackConnection, mode: FlushMode) ->
servers = self._get_openstack_instances(conn=conn)
for server in servers:
- ssh_conn: SshConnection = self._get_ssh_connection(conn=conn, server_name=server.name)
+ ssh_conn: SSHConnection = self._get_ssh_connection(conn=conn, server_name=server.name)
result: invoke.runners.Result = ssh_conn.run(
killer_command,
warn=True,
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 455f5e396..5e00847c8 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -14,7 +14,7 @@
import jinja2
import paramiko
import paramiko.ssh_exception
-from fabric import Connection as SshConnection
+from fabric import Connection as SSHConnection
from charm_state import GithubOrg, GithubPath, ProxyConfig, SSHDebugConnection
from errors import (
@@ -428,7 +428,7 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
return OpenstackRunnerManager._run_health_check(ssh_conn, instance.server_name)
@staticmethod
- def _run_health_check(ssh_conn: SshConnection, name: str) -> bool:
+ def _run_health_check(ssh_conn: SSHConnection, name: str) -> bool:
"""Run a health check for runner process.
Args:
@@ -557,7 +557,7 @@ def _issue_runner_installed_metric(
)
@staticmethod
- def _pull_runner_metrics(name: str, ssh_conn: SshConnection) -> None:
+ def _pull_runner_metrics(name: str, ssh_conn: SSHConnection) -> None:
"""Pull metrics from runner.
Args:
@@ -596,7 +596,7 @@ def _pull_runner_metrics(name: str, ssh_conn: SshConnection) -> None:
@staticmethod
def _ssh_pull_file(
- ssh_conn: SshConnection, remote_path: str, local_path: str, max_size: int
+ ssh_conn: SSHConnection, remote_path: str, local_path: str, max_size: int
) -> None:
"""Pull file from the runner instance.
@@ -654,7 +654,7 @@ def _ssh_pull_file(
@staticmethod
def _run_github_runner_removal_script(
- instance_name: str, ssh_conn: SshConnection, remove_token: str
+ instance_name: str, ssh_conn: SSHConnection, remove_token: str
) -> None:
"""Run Github runner removal script.
diff --git a/tests/unit/test_openstack_manager.py b/tests/unit/test_openstack_manager.py
index 6aabc7361..9399cd46e 100644
--- a/tests/unit/test_openstack_manager.py
+++ b/tests/unit/test_openstack_manager.py
@@ -10,7 +10,7 @@
import openstack.connection
import openstack.exceptions
import pytest
-from fabric.connection import Connection as SshConnection
+from fabric.connection import Connection as SSHConnection
from invoke import Result
from openstack.compute.v2.keypair import Keypair
from openstack.compute.v2.server import Server
@@ -57,7 +57,7 @@ def patch_get_ssh_connection_health_check_fixture(monkeypatch: pytest.MonkeyPatc
mock_get_ssh_connection = MagicMock(
spec=openstack_manager.OpenstackRunnerManager._get_ssh_connection
)
- mock_ssh_connection = MagicMock(spec=SshConnection)
+ mock_ssh_connection = MagicMock(spec=SSHConnection)
mock_ssh_connection.host = "test host IP"
mock_result = MagicMock(spec=Result)
mock_result.ok = True
@@ -79,7 +79,7 @@ def ssh_connection_health_check_fixture(monkeypatch: pytest.MonkeyPatch):
mock_get_ssh_connection = MagicMock(
spec=openstack_manager.OpenstackRunnerManager._get_ssh_connection
)
- mock_ssh_connection = MagicMock(spec=SshConnection)
+ mock_ssh_connection = MagicMock(spec=SSHConnection)
mock_ssh_connection.host = "test host IP"
mock_result = MagicMock(spec=Result)
mock_result.ok = True
@@ -97,7 +97,7 @@ def patch_ssh_connection_error_fixture(monkeypatch: pytest.MonkeyPatch):
mock_get_ssh_connection = MagicMock(
spec=openstack_manager.OpenstackRunnerManager._get_ssh_connection
)
- mock_ssh_connection = MagicMock(spec=SshConnection)
+ mock_ssh_connection = MagicMock(spec=SSHConnection)
mock_result = MagicMock(spec=Result)
mock_result.ok = False
mock_result.stdout = "Mock stdout"
@@ -153,7 +153,7 @@ def patched_create_connection_context_fixture(monkeypatch: pytest.MonkeyPatch):
def ssh_connection_mock_fixture() -> MagicMock:
"""Return a mocked ssh connection."""
test_file_content = secrets.token_hex(16)
- ssh_conn_mock = MagicMock(spec=openstack_manager.SshConnection)
+ ssh_conn_mock = MagicMock(spec=openstack_manager.SSHConnection)
ssh_conn_mock.get.side_effect = lambda remote, local: Path(local).write_text(test_file_content)
ssh_conn_mock.run.side_effect = lambda cmd, **kwargs: (
Result(stdout="1") if cmd.startswith("stat") else Result()
@@ -862,7 +862,7 @@ def test__ssh_health_check_error(monkeypatch: pytest.MonkeyPatch, mock_server: M
mock_ssh_connection = MagicMock()
mock_ssh_connection.run = MagicMock(side_effect=TimeoutError)
monkeypatch.setattr(
- openstack_manager, "SshConnection", MagicMock(return_value=mock_ssh_connection)
+ openstack_manager, "SSHConnection", MagicMock(return_value=mock_ssh_connection)
)
with pytest.raises(openstack_manager._SSHError) as exc:
@@ -1132,7 +1132,7 @@ def test__get_ssh_connection_server_no_valid_connections(
mock_ssh_connection = MagicMock()
mock_ssh_connection.run = run
monkeypatch.setattr(
- openstack_manager, "SshConnection", MagicMock(return_value=mock_ssh_connection)
+ openstack_manager, "SSHConnection", MagicMock(return_value=mock_ssh_connection)
)
with pytest.raises(openstack_manager._SSHError) as exc:
@@ -1164,7 +1164,7 @@ def test__get_ssh_connection_server(monkeypatch: pytest.MonkeyPatch):
return_value=factories.MockSSHRunResult(exited=0, stdout="hello world")
)
monkeypatch.setattr(
- openstack_manager, "SshConnection", MagicMock(return_value=mock_ssh_connection)
+ openstack_manager, "SSHConnection", MagicMock(return_value=mock_ssh_connection)
)
assert (
From a4bca24d3ee7f95de6a1d9bb583ebe085b37f170 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 15 Aug 2024 14:36:25 +0800
Subject: [PATCH 181/278] Fix according comment
---
pyproject.toml | 4 ++--
src-docs/openstack_cloud.openstack_cloud.md | 22 +++++++++----------
src/openstack_cloud/openstack_cloud.py | 11 +++++-----
.../openstack_runner_manager.py | 2 +-
.../test_runner_manager_openstack.py | 5 +----
5 files changed, 20 insertions(+), 24 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 458b72d93..f4a49bd2a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,9 +57,9 @@ max-doc-length = 99
max-complexity = 10
exclude = [".git", "__pycache__", ".tox", "build", "dist", "*.egg_info", "venv"]
select = ["E", "W", "F", "C", "N", "R", "D", "H"]
-# Ignore W503, E501 because using black creates errors with this
+# Ignore W503, E501, E203 because using black creates errors with this
# Ignore D107 Missing docstring in __init__
-ignore = ["W503", "E501", "D107"]
+ignore = ["W503", "E501", "D107", "E203"]
# D100, D101, D102, D103, D104: Ignore docstring style issues in tests
# temporary disable E402 for the fix in charm.py for lp:2058335
per-file-ignores = ["src/charm.py:E402", "tests/*:D100,D101,D102,D103,D104,D205,D212"]
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index d0895577a..0fe0f5cdb 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -53,14 +53,14 @@ Construct the object.
---
-
+
## class `OpenstackCloud`
Client to interact with OpenStack cloud.
The OpenStack server name is managed by this cloud. Caller refers to the instances via instance_id. If the caller needs the server name, e.g., for logging, it can be queried with get_server_name.
-
+
### method `__init__`
@@ -83,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -95,7 +95,7 @@ Cleanup unused openstack resources.
---
-
+
### method `delete_instance`
@@ -113,7 +113,7 @@ Delete a openstack instance.
---
-
+
### method `get_instance`
@@ -136,7 +136,7 @@ Get OpenStack instance by instance ID.
---
-
+
### method `get_instances`
@@ -153,7 +153,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -176,7 +176,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
@@ -206,7 +206,7 @@ Get SSH connection to an OpenStack instance.
---
-
+
### method `launch_instance`
@@ -216,7 +216,7 @@ launch_instance(
image: str,
flavor: str,
network: str,
- userdata: str
+ cloud_init: str
) → OpenstackInstance
```
@@ -230,7 +230,7 @@ Create an OpenStack instance.
- `image`: The image used to create the instance.
- `flavor`: The flavor used to create the instance.
- `network`: The network used to create the instance.
- - `userdata`: The cloud init userdata to startup the instance.
+ - `cloud_init`: The cloud init userdata to startup the instance.
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 6de3bfeba..2990d2a9d 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -26,7 +26,7 @@
logger = logging.getLogger(__name__)
-_CLOUDS_YAML_PATH = Path(Path.home() / ".config/openstack/clouds.yaml")
+_CLOUDS_YAML_PATH = Path.home() / ".config/openstack/clouds.yaml"
# Update the version when the security group rules are not backward compatible.
_SECURITY_GROUP_NAME = "github-runner-v1"
@@ -80,8 +80,7 @@ def __init__(self, server: OpenstackServer, prefix: str):
raise ValueError(
f"Found openstack server {server.name} managed under prefix {prefix}, contact devs"
)
- # Disable E203 (space before :) as it conflicts with the formatter (black).
- self.instance_id = self.server_name[len(prefix) + 1 :] # noqa: E203
+ self.instance_id = self.server_name[len(prefix) + 1 :]
@contextmanager
@@ -142,7 +141,7 @@ def __init__(self, clouds_config: dict[str, dict], cloud: str, prefix: str):
# Ignore "Too many arguments" as 6 args should be fine. Move to a dataclass if new args are
# added.
def launch_instance( # pylint: disable=R0913
- self, instance_id: str, image: str, flavor: str, network: str, userdata: str
+ self, instance_id: str, image: str, flavor: str, network: str, cloud_init: str
) -> OpenstackInstance:
"""Create an OpenStack instance.
@@ -151,7 +150,7 @@ def launch_instance( # pylint: disable=R0913
image: The image used to create the instance.
flavor: The flavor used to create the instance.
network: The network used to create the instance.
- userdata: The cloud init userdata to startup the instance.
+ cloud_init: The cloud init userdata to startup the instance.
Raises:
OpenStackError: Unable to create OpenStack server.
@@ -176,7 +175,7 @@ def launch_instance( # pylint: disable=R0913
flavor=flavor,
network=network,
security_groups=[security_group.id],
- userdata=userdata,
+ userdata=cloud_init,
auto_ip=False,
timeout=_CREATE_SERVER_TIMEOUT,
wait=True,
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 5e00847c8..61fc212a8 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -158,7 +158,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
image=self.config.image,
flavor=self.config.flavor,
network=self.config.network,
- userdata=userdata,
+ cloud_init=userdata,
)
except OpenStackError as err:
raise RunnerCreateError(f"Failed to create {instance_name} openstack runner") from err
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 6160b0b4a..1685d6548 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -355,10 +355,7 @@ async def test_runner_normal_lifecycle(
assert metric_log_full_content.startswith(
metric_log_existing_content
), "The metric log was modified in ways other than appending"
- # Disable E203 (space before :) as it conflicts with the formatter (black).
- metric_log_new_content = metric_log_full_content[
- len(metric_log_existing_content) : # noqa: E203
- ]
+ metric_log_new_content = metric_log_full_content[len(metric_log_existing_content) :]
metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()]
assert (
len(metric_logs) == 2
From c7e0ab324e0f13084db184151ac3c83940634d1c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 15 Aug 2024 15:07:17 +0800
Subject: [PATCH 182/278] Fix clouds yaml write issue.
---
src-docs/openstack_cloud.openstack_cloud.md | 22 ++++++++++-----------
src/openstack_cloud/openstack_cloud.py | 9 +++++++--
2 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 0fe0f5cdb..316dadba1 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -9,7 +9,7 @@ Class for accessing OpenStack API for managing servers.
---
-
+
## class `OpenstackInstance`
Represents an OpenStack instance.
@@ -24,7 +24,7 @@ Represents an OpenStack instance.
- `addresses`: IP addresses assigned to the server.
- `status`: Status of the server.
-
+
### method `__init__`
@@ -53,14 +53,14 @@ Construct the object.
---
-
+
## class `OpenstackCloud`
Client to interact with OpenStack cloud.
The OpenStack server name is managed by this cloud. Caller refers to the instances via instance_id. If the caller needs the server name, e.g., for logging, it can be queried with get_server_name.
-
+
### method `__init__`
@@ -83,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -95,7 +95,7 @@ Cleanup unused openstack resources.
---
-
+
### method `delete_instance`
@@ -113,7 +113,7 @@ Delete a openstack instance.
---
-
+
### method `get_instance`
@@ -136,7 +136,7 @@ Get OpenStack instance by instance ID.
---
-
+
### method `get_instances`
@@ -153,7 +153,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -176,7 +176,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
@@ -206,7 +206,7 @@ Get SSH connection to an OpenStack instance.
---
-
+
### method `launch_instance`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 2990d2a9d..9a27f34b5 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -23,6 +23,7 @@
from paramiko.ssh_exception import NoValidConnectionsError
from errors import KeyfileError, OpenStackError, SSHError
+from utilities import retry
logger = logging.getLogger(__name__)
@@ -82,8 +83,8 @@ def __init__(self, server: OpenstackServer, prefix: str):
)
self.instance_id = self.server_name[len(prefix) + 1 :]
-
@contextmanager
+@retry(tries=2, delay=5, local_logger=logger)
def _get_openstack_connection(
clouds_config: dict[str, dict], cloud: str
) -> Iterator[OpenstackConnection]:
@@ -103,7 +104,11 @@ def _get_openstack_connection(
"""
if not _CLOUDS_YAML_PATH.exists():
_CLOUDS_YAML_PATH.parent.mkdir(parents=True, exist_ok=True)
- _CLOUDS_YAML_PATH.write_text(data=yaml.dump(clouds_config), encoding="utf-8")
+
+ # Concurrency: Very small chance for the file to be corrupted due to multiple process calling
+ # this function and writing the file at the same time. This should cause the `conn.authorize`
+ # to fail, and retry of this function would resolve this.
+ _CLOUDS_YAML_PATH.write_text(data=yaml.dump(clouds_config), encoding="utf-8")
# api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
# I could not reproduce it. Therefore, no catch here for such exception.
From f586aee4952b56c1e127c5e4d2ae075c81a522a9 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 15 Aug 2024 15:16:28 +0800
Subject: [PATCH 183/278] Fix format
---
src-docs/openstack_cloud.openstack_cloud.md | 18 +++++++++---------
src/openstack_cloud/openstack_cloud.py | 3 ++-
2 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 316dadba1..8f639b1e6 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -53,14 +53,14 @@ Construct the object.
---
-
+
## class `OpenstackCloud`
Client to interact with OpenStack cloud.
The OpenStack server name is managed by this cloud. Caller refers to the instances via instance_id. If the caller needs the server name, e.g., for logging, it can be queried with get_server_name.
-
+
### method `__init__`
@@ -83,7 +83,7 @@ Create the object.
---
-
+
### method `cleanup`
@@ -95,7 +95,7 @@ Cleanup unused openstack resources.
---
-
+
### method `delete_instance`
@@ -113,7 +113,7 @@ Delete a openstack instance.
---
-
+
### method `get_instance`
@@ -136,7 +136,7 @@ Get OpenStack instance by instance ID.
---
-
+
### method `get_instances`
@@ -153,7 +153,7 @@ Get all OpenStack instances.
---
-
+
### method `get_server_name`
@@ -176,7 +176,7 @@ Get server name on OpenStack.
---
-
+
### method `get_ssh_connection`
@@ -206,7 +206,7 @@ Get SSH connection to an OpenStack instance.
---
-
+
### method `launch_instance`
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 9a27f34b5..567d53031 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -83,6 +83,7 @@ def __init__(self, server: OpenstackServer, prefix: str):
)
self.instance_id = self.server_name[len(prefix) + 1 :]
+
@contextmanager
@retry(tries=2, delay=5, local_logger=logger)
def _get_openstack_connection(
@@ -105,7 +106,7 @@ def _get_openstack_connection(
if not _CLOUDS_YAML_PATH.exists():
_CLOUDS_YAML_PATH.parent.mkdir(parents=True, exist_ok=True)
- # Concurrency: Very small chance for the file to be corrupted due to multiple process calling
+ # Concurrency: Very small chance for the file to be corrupted due to multiple process calling
# this function and writing the file at the same time. This should cause the `conn.authorize`
# to fail, and retry of this function would resolve this.
_CLOUDS_YAML_PATH.write_text(data=yaml.dump(clouds_config), encoding="utf-8")
From 95349f8eac1eade42e4c29215727b93d968bb49d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 15 Aug 2024 15:36:00 +0800
Subject: [PATCH 184/278] Add delete runner by amount
---
src/manager/runner_manager.py | 57 ++++++++++++++-----
.../test_runner_manager_openstack.py | 8 +--
2 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index dde83c00b..3421f2b45 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -190,10 +190,26 @@ def get_runners(
]
return cast(tuple[RunnerInstance], tuple(runner_instances))
- def delete_runners(
+ def delete_runners(self, num: int) -> IssuedMetricEventsStats:
+ """Delete runners.
+
+ Args:
+ num: The number of runner to delete.
+
+ Returns:
+ Stats on metrics events issued during the deletion of runners.
+ """
+ logger.info("Deleting %s number of runners", num)
+ runners_list = self.get_runners()[:num]
+ runner_names = [runner.name for runner in runners_list]
+ logger.info("Deleting runners: %s", runner_names)
+ remove_token = self._github.get_removal_token()
+ return self._delete_runners(runners=runners_list, remove_token=remove_token)
+
+ def flush_runners(
self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE
) -> IssuedMetricEventsStats:
- """Delete the runners.
+ """Delete runners according to state.
Args:
flush_mode: The type of runners affect by the deletion.
@@ -203,9 +219,9 @@ def delete_runners(
"""
match flush_mode:
case FlushMode.FLUSH_IDLE:
- logger.info("Deleting idle runners...")
+ logger.info("Flushing idle runners...")
case FlushMode.FLUSH_BUSY:
- logger.info("Deleting idle and busy runners...")
+ logger.info("Flushing idle and busy runners...")
case _:
logger.critical(
"Unknown flush mode %s encountered, contact developers", flush_mode
@@ -217,17 +233,9 @@ def delete_runners(
runners_list = self.get_runners(github_runner_state=states)
runner_names = [runner.name for runner in runners_list]
- logger.info("Deleting runners: %s", runner_names)
+ logger.info("Flushing runners: %s", runner_names)
remove_token = self._github.get_removal_token()
-
- runner_metrics_list = []
- for runner in runners_list:
- deleted_runner_metrics = self._cloud.delete_runner(
- instance_id=runner.id, remove_token=remove_token
- )
- if deleted_runner_metrics is not None:
- runner_metrics_list.append(deleted_runner_metrics)
- return self._issue_runner_metrics(metrics=iter(runner_metrics_list))
+ return self._delete_runners(runners=runners_list, remove_token=remove_token)
def cleanup(self) -> IssuedMetricEventsStats:
"""Run cleanup of the runners and other resources.
@@ -240,6 +248,27 @@ def cleanup(self) -> IssuedMetricEventsStats:
deleted_runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
+ def _delete_runners(
+ self, runners: Sequence[RunnerInstance], remove_token: str
+ ) -> IssuedMetricEventsStats:
+ """Delete list of runners.
+
+ Args:
+ runners: The runners to delete.
+ remove_token: The token for removing self-hosted runners.
+
+ Returns:
+ Stats on metrics events issued during the deletion of runners.
+ """
+ runner_metrics_list = []
+ for runner in runners:
+ deleted_runner_metrics = self._cloud.delete_runner(
+ instance_id=runner.id, remove_token=remove_token
+ )
+ if deleted_runner_metrics is not None:
+ runner_metrics_list.append(deleted_runner_metrics)
+ return self._issue_runner_metrics(metrics=iter(runner_metrics_list))
+
def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetricEventsStats:
"""Issue runner metrics.
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 1685d6548..b2a75e9ed 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -255,7 +255,7 @@ async def test_runner_normal_idle_lifecycle(
assert openstack_runner_manager._health_check(runner)
# 3.
- runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
await assert_runner_amount(runner_manager, 0)
@@ -298,7 +298,7 @@ async def test_runner_flush_busy_lifecycle(
assert busy_runner.github_state == GithubRunnerState.BUSY
# 2.
- runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
runner_list = runner_manager_with_one_runner.get_runners()
assert len(runner_list) == 1
busy_runner = runner_list[0]
@@ -306,7 +306,7 @@ async def test_runner_flush_busy_lifecycle(
assert busy_runner.github_state == GithubRunnerState.BUSY
# 3.
- runner_manager_with_one_runner.delete_runners(flush_mode=FlushMode.FLUSH_BUSY)
+ runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_BUSY)
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
@@ -398,5 +398,5 @@ async def test_runner_spawn_two(
assert len(runner_list) == 2
# 3.
- runner_manager.delete_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
await assert_runner_amount(runner_manager, 0)
From a9e6323f60d440c8506128ce9e0e4485af6812ee Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 15 Aug 2024 17:54:47 +0800
Subject: [PATCH 185/278] Add getting runner health state for metrics
---
...penstack_cloud.openstack_runner_manager.md | 37 ++++++++++++++-----
src/manager/cloud_runner_manager.py | 5 +++
src/manager/runner_manager.py | 9 +++++
.../openstack_runner_manager.py | 13 +++++++
4 files changed, 54 insertions(+), 10 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 7c7acd442..d292b0e87 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -17,7 +17,7 @@ Manager for self-hosted runner on OpenStack.
---
-
+
## class `OpenstackRunnerManagerConfig`
Configuration for OpenstackRunnerManager.
@@ -70,7 +70,7 @@ __init__(
---
-
+
## class `RunnerHealth`
Runners with health state.
@@ -103,12 +103,12 @@ __init__(
---
-
+
## class `OpenstackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
-
+
### method `__init__`
@@ -130,7 +130,7 @@ Construct the object.
---
-
+
### method `cleanup`
@@ -153,7 +153,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -182,7 +182,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -206,7 +206,7 @@ Delete self-hosted runners.
---
-
+
### method `get_name_prefix`
@@ -223,7 +223,7 @@ Get the name prefix of the self-hosted runners.
---
-
+
### method `get_runner`
@@ -246,7 +246,24 @@ Get a self-hosted runner by instance id.
---
-
+
+
+### method `get_runner_health`
+
+```python
+get_runner_health() → RunnerByHealth
+```
+
+Get the runner health state.
+
+
+
+**Returns:**
+ The runners by the health state.
+
+---
+
+
### method `get_runners`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 78fac93be..5a0aa2602 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -10,6 +10,7 @@
from typing import Iterator, Sequence, Tuple
from metrics.runner import RunnerMetrics
+from runner_type import RunnerByHealth
logger = logging.getLogger(__name__)
@@ -138,3 +139,7 @@ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
Args:
remove_token: The GitHub remove token.
"""
+
+ @abc.abstractmethod
+ def get_runner_health(self) -> RunnerByHealth:
+ """Get the runners health state."""
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 3421f2b45..4ae3b7e6e 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -23,6 +23,7 @@
from metrics import github as github_metrics
from metrics import runner as runner_metrics
from metrics.runner import RunnerMetrics
+from runner_type import RunnerByHealth
logger = logging.getLogger(__name__)
@@ -248,6 +249,14 @@ def cleanup(self) -> IssuedMetricEventsStats:
deleted_runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
+ def get_runner_health(self) -> RunnerByHealth:
+ """Get the runner health state.
+
+ Returns:
+ The runners by the health state.
+ """
+ return self._cloud.get_runner_health()
+
def _delete_runners(
self, runners: Sequence[RunnerInstance], remove_token: str
) -> IssuedMetricEventsStats:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 61fc212a8..cdb118933 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -39,6 +39,7 @@
from openstack_cloud.openstack_cloud import OpenstackCloud, OpenstackInstance
from openstack_cloud.openstack_manager import GithubRunnerRemoveError
from repo_policy_compliance_client import RepoPolicyComplianceClient
+from runner_type import RunnerByHealth
from utilities import retry
logger = logging.getLogger(__name__)
@@ -266,6 +267,18 @@ def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
self._openstack_cloud.cleanup()
return metrics
+ def get_runner_health(self) -> RunnerByHealth:
+ """Get the runner health state.
+
+ Returns:
+ The runners by the health state.
+ """
+ runners = self._get_runner_health()
+ return RunnerByHealth(
+ tuple(runner.server_name for runner in runners.healthy),
+ tuple(runner.server_name for runner in runners.unhealthy),
+ )
+
def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None:
"""Delete self-hosted runners by openstack instance.
From 6d09cb89c26bd58b9e4a75f25bf23dedc4ed8206 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 16 Aug 2024 10:41:10 +0800
Subject: [PATCH 186/278] Fix security group ID issues
---
src/openstack_cloud/openstack_cloud.py | 6 +++---
src/openstack_cloud/openstack_runner_manager.py | 3 ++-
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 567d53031..a01aecbbd 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -587,14 +587,14 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
if not rule_exists_icmp:
conn.create_security_group_rule(
- secgroup_name_or_id=_SECURITY_GROUP_NAME,
+ secgroup_name_or_id=security_group.id,
protocol="icmp",
direction="ingress",
ethertype="IPv4",
)
if not rule_exists_ssh:
conn.create_security_group_rule(
- secgroup_name_or_id=_SECURITY_GROUP_NAME,
+ secgroup_name_or_id=security_group.id,
port_range_min="22",
port_range_max="22",
protocol="tcp",
@@ -603,7 +603,7 @@ def _ensure_security_group(conn: OpenstackConnection) -> OpenstackSecurityGroup:
)
if not rule_exists_tmate_ssh:
conn.create_security_group_rule(
- secgroup_name_or_id=_SECURITY_GROUP_NAME,
+ secgroup_name_or_id=security_group.id,
port_range_min="10022",
port_range_max="10022",
protocol="tcp",
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index cdb118933..9a78ac37d 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -477,7 +477,8 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
except SSHError as err:
raise RunnerStartError(
- f"Failed to SSH connect to {instance.server_name} openstack runner"
+ f"Failed to SSH to {instance.server_name} during creation possible due to setup "
+ "not completed"
) from err
result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
From 134aac786ee211c678a9f4c8f6ddb1600bb3787a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 16 Aug 2024 12:57:04 +0800
Subject: [PATCH 187/278] Fix according to review
---
src-docs/openstack_cloud.openstack_cloud.md | 4 +-
...penstack_cloud.openstack_runner_manager.md | 8 +--
src-docs/runner_type.md | 2 +-
src/manager/cloud_runner_manager.py | 4 +-
src/manager/runner_manager.py | 26 ++++----
src/openstack_cloud/openstack_cloud.py | 62 +++++++------------
src/openstack_cloud/openstack_manager.py | 12 ++--
.../openstack_runner_manager.py | 57 ++++++++---------
src/runner_manager.py | 8 +--
src/runner_type.py | 2 +-
tests/unit/test_openstack_manager.py | 10 +--
tests/unit/test_runner_manager.py | 6 +-
12 files changed, 93 insertions(+), 108 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md
index 8f639b1e6..d49b62008 100644
--- a/src-docs/openstack_cloud.openstack_cloud.md
+++ b/src-docs/openstack_cloud.openstack_cloud.md
@@ -91,7 +91,7 @@ Create the object.
cleanup() → None
```
-Cleanup unused openstack resources.
+Cleanup unused key files and openstack keypairs.
---
@@ -141,7 +141,7 @@ Get OpenStack instance by instance ID.
### method `get_instances`
```python
-get_instances() → tuple[OpenstackInstance]
+get_instances() → tuple[OpenstackInstance, ]
```
Get all OpenStack instances.
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index d292b0e87..a96d9d2eb 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -88,8 +88,8 @@ Runners with health state.
```python
__init__(
- healthy: tuple[OpenstackInstance],
- unhealthy: tuple[OpenstackInstance]
+ healthy: tuple[OpenstackInstance, ],
+ unhealthy: tuple[OpenstackInstance, ]
) → None
```
@@ -251,7 +251,7 @@ Get a self-hosted runner by instance id.
### method `get_runner_health`
```python
-get_runner_health() → RunnerByHealth
+get_runner_health() → RunnerNameByHealth
```
Get the runner health state.
@@ -259,7 +259,7 @@ Get the runner health state.
**Returns:**
- The runners by the health state.
+ The names of the runner by health state.
---
diff --git a/src-docs/runner_type.md b/src-docs/runner_type.md
index 481a17a62..8c9db658a 100644
--- a/src-docs/runner_type.md
+++ b/src-docs/runner_type.md
@@ -11,7 +11,7 @@ Types used by Runner class.
-## class `RunnerByHealth`
+## class `RunnerNameByHealth`
Set of runners instance by health state.
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 5a0aa2602..c26b9e7e8 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -10,7 +10,7 @@
from typing import Iterator, Sequence, Tuple
from metrics.runner import RunnerMetrics
-from runner_type import RunnerByHealth
+from runner_type import RunnerNameByHealth
logger = logging.getLogger(__name__)
@@ -141,5 +141,5 @@ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
"""
@abc.abstractmethod
- def get_runner_health(self) -> RunnerByHealth:
+ def get_runner_health(self) -> RunnerNameByHealth:
"""Get the runners health state."""
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 4ae3b7e6e..2b011db60 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -23,7 +23,7 @@
from metrics import github as github_metrics
from metrics import runner as runner_metrics
from metrics.runner import RunnerMetrics
-from runner_type import RunnerByHealth
+from runner_type import RunnerNameByHealth
logger = logging.getLogger(__name__)
@@ -135,25 +135,25 @@ def create_runners(self, num: int) -> tuple[InstanceId]:
def get_runners(
self,
- github_runner_state: Sequence[GithubRunnerState] | None = None,
- cloud_runner_state: Sequence[CloudRunnerState] | None = None,
+ github_states: Sequence[GithubRunnerState] | None = None,
+ cloud_states: Sequence[CloudRunnerState] | None = None,
) -> tuple[RunnerInstance]:
"""Get information on runner filter by state.
Only runners that has cloud instance are returned.
Args:
- github_runner_state: Filter for the runners with these github states. If None all
+ github_states: Filter for the runners with these github states. If None all
states will be included.
- cloud_runner_state: Filter for the runners with these cloud states. If None all states
+ cloud_states: Filter for the runners with these cloud states. If None all states
will be included.
Returns:
Information on the runners.
"""
logger.info("Getting runners...")
- github_infos = self._github.get_runners(github_runner_state)
- cloud_infos = self._cloud.get_runners(cloud_runner_state)
+ github_infos = self._github.get_runners(github_states)
+ cloud_infos = self._cloud.get_runners(cloud_states)
github_infos_map = {info.name: info for info in github_infos}
cloud_infos_map = {info.name: info for info in cloud_infos}
logger.info(
@@ -179,15 +179,15 @@ def get_runners(
)
for name in cloud_infos_map.keys()
]
- if cloud_runner_state is not None:
+ if cloud_states is not None:
runner_instances = [
- runner for runner in runner_instances if runner.cloud_state in cloud_runner_state
+ runner for runner in runner_instances if runner.cloud_state in cloud_states
]
- if github_runner_state is not None:
+ if github_states is not None:
runner_instances = [
runner
for runner in runner_instances
- if runner.github_state is not None and runner.github_state in github_runner_state
+ if runner.github_state is not None and runner.github_state in github_states
]
return cast(tuple[RunnerInstance], tuple(runner_instances))
@@ -232,7 +232,7 @@ def flush_runners(
if flush_mode == FlushMode.FLUSH_BUSY:
states.append(GithubRunnerState.BUSY)
- runners_list = self.get_runners(github_runner_state=states)
+ runners_list = self.get_runners(github_states=states)
runner_names = [runner.name for runner in runners_list]
logger.info("Flushing runners: %s", runner_names)
remove_token = self._github.get_removal_token()
@@ -249,7 +249,7 @@ def cleanup(self) -> IssuedMetricEventsStats:
deleted_runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
- def get_runner_health(self) -> RunnerByHealth:
+ def get_runner_health(self) -> RunnerNameByHealth:
"""Get the runner health state.
Returns:
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index a01aecbbd..ceac7fd6d 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -309,7 +309,7 @@ def get_ssh_connection(self, instance: OpenstackInstance) -> SSHConnection:
f"addresses: {instance.addresses}"
)
- def get_instances(self) -> tuple[OpenstackInstance]:
+ def get_instances(self) -> tuple[OpenstackInstance, ...]:
"""Get all OpenStack instances.
Returns:
@@ -320,26 +320,26 @@ def get_instances(self) -> tuple[OpenstackInstance]:
with _get_openstack_connection(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
- servers = self._get_openstack_instances(conn)
- server_names = set(server.name for server in servers)
-
- instances = []
- for name in server_names:
- # The server can be deleted between the `_get_openstack_instances` call and this
- # line. This is an issues during tests. Hence the need for None check.
- server = OpenstackCloud._get_and_ensure_unique_server(conn, name)
- if server is not None:
- instances.append(OpenstackInstance(server, self.prefix))
- return cast(tuple[OpenstackInstance], tuple(instances))
+ instance_list = self._get_openstack_instances(conn)
+ server_names = set(server.name for server in instance_list)
+
+ server_list = [
+ OpenstackCloud._get_and_ensure_unique_server(conn, name) for name in server_names
+ ]
+ return tuple(
+ OpenstackInstance(server, self.prefix)
+ for server in server_list
+ if server is not None
+ )
def cleanup(self) -> None:
- """Cleanup unused openstack resources."""
+ """Cleanup unused key files and openstack keypairs."""
with _get_openstack_connection(
clouds_config=self._clouds_config, cloud=self._cloud
) as conn:
- server_list = self._get_openstack_instances(conn)
- exclude_list = [server.name for server in server_list]
- self._cleanup_key_files(conn, exclude_list)
+ instances = self._get_openstack_instances(conn)
+ exclude_list = [server.name for server in instances]
+ self._cleanup_key_files(exclude_list)
self._cleanup_openstack_keypairs(conn, exclude_list)
def get_server_name(self, instance_id: str) -> str:
@@ -353,13 +353,10 @@ def get_server_name(self, instance_id: str) -> str:
"""
return f"{self.prefix}-{instance_id}"
- def _cleanup_key_files(
- self, conn: OpenstackConnection, exclude_instances: Iterable[str]
- ) -> None:
+ def _cleanup_key_files(self, exclude_instances: Iterable[str]) -> None:
"""Delete all SSH key files except the specified instances.
Args:
- conn: The Openstack connection instance.
exclude_instances: The keys of these instance will not be deleted.
"""
logger.info("Cleaning up SSH key files")
@@ -375,16 +372,6 @@ def _cleanup_key_files(
total += 1
if path.name in exclude_filename:
continue
-
- keypair_name = path.name.split(".")[0]
- try:
- conn.delete_keypair(keypair_name)
- except openstack.exceptions.SDKException:
- logger.warning(
- "Unable to delete OpenStack keypair associated with deleted key file %s ",
- path.name,
- )
-
path.unlink()
deleted += 1
logger.info("Found %s key files, clean up %s key files", total, deleted)
@@ -403,7 +390,7 @@ def _cleanup_openstack_keypairs(
for key in keypairs:
# The `name` attribute is of resource.Body type.
if key.name and str(key.name).startswith(self.prefix):
- if str(key.name) in exclude_instances:
+ if str(key.name) in set(exclude_instances):
continue
try:
@@ -414,7 +401,7 @@ def _cleanup_openstack_keypairs(
key.name,
)
- def _get_openstack_instances(self, conn: OpenstackConnection) -> tuple[OpenstackServer]:
+ def _get_openstack_instances(self, conn: OpenstackConnection) -> tuple[OpenstackServer, ...]:
"""Get the OpenStack servers managed by this unit.
Args:
@@ -423,13 +410,10 @@ def _get_openstack_instances(self, conn: OpenstackConnection) -> tuple[Openstack
Returns:
List of OpenStack instances.
"""
- return cast(
- tuple[OpenstackServer],
- tuple(
- server
- for server in cast(list[OpenstackServer], conn.list_servers())
- if server.name.startswith(f"{self.prefix}-")
- ),
+ return tuple(
+ server
+ for server in cast(list[OpenstackServer], conn.list_servers())
+ if server.name.startswith(f"{self.prefix}-")
)
@staticmethod
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index 2893d65dd..6b6b3d082 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -62,7 +62,7 @@
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner_manager import IssuedMetricEventsStats
from runner_manager_type import FlushMode, OpenstackRunnerManagerConfig
-from runner_type import GithubPath, RunnerByHealth, RunnerGithubInfo
+from runner_type import GithubPath, RunnerGithubInfo, RunnerNameByHealth
from utilities import retry, set_env_var
logger = logging.getLogger(__name__)
@@ -419,7 +419,7 @@ def get_github_runner_info(self) -> tuple[RunnerGithubInfo, ...]:
if runner["name"].startswith(f"{self.instance_name}-")
)
- def _get_openstack_runner_status(self, conn: OpenstackConnection) -> RunnerByHealth:
+ def _get_openstack_runner_status(self, conn: OpenstackConnection) -> RunnerNameByHealth:
"""Get status on OpenStack of each runner.
Args:
@@ -440,7 +440,7 @@ def _get_openstack_runner_status(self, conn: OpenstackConnection) -> RunnerByHea
else:
healthy_runner.append(instance.name)
- return RunnerByHealth(healthy=tuple(healthy_runner), unhealthy=tuple(unhealthy_runner))
+ return RunnerNameByHealth(healthy=tuple(healthy_runner), unhealthy=tuple(unhealthy_runner))
def _get_openstack_instances(self, conn: OpenstackConnection) -> list[Server]:
"""Get the OpenStack servers managed by this unit.
@@ -1302,7 +1302,7 @@ def _clean_up_openstack_keypairs(
)
def _clean_up_runners(
- self, conn: OpenstackConnection, runner_by_health: RunnerByHealth, remove_token: str
+ self, conn: OpenstackConnection, runner_by_health: RunnerNameByHealth, remove_token: str
) -> None:
"""Clean up offline or unhealthy runners.
@@ -1355,7 +1355,7 @@ def _scale(
self,
quantity: int,
conn: OpenstackConnection,
- runner_by_health: RunnerByHealth,
+ runner_by_health: RunnerNameByHealth,
remove_token: str,
) -> int:
"""Scale the number of runners.
@@ -1488,7 +1488,7 @@ def _issue_reconciliation_metric(
metric_stats: IssuedMetricEventsStats,
reconciliation_start_ts: float,
reconciliation_end_ts: float,
- runner_states: RunnerByHealth,
+ runner_states: RunnerNameByHealth,
) -> None:
"""Issue reconciliation metric.
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 9a78ac37d..5019fe853 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -39,7 +39,7 @@
from openstack_cloud.openstack_cloud import OpenstackCloud, OpenstackInstance
from openstack_cloud.openstack_manager import GithubRunnerRemoveError
from repo_policy_compliance_client import RepoPolicyComplianceClient
-from runner_type import RunnerByHealth
+from runner_type import RunnerNameByHealth
from utilities import retry
logger = logging.getLogger(__name__)
@@ -105,8 +105,8 @@ class RunnerHealth:
unhealthy: The list of unhealthy runners.
"""
- healthy: tuple[OpenstackInstance]
- unhealthy: tuple[OpenstackInstance]
+ healthy: tuple[OpenstackInstance, ...]
+ unhealthy: tuple[OpenstackInstance, ...]
class OpenstackRunnerManager(CloudRunnerManager):
@@ -150,7 +150,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
start_timestamp = time.time()
instance_id = OpenstackRunnerManager._generate_instance_id()
instance_name = self._openstack_cloud.get_server_name(instance_id=instance_id)
- userdata = self._generate_userdata(
+ cloud_init = self._generate_cloud_init(
instance_name=instance_name, registration_token=registration_token
)
try:
@@ -159,7 +159,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
image=self.config.image,
flavor=self.config.flavor,
network=self.config.network,
- cloud_init=userdata,
+ cloud_init=cloud_init,
)
except OpenStackError as err:
raise RunnerCreateError(f"Failed to create {instance_name} openstack runner") from err
@@ -185,16 +185,16 @@ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
Returns:
Information on the runner instance.
"""
- name = self._openstack_cloud.get_server_name(instance_id)
- instances_list = self._openstack_cloud.get_instances()
- for instance in instances_list:
- if instance.server_name == name:
- return CloudRunnerInstance(
- name=name,
- instance_id=instance_id,
- state=CloudRunnerState.from_openstack_server_status(instance.status),
- )
- return None
+ instance = self._openstack_cloud.get_instance(instance_id)
+ return (
+ CloudRunnerInstance(
+ name=instance.server_name,
+ instance_id=instance_id,
+ state=CloudRunnerState.from_openstack_server_status(instance.status),
+ )
+ if instance is not None
+ else None
+ )
def get_runners(
self, states: Sequence[CloudRunnerState] | None = None
@@ -242,7 +242,7 @@ def delete_runner(
return None
metric = runner_metrics.extract(
- metrics_storage_manager=metrics_storage, runners=instance.server_name
+ metrics_storage_manager=metrics_storage, runners=set(instance.server_name)
)
self._delete_runner(instance, remove_token)
return next(metric, None)
@@ -267,14 +267,14 @@ def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
self._openstack_cloud.cleanup()
return metrics
- def get_runner_health(self) -> RunnerByHealth:
+ def get_runner_health(self) -> RunnerNameByHealth:
"""Get the runner health state.
Returns:
- The runners by the health state.
+ The names of the runner by health state.
"""
runners = self._get_runner_health()
- return RunnerByHealth(
+ return RunnerNameByHealth(
tuple(runner.server_name for runner in runners.healthy),
tuple(runner.server_name for runner in runners.unhealthy),
)
@@ -291,7 +291,7 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None
self._pull_runner_metrics(instance.server_name, ssh_conn)
try:
- OpenstackRunnerManager._run_github_runner_removal_script(
+ OpenstackRunnerManager._run_runner_removal_script(
instance.server_name, ssh_conn, remove_token
)
except GithubRunnerRemoveError:
@@ -326,18 +326,19 @@ def _get_runner_health(self) -> RunnerHealth:
healthy, unhealthy = [], []
for runner in runner_list:
cloud_state = CloudRunnerState.from_openstack_server_status(runner.status)
- if cloud_state in (
- CloudRunnerState.DELETED,
- CloudRunnerState.ERROR,
- CloudRunnerState.STOPPED,
- CloudRunnerState.UNKNOWN,
+ if cloud_state in set(
+ (
+ CloudRunnerState.DELETED,
+ CloudRunnerState.ERROR,
+ CloudRunnerState.STOPPED,
+ )
) or not self._health_check(runner):
unhealthy.append(runner)
else:
healthy.append(runner)
return RunnerHealth(healthy=tuple(healthy), unhealthy=tuple(unhealthy))
- def _generate_userdata(self, instance_name: str, registration_token: str) -> str:
+ def _generate_cloud_init(self, instance_name: str, registration_token: str) -> str:
"""Generate cloud init userdata.
This is the script the openstack server runs on startup.
@@ -347,7 +348,7 @@ def _generate_userdata(self, instance_name: str, registration_token: str) -> str
registration_token: The GitHub runner registration token.
Returns:
- The userdata for openstack instance.
+ The cloud init userdata for openstack instance.
"""
jinja = jinja2.Environment(loader=jinja2.FileSystemLoader("templates"), autoescape=True)
@@ -667,7 +668,7 @@ def _ssh_pull_file(
raise _PullFileError(f"Unable to retrieve file {remote_path}") from exc
@staticmethod
- def _run_github_runner_removal_script(
+ def _run_runner_removal_script(
instance_name: str, ssh_conn: SSHConnection, remove_token: str
) -> None:
"""Run Github runner removal script.
diff --git a/src/runner_manager.py b/src/runner_manager.py
index 25aca060e..8d68a68c9 100644
--- a/src/runner_manager.py
+++ b/src/runner_manager.py
@@ -43,7 +43,7 @@
from runner import LXD_PROFILE_YAML, CreateRunnerConfig, Runner, RunnerConfig, RunnerStatus
from runner_manager_type import FlushMode, RunnerInfo, RunnerManagerClients, RunnerManagerConfig
from runner_type import ProxySetting as RunnerProxySetting
-from runner_type import RunnerByHealth
+from runner_type import RunnerNameByHealth
from utilities import execute_command, retry, set_env_var
REMOVED_RUNNER_LOG_STR = "Removed runner: %s"
@@ -222,7 +222,7 @@ def get_github_info(self) -> Iterator[RunnerInfo]:
for runner in remote_runners.values()
)
- def _get_runner_health_states(self) -> RunnerByHealth:
+ def _get_runner_health_states(self) -> RunnerNameByHealth:
"""Get all runners sorted into health groups.
Returns:
@@ -247,7 +247,7 @@ def _get_runner_health_states(self) -> RunnerByHealth:
else:
unhealthy.append(runner.name)
- return RunnerByHealth(healthy, unhealthy)
+ return RunnerNameByHealth(healthy, unhealthy)
def _create_runner(
self, registration_token: str, resources: VirtualMachineResources, runner: Runner
@@ -491,7 +491,7 @@ def _remove_runners(self, count: int, runners: list[Runner]) -> None:
logger.info("There are no idle runners to remove.")
def _cleanup_offline_runners(
- self, runner_states: RunnerByHealth, all_runners: list[Runner]
+ self, runner_states: RunnerNameByHealth, all_runners: list[Runner]
) -> None:
"""Cleanup runners that are not running the github run.sh script.
diff --git a/src/runner_type.py b/src/runner_type.py
index ef4ce5f07..86769eafd 100644
--- a/src/runner_type.py
+++ b/src/runner_type.py
@@ -12,7 +12,7 @@
@dataclass
-class RunnerByHealth:
+class RunnerNameByHealth:
"""Set of runners instance by health state.
Attributes:
diff --git a/tests/unit/test_openstack_manager.py b/tests/unit/test_openstack_manager.py
index 9399cd46e..5e43fb518 100644
--- a/tests/unit/test_openstack_manager.py
+++ b/tests/unit/test_openstack_manager.py
@@ -27,7 +27,7 @@
from openstack_cloud import openstack_manager
from openstack_cloud.openstack_manager import MAX_METRICS_FILE_SIZE, METRICS_EXCHANGE_PATH
from runner_manager_type import FlushMode
-from runner_type import RunnerByHealth, RunnerGithubInfo
+from runner_type import RunnerGithubInfo, RunnerNameByHealth
from tests.unit import factories
FAKE_MONGODB_URI = "mongodb://example.com/db"
@@ -510,7 +510,7 @@ def test_reconcile_pulls_metric_files(
monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms))
openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock(
- return_value=RunnerByHealth(healthy=(), unhealthy=("test_runner",))
+ return_value=RunnerNameByHealth(healthy=(), unhealthy=("test_runner",))
)
ssh_connection_mock.get.side_effect = MagicMock()
openstack_manager_for_reconcile.reconcile(quantity=0)
@@ -545,7 +545,7 @@ def test_reconcile_does_not_pull_too_large_files(
Result(stdout=f"{MAX_METRICS_FILE_SIZE + 1}") if cmd.startswith("stat") else Result()
)
openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock(
- return_value=RunnerByHealth(healthy=("test_runner",), unhealthy=())
+ return_value=RunnerNameByHealth(healthy=("test_runner",), unhealthy=())
)
openstack_manager_for_reconcile.reconcile(quantity=0)
@@ -570,7 +570,7 @@ def test_reconcile_issue_reconciliation_metrics(
monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms))
openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock(
- return_value=RunnerByHealth(healthy=("test_runner",), unhealthy=())
+ return_value=RunnerNameByHealth(healthy=("test_runner",), unhealthy=())
)
openstack_manager.runner_metrics.extract.return_value = (MagicMock() for _ in range(2))
@@ -635,7 +635,7 @@ def test_reconcile_ignores_metrics_for_openstack_online_runners(
]
}
openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock(
- return_value=RunnerByHealth(
+ return_value=RunnerNameByHealth(
healthy=(runner_names["healthy_online"], runner_names["healthy_offline"]),
unhealthy=(
runner_names["unhealthy_online"],
diff --git a/tests/unit/test_runner_manager.py b/tests/unit/test_runner_manager.py
index 94d3373d4..66b09cd60 100644
--- a/tests/unit/test_runner_manager.py
+++ b/tests/unit/test_runner_manager.py
@@ -29,7 +29,7 @@
from metrics.storage import MetricsStorage
from runner import Runner, RunnerStatus
from runner_manager import BUILD_IMAGE_SCRIPT_FILENAME, RunnerManager, RunnerManagerConfig
-from runner_type import RunnerByHealth
+from runner_type import RunnerNameByHealth
from tests.unit.mock import TEST_BINARY, MockLxdImageManager
FAKE_MONGODB_URI = "mongodb://example.com/db"
@@ -268,7 +268,7 @@ def mock_get_runners():
# Create online runners.
runner_manager._get_runners = mock_get_runners
- runner_manager._get_runner_health_states = lambda: RunnerByHealth(
+ runner_manager._get_runner_health_states = lambda: RunnerNameByHealth(
(
f"{runner_manager.instance_name}-0",
f"{runner_manager.instance_name}-1",
@@ -433,7 +433,7 @@ def mock_get_runners():
# Create online runners.
runner_manager._get_runners = mock_get_runners
- runner_manager._get_runner_health_states = lambda: RunnerByHealth(
+ runner_manager._get_runner_health_states = lambda: RunnerNameByHealth(
healthy=(
online_idle_runner_name,
offline_idle_runner_name,
From 4727f71f706edde6e65401f5d186056ace30f27a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 16 Aug 2024 13:49:02 +0800
Subject: [PATCH 188/278] Refactor health state for runner
---
src/manager/cloud_runner_manager.py | 23 ++++----
src/manager/runner_manager.py | 34 +++++++-----
.../openstack_runner_manager.py | 53 ++++++++++---------
.../test_runner_manager_openstack.py | 2 +-
4 files changed, 60 insertions(+), 52 deletions(-)
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index c26b9e7e8..6a5abd5d8 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -6,9 +6,10 @@
import abc
import logging
from dataclasses import dataclass
-from enum import Enum
+from enum import Enum, auto
from typing import Iterator, Sequence, Tuple
+from manager.runner_manager import HealthState
from metrics.runner import RunnerMetrics
from runner_type import RunnerNameByHealth
@@ -30,13 +31,13 @@ class CloudRunnerState(str, Enum):
UNEXPECTED: An unknown state not accounted by the developer is encountered.
"""
- CREATED = "created"
- ACTIVE = "active"
- DELETED = "deleted"
- ERROR = "error"
- STOPPED = "stopped"
- UNKNOWN = "unknown"
- UNEXPECTED = "unexpected"
+ CREATED = auto()
+ ACTIVE = auto()
+ DELETED = auto()
+ ERROR = auto()
+ STOPPED = auto()
+ UNKNOWN = auto()
+ UNEXPECTED = auto()
# Disable "Too many return statements" as this method is using case statement for converting
# the states, which does not cause a complexity issue.
@@ -81,11 +82,13 @@ class CloudRunnerInstance:
Attributes:
name: Name of the instance hosting the runner.
instance_id: ID of the instance.
+ health: Health state of the runner.
state: State of the instance hosting the runner.
"""
name: str
instance_id: InstanceId
+ health: HealthState
state: CloudRunnerState
@@ -139,7 +142,3 @@ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
Args:
remove_token: The GitHub remove token.
"""
-
- @abc.abstractmethod
- def get_runner_health(self) -> RunnerNameByHealth:
- """Get the runners health state."""
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 2b011db60..9a7e7385d 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -40,21 +40,34 @@ class FlushMode(Enum):
FLUSH_IDLE = auto()
FLUSH_BUSY = auto()
-
-
+
+class HealthState(Enum):
+ """Health state of the runners.
+
+ Attributes:
+ HEALTHY: The runner is healthy.
+ UNHEALTHY: The runner is not healthy.
+ UNKNOWN: Unable to get the health state.
+ """
+ HEALTHY = auto()
+ UNHEALTHY= auto()
+ UNKNOWN=auto()
+
@dataclass
class RunnerInstance:
"""Represents an instance of runner.
Attributes:
name: Full name of the runner. Managed by the cloud runner manager.
- id: ID of the runner. Managed by the runner manager.
+ instance_id: ID of the runner. Managed by the runner manager.
+ health: The health state of the runner.
github_state: State on github.
cloud_state: State on cloud.
"""
name: str
- id: InstanceId
+ instance_id: InstanceId
+ health: HealthState
github_state: GithubRunnerState | None
cloud_state: CloudRunnerState
@@ -66,7 +79,8 @@ def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedR
github_info: Information on the GitHub of the runner.
"""
self.name = cloud_instance.name
- self.id = cloud_instance.instance_id
+ self.instance_id = cloud_instance.instance_id
+ self.health = cloud_instance.health
self.github_state = (
GithubRunnerState.from_runner(github_info) if github_info is not None else None
)
@@ -249,14 +263,6 @@ def cleanup(self) -> IssuedMetricEventsStats:
deleted_runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
- def get_runner_health(self) -> RunnerNameByHealth:
- """Get the runner health state.
-
- Returns:
- The runners by the health state.
- """
- return self._cloud.get_runner_health()
-
def _delete_runners(
self, runners: Sequence[RunnerInstance], remove_token: str
) -> IssuedMetricEventsStats:
@@ -272,7 +278,7 @@ def _delete_runners(
runner_metrics_list = []
for runner in runners:
deleted_runner_metrics = self._cloud.delete_runner(
- instance_id=runner.id, remove_token=remove_token
+ instance_id=runner.instance_id, remove_token=remove_token
)
if deleted_runner_metrics is not None:
runner_metrics_list.append(deleted_runner_metrics)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 5019fe853..4ea2ade47 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -33,6 +33,7 @@
CloudRunnerState,
InstanceId,
)
+from manager.runner_manager import HealthState
from metrics import events as metric_events
from metrics import runner as runner_metrics
from metrics import storage as metrics_storage
@@ -186,10 +187,12 @@ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
Information on the runner instance.
"""
instance = self._openstack_cloud.get_instance(instance_id)
+ healthy = self._runner_health_check(instance=instance)
return (
CloudRunnerInstance(
name=instance.server_name,
instance_id=instance_id,
+ health=HealthState.HEALTHY if healthy else HealthState.UNHEALTHY,
state=CloudRunnerState.from_openstack_server_status(instance.status),
)
if instance is not None
@@ -213,6 +216,7 @@ def get_runners(
CloudRunnerInstance(
name=instance.server_name,
instance_id=instance.instance_id,
+ health=HealthState.HEALTHY if self._runner_health_check(instance) else HealthState.UNHEALTHY,
state=CloudRunnerState.from_openstack_server_status(instance.status),
)
for instance in instance_list
@@ -256,7 +260,7 @@ def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
Returns:
Any metrics retrieved from cleanup runners.
"""
- runners = self._get_runner_health()
+ runners = self._get_runners_health()
healthy_runner_names = [runner.server_name for runner in runners.healthy]
metrics = runner_metrics.extract(
metrics_storage_manager=metrics_storage, runners=set(healthy_runner_names)
@@ -267,18 +271,6 @@ def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
self._openstack_cloud.cleanup()
return metrics
- def get_runner_health(self) -> RunnerNameByHealth:
- """Get the runner health state.
-
- Returns:
- The names of the runner by health state.
- """
- runners = self._get_runner_health()
- return RunnerNameByHealth(
- tuple(runner.server_name for runner in runners.healthy),
- tuple(runner.server_name for runner in runners.unhealthy),
- )
-
def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None:
"""Delete self-hosted runners by openstack instance.
@@ -314,8 +306,8 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None
logger.exception(
"Unable to delete openstack instance for runner %s", instance.server_name
)
-
- def _get_runner_health(self) -> RunnerHealth:
+
+ def _get_runners_health(self) -> RunnerHealth:
"""Get runners by health state.
Returns:
@@ -325,19 +317,30 @@ def _get_runner_health(self) -> RunnerHealth:
healthy, unhealthy = [], []
for runner in runner_list:
- cloud_state = CloudRunnerState.from_openstack_server_status(runner.status)
- if cloud_state in set(
- (
- CloudRunnerState.DELETED,
- CloudRunnerState.ERROR,
- CloudRunnerState.STOPPED,
- )
- ) or not self._health_check(runner):
- unhealthy.append(runner)
- else:
+ if self._runner_health_check(runner):
healthy.append(runner)
+ else:
+ unhealthy.append(runner)
return RunnerHealth(healthy=tuple(healthy), unhealthy=tuple(unhealthy))
+ def _runner_health_check(self, instance: OpenstackInstance) -> bool:
+ """Run health check on a runner.
+
+ Args:
+ instance: The instance hosting the runner to run health check on.
+
+ Returns:
+ True if runner is healthy.
+ """
+ cloud_state = CloudRunnerState.from_openstack_server_status(instance.status)
+ return cloud_state not in set(
+ (
+ CloudRunnerState.DELETED,
+ CloudRunnerState.ERROR,
+ CloudRunnerState.STOPPED,
+ )
+ ) and self._health_check(instance)
+
def _generate_cloud_init(self, instance_name: str, registration_token: str) -> str:
"""Generate cloud init userdata.
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b2a75e9ed..d92fec639 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -238,7 +238,7 @@ async def test_runner_normal_idle_lifecycle(
assert isinstance(runner_list, tuple)
assert len(runner_list) == 1
runner = runner_list[0]
- assert runner.id == runner_id
+ assert runner.instance_id == runner_id
assert runner.cloud_state == CloudRunnerState.ACTIVE
# Update on GitHub-side can take a bit of time.
await wait_for(
From 1d055ebf13f6413273a169e42ab905bd9d5adbe2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:38:29 +0800
Subject: [PATCH 189/278] Fix lint issues
---
...penstack_cloud.openstack_runner_manager.md | 120 ++++++-----------
src/manager/cloud_runner_manager.py | 58 +++++++-
src/manager/runner_manager.py | 22 +--
.../openstack_runner_manager.py | 127 ++++++++++--------
.../test_runner_manager_openstack.py | 17 ++-
5 files changed, 182 insertions(+), 162 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index a96d9d2eb..7a0969bf5 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -17,10 +17,10 @@ Manager for self-hosted runner on OpenStack.
---
-
+
-## class `OpenstackRunnerManagerConfig`
-Configuration for OpenstackRunnerManager.
+## class `OpenStackCloudConfig`
+Configuration for OpenStack cloud authorisation information.
@@ -28,36 +28,13 @@ Configuration for OpenstackRunnerManager.
- `clouds_config`: The clouds.yaml.
- `cloud`: The cloud name to connect to.
- - `image`: The image name for runners to use.
- - `flavor`: The flavor name for runners to use.
- - `network`: The network name for runners to use.
- - `github_path`: The GitHub organization or repository for runners to connect to.
- - `labels`: The labels to add to runners.
- - `proxy_config`: The proxy configuration.
- - `dockerhub_mirror`: The dockerhub mirror to use for runners.
- - `ssh_debug_connections`: The information on the ssh debug services.
- - `repo_policy_url`: The URL of the repo policy service.
- - `repo_policy_token`: The token to access the repo policy service.
### method `__init__`
```python
-__init__(
- clouds_config: dict[str, dict],
- cloud: str,
- image: str,
- flavor: str,
- network: str,
- github_path: GithubOrg | GithubRepo,
- labels: list[str],
- proxy_config: ProxyConfig | None,
- dockerhub_mirror: str | None,
- ssh_debug_connections: list[SSHDebugConnection] | None,
- repo_policy_url: str | None,
- repo_policy_token: str | None
-) → None
+__init__(clouds_config: dict[str, dict], cloud: str) → None
```
@@ -70,27 +47,25 @@ __init__(
---
-
+
-## class `RunnerHealth`
-Runners with health state.
+## class `OpenStackServerConfig`
+Configuration for OpenStack server.
**Attributes:**
- - `healthy`: The list of healthy runners.
- - `unhealthy`: The list of unhealthy runners.
+ - `image`: The image name for runners to use.
+ - `flavor`: The flavor name for runners to use.
+ - `network`: The network name for runners to use.
### method `__init__`
```python
-__init__(
- healthy: tuple[OpenstackInstance, ],
- unhealthy: tuple[OpenstackInstance, ]
-) → None
+__init__(image: str, flavor: str, network: str) → None
```
@@ -103,17 +78,29 @@ __init__(
---
-
+
## class `OpenstackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
-
+
+
+**Attributes:**
+
+ - `name_prefix`: The name prefix of the runners created.
+
+
### method `__init__`
```python
-__init__(prefix: str, config: OpenstackRunnerManagerConfig) → None
+__init__(
+ prefix: str,
+ cloud_config: OpenStackCloudConfig,
+ server_config: OpenStackServerConfig,
+ runner_config: GitHubRunnerConfig,
+ service_config: SupportServiceConfig
+) → None
```
Construct the object.
@@ -123,14 +110,23 @@ Construct the object.
**Args:**
- `prefix`: The prefix to runner name.
- - `config`: Configuration of the object.
+ - `cloud_config`: The configuration for OpenStack authorisation.
+ - `server_config`: The configuration for creating OpenStack server.
+ - `runner_config`: The configuration for the runner.
+ - `service_config`: The configuration of supporting services of the runners.
+
+---
+
+#### property name_prefix
+
+Get the name prefix of the self-hosted runners.
---
-
+
### method `cleanup`
@@ -153,7 +149,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -182,7 +178,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -206,24 +202,7 @@ Delete self-hosted runners.
---
-
-
-### method `get_name_prefix`
-
-```python
-get_name_prefix() → str
-```
-
-Get the name prefix of the self-hosted runners.
-
-
-
-**Returns:**
- The name prefix.
-
----
-
-
+
### method `get_runner`
@@ -246,24 +225,7 @@ Get a self-hosted runner by instance id.
---
-
-
-### method `get_runner_health`
-
-```python
-get_runner_health() → RunnerNameByHealth
-```
-
-Get the runner health state.
-
-
-
-**Returns:**
- The names of the runner by health state.
-
----
-
-
+
### method `get_runners`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 6a5abd5d8..acc04b542 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -9,15 +9,28 @@
from enum import Enum, auto
from typing import Iterator, Sequence, Tuple
-from manager.runner_manager import HealthState
+from charm_state import GithubPath, ProxyConfig, SSHDebugConnection
from metrics.runner import RunnerMetrics
-from runner_type import RunnerNameByHealth
logger = logging.getLogger(__name__)
InstanceId = str
+class HealthState(Enum):
+ """Health state of the runners.
+
+ Attributes:
+ HEALTHY: The runner is healthy.
+ UNHEALTHY: The runner is not healthy.
+ UNKNOWN: Unable to get the health state.
+ """
+
+ HEALTHY = auto()
+ UNHEALTHY = auto()
+ UNKNOWN = auto()
+
+
class CloudRunnerState(str, Enum):
"""Represent state of the instance hosting the runner.
@@ -75,6 +88,38 @@ def from_openstack_server_status( # pylint: disable=R0911
return CloudRunnerState.UNEXPECTED
+@dataclass
+class GitHubRunnerConfig:
+ """Configuration for GitHub runner spawned.
+
+ Attributes:
+ github_path: The GitHub organization or repository for runners to connect to.
+ labels: The labels to add to runners.
+ """
+
+ github_path: GithubPath
+ labels: list[str]
+
+
+@dataclass
+class SupportServiceConfig:
+ """Configuration for supporting services for runners.
+
+ Attributes:
+ proxy_config: The proxy configuration.
+ dockerhub_mirror: The dockerhub mirror to use for runners.
+ ssh_debug_connections: The information on the ssh debug services.
+ repo_policy_url: The URL of the repo policy service.
+ repo_policy_token: The token to access the repo policy service.
+ """
+
+ proxy_config: ProxyConfig | None
+ dockerhub_mirror: str | None
+ ssh_debug_connections: list[SSHDebugConnection] | None
+ repo_policy_url: str | None
+ repo_policy_token: str | None
+
+
@dataclass
class CloudRunnerInstance:
"""Information on the runner on the cloud.
@@ -93,10 +138,15 @@ class CloudRunnerInstance:
class CloudRunnerManager(abc.ABC):
- """Manage runner instance on cloud."""
+ """Manage runner instance on cloud.
+
+ Attributes:
+ name_prefix: The name prefix of the self-hosted runners.
+ """
+ @property
@abc.abstractmethod
- def get_name_prefix(self) -> str:
+ def name_prefix(self) -> str:
"""Get the name prefix of the self-hosted runners."""
@abc.abstractmethod
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 9a7e7385d..e2472e643 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -16,6 +16,7 @@
CloudRunnerInstance,
CloudRunnerManager,
CloudRunnerState,
+ HealthState,
InstanceId,
)
from manager.github_runner_manager import GithubRunnerManager, GithubRunnerState
@@ -23,7 +24,6 @@
from metrics import github as github_metrics
from metrics import runner as runner_metrics
from metrics.runner import RunnerMetrics
-from runner_type import RunnerNameByHealth
logger = logging.getLogger(__name__)
@@ -40,19 +40,8 @@ class FlushMode(Enum):
FLUSH_IDLE = auto()
FLUSH_BUSY = auto()
-
-class HealthState(Enum):
- """Health state of the runners.
-
- Attributes:
- HEALTHY: The runner is healthy.
- UNHEALTHY: The runner is not healthy.
- UNKNOWN: Unable to get the health state.
- """
- HEALTHY = auto()
- UNHEALTHY= auto()
- UNKNOWN=auto()
-
+
+
@dataclass
class RunnerInstance:
"""Represents an instance of runner.
@@ -112,8 +101,9 @@ def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManag
"""
self._config = config
self._cloud = cloud_runner_manager
+ self.name_prefix = self._cloud.name_prefix
self._github = GithubRunnerManager(
- prefix=self._cloud.get_name_prefix(), token=self._config.token, path=self._config.path
+ prefix=self.name_prefix, token=self._config.token, path=self._config.path
)
def create_runners(self, num: int) -> tuple[InstanceId]:
@@ -311,7 +301,7 @@ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetri
issued_events = runner_metrics.issue_events(
runner_metrics=extracted_metrics,
job_metrics=job_metrics,
- flavor=self._cloud.get_name_prefix(),
+ flavor=self.name_prefix,
)
for event_type in issued_events:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 4ea2ade47..733eb1419 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -16,7 +16,7 @@
import paramiko.ssh_exception
from fabric import Connection as SSHConnection
-from charm_state import GithubOrg, GithubPath, ProxyConfig, SSHDebugConnection
+from charm_state import GithubOrg
from errors import (
CreateMetricsStorageError,
GetMetricsStorageError,
@@ -31,7 +31,9 @@
CloudRunnerInstance,
CloudRunnerManager,
CloudRunnerState,
+ GitHubRunnerConfig,
InstanceId,
+ SupportServiceConfig,
)
from manager.runner_manager import HealthState
from metrics import events as metric_events
@@ -40,7 +42,6 @@
from openstack_cloud.openstack_cloud import OpenstackCloud, OpenstackInstance
from openstack_cloud.openstack_manager import GithubRunnerRemoveError
from repo_policy_compliance_client import RepoPolicyComplianceClient
-from runner_type import RunnerNameByHealth
from utilities import retry
logger = logging.getLogger(__name__)
@@ -63,42 +64,36 @@ class _PullFileError(Exception):
"""Represents an error while pulling a file from the runner instance."""
-# Ignore "Too many instance attributes" as this dataclass is for passing arguments.
@dataclass
-class OpenstackRunnerManagerConfig: # pylint: disable=R0902
- """Configuration for OpenstackRunnerManager.
+class OpenStackCloudConfig:
+ """Configuration for OpenStack cloud authorisation information.
Attributes:
clouds_config: The clouds.yaml.
cloud: The cloud name to connect to.
+ """
+
+ clouds_config: dict[str, dict]
+ cloud: str
+
+
+@dataclass
+class OpenStackServerConfig:
+ """Configuration for OpenStack server.
+
+ Attributes:
image: The image name for runners to use.
flavor: The flavor name for runners to use.
network: The network name for runners to use.
- github_path: The GitHub organization or repository for runners to connect to.
- labels: The labels to add to runners.
- proxy_config: The proxy configuration.
- dockerhub_mirror: The dockerhub mirror to use for runners.
- ssh_debug_connections: The information on the ssh debug services.
- repo_policy_url: The URL of the repo policy service.
- repo_policy_token: The token to access the repo policy service.
"""
- clouds_config: dict[str, dict]
- cloud: str
image: str
flavor: str
network: str
- github_path: GithubPath
- labels: list[str]
- proxy_config: ProxyConfig | None
- dockerhub_mirror: str | None
- ssh_debug_connections: list[SSHDebugConnection] | None
- repo_policy_url: str | None
- repo_policy_token: str | None
@dataclass
-class RunnerHealth:
+class _RunnerHealth:
"""Runners with health state.
Attributes:
@@ -111,31 +106,41 @@ class RunnerHealth:
class OpenstackRunnerManager(CloudRunnerManager):
- """Manage self-hosted runner on OpenStack cloud."""
+ """Manage self-hosted runner on OpenStack cloud.
- def __init__(self, prefix: str, config: OpenstackRunnerManagerConfig) -> None:
+ Attributes:
+ name_prefix: The name prefix of the runners created.
+ """
+
+ # Ignore "Too many arguments", as the class requires a lot of configurations.
+ def __init__( # pylint: disable=R0913
+ self,
+ prefix: str,
+ cloud_config: OpenStackCloudConfig,
+ server_config: OpenStackServerConfig,
+ runner_config: GitHubRunnerConfig,
+ service_config: SupportServiceConfig,
+ ) -> None:
"""Construct the object.
Args:
prefix: The prefix to runner name.
- config: Configuration of the object.
+ cloud_config: The configuration for OpenStack authorisation.
+ server_config: The configuration for creating OpenStack server.
+ runner_config: The configuration for the runner.
+ service_config: The configuration of supporting services of the runners.
"""
- self.prefix = prefix
- self.config = config
+ self.name_prefix = prefix
+ self._cloud_config = cloud_config
+ self._server_config = server_config
+ self._runner_config = runner_config
+ self._service_config = service_config
self._openstack_cloud = OpenstackCloud(
- clouds_config=self.config.clouds_config,
- cloud=self.config.cloud,
- prefix=self.prefix,
+ clouds_config=self._cloud_config.clouds_config,
+ cloud=self._cloud_config.cloud,
+ prefix=self.name_prefix,
)
- def get_name_prefix(self) -> str:
- """Get the name prefix of the self-hosted runners.
-
- Returns:
- The name prefix.
- """
- return self.prefix
-
def create_runner(self, registration_token: str) -> InstanceId:
"""Create a self-hosted runner.
@@ -157,9 +162,9 @@ def create_runner(self, registration_token: str) -> InstanceId:
try:
instance = self._openstack_cloud.launch_instance(
instance_id=instance_id,
- image=self.config.image,
- flavor=self.config.flavor,
- network=self.config.network,
+ image=self._server_config.image,
+ flavor=self._server_config.flavor,
+ network=self._server_config.network,
cloud_init=cloud_init,
)
except OpenStackError as err:
@@ -171,7 +176,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
end_timestamp = time.time()
OpenstackRunnerManager._issue_runner_installed_metric(
name=instance_name,
- flavor=self.prefix,
+ flavor=self.name_prefix,
install_start_timestamp=start_timestamp,
install_end_timestamp=end_timestamp,
)
@@ -216,7 +221,11 @@ def get_runners(
CloudRunnerInstance(
name=instance.server_name,
instance_id=instance.instance_id,
- health=HealthState.HEALTHY if self._runner_health_check(instance) else HealthState.UNHEALTHY,
+ health=(
+ HealthState.HEALTHY
+ if self._runner_health_check(instance)
+ else HealthState.UNHEALTHY
+ ),
state=CloudRunnerState.from_openstack_server_status(instance.status),
)
for instance in instance_list
@@ -306,8 +315,8 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None
logger.exception(
"Unable to delete openstack instance for runner %s", instance.server_name
)
-
- def _get_runners_health(self) -> RunnerHealth:
+
+ def _get_runners_health(self) -> _RunnerHealth:
"""Get runners by health state.
Returns:
@@ -321,11 +330,11 @@ def _get_runners_health(self) -> RunnerHealth:
healthy.append(runner)
else:
unhealthy.append(runner)
- return RunnerHealth(healthy=tuple(healthy), unhealthy=tuple(unhealthy))
+ return _RunnerHealth(healthy=tuple(healthy), unhealthy=tuple(unhealthy))
def _runner_health_check(self, instance: OpenstackInstance) -> bool:
"""Run health check on a runner.
-
+
Args:
instance: The instance hosting the runner to run health check on.
@@ -357,10 +366,10 @@ def _generate_cloud_init(self, instance_name: str, registration_token: str) -> s
env_contents = jinja.get_template("env.j2").render(
pre_job_script=str(PRE_JOB_SCRIPT),
- dockerhub_mirror=self.config.dockerhub_mirror or "",
+ dockerhub_mirror=self._service_config.dockerhub_mirror or "",
ssh_debug_info=(
- secrets.choice(self.config.ssh_debug_connections)
- if self.config.ssh_debug_connections
+ secrets.choice(self._service_config.ssh_debug_connections)
+ if self._service_config.ssh_debug_connections
else None
),
# Proxies are handled by aproxy.
@@ -385,24 +394,24 @@ def _generate_cloud_init(self, instance_name: str, registration_token: str) -> s
pre_job_contents = jinja.get_template("pre-job.j2").render(pre_job_contents_dict)
runner_group = None
- if isinstance(self.config.github_path, GithubOrg):
- runner_group = self.config.github_path.group
+ if isinstance(self._runner_config.github_path, GithubOrg):
+ runner_group = self._runner_config.github_path.group
aproxy_address = (
- self.config.proxy_config.aproxy_address
- if self.config.proxy_config is not None
+ self._service_config.proxy_config.aproxy_address
+ if self._service_config.proxy_config is not None
else None
)
return jinja.get_template("openstack-userdata.sh.j2").render(
- github_url=f"https://github.com/{self.config.github_path.path()}",
+ github_url=f"https://github.com/{self._runner_config.github_path.path()}",
runner_group=runner_group,
token=registration_token,
- instance_labels=",".join(self.config.labels),
+ instance_labels=",".join(self._runner_config.labels),
instance_name=instance_name,
env_contents=env_contents,
pre_job_contents=pre_job_contents,
metrics_exchange_path=str(METRICS_EXCHANGE_PATH),
aproxy_address=aproxy_address,
- dockerhub_mirror=self.config.dockerhub_mirror,
+ dockerhub_mirror=self._service_config.dockerhub_mirror,
)
def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | None:
@@ -411,9 +420,9 @@ def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | Non
Returns:
The repo policy compliance client.
"""
- if self.config.repo_policy_url and self.config.repo_policy_token:
+ if self._service_config.repo_policy_url and self._service_config.repo_policy_token:
return RepoPolicyComplianceClient(
- self.config.repo_policy_url, self.config.repo_policy_token
+ self._service_config.repo_policy_url, self._service_config.repo_policy_token
)
return None
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index d92fec639..c09800ae7 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -18,14 +18,15 @@
from openstack.connection import Connection as OpenstackConnection
from charm_state import GithubPath, ProxyConfig, parse_github_path
-from manager.cloud_runner_manager import CloudRunnerState
+from manager.cloud_runner_manager import CloudRunnerState, GitHubRunnerConfig, SupportServiceConfig
from manager.github_runner_manager import GithubRunnerState
from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
from metrics import events, storage
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
+ OpenStackCloudConfig,
OpenstackRunnerManager,
- OpenstackRunnerManagerConfig,
+ OpenStackServerConfig,
)
from tests.integration.helpers.common import (
DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
@@ -104,21 +105,29 @@ async def openstack_runner_manager_fixture(
_CLOUDS_YAML_PATH.unlink(missing_ok=True)
clouds_config = yaml.safe_load(private_endpoint_clouds_yaml)
- config = OpenstackRunnerManagerConfig(
+ cloud_config = OpenStackCloudConfig(
clouds_config=clouds_config,
cloud="testcloud",
+ )
+ server_config = OpenStackServerConfig(
image=openstack_test_image,
flavor=flavor_name,
network=network_name,
+ )
+ runner_config = GitHubRunnerConfig(
github_path=github_path,
labels=["openstack_test", runner_label],
+ )
+ service_config = SupportServiceConfig(
proxy_config=proxy_config,
dockerhub_mirror=None,
ssh_debug_connections=None,
repo_policy_url=None,
repo_policy_token=None,
)
- return OpenstackRunnerManager(app_name, config)
+ return OpenstackRunnerManager(
+ app_name, cloud_config, server_config, runner_config, service_config
+ )
@pytest_asyncio.fixture(scope="module", name="runner_manager")
From 63835848aa3727cf88fa8635f9b48111d0e3e29a Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:39:48 +0800
Subject: [PATCH 190/278] Add missing docs
---
src/manager/runner_manager.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index e2472e643..76a1f985d 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -90,7 +90,11 @@ class RunnerManagerConfig:
class RunnerManager:
- """Manage the runners."""
+ """Manage the runners.
+
+ Attributes:
+ name_prefix: The name prefix of the runners.
+ """
def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManagerConfig):
"""Construct the object.
From 7c0c2256fb7420fb1c54de1d9c8516a9b14ff337 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:50:08 +0800
Subject: [PATCH 191/278] Update the github state enum to use auto
---
src/manager/github_runner_manager.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 4d6ae5788..218155b0f 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -3,7 +3,7 @@
"""Client for managing self-hosted runner on GitHub side."""
-from enum import Enum
+from enum import Enum, auto
from typing import Sequence
from charm_state import GithubPath
@@ -21,9 +21,9 @@ class GithubRunnerState(str, Enum):
OFFLINE: Runner is not connected to GitHub.
"""
- BUSY = "busy"
- IDLE = "idle"
- OFFLINE = "offline"
+ BUSY = auto()
+ IDLE = auto()
+ OFFLINE = auto()
@staticmethod
def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
From 3d62ef97691bed3e63ba25c5f0d93080f1155e8c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 16 Aug 2024 15:03:17 +0800
Subject: [PATCH 192/278] Rename class to fit convension
---
src/manager/github_runner_manager.py | 18 +++++++++---------
src/manager/runner_manager.py | 16 ++++++++--------
.../test_runner_manager_openstack.py | 10 +++++-----
3 files changed, 22 insertions(+), 22 deletions(-)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 218155b0f..0aed972bd 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -11,7 +11,7 @@
from github_type import GitHubRunnerStatus, SelfHostedRunner
-class GithubRunnerState(str, Enum):
+class GitHubRunnerState(str, Enum):
"""State of the self-hosted runner on GitHub.
Attributes:
@@ -26,7 +26,7 @@ class GithubRunnerState(str, Enum):
OFFLINE = auto()
@staticmethod
- def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
+ def from_runner(runner: SelfHostedRunner) -> "GitHubRunnerState":
"""Construct the object from GtiHub runner information.
Args:
@@ -35,13 +35,13 @@ def from_runner(runner: SelfHostedRunner) -> "GithubRunnerState":
Returns:
The state of runner.
"""
- state = GithubRunnerState.OFFLINE
+ state = GitHubRunnerState.OFFLINE
# A runner that is busy and offline is possible.
if runner.busy:
- state = GithubRunnerState.BUSY
+ state = GitHubRunnerState.BUSY
if runner.status == GitHubRunnerStatus.ONLINE:
if not runner.busy:
- state = GithubRunnerState.IDLE
+ state = GitHubRunnerState.IDLE
return state
@@ -61,7 +61,7 @@ def __init__(self, prefix: str, token: str, path: GithubPath):
self.github = GithubClient(token)
def get_runners(
- self, states: Sequence[GithubRunnerState] | None = None
+ self, states: Sequence[GitHubRunnerState] | None = None
) -> tuple[SelfHostedRunner]:
"""Get info on self-hosted runners of certain states.
@@ -79,7 +79,7 @@ def get_runners(
and GithubRunnerManager._is_runner_in_state(runner, states)
)
- def delete_runners(self, states: Sequence[GithubRunnerState] | None = None) -> None:
+ def delete_runners(self, states: Sequence[GitHubRunnerState] | None = None) -> None:
"""Delete the self-hosted runners of certain states.
Args:
@@ -111,7 +111,7 @@ def get_removal_token(self) -> str:
@staticmethod
def _is_runner_in_state(
- runner: SelfHostedRunner, states: Sequence[GithubRunnerState] | None
+ runner: SelfHostedRunner, states: Sequence[GitHubRunnerState] | None
) -> bool:
"""Check that the runner is in one of the states provided.
@@ -124,4 +124,4 @@ def _is_runner_in_state(
"""
if states is None:
return True
- return GithubRunnerState.from_runner(runner) in states
+ return GitHubRunnerState.from_runner(runner) in states
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 76a1f985d..98cf5f35f 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -19,7 +19,7 @@
HealthState,
InstanceId,
)
-from manager.github_runner_manager import GithubRunnerManager, GithubRunnerState
+from manager.github_runner_manager import GithubRunnerManager, GitHubRunnerState
from metrics import events as metric_events
from metrics import github as github_metrics
from metrics import runner as runner_metrics
@@ -57,7 +57,7 @@ class RunnerInstance:
name: str
instance_id: InstanceId
health: HealthState
- github_state: GithubRunnerState | None
+ github_state: GitHubRunnerState | None
cloud_state: CloudRunnerState
def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedRunner | None):
@@ -71,7 +71,7 @@ def __init__(self, cloud_instance: CloudRunnerInstance, github_info: SelfHostedR
self.instance_id = cloud_instance.instance_id
self.health = cloud_instance.health
self.github_state = (
- GithubRunnerState.from_runner(github_info) if github_info is not None else None
+ GitHubRunnerState.from_runner(github_info) if github_info is not None else None
)
self.cloud_state = cloud_instance.state
@@ -91,7 +91,7 @@ class RunnerManagerConfig:
class RunnerManager:
"""Manage the runners.
-
+
Attributes:
name_prefix: The name prefix of the runners.
"""
@@ -143,7 +143,7 @@ def create_runners(self, num: int) -> tuple[InstanceId]:
def get_runners(
self,
- github_states: Sequence[GithubRunnerState] | None = None,
+ github_states: Sequence[GitHubRunnerState] | None = None,
cloud_states: Sequence[CloudRunnerState] | None = None,
) -> tuple[RunnerInstance]:
"""Get information on runner filter by state.
@@ -236,9 +236,9 @@ def flush_runners(
"Unknown flush mode %s encountered, contact developers", flush_mode
)
- states = [GithubRunnerState.IDLE]
+ states = [GitHubRunnerState.IDLE]
if flush_mode == FlushMode.FLUSH_BUSY:
- states.append(GithubRunnerState.BUSY)
+ states.append(GitHubRunnerState.BUSY)
runners_list = self.get_runners(github_states=states)
runner_names = [runner.name for runner in runners_list]
@@ -252,7 +252,7 @@ def cleanup(self) -> IssuedMetricEventsStats:
Returns:
Stats on metrics events issued during the cleanup of runners.
"""
- self._github.delete_runners([GithubRunnerState.OFFLINE])
+ self._github.delete_runners([GitHubRunnerState.OFFLINE])
remove_token = self._github.get_removal_token()
deleted_runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index c09800ae7..523c2b5ae 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -19,7 +19,7 @@
from charm_state import GithubPath, ProxyConfig, parse_github_path
from manager.cloud_runner_manager import CloudRunnerState, GitHubRunnerConfig, SupportServiceConfig
-from manager.github_runner_manager import GithubRunnerState
+from manager.github_runner_manager import GitHubRunnerState
from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
from metrics import events, storage
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
@@ -160,7 +160,7 @@ async def runner_manager_with_one_runner_fixture(runner_manager: RunnerManager)
), "Test arrange failed: Expect runner in active state"
try:
await wait_for(
- lambda: runner_manager.get_runners()[0].github_state == GithubRunnerState.IDLE,
+ lambda: runner_manager.get_runners()[0].github_state == GitHubRunnerState.IDLE,
timeout=120,
check_interval=10,
)
@@ -251,7 +251,7 @@ async def test_runner_normal_idle_lifecycle(
assert runner.cloud_state == CloudRunnerState.ACTIVE
# Update on GitHub-side can take a bit of time.
await wait_for(
- lambda: runner_manager.get_runners()[0].github_state == GithubRunnerState.IDLE,
+ lambda: runner_manager.get_runners()[0].github_state == GitHubRunnerState.IDLE,
timeout=120,
check_interval=10,
)
@@ -304,7 +304,7 @@ async def test_runner_flush_busy_lifecycle(
assert len(runner_list) == 1
busy_runner = runner_list[0]
assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GithubRunnerState.BUSY
+ assert busy_runner.github_state == GitHubRunnerState.BUSY
# 2.
runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
@@ -312,7 +312,7 @@ async def test_runner_flush_busy_lifecycle(
assert len(runner_list) == 1
busy_runner = runner_list[0]
assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GithubRunnerState.BUSY
+ assert busy_runner.github_state == GitHubRunnerState.BUSY
# 3.
runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_BUSY)
From d14f92ae2e9db48f4cf8aaa461c8b4192388f5f8 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 16 Aug 2024 15:29:45 +0800
Subject: [PATCH 193/278] Fix according to review
---
...penstack_cloud.openstack_runner_manager.md | 2 +-
src/manager/cloud_runner_manager.py | 22 +++++++++----------
src/openstack_cloud/openstack_cloud.py | 4 ++--
.../openstack_runner_manager.py | 12 +++++-----
4 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 7a0969bf5..a40e7009c 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -232,7 +232,7 @@ Get a self-hosted runner by instance id.
```python
get_runners(
states: Optional[Sequence[CloudRunnerState]] = None
-) → Tuple[CloudRunnerInstance]
+) → tuple[CloudRunnerInstance, ]
```
Get self-hosted runners by state.
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index acc04b542..7362990d0 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -52,10 +52,8 @@ class CloudRunnerState(str, Enum):
UNKNOWN = auto()
UNEXPECTED = auto()
- # Disable "Too many return statements" as this method is using case statement for converting
- # the states, which does not cause a complexity issue.
@staticmethod
- def from_openstack_server_status( # pylint: disable=R0911
+ def from_openstack_server_status(
openstack_server_status: str,
) -> "CloudRunnerState":
"""Create from openstack server status.
@@ -69,23 +67,25 @@ def from_openstack_server_status( # pylint: disable=R0911
Returns:
The state of the runner.
"""
+ state = CloudRunnerState.UNEXPECTED
match openstack_server_status:
case "BUILD":
- return CloudRunnerState.CREATED
+ state = CloudRunnerState.CREATED
case "REBUILD":
- return CloudRunnerState.CREATED
+ state = CloudRunnerState.CREATED
case "ACTIVE":
- return CloudRunnerState.ACTIVE
+ state = CloudRunnerState.ACTIVE
case "ERROR":
- return CloudRunnerState.ERROR
+ state = CloudRunnerState.ERROR
case "STOPPED":
- return CloudRunnerState.STOPPED
+ state = CloudRunnerState.STOPPED
case "DELETED":
- return CloudRunnerState.DELETED
+ state = CloudRunnerState.DELETED
case "UNKNOWN":
- return CloudRunnerState.UNKNOWN
+ state = CloudRunnerState.UNKNOWN
case _:
- return CloudRunnerState.UNEXPECTED
+ state = CloudRunnerState.UNEXPECTED
+ return state
@dataclass
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index ceac7fd6d..462b6b46b 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -394,7 +394,7 @@ def _cleanup_openstack_keypairs(
continue
try:
- conn.delete_keypair(key.name)
+ self._delete_keypair(conn, key.name)
except openstack.exceptions.SDKException:
logger.warning(
"Unable to delete OpenStack keypair associated with deleted key file %s ",
@@ -422,7 +422,7 @@ def _get_and_ensure_unique_server(
) -> OpenstackServer | None:
"""Get the latest server of the name and ensure it is unique.
- If multiple servers with the same name is found, the latest server in creation time is
+ If multiple servers with the same name are found, the latest server in creation time is
returned. Other servers is deleted.
Args:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 733eb1419..cac1187c0 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -8,7 +8,7 @@
import time
from dataclasses import dataclass
from pathlib import Path
-from typing import Iterator, Sequence, Tuple
+from typing import Iterator, Sequence
import invoke
import jinja2
@@ -206,7 +206,7 @@ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
def get_runners(
self, states: Sequence[CloudRunnerState] | None = None
- ) -> Tuple[CloudRunnerInstance]:
+ ) -> tuple[CloudRunnerInstance, ...]:
"""Get self-hosted runners by state.
Args:
@@ -231,7 +231,7 @@ def get_runners(
for instance in instance_list
]
if states is None:
- return instance_list
+ return tuple(instance_list)
return tuple(instance for instance in instance_list if instance.state in states)
def delete_runner(
@@ -254,11 +254,11 @@ def delete_runner(
)
return None
- metric = runner_metrics.extract(
- metrics_storage_manager=metrics_storage, runners=set(instance.server_name)
+ extracted_metrics = runner_metrics.extract(
+ metrics_storage_manager=metrics_storage, runners=set([instance.server_name])
)
self._delete_runner(instance, remove_token)
- return next(metric, None)
+ return next(extracted_metrics, None)
def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
From 4bce4a810b517790c72fb3905d7a626149ad7c91 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:38:35 +0800
Subject: [PATCH 194/278] Fix name_prefix property cloud runner manager
---
.../openstack_cloud.openstack_runner_manager.md | 17 +++++++++++------
src/openstack_cloud/openstack_runner_manager.py | 11 ++++++++++-
2 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index a40e7009c..82991f1e2 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -120,13 +120,18 @@ Construct the object.
#### property name_prefix
-Get the name prefix of the self-hosted runners.
+The prefix of runner names.
+
+
+
+**Returns:**
+ The prefix of the runner names managed by this class.
---
-
+
### method `cleanup`
@@ -149,7 +154,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -178,7 +183,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -202,7 +207,7 @@ Delete self-hosted runners.
---
-
+
### method `get_runner`
@@ -225,7 +230,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index cac1187c0..c58d131c0 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -130,7 +130,7 @@ def __init__( # pylint: disable=R0913
runner_config: The configuration for the runner.
service_config: The configuration of supporting services of the runners.
"""
- self.name_prefix = prefix
+ self._prefix = prefix
self._cloud_config = cloud_config
self._server_config = server_config
self._runner_config = runner_config
@@ -141,6 +141,15 @@ def __init__( # pylint: disable=R0913
prefix=self.name_prefix,
)
+ @property
+ def name_prefix(self) -> str:
+ """The prefix of runner names.
+
+ Returns:
+ The prefix of the runner names managed by this class.
+ """
+ return self._prefix
+
def create_runner(self, registration_token: str) -> InstanceId:
"""Create a self-hosted runner.
From 0d23dbecf23114a89955c24d8499d4fbd21ae2c0 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 19 Aug 2024 15:48:49 +0800
Subject: [PATCH 195/278] Add class for scaling runners
---
src-docs/errors.md | 65 ++++----
src-docs/metrics.md | 3 -
src-docs/openstack_cloud.md | 5 -
...penstack_cloud.openstack_runner_manager.md | 24 +--
src-docs/runner_manager.md | 4 +-
src-docs/runner_manager_type.md | 2 +-
src/charm.py | 147 ++++++++++--------
src/errors.py | 4 +
src/manager/runner_scaler.py | 138 ++++++++++++++++
.../openstack_runner_manager.py | 23 +--
src/runner_manager.py | 16 +-
src/runner_manager_type.py | 2 +-
tests/integration/helpers/common.py | 8 +-
.../test_charm_scheduled_events.py | 4 +-
.../test_runner_manager_openstack.py | 12 +-
tests/unit/test_charm.py | 12 +-
tests/unit/test_runner_manager.py | 44 +++---
17 files changed, 337 insertions(+), 176 deletions(-)
create mode 100644 src/manager/runner_scaler.py
diff --git a/src-docs/errors.md b/src-docs/errors.md
index cf7cde565..ee5db5a11 100644
--- a/src-docs/errors.md
+++ b/src-docs/errors.md
@@ -99,6 +99,17 @@ Error for setting up aproxy.
+## class `MissingServerConfigError`
+Error for unable to create runner due to missing server configurations.
+
+
+
+
+
+---
+
+
+
## class `MissingRunnerBinaryError`
Error for missing runner binary.
@@ -108,7 +119,7 @@ Error for missing runner binary.
---
-
+
## class `ConfigurationError`
Error for juju configuration.
@@ -119,7 +130,7 @@ Error for juju configuration.
---
-
+
## class `MissingMongoDBError`
Error for missing integration data.
@@ -130,7 +141,7 @@ Error for missing integration data.
---
-
+
## class `LxdError`
Error for executing LXD actions.
@@ -141,7 +152,7 @@ Error for executing LXD actions.
---
-
+
## class `SubprocessError`
Error for Subprocess calls.
@@ -155,7 +166,7 @@ Error for Subprocess calls.
- `stdout`: Content of stdout of the subprocess.
- `stderr`: Content of stderr of the subprocess.
-
+
### method `__init__`
@@ -185,7 +196,7 @@ Construct the subprocess error.
---
-
+
## class `IssueMetricEventError`
Represents an error when issuing a metric event.
@@ -196,7 +207,7 @@ Represents an error when issuing a metric event.
---
-
+
## class `LogrotateSetupError`
Represents an error raised when logrotate cannot be setup.
@@ -207,7 +218,7 @@ Represents an error raised when logrotate cannot be setup.
---
-
+
## class `MetricsStorageError`
Base class for all metrics storage errors.
@@ -218,7 +229,7 @@ Base class for all metrics storage errors.
---
-
+
## class `SharedFilesystemError`
Base class for all shared filesystem errors.
@@ -229,7 +240,7 @@ Base class for all shared filesystem errors.
---
-
+
## class `CreateMetricsStorageError`
Represents an error when the metrics storage could not be created.
@@ -240,7 +251,7 @@ Represents an error when the metrics storage could not be created.
---
-
+
## class `DeleteMetricsStorageError`
Represents an error when the metrics storage could not be deleted.
@@ -251,7 +262,7 @@ Represents an error when the metrics storage could not be deleted.
---
-
+
## class `GetMetricsStorageError`
Represents an error when the metrics storage could not be retrieved.
@@ -262,7 +273,7 @@ Represents an error when the metrics storage could not be retrieved.
---
-
+
## class `QuarantineMetricsStorageError`
Represents an error when the metrics storage could not be quarantined.
@@ -273,7 +284,7 @@ Represents an error when the metrics storage could not be quarantined.
---
-
+
## class `SharedFilesystemMountError`
Represents an error related to the mounting of the shared filesystem.
@@ -284,7 +295,7 @@ Represents an error related to the mounting of the shared filesystem.
---
-
+
## class `RunnerMetricsError`
Base class for all runner metrics errors.
@@ -295,7 +306,7 @@ Base class for all runner metrics errors.
---
-
+
## class `CorruptMetricDataError`
Represents an error with the data being corrupt.
@@ -306,7 +317,7 @@ Represents an error with the data being corrupt.
---
-
+
## class `GithubMetricsError`
Base class for all github metrics errors.
@@ -317,7 +328,7 @@ Base class for all github metrics errors.
---
-
+
## class `GithubClientError`
Base class for all github client errors.
@@ -328,7 +339,7 @@ Base class for all github client errors.
---
-
+
## class `GithubApiError`
Represents an error when the GitHub API returns an error.
@@ -339,7 +350,7 @@ Represents an error when the GitHub API returns an error.
---
-
+
## class `TokenError`
Represents an error when the token is invalid or has not enough permissions.
@@ -350,7 +361,7 @@ Represents an error when the token is invalid or has not enough permissions.
---
-
+
## class `JobNotFoundError`
Represents an error when the job could not be found on GitHub.
@@ -361,7 +372,7 @@ Represents an error when the job could not be found on GitHub.
---
-
+
## class `RunnerLogsError`
Base class for all runner logs errors.
@@ -372,7 +383,7 @@ Base class for all runner logs errors.
---
-
+
## class `OpenStackError`
Base class for OpenStack errors.
@@ -383,7 +394,7 @@ Base class for OpenStack errors.
---
-
+
## class `OpenStackInvalidConfigError`
Represents an invalid OpenStack configuration.
@@ -394,7 +405,7 @@ Represents an invalid OpenStack configuration.
---
-
+
## class `OpenStackUnauthorizedError`
Represents an unauthorized connection to OpenStack.
@@ -405,7 +416,7 @@ Represents an unauthorized connection to OpenStack.
---
-
+
## class `SSHError`
Represents an error while interacting with SSH.
@@ -416,7 +427,7 @@ Represents an error while interacting with SSH.
---
-
+
## class `KeyfileError`
Represents missing keyfile for SSH.
diff --git a/src-docs/metrics.md b/src-docs/metrics.md
index e4275c340..25c650c41 100644
--- a/src-docs/metrics.md
+++ b/src-docs/metrics.md
@@ -22,8 +22,5 @@ Package for common metrics-related code.
- **github**: # Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
-- **runner_logs**: # Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
diff --git a/src-docs/openstack_cloud.md b/src-docs/openstack_cloud.md
index 4d82f5359..80cdb146b 100644
--- a/src-docs/openstack_cloud.md
+++ b/src-docs/openstack_cloud.md
@@ -5,11 +5,6 @@
# module `openstack_cloud`
Module for managing Openstack cloud.
-**Global Variables**
----------------
-- **openstack_manager**: # Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
---
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 82991f1e2..7499e4ce5 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -17,7 +17,7 @@ Manager for self-hosted runner on OpenStack.
---
-
+
## class `OpenStackCloudConfig`
Configuration for OpenStack cloud authorisation information.
@@ -47,7 +47,7 @@ __init__(clouds_config: dict[str, dict], cloud: str) → None
---
-
+
## class `OpenStackServerConfig`
Configuration for OpenStack server.
@@ -78,9 +78,9 @@ __init__(image: str, flavor: str, network: str) → None
---
-
+
-## class `OpenstackRunnerManager`
+## class `OpenStackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
@@ -89,7 +89,7 @@ Manage self-hosted runner on OpenStack cloud.
- `name_prefix`: The name prefix of the runners created.
-
+
### method `__init__`
@@ -97,7 +97,7 @@ Manage self-hosted runner on OpenStack cloud.
__init__(
prefix: str,
cloud_config: OpenStackCloudConfig,
- server_config: OpenStackServerConfig,
+ server_config: OpenStackServerConfig | None,
runner_config: GitHubRunnerConfig,
service_config: SupportServiceConfig
) → None
@@ -111,7 +111,7 @@ Construct the object.
- `prefix`: The prefix to runner name.
- `cloud_config`: The configuration for OpenStack authorisation.
- - `server_config`: The configuration for creating OpenStack server.
+ - `server_config`: The configuration for creating OpenStack server. Unable to create runner if None.
- `runner_config`: The configuration for the runner.
- `service_config`: The configuration of supporting services of the runners.
@@ -131,7 +131,7 @@ The prefix of runner names.
---
-
+
### method `cleanup`
@@ -154,7 +154,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -183,7 +183,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -207,7 +207,7 @@ Delete self-hosted runners.
---
-
+
### method `get_runner`
@@ -230,7 +230,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src-docs/runner_manager.md b/src-docs/runner_manager.md
index 8d1773bf0..2cf622469 100644
--- a/src-docs/runner_manager.md
+++ b/src-docs/runner_manager.md
@@ -15,7 +15,7 @@ Runner Manager manages the runners on LXD and GitHub.
-## class `RunnerManager`
+## class `LXDRunnerManager`
Manage a group of runners according to configuration.
@@ -33,7 +33,7 @@ Manage a group of runners according to configuration.
__init__(
app_name: str,
unit: int,
- runner_manager_config: RunnerManagerConfig
+ runner_manager_config: LXDRunnerManagerConfig
) → None
```
diff --git a/src-docs/runner_manager_type.md b/src-docs/runner_manager_type.md
index f6dd4faae..7e0675add 100644
--- a/src-docs/runner_manager_type.md
+++ b/src-docs/runner_manager_type.md
@@ -71,7 +71,7 @@ __init__(
-## class `RunnerManagerConfig`
+## class `LXDRunnerManagerConfig`
Configuration of runner manager.
diff --git a/src/charm.py b/src/charm.py
index 057982ef2..b869e3627 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -8,6 +8,10 @@
"""Charm for creating and managing GitHub self-hosted runner instances."""
+from manager.cloud_runner_manager import GitHubRunnerConfig, SupportServiceConfig
+from manager.runner_manager import RunnerManager, RunnerManagerConfig
+from manager.runner_scaler import RunnerScaler
+from openstack_cloud.openstack_runner_manager import OpenStackCloudConfig, OpenStackServerConfig
from utilities import bytes_with_unit_to_kib, execute_command, remove_residual_venv_dirs, retry
# This is a workaround for https://bugs.launchpad.net/juju/+bug/2058335
@@ -78,9 +82,9 @@
from event_timer import EventTimer, TimerStatusError
from firewall import Firewall, FirewallEntry
from github_type import GitHubRunnerStatus
-from openstack_cloud.openstack_manager import OpenstackRunnerManager
+from openstack_cloud.openstack_runner_manager import OpenStackRunnerManager
from runner import LXD_PROFILE_YAML
-from runner_manager import RunnerManager, RunnerManagerConfig
+from runner_manager import LXDRunnerManager, LXDRunnerManagerConfig
from runner_manager_type import FlushMode, OpenstackRunnerManagerConfig
RECONCILE_RUNNERS_EVENT = "reconcile-runners"
@@ -366,7 +370,7 @@ def _ensure_service_health(self) -> None:
def _get_runner_manager(
self, state: CharmState, token: str | None = None, path: GithubPath | None = None
- ) -> RunnerManager:
+ ) -> LXDRunnerManager:
"""Get a RunnerManager instance.
Args:
@@ -399,10 +403,10 @@ def _get_runner_manager(
app_name, unit = self.unit.name.rsplit("/", 1)
- return RunnerManager(
+ return LXDRunnerManager(
app_name,
unit,
- RunnerManagerConfig(
+ LXDRunnerManagerConfig(
charm_state=state,
dockerhub_mirror=state.charm_config.dockerhub_mirror,
image=state.runner_config.base_image.value,
@@ -495,8 +499,8 @@ def _on_start(self, _: StartEvent) -> None:
if state.instance_type == InstanceType.OPENSTACK:
if not self._get_set_image_ready_status():
return
- openstack_runner_manager = self._get_openstack_runner_manager(state)
- openstack_runner_manager.reconcile(state.runner_config.virtual_machines)
+ runner_scaler = self._get_runner_scaler(state)
+ runner_scaler.reconcile(state.runner_config.virtual_machines)
self.unit.status = ActiveStatus()
return
@@ -618,9 +622,9 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901
if not self._get_set_image_ready_status():
return
if state.charm_config.token != self._stored.token:
- openstack_runner_manager = self._get_openstack_runner_manager(state)
- openstack_runner_manager.flush()
- openstack_runner_manager.reconcile(state.runner_config.virtual_machines)
+ runner_scaler = self._get_runner_scaler(state)
+ runner_scaler.flush()
+ runner_scaler.reconcile(state.runner_config.virtual_machines)
# TODO: 2024-04-12: Flush on token changes.
self.unit.status = ActiveStatus()
return
@@ -639,7 +643,7 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901
self.unit.status = ActiveStatus()
def _check_and_update_local_lxd_dependencies(
- self, runner_manager: RunnerManager, token: str, proxy_config: ProxyConfig
+ self, runner_manager: LXDRunnerManager, token: str, proxy_config: ProxyConfig
) -> bool:
"""Check and update runner binary and services for local LXD runners.
@@ -719,8 +723,8 @@ def _trigger_reconciliation(self) -> None:
if state.instance_type == InstanceType.OPENSTACK:
if not self._get_set_image_ready_status():
return
- runner_manager = self._get_openstack_runner_manager(state)
- runner_manager.reconcile(state.runner_config.virtual_machines)
+ runner_scaler = self._get_runner_scaler(state)
+ runner_scaler.reconcile(state.runner_config.virtual_machines)
self.unit.status = ActiveStatus()
return
@@ -757,23 +761,8 @@ def _on_check_runners_action(self, event: ActionEvent) -> None:
state = self._setup_state()
if state.instance_type == InstanceType.OPENSTACK:
- openstack_runner_manager = self._get_openstack_runner_manager(state)
- runner_info = openstack_runner_manager.get_github_runner_info()
-
- for info in runner_info:
- if info.online:
- online += 1
- runner_names.append(info.runner_name)
- else:
- offline += 1
- event.set_results(
- {
- "online": online,
- "offline": offline,
- "unknown": unknown,
- "runners": ", ".join(runner_names),
- }
- )
+ runner_scaler = self._get_runner_scaler(state)
+ event.set_results(runner_scaler.get_runner_info())
return
runner_manager = self._get_runner_manager(state)
@@ -814,9 +803,9 @@ def _on_reconcile_runners_action(self, event: ActionEvent) -> None:
if not self._get_set_image_ready_status():
event.fail("Openstack image not yet provided/ready.")
return
- runner_manager = self._get_openstack_runner_manager(state)
+ runner_scaler = self._get_runner_scaler(state)
- delta = runner_manager.reconcile(state.runner_config.virtual_machines)
+ delta = runner_scaler.reconcile(state.runner_config.virtual_machines)
self.unit.status = ActiveStatus()
event.set_results({"delta": {"virtual-machines": delta}})
return
@@ -847,8 +836,8 @@ def _on_flush_runners_action(self, event: ActionEvent) -> None:
if state.instance_type == InstanceType.OPENSTACK:
# Flushing mode not implemented for OpenStack yet.
- runner_manager = self._get_openstack_runner_manager(state)
- flushed = runner_manager.flush()
+ runner_scaler = self._get_runner_scaler(state)
+ flushed = runner_scaler.flush()
event.set_results({"delta": {"virtual-machines": flushed}})
return
@@ -895,15 +884,15 @@ def _on_stop(self, _: StopEvent) -> None:
state = self._setup_state()
if state.instance_type == InstanceType.OPENSTACK:
- runner_manager = self._get_openstack_runner_manager(state)
- runner_manager.flush()
+ runner_scaler = self._get_runner_scaler(state)
+ runner_scaler.flush()
return
runner_manager = self._get_runner_manager(state)
runner_manager.flush(FlushMode.FLUSH_BUSY)
def _reconcile_runners(
- self, runner_manager: RunnerManager, num: int, resources: VirtualMachineResources
+ self, runner_manager: LXDRunnerManager, num: int, resources: VirtualMachineResources
) -> Dict[str, Any]:
"""Reconcile the current runners state and intended runner state.
@@ -918,7 +907,7 @@ def _reconcile_runners(
Returns:
Changes in runner number due to reconciling runners.
"""
- if not RunnerManager.runner_bin_path.is_file():
+ if not LXDRunnerManager.runner_bin_path.is_file():
logger.warning("Unable to reconcile due to missing runner binary")
raise MissingRunnerBinaryError("Runner binary not found.")
@@ -1148,10 +1137,10 @@ def _on_debug_ssh_relation_changed(self, _: ops.RelationChangedEvent) -> None:
if state.instance_type == InstanceType.OPENSTACK:
if not self._get_set_image_ready_status():
return
- runner_manager = self._get_openstack_runner_manager(state)
+ runner_scaler = self._get_runner_scaler(state)
# TODO: 2024-04-12: Should be flush idle.
- runner_manager.flush()
- runner_manager.reconcile(state.runner_config.virtual_machines)
+ runner_scaler.flush()
+ runner_scaler.reconcile(state.runner_config.virtual_machines)
return
self._refresh_firewall(state)
@@ -1176,10 +1165,10 @@ def _on_image_relation_changed(self, _: ops.RelationChangedEvent) -> None:
if not self._get_set_image_ready_status():
return
- runner_manager = self._get_openstack_runner_manager(state)
+ runner_scaler = self._get_runner_scaler(state)
# TODO: 2024-04-12: Should be flush idle.
- runner_manager.flush()
- runner_manager.reconcile(state.runner_config.virtual_machines)
+ runner_scaler.flush()
+ runner_scaler.reconcile(state.runner_config.virtual_machines)
self.unit.status = ActiveStatus()
return
@@ -1198,10 +1187,10 @@ def _get_set_image_ready_status(self) -> bool:
return False
return True
- def _get_openstack_runner_manager(
+ def _get_runner_scaler(
self, state: CharmState, token: str | None = None, path: GithubPath | None = None
- ) -> OpenstackRunnerManager:
- """Get OpenstackRunnerManager instance.
+ ) -> RunnerScaler:
+ """Get runner scaler instance for scaling runners.
TODO: 2024-07-09 Combine this with `_get_runner_manager` during the runner manager \
interface refactor.
@@ -1214,39 +1203,61 @@ def _get_openstack_runner_manager(
name. If None the path in charm state is used.
Returns:
- An instance of OpenstackRunnerManager.
+ An instance of RunnerScaler.
"""
if token is None:
token = state.charm_config.token
if path is None:
path = state.charm_config.path
- # Empty image can be passed down due to a delete only case where deletion of runners do not
- # depend on the image ID being available. Make sure that the charm goes to blocked status
- # in hook where a runner may be created. TODO: 2024-07-09 This logic is subject to
- # refactoring.
+ app_name, unit = self.unit.name.rsplit("/", 1)
+
+ clouds = list(state.charm_config.openstack_clouds_yaml["clouds"].keys())
+ if len(clouds) > 1:
+ logger.warning(
+ "Multiple clouds defined in clouds.yaml. Using the first one to connect."
+ )
+ cloud_config = OpenStackCloudConfig(
+ clouds_config=state.charm_config.openstack_clouds_yaml,
+ cloud=clouds[0],
+ )
+ server_config = None
+ image_labels = []
image = state.runner_config.openstack_image
- image_id = image.id if image and image.id else ""
- image_labels = image.tags if image and image.tags else []
+ if image and image.id:
+ server_config = OpenStackServerConfig(
+ image=image.id,
+ flavor=state.runner_config.openstack_flavor,
+ network=state.runner_config.openstack_network,
+ )
+ if image.tags:
+ image_labels += image.tags
- app_name, unit = self.unit.name.rsplit("/", 1)
- openstack_runner_manager_config = OpenstackRunnerManagerConfig(
- charm_state=state,
- path=path,
- token=token,
- labels=(*state.charm_config.labels, *image_labels),
- flavor=state.runner_config.openstack_flavor,
- image=image_id,
- network=state.runner_config.openstack_network,
+ runner_config = GitHubRunnerConfig(
+ github_path=path, labels=(*state.charm_config.labels, *image_labels)
+ )
+ service_config = SupportServiceConfig(
+ proxy_config=state.proxy_config,
dockerhub_mirror=state.charm_config.dockerhub_mirror,
- reactive_config=state.reactive_config,
+ ssh_debug_connections=state.ssh_debug_connections,
+ repo_policy_url=state.charm_config.repo_policy_compliance.url,
+ repo_policy_token=state.charm_config.repo_policy_compliance.token,
)
- return OpenstackRunnerManager(
+ openstack_runner_manager = OpenStackRunnerManager(
app_name,
- unit,
- openstack_runner_manager_config,
- state.charm_config.openstack_clouds_yaml,
+ cloud_config=cloud_config,
+ server_config=server_config,
+ runner_config=runner_config,
+ service_config=service_config,
+ )
+ runner_manager_config = RunnerManagerConfig(
+ token=token,
+ path=path,
+ )
+ runner_manager = RunnerManager(
+ cloud_runner_manager=openstack_runner_manager, config=runner_manager_config
)
+ return RunnerScaler(runner_manager=runner_manager)
if __name__ == "__main__":
diff --git a/src/errors.py b/src/errors.py
index 59d28a239..4285dc6e4 100644
--- a/src/errors.py
+++ b/src/errors.py
@@ -39,6 +39,10 @@ class RunnerAproxyError(RunnerError):
"""Error for setting up aproxy."""
+class MissingServerConfigError(RunnerError):
+ """Error for unable to create runner due to missing server configurations."""
+
+
class MissingRunnerBinaryError(Exception):
"""Error for missing runner binary."""
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
new file mode 100644
index 000000000..7bf9b81b9
--- /dev/null
+++ b/src/manager/runner_scaler.py
@@ -0,0 +1,138 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+
+import logging
+import time
+from typing import TypedDict
+
+from errors import IssueMetricEventError, MissingServerConfigError
+from manager.cloud_runner_manager import HealthState
+from manager.github_runner_manager import GitHubRunnerState
+from manager.runner_manager import FlushMode, RunnerManager
+from metrics import events as metric_events
+
+logger = logging.getLogger(__name__)
+
+
+@TypedDict
+class RunnerInfo:
+ """Information on the runners.
+
+ Attributes:
+ online: The number of runner in online state.
+ offline: The number of runner in offline state.
+ unknown: The number of runner in unknown state.
+ runners: The names of the online runners.
+ """
+
+ online: int
+ offline: int
+ unknown: int
+ runners: tuple[str, ...]
+
+
+class RunnerScaler:
+ """Manage the reconcile of runners."""
+
+ def __init__(self, runner_manager: RunnerManager):
+ """Construct the object.
+
+ Args:
+ runner_manager: The RunnerManager to preform runner reconcile.
+ """
+ self._manager = runner_manager
+
+ def get_runner_info(self) -> RunnerInfo:
+ runner_list = self._manager.get_runners()
+ online = 0
+ offline = 0
+ unknown = 0
+ online_runners = []
+ for runner in runner_list:
+ match runner.github_state:
+ case GitHubRunnerState.BUSY:
+ online += 1
+ online_runners.append(runner.name)
+ case GitHubRunnerState.IDLE:
+ online += 1
+ online_runners.append(runner.name)
+ case GitHubRunnerState.OFFLINE:
+ offline += 1
+ case _:
+ unknown += 1
+ return RunnerInfo(online=online, offline=offline, unknown=unknown, runners=online_runners)
+
+ def flush(self, flush_mode: FlushMode) -> None:
+ """Flush the runners.
+
+ Args:
+ flush_mode: Determines the types of runner to be flushed.
+ """
+ self._manager.cleanup()
+ self._manager.delete_runners(flush_mode=flush_mode)
+
+ def reconcile(self, num_of_runner: int) -> int:
+ """Reconcile the quantity of runners.
+
+ Args:
+ num_of_runner: The number of intended runners.
+
+ Returns:
+ The Change in number of runners.
+ """
+ logger.info("Start reconcile to %s runner", num_of_runner)
+
+ start_timestamp = time.time()
+ delete_metric_stats = None
+ metric_stats = self._manager.cleanup()
+ runners = self._manager.get_runners()
+ current_num = len(runners)
+ logger.info("Reconcile runners from %s to %s", current_num, num_of_runner)
+ runner_diff = num_of_runner - current_num
+ if runner_diff > 0:
+ try:
+ self._manager.create_runners(runner_diff)
+ except MissingServerConfigError:
+ logging.exception(
+ "Unable to spawn runner due to missing server configuration, such as, image."
+ )
+ elif runner_diff < 0:
+ delete_metric_stats = self._manager.delete_runners(-runner_diff)
+ else:
+ logger.info("No changes to the number of runners.")
+ end_timestamp = time.time()
+
+ # Merge the two metric stats.
+ if delete_metric_stats is not None:
+ metric_stats = {
+ delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0)
+ for event_name in set(delete_metric_stats) | set(metric_stats)
+ }
+
+ runner_list = self._manager.get_runners()
+ idle_runners = [
+ runner for runner in runner_list if runner.github_state == GitHubRunnerState.IDLE
+ ]
+ offline_healthy_runners = [
+ runner
+ for runner in runner_list
+ if runner.github_state == GitHubRunnerState.OFFLINE
+ and runner.health == HealthState.HEALTHY
+ ]
+
+ try:
+ metric_events.issue_event(
+ metric_events.Reconciliation(
+ timestamp=time.time(),
+ flavor=self._manager.name_prefix,
+ crashed_runners=metric_stats.get(metric_events.RunnerStart, 0)
+ - metric_stats.get(metric_events.RunnerStop, 0),
+ idle_runners=len(set(idle_runners) | set(offline_healthy_runners)),
+ duration=end_timestamp - start_timestamp,
+ )
+ )
+ except IssueMetricEventError:
+ logger.exception("Failed to issue Reconciliation metric")
+
+ return runner_diff
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c58d131c0..45231bdb0 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -22,6 +22,7 @@
GetMetricsStorageError,
IssueMetricEventError,
KeyfileError,
+ MissingServerConfigError,
OpenStackError,
RunnerCreateError,
RunnerStartError,
@@ -105,7 +106,7 @@ class _RunnerHealth:
unhealthy: tuple[OpenstackInstance, ...]
-class OpenstackRunnerManager(CloudRunnerManager):
+class OpenStackRunnerManager(CloudRunnerManager):
"""Manage self-hosted runner on OpenStack cloud.
Attributes:
@@ -117,7 +118,7 @@ def __init__( # pylint: disable=R0913
self,
prefix: str,
cloud_config: OpenStackCloudConfig,
- server_config: OpenStackServerConfig,
+ server_config: OpenStackServerConfig | None,
runner_config: GitHubRunnerConfig,
service_config: SupportServiceConfig,
) -> None:
@@ -126,7 +127,8 @@ def __init__( # pylint: disable=R0913
Args:
prefix: The prefix to runner name.
cloud_config: The configuration for OpenStack authorisation.
- server_config: The configuration for creating OpenStack server.
+ server_config: The configuration for creating OpenStack server. Unable to create
+ runner if None.
runner_config: The configuration for the runner.
service_config: The configuration of supporting services of the runners.
"""
@@ -162,8 +164,11 @@ def create_runner(self, registration_token: str) -> InstanceId:
Returns:
Instance ID of the runner.
"""
+ if self._server_config is None:
+ raise MissingServerConfigError("Missing server configuration to create runners")
+
start_timestamp = time.time()
- instance_id = OpenstackRunnerManager._generate_instance_id()
+ instance_id = OpenStackRunnerManager._generate_instance_id()
instance_name = self._openstack_cloud.get_server_name(instance_id=instance_id)
cloud_init = self._generate_cloud_init(
instance_name=instance_name, registration_token=registration_token
@@ -183,7 +188,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
self._wait_runner_running(instance)
end_timestamp = time.time()
- OpenstackRunnerManager._issue_runner_installed_metric(
+ OpenStackRunnerManager._issue_runner_installed_metric(
name=instance_name,
flavor=self.name_prefix,
install_start_timestamp=start_timestamp,
@@ -301,7 +306,7 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None
self._pull_runner_metrics(instance.server_name, ssh_conn)
try:
- OpenstackRunnerManager._run_runner_removal_script(
+ OpenStackRunnerManager._run_runner_removal_script(
instance.server_name, ssh_conn, remove_token
)
except GithubRunnerRemoveError:
@@ -460,7 +465,7 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
"SSH connection failure with %s during health check", instance.server_name
)
raise
- return OpenstackRunnerManager._run_health_check(ssh_conn, instance.server_name)
+ return OpenStackRunnerManager._run_health_check(ssh_conn, instance.server_name)
@staticmethod
def _run_health_check(ssh_conn: SSHConnection, name: str) -> bool:
@@ -611,13 +616,13 @@ def _pull_runner_metrics(name: str, ssh_conn: SSHConnection) -> None:
return
try:
- OpenstackRunnerManager._ssh_pull_file(
+ OpenStackRunnerManager._ssh_pull_file(
ssh_conn=ssh_conn,
remote_path=str(METRICS_EXCHANGE_PATH / "pre-job-metrics.json"),
local_path=str(storage.path / "pre-job-metrics.json"),
max_size=MAX_METRICS_FILE_SIZE,
)
- OpenstackRunnerManager._ssh_pull_file(
+ OpenStackRunnerManager._ssh_pull_file(
ssh_conn=ssh_conn,
remote_path=str(METRICS_EXCHANGE_PATH / "post-job-metrics.json"),
local_path=str(storage.path / "post-job-metrics.json"),
diff --git a/src/runner_manager.py b/src/runner_manager.py
index 8d68a68c9..e79d9f7a6 100644
--- a/src/runner_manager.py
+++ b/src/runner_manager.py
@@ -41,7 +41,7 @@
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner import LXD_PROFILE_YAML, CreateRunnerConfig, Runner, RunnerConfig, RunnerStatus
-from runner_manager_type import FlushMode, RunnerInfo, RunnerManagerClients, RunnerManagerConfig
+from runner_manager_type import FlushMode, LXDRunnerManagerConfig, RunnerInfo, RunnerManagerClients
from runner_type import ProxySetting as RunnerProxySetting
from runner_type import RunnerNameByHealth
from utilities import execute_command, retry, set_env_var
@@ -56,7 +56,7 @@
IssuedMetricEventsStats = dict[Type[metric_events.Event], int]
-class RunnerManager:
+class LXDRunnerManager:
"""Manage a group of runners according to configuration.
Attributes:
@@ -71,7 +71,7 @@ def __init__(
self,
app_name: str,
unit: int,
- runner_manager_config: RunnerManagerConfig,
+ runner_manager_config: LXDRunnerManagerConfig,
) -> None:
"""Construct RunnerManager object for creating and managing runners.
@@ -159,7 +159,7 @@ def update_runner_bin(self, binary: RunnerApplication) -> None:
try:
# Delete old version of runner binary.
- RunnerManager.runner_bin_path.unlink(missing_ok=True)
+ LXDRunnerManager.runner_bin_path.unlink(missing_ok=True)
except OSError as err:
logger.exception("Unable to perform file operation on the runner binary path")
raise RunnerBinaryError("File operation failed on the runner binary path") from err
@@ -182,7 +182,7 @@ def update_runner_bin(self, binary: RunnerApplication) -> None:
sha256 = hashlib.sha256()
- with RunnerManager.runner_bin_path.open(mode="wb") as file:
+ with LXDRunnerManager.runner_bin_path.open(mode="wb") as file:
# Process with chunk_size of 128 KiB.
for chunk in response.iter_content(chunk_size=128 * 1024, decode_unicode=False):
file.write(chunk)
@@ -267,7 +267,7 @@ def _create_runner(
config=CreateRunnerConfig(
image=self.config.image,
resources=resources,
- binary_path=RunnerManager.runner_bin_path,
+ binary_path=LXDRunnerManager.runner_bin_path,
registration_token=registration_token,
arch=self.config.charm_state.arch,
)
@@ -309,7 +309,7 @@ def _create_runner(
config=CreateRunnerConfig(
image=self.config.image,
resources=resources,
- binary_path=RunnerManager.runner_bin_path,
+ binary_path=LXDRunnerManager.runner_bin_path,
registration_token=registration_token,
arch=self.config.charm_state.arch,
)
@@ -447,7 +447,7 @@ def _spawn_new_runners(self, count: int, resources: VirtualMachineResources) ->
Raises:
RunnerCreateError: If there was an error spawning new runner.
"""
- if not RunnerManager.runner_bin_path.exists():
+ if not LXDRunnerManager.runner_bin_path.exists():
raise RunnerCreateError("Unable to create runner due to missing runner binary.")
logger.info("Getting registration token for GitHub runners.")
registration_token = self._clients.github.get_runner_registration_token(self.config.path)
diff --git a/src/runner_manager_type.py b/src/runner_manager_type.py
index f3a2112f5..343b1eb04 100644
--- a/src/runner_manager_type.py
+++ b/src/runner_manager_type.py
@@ -61,7 +61,7 @@ class RunnerManagerClients:
@dataclass
# The instance attributes are all required.
-class RunnerManagerConfig: # pylint: disable=too-many-instance-attributes
+class LXDRunnerManagerConfig: # pylint: disable=too-many-instance-attributes
"""Configuration of runner manager.
Attributes:
diff --git a/tests/integration/helpers/common.py b/tests/integration/helpers/common.py
index 16622c038..495c952b3 100644
--- a/tests/integration/helpers/common.py
+++ b/tests/integration/helpers/common.py
@@ -36,7 +36,7 @@
TOKEN_CONFIG_NAME,
VIRTUAL_MACHINES_CONFIG_NAME,
)
-from runner_manager import RunnerManager
+from runner_manager import LXDRunnerManager
from tests.status_name import ACTIVE
DISPATCH_TEST_WORKFLOW_FILENAME = "workflow_dispatch_test.yaml"
@@ -93,7 +93,7 @@ async def check_runner_binary_exists(unit: Unit) -> bool:
Returns:
Whether the runner binary file exists in the charm.
"""
- return_code, _, _ = await run_in_unit(unit, f"test -f {RunnerManager.runner_bin_path}")
+ return_code, _, _ = await run_in_unit(unit, f"test -f {LXDRunnerManager.runner_bin_path}")
return return_code == 0
@@ -141,10 +141,10 @@ async def remove_runner_bin(unit: Unit) -> None:
Args:
unit: Unit instance to check for the LXD profile.
"""
- await run_in_unit(unit, f"rm {RunnerManager.runner_bin_path}")
+ await run_in_unit(unit, f"rm {LXDRunnerManager.runner_bin_path}")
# No file should exists under with the filename.
- return_code, _, _ = await run_in_unit(unit, f"test -f {RunnerManager.runner_bin_path}")
+ return_code, _, _ = await run_in_unit(unit, f"test -f {LXDRunnerManager.runner_bin_path}")
assert return_code != 0
diff --git a/tests/integration/test_charm_scheduled_events.py b/tests/integration/test_charm_scheduled_events.py
index aa4a9f1b3..5e9819f23 100644
--- a/tests/integration/test_charm_scheduled_events.py
+++ b/tests/integration/test_charm_scheduled_events.py
@@ -13,7 +13,7 @@
from juju.application import Application
from juju.model import Model
-from runner_manager import RunnerManager
+from runner_manager import LXDRunnerManager
from tests.integration.helpers.common import check_runner_binary_exists
from tests.integration.helpers.lxd import get_runner_names, run_in_unit, wait_till_num_of_runners
from tests.status_name import ACTIVE
@@ -40,7 +40,7 @@ async def test_update_interval(model: Model, app_scheduled_events: Application)
unit = app_scheduled_events.units[0]
assert await check_runner_binary_exists(unit)
- ret_code, stdout, stderr = await run_in_unit(unit, f"rm -f {RunnerManager.runner_bin_path}")
+ ret_code, stdout, stderr = await run_in_unit(unit, f"rm -f {LXDRunnerManager.runner_bin_path}")
assert ret_code == 0, f"Failed to remove runner binary {stdout} {stderr}"
assert not await check_runner_binary_exists(unit)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 523c2b5ae..58410b402 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -25,7 +25,7 @@
from openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH
from openstack_cloud.openstack_runner_manager import (
OpenStackCloudConfig,
- OpenstackRunnerManager,
+ OpenStackRunnerManager,
OpenStackServerConfig,
)
from tests.integration.helpers.common import (
@@ -96,7 +96,7 @@ async def openstack_runner_manager_fixture(
proxy_config: ProxyConfig,
runner_label: str,
openstack_connection: OpenstackConnection,
-) -> OpenstackRunnerManager:
+) -> OpenStackRunnerManager:
"""Create OpenstackRunnerManager instance.
The prefix args of OpenstackRunnerManager set to app_name to let openstack_connection_fixture
@@ -125,14 +125,14 @@ async def openstack_runner_manager_fixture(
repo_policy_url=None,
repo_policy_token=None,
)
- return OpenstackRunnerManager(
+ return OpenStackRunnerManager(
app_name, cloud_config, server_config, runner_config, service_config
)
@pytest_asyncio.fixture(scope="module", name="runner_manager")
async def runner_manager_fixture(
- openstack_runner_manager: OpenstackRunnerManager,
+ openstack_runner_manager: OpenStackRunnerManager,
token: str,
github_path: GithubPath,
log_dir_base_path: dict[str, Path],
@@ -219,7 +219,7 @@ async def test_get_no_runner(runner_manager: RunnerManager) -> None:
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
async def test_runner_normal_idle_lifecycle(
- runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+ runner_manager: RunnerManager, openstack_runner_manager: OpenStackRunnerManager
) -> None:
"""
Arrange: RunnerManager instance with no runners.
@@ -381,7 +381,7 @@ async def test_runner_normal_lifecycle(
@pytest.mark.asyncio
@pytest.mark.abort_on_fail
async def test_runner_spawn_two(
- runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+ runner_manager: RunnerManager, openstack_runner_manager: OpenStackRunnerManager
) -> None:
"""
Arrange: RunnerManager instance with no runners.
diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py
index a7276f078..994e0a4cf 100644
--- a/tests/unit/test_charm.py
+++ b/tests/unit/test_charm.py
@@ -47,7 +47,7 @@
from event_timer import EventTimer, TimerEnableError
from firewall import FirewallEntry
from github_type import GitHubRunnerStatus
-from runner_manager import RunnerInfo, RunnerManagerConfig
+from runner_manager import LXDRunnerManagerConfig, RunnerInfo
TEST_PROXY_SERVER_URL = "http://proxy.server:1234"
@@ -458,7 +458,7 @@ def test_org_register(self, run, wt, mkdir, rm):
rm.assert_called_with(
"github-runner",
"0",
- RunnerManagerConfig(
+ LXDRunnerManagerConfig(
path=GithubOrg(org="mockorg", group="mockgroup"),
token="mocktoken",
image="jammy",
@@ -488,7 +488,7 @@ def test_repo_register(self, run, wt, mkdir, rm):
rm.assert_called_with(
"github-runner",
"0",
- RunnerManagerConfig(
+ LXDRunnerManagerConfig(
path=GithubRepo(owner="mockorg", repo="repo"),
token="mocktoken",
image="jammy",
@@ -546,7 +546,7 @@ def test_update_config(self, run, wt, mkdir, rm):
rm.assert_called_with(
"github-runner",
"0",
- RunnerManagerConfig(
+ LXDRunnerManagerConfig(
path=GithubRepo(owner="mockorg", repo="repo"),
token="mocktoken",
image="jammy",
@@ -568,7 +568,7 @@ def test_update_config(self, run, wt, mkdir, rm):
rm.assert_called_with(
"github-runner",
"0",
- RunnerManagerConfig(
+ LXDRunnerManagerConfig(
path=GithubRepo(owner="mockorg", repo="repo"),
token="mocktoken",
image="jammy",
@@ -926,7 +926,7 @@ def test__on_image_relation_image_ready():
harness.charm._setup_state = MagicMock(return_value=state_mock)
harness.charm._get_set_image_ready_status = MagicMock(return_value=True)
runner_manager_mock = MagicMock()
- harness.charm._get_openstack_runner_manager = MagicMock(return_value=runner_manager_mock)
+ harness.charm._get_runner_scaler = MagicMock(return_value=runner_manager_mock)
harness.charm._on_image_relation_changed(MagicMock())
diff --git a/tests/unit/test_runner_manager.py b/tests/unit/test_runner_manager.py
index 66b09cd60..7d7600825 100644
--- a/tests/unit/test_runner_manager.py
+++ b/tests/unit/test_runner_manager.py
@@ -28,7 +28,7 @@
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
from metrics.storage import MetricsStorage
from runner import Runner, RunnerStatus
-from runner_manager import BUILD_IMAGE_SCRIPT_FILENAME, RunnerManager, RunnerManagerConfig
+from runner_manager import BUILD_IMAGE_SCRIPT_FILENAME, LXDRunnerManager, LXDRunnerManagerConfig
from runner_type import RunnerNameByHealth
from tests.unit.mock import TEST_BINARY, MockLxdImageManager
@@ -87,10 +87,10 @@ def runner_manager_fixture(request, tmp_path, monkeypatch, token, charm_state):
pool_path = tmp_path / "test_storage"
pool_path.mkdir(exist_ok=True)
- runner_manager = RunnerManager(
+ runner_manager = LXDRunnerManager(
"test app",
"0",
- RunnerManagerConfig(
+ LXDRunnerManagerConfig(
path=request.param[0],
token=token,
image=IMAGE_NAME,
@@ -144,7 +144,7 @@ def reactive_reconcile_fixture(monkeypatch: MonkeyPatch, tmp_path: Path) -> Magi
pytest.param(Arch.X64),
],
)
-def test_get_latest_runner_bin_url(runner_manager: RunnerManager, arch: Arch, charm_state):
+def test_get_latest_runner_bin_url(runner_manager: LXDRunnerManager, arch: Arch, charm_state):
"""
arrange: Nothing.
act: Get runner bin url of existing binary.
@@ -168,7 +168,7 @@ def test_get_latest_runner_bin_url(runner_manager: RunnerManager, arch: Arch, ch
assert runner_bin["filename"] == filename
-def test_get_latest_runner_bin_url_missing_binary(runner_manager: RunnerManager):
+def test_get_latest_runner_bin_url_missing_binary(runner_manager: LXDRunnerManager):
"""
arrange: Given a mocked GH API client that does not return any runner binaries.
act: Get runner bin url of non-existing binary.
@@ -181,7 +181,7 @@ def test_get_latest_runner_bin_url_missing_binary(runner_manager: RunnerManager)
runner_manager.get_latest_runner_bin_url(os_name="not_exist")
-def test_update_runner_bin(runner_manager: RunnerManager):
+def test_update_runner_bin(runner_manager: LXDRunnerManager):
"""
arrange: Remove the existing runner binary.
act: Update runner binary.
@@ -222,7 +222,7 @@ def iter_content(self, *args, **kwargs):
assert runner_manager.runner_bin_path.read_bytes() == TEST_BINARY
-def test_reconcile_zero_count(runner_manager: RunnerManager):
+def test_reconcile_zero_count(runner_manager: LXDRunnerManager):
"""
arrange: Nothing.
act: Reconcile with the current amount of runner.
@@ -234,7 +234,7 @@ def test_reconcile_zero_count(runner_manager: RunnerManager):
assert delta == 0
-def test_reconcile_create_runner(runner_manager: RunnerManager):
+def test_reconcile_create_runner(runner_manager: LXDRunnerManager):
"""
arrange: Nothing.
act: Reconcile to create a runner.
@@ -246,7 +246,7 @@ def test_reconcile_create_runner(runner_manager: RunnerManager):
assert delta == 1
-def test_reconcile_remove_runner(runner_manager: RunnerManager):
+def test_reconcile_remove_runner(runner_manager: LXDRunnerManager):
"""
arrange: Create online runners.
act: Reconcile to remove a runner.
@@ -282,7 +282,7 @@ def mock_get_runners():
assert delta == -1
-def test_reconcile(runner_manager: RunnerManager, tmp_path: Path):
+def test_reconcile(runner_manager: LXDRunnerManager, tmp_path: Path):
"""
arrange: Setup one runner.
act: Reconcile with the current amount of runner.
@@ -295,7 +295,7 @@ def test_reconcile(runner_manager: RunnerManager, tmp_path: Path):
assert len(runner_manager._get_runners()) == 1
-def test_empty_flush(runner_manager: RunnerManager):
+def test_empty_flush(runner_manager: LXDRunnerManager):
"""
arrange: No initial runners.
act: Perform flushing with no runners.
@@ -305,7 +305,7 @@ def test_empty_flush(runner_manager: RunnerManager):
runner_manager.flush()
-def test_flush(runner_manager: RunnerManager, tmp_path: Path):
+def test_flush(runner_manager: LXDRunnerManager, tmp_path: Path):
"""
arrange: Create some runners.
act: Perform flushing.
@@ -319,7 +319,7 @@ def test_flush(runner_manager: RunnerManager, tmp_path: Path):
def test_reconcile_issues_runner_installed_event(
- runner_manager: RunnerManager,
+ runner_manager: LXDRunnerManager,
monkeypatch: MonkeyPatch,
issue_event_mock: MagicMock,
charm_state: MagicMock,
@@ -341,7 +341,7 @@ def test_reconcile_issues_runner_installed_event(
def test_reconcile_issues_no_runner_installed_event_if_metrics_disabled(
- runner_manager: RunnerManager, issue_event_mock: MagicMock, charm_state: MagicMock
+ runner_manager: LXDRunnerManager, issue_event_mock: MagicMock, charm_state: MagicMock
):
"""
arrange: Disable issuing of metrics.
@@ -356,7 +356,7 @@ def test_reconcile_issues_no_runner_installed_event_if_metrics_disabled(
def test_reconcile_error_on_issue_event_is_ignored(
- runner_manager: RunnerManager,
+ runner_manager: LXDRunnerManager,
issue_event_mock: MagicMock,
charm_state: MagicMock,
):
@@ -375,7 +375,7 @@ def test_reconcile_error_on_issue_event_is_ignored(
def test_reconcile_issues_reconciliation_metric_event(
- runner_manager: RunnerManager,
+ runner_manager: LXDRunnerManager,
monkeypatch: MonkeyPatch,
issue_event_mock: MagicMock,
runner_metrics: MagicMock,
@@ -458,7 +458,7 @@ def mock_get_runners():
def test_reconcile_places_timestamp_in_newly_created_runner(
- runner_manager: RunnerManager,
+ runner_manager: LXDRunnerManager,
monkeypatch: MonkeyPatch,
shared_fs: MagicMock,
tmp_path: Path,
@@ -485,7 +485,7 @@ def test_reconcile_places_timestamp_in_newly_created_runner(
def test_reconcile_error_on_placing_timestamp_is_ignored(
- runner_manager: RunnerManager, shared_fs: MagicMock, tmp_path: Path, charm_state: MagicMock
+ runner_manager: LXDRunnerManager, shared_fs: MagicMock, tmp_path: Path, charm_state: MagicMock
):
"""
arrange: Enable issuing of metrics and do not create the directory for the shared filesystem\
@@ -504,7 +504,7 @@ def test_reconcile_error_on_placing_timestamp_is_ignored(
def test_reconcile_places_no_timestamp_in_newly_created_runner_if_metrics_disabled(
- runner_manager: RunnerManager, shared_fs: MagicMock, tmp_path: Path, charm_state: MagicMock
+ runner_manager: LXDRunnerManager, shared_fs: MagicMock, tmp_path: Path, charm_state: MagicMock
):
"""
arrange: Disable issuing of metrics, mock timestamps and the shared filesystem module.
@@ -522,7 +522,7 @@ def test_reconcile_places_no_timestamp_in_newly_created_runner_if_metrics_disabl
def test_reconcile_reactive_mode(
- runner_manager: RunnerManager,
+ runner_manager: LXDRunnerManager,
reactive_reconcile_mock: MagicMock,
caplog: LogCaptureFixture,
):
@@ -542,7 +542,7 @@ def test_reconcile_reactive_mode(
def test_schedule_build_runner_image(
- runner_manager: RunnerManager,
+ runner_manager: LXDRunnerManager,
tmp_path: Path,
charm_state: CharmState,
monkeypatch: MonkeyPatch,
@@ -569,7 +569,7 @@ def test_schedule_build_runner_image(
assert cronfile.read_text() == f"4 4,10,16,22 * * * ubuntu {cmd} jammy\n"
-def test_has_runner_image(runner_manager: RunnerManager):
+def test_has_runner_image(runner_manager: LXDRunnerManager):
"""
arrange: Multiple setups.
1. no runner image exists.
From ab1c4b814200dc92cbdc24ce8303bebb879d66cb Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 20 Aug 2024 10:44:59 +0800
Subject: [PATCH 196/278] Fix lints
---
src-docs/charm.md | 10 +++----
src-docs/metrics.md | 3 ++
src-docs/openstack_cloud.md | 11 +++++++
...penstack_cloud.openstack_runner_manager.md | 9 +++---
src/charm.py | 11 ++++---
src/manager/runner_scaler.py | 29 ++++++++++++++-----
.../openstack_runner_manager.py | 1 +
7 files changed, 54 insertions(+), 20 deletions(-)
diff --git a/src-docs/charm.md b/src-docs/charm.md
index c03d1411b..9fd2aac04 100644
--- a/src-docs/charm.md
+++ b/src-docs/charm.md
@@ -20,7 +20,7 @@ Charm for creating and managing GitHub self-hosted runner instances.
---
-
+
## function `catch_charm_errors`
@@ -46,7 +46,7 @@ Catch common errors in charm.
---
-
+
## function `catch_action_errors`
@@ -72,7 +72,7 @@ Catch common errors in actions.
---
-
+
## class `ReconcileRunnersEvent`
Event representing a periodic check to ensure runners are ok.
@@ -83,7 +83,7 @@ Event representing a periodic check to ensure runners are ok.
---
-
+
## class `GithubRunnerCharm`
Charm for managing GitHub self-hosted runners.
@@ -100,7 +100,7 @@ Charm for managing GitHub self-hosted runners.
- `ram_pool_path`: The path to memdisk storage.
- `kernel_module_path`: The path to kernel modules.
-
+
### method `__init__`
diff --git a/src-docs/metrics.md b/src-docs/metrics.md
index 25c650c41..e4275c340 100644
--- a/src-docs/metrics.md
+++ b/src-docs/metrics.md
@@ -22,5 +22,8 @@ Package for common metrics-related code.
- **github**: # Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+- **runner_logs**: # Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
diff --git a/src-docs/openstack_cloud.md b/src-docs/openstack_cloud.md
index 80cdb146b..51140a4b2 100644
--- a/src-docs/openstack_cloud.md
+++ b/src-docs/openstack_cloud.md
@@ -5,6 +5,17 @@
# module `openstack_cloud`
Module for managing Openstack cloud.
+**Global Variables**
+---------------
+- **openstack_cloud**: # Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+- **openstack_manager**: # Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+- **openstack_runner_manager**: # Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
---
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 7499e4ce5..02841811f 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -131,7 +131,7 @@ The prefix of runner names.
---
-
+
### method `cleanup`
@@ -174,6 +174,7 @@ Create a self-hosted runner.
**Raises:**
+ - `MissingServerConfigError`: Unable to create runner due to missing configuration.
- `RunnerCreateError`: Unable to create runner due to OpenStack issues.
@@ -183,7 +184,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -207,7 +208,7 @@ Delete self-hosted runners.
---
-
+
### method `get_runner`
@@ -230,7 +231,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/charm.py b/src/charm.py
index b869e3627..d3b48d44d 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -11,7 +11,6 @@
from manager.cloud_runner_manager import GitHubRunnerConfig, SupportServiceConfig
from manager.runner_manager import RunnerManager, RunnerManagerConfig
from manager.runner_scaler import RunnerScaler
-from openstack_cloud.openstack_runner_manager import OpenStackCloudConfig, OpenStackServerConfig
from utilities import bytes_with_unit_to_kib, execute_command, remove_residual_venv_dirs, retry
# This is a workaround for https://bugs.launchpad.net/juju/+bug/2058335
@@ -82,10 +81,14 @@
from event_timer import EventTimer, TimerStatusError
from firewall import Firewall, FirewallEntry
from github_type import GitHubRunnerStatus
-from openstack_cloud.openstack_runner_manager import OpenStackRunnerManager
+from openstack_cloud.openstack_runner_manager import (
+ OpenStackCloudConfig,
+ OpenStackRunnerManager,
+ OpenStackServerConfig,
+)
from runner import LXD_PROFILE_YAML
from runner_manager import LXDRunnerManager, LXDRunnerManagerConfig
-from runner_manager_type import FlushMode, OpenstackRunnerManagerConfig
+from runner_manager_type import FlushMode
RECONCILE_RUNNERS_EVENT = "reconcile-runners"
@@ -1210,7 +1213,7 @@ def _get_runner_scaler(
if path is None:
path = state.charm_config.path
- app_name, unit = self.unit.name.rsplit("/", 1)
+ app_name, _ = self.unit.name.rsplit("/", 1)
clouds = list(state.charm_config.openstack_clouds_yaml["clouds"].keys())
if len(clouds) > 1:
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 7bf9b81b9..980c542d0 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -1,6 +1,7 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+"""Module for scaling the runners amount."""
import logging
import time
@@ -15,8 +16,7 @@
logger = logging.getLogger(__name__)
-@TypedDict
-class RunnerInfo:
+class RunnerInfo(TypedDict):
"""Information on the runners.
Attributes:
@@ -39,11 +39,16 @@ def __init__(self, runner_manager: RunnerManager):
"""Construct the object.
Args:
- runner_manager: The RunnerManager to preform runner reconcile.
+ runner_manager: The RunnerManager to perform runner reconcile.
"""
self._manager = runner_manager
def get_runner_info(self) -> RunnerInfo:
+ """Get information on the runners.
+
+ Returns:
+ The information on the runners.
+ """
runner_list = self._manager.get_runners()
online = 0
offline = 0
@@ -61,16 +66,26 @@ def get_runner_info(self) -> RunnerInfo:
offline += 1
case _:
unknown += 1
- return RunnerInfo(online=online, offline=offline, unknown=unknown, runners=online_runners)
+ return RunnerInfo(
+ online=online, offline=offline, unknown=unknown, runners=tuple(online_runners)
+ )
- def flush(self, flush_mode: FlushMode) -> None:
+ def flush(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
"""Flush the runners.
Args:
flush_mode: Determines the types of runner to be flushed.
+
+ Returns:
+ Number of runners flushed.
"""
- self._manager.cleanup()
- self._manager.delete_runners(flush_mode=flush_mode)
+ metric_stats = self._manager.cleanup()
+ delete_metric_stats = self._manager.delete_runners(flush_mode=flush_mode)
+ metric_stats = {
+ delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0)
+ for event_name in set(delete_metric_stats) | set(metric_stats)
+ }
+ return metric_stats.get(metric_events.RunnerStop, 0)
def reconcile(self, num_of_runner: int) -> int:
"""Reconcile the quantity of runners.
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 45231bdb0..00612ce03 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -159,6 +159,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
registration_token: The GitHub registration token for registering runners.
Raises:
+ MissingServerConfigError: Unable to create runner due to missing configuration.
RunnerCreateError: Unable to create runner due to OpenStack issues.
Returns:
From ba2800d67d92e80725454e7e2528410122b4669d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 20 Aug 2024 13:18:37 +0800
Subject: [PATCH 197/278] Fix unit test
---
tests/unit/conftest.py | 10 +++---
tests/unit/test_charm.py | 36 ++++++++++---------
..._manager.py => test_lxd_runner_manager.py} | 4 +--
3 files changed, 26 insertions(+), 24 deletions(-)
rename tests/unit/{test_runner_manager.py => test_lxd_runner_manager.py} (99%)
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 3ee0259f7..cb50275f6 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -10,7 +10,7 @@
import pytest
import utilities
-from openstack_cloud import openstack_manager
+from manager.runner_scaler import RunnerScaler
from tests.unit.mock import MockGhapiClient, MockLxdClient, MockRepoPolicyComplianceClient
@@ -46,7 +46,7 @@ def disk_usage_mock(total_disk: int):
@pytest.fixture(autouse=True)
def mocks(monkeypatch, tmp_path, exec_command, lxd_exec_command, runner_binary_path):
- openstack_manager_mock = unittest.mock.MagicMock(spec=openstack_manager)
+ runner_scaler_mock = unittest.mock.MagicMock(spec=RunnerScaler)
cron_path = tmp_path / "cron.d"
cron_path.mkdir()
@@ -61,7 +61,7 @@ def mocks(monkeypatch, tmp_path, exec_command, lxd_exec_command, runner_binary_p
monkeypatch.setattr(
"charm.GithubRunnerCharm.repo_check_systemd_service", tmp_path / "systemd_service"
)
- monkeypatch.setattr("charm.OpenstackRunnerManager", openstack_manager_mock)
+ monkeypatch.setattr("charm.RunnerScaler", runner_scaler_mock)
monkeypatch.setattr("charm.GithubRunnerCharm.kernel_module_path", tmp_path / "modules")
monkeypatch.setattr("charm.GithubRunnerCharm._update_kernel", lambda self, now: None)
monkeypatch.setattr("charm.execute_command", exec_command)
@@ -86,8 +86,8 @@ def mocks(monkeypatch, tmp_path, exec_command, lxd_exec_command, runner_binary_p
monkeypatch.setattr("runner_manager.LxdClient", MockLxdClient)
monkeypatch.setattr("runner_manager.shared_fs", unittest.mock.MagicMock())
monkeypatch.setattr("runner_manager.execute_command", exec_command)
- monkeypatch.setattr("runner_manager.RunnerManager.runner_bin_path", runner_binary_path)
- monkeypatch.setattr("runner_manager.RunnerManager.cron_path", cron_path)
+ monkeypatch.setattr("runner_manager.LXDRunnerManager.runner_bin_path", runner_binary_path)
+ monkeypatch.setattr("runner_manager.LXDRunnerManager.cron_path", cron_path)
monkeypatch.setattr(
"runner_manager.RepoPolicyComplianceClient", MockRepoPolicyComplianceClient
)
diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py
index 994e0a4cf..d44eca542 100644
--- a/tests/unit/test_charm.py
+++ b/tests/unit/test_charm.py
@@ -156,8 +156,10 @@ def stub_update_runner_bin(*args, **kwargs) -> None:
harness = Harness(GithubRunnerCharm)
harness.update_config({PATH_CONFIG_NAME: "mock/repo", TOKEN_CONFIG_NAME: "mocktoken"})
harness.begin()
- monkeypatch.setattr("runner_manager.RunnerManager.update_runner_bin", stub_update_runner_bin)
- monkeypatch.setattr("runner_manager.RunnerManager._runners_in_pre_job", lambda self: False)
+ monkeypatch.setattr(
+ "runner_manager.LXDRunnerManager.update_runner_bin", stub_update_runner_bin
+ )
+ monkeypatch.setattr("runner_manager.LXDRunnerManager._runners_in_pre_job", lambda self: False)
monkeypatch.setattr("charm.EventTimer.ensure_event_timer", MagicMock())
monkeypatch.setattr("charm.logrotate.setup", MagicMock())
return harness
@@ -206,7 +208,7 @@ def test_common_install_code(
monkeypatch.setattr("charm.logrotate.setup", setup_logrotate := MagicMock())
monkeypatch.setattr(
- "runner_manager.RunnerManager.schedule_build_runner_image",
+ "runner_manager.LXDRunnerManager.schedule_build_runner_image",
schedule_build_runner_image := MagicMock(),
)
event_timer_mock = MagicMock(spec=EventTimer)
@@ -241,11 +243,11 @@ def test_common_install_code_does_not_rebuild_image(
assert: Image is not rebuilt.
"""
monkeypatch.setattr(
- "runner_manager.RunnerManager.build_runner_image",
+ "runner_manager.LXDRunnerManager.build_runner_image",
build_runner_image := MagicMock(),
)
monkeypatch.setattr(
- "runner_manager.RunnerManager.has_runner_image",
+ "runner_manager.LXDRunnerManager.has_runner_image",
MagicMock(return_value=True),
)
getattr(harness.charm.on, hook).emit()
@@ -437,7 +439,7 @@ def test_database_integration_events_trigger_reconciliation(
class TestCharm(unittest.TestCase):
"""Test the GithubRunner charm."""
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -468,7 +470,7 @@ def test_org_register(self, run, wt, mkdir, rm):
),
)
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -498,7 +500,7 @@ def test_repo_register(self, run, wt, mkdir, rm):
),
)
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -525,7 +527,7 @@ def test_exceed_free_disk_size(self, run, wt, mkdir, rm):
)
)
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -582,7 +584,7 @@ def test_update_config(self, run, wt, mkdir, rm):
)
mock_rm.reset_mock()
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -627,7 +629,7 @@ def test_on_update_status(self, run, wt, mkdir, rm):
with pytest.raises(TimerEnableError):
harness.charm.on.update_status.emit()
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -639,7 +641,7 @@ def test_on_stop(self, run, wt, mkdir, rm):
harness.charm.on.stop.emit()
mock_rm.flush.assert_called()
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -658,8 +660,8 @@ def test_on_start_failure(self, run, wt, mkdir, rm):
"Failed to start runners: mock error"
)
- @patch("charm.RunnerManager")
- @patch("charm.OpenstackRunnerManager")
+ @patch("charm.LXDRunnerManager")
+ @patch("charm.RunnerScaler")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -698,7 +700,7 @@ def test_on_config_changed_openstack_clouds_yaml(self, run, wt, mkdir, orm, rm):
assert harness.charm.unit.status == BlockedStatus("Please provide image integration.")
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -717,7 +719,7 @@ def test_check_runners_action(self, run, wt, mkdir, rm):
{"online": 2, "offline": 2, "unknown": 1, "runners": "test runner 0, test runner 1"}
)
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
@@ -731,7 +733,7 @@ def test_check_runners_action_with_errors(self, run, wt, mkdir, rm):
harness.charm._on_check_runners_action(mock_event)
mock_event.fail.assert_called_with("Invalid Github config, Missing path configuration")
- @patch("charm.RunnerManager")
+ @patch("charm.LXDRunnerManager")
@patch("pathlib.Path.mkdir")
@patch("pathlib.Path.write_text")
@patch("subprocess.run")
diff --git a/tests/unit/test_runner_manager.py b/tests/unit/test_lxd_runner_manager.py
similarity index 99%
rename from tests/unit/test_runner_manager.py
rename to tests/unit/test_lxd_runner_manager.py
index 7d7600825..829d73c9c 100644
--- a/tests/unit/test_runner_manager.py
+++ b/tests/unit/test_lxd_runner_manager.py
@@ -1,7 +1,7 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
-"""Test cases of RunnerManager class."""
+"""Test cases of LXDRunnerManager class."""
import random
import secrets
from pathlib import Path
@@ -82,7 +82,7 @@ def charm_state_fixture(charm_config: MagicMock):
def runner_manager_fixture(request, tmp_path, monkeypatch, token, charm_state):
charm_state.proxy_config = request.param[1]
monkeypatch.setattr(
- "runner_manager.RunnerManager.runner_bin_path", tmp_path / "mock_runner_binary"
+ "runner_manager.LXDRunnerManager.runner_bin_path", tmp_path / "mock_runner_binary"
)
pool_path = tmp_path / "test_storage"
pool_path.mkdir(exist_ok=True)
From a7979d155dc73568b1cd135f334f156791858bbe Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:34:36 +0800
Subject: [PATCH 198/278] Fix according to review comment
---
src-docs/openstack_cloud.openstack_manager.md | 12 +--
...penstack_cloud.openstack_runner_manager.md | 25 +++++-
src/manager/cloud_runner_manager.py | 14 +++-
src/manager/runner_manager.py | 13 ++--
src/openstack_cloud/openstack_cloud.py | 3 +-
src/openstack_cloud/openstack_manager.py | 2 -
.../openstack_runner_manager.py | 77 ++++++++++++++++++-
templates/env.j2 | 14 ----
8 files changed, 124 insertions(+), 36 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_manager.md b/src-docs/openstack_cloud.openstack_manager.md
index f87a1b8b4..a0f0a2531 100644
--- a/src-docs/openstack_cloud.openstack_manager.md
+++ b/src-docs/openstack_cloud.openstack_manager.md
@@ -93,7 +93,7 @@ __init__(
---
-
+
## class `GithubRunnerRemoveError`
Represents an error removing registered runner from Github.
@@ -104,7 +104,7 @@ Represents an error removing registered runner from Github.
---
-
+
## class `OpenstackRunnerManager`
Runner manager for OpenStack-based instances.
@@ -117,7 +117,7 @@ Runner manager for OpenStack-based instances.
- `unit_num`: The juju unit number.
- `instance_name`: Prefix of the name for the set of runners.
-
+
### method `__init__`
@@ -146,7 +146,7 @@ Construct OpenstackRunnerManager object.
---
-
+
### method `flush`
@@ -171,7 +171,7 @@ Flush Openstack servers.
---
-
+
### method `get_github_runner_info`
@@ -188,7 +188,7 @@ Get information on GitHub for the runners.
---
-
+
### method `reconcile`
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 82991f1e2..752e2f9d3 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -131,7 +131,7 @@ The prefix of runner names.
---
-
+
### method `cleanup`
@@ -207,6 +207,29 @@ Delete self-hosted runners.
---
+
+
+### method `flush_runners`
+
+```python
+flush_runners(remove_token: str, busy: bool = False) → Iterator[RunnerMetrics]
+```
+
+Remove idle and/or busy runners.
+
+
+
+**Args:**
+ remove_token:
+ - `busy`: If false, only idle runners are removed. If true, both idle and busy runners are removed.
+
+
+
+**Returns:**
+ Any metrics retrieved from flushed runners.
+
+---
+
### method `get_runner`
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 7362990d0..28ed17b20 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -176,13 +176,23 @@ def get_runners(self, states: Sequence[CloudRunnerState]) -> Tuple[CloudRunnerIn
@abc.abstractmethod
def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMetrics | None:
- """Delete self-hosted runners.
+ """Delete self-hosted runner.
Args:
instance_id: The instance id of the runner to delete.
remove_token: The GitHub remove token.
"""
+ @abc.abstractmethod
+ def flush_runners(self, remove_token: str, busy: bool = False) -> Iterator[RunnerMetrics]:
+ """Stop all runners.
+
+ Args:
+ remove_token: The GitHub remove token for removing runners.
+ busy: If false, only idle runners are removed. If true, both idle and busy runners are
+ removed.
+ """
+
@abc.abstractmethod
def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
@@ -190,5 +200,5 @@ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
Perform health check on runner and delete the runner if it fails.
Args:
- remove_token: The GitHub remove token.
+ remove_token: The GitHub remove token for removing runners.
"""
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 98cf5f35f..a9cff2f35 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -236,15 +236,12 @@ def flush_runners(
"Unknown flush mode %s encountered, contact developers", flush_mode
)
- states = [GitHubRunnerState.IDLE]
- if flush_mode == FlushMode.FLUSH_BUSY:
- states.append(GitHubRunnerState.BUSY)
-
- runners_list = self.get_runners(github_states=states)
- runner_names = [runner.name for runner in runners_list]
- logger.info("Flushing runners: %s", runner_names)
+ busy = False
+ if FlushMode.FLUSH_BUSY:
+ busy = True
remove_token = self._github.get_removal_token()
- return self._delete_runners(runners=runners_list, remove_token=remove_token)
+ stats = self._cloud.flush_runners(remove_token, busy)
+ return self._issue_runner_metrics(metrics=stats)
def cleanup(self) -> IssuedMetricEventsStats:
"""Run cleanup of the runners and other resources.
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 462b6b46b..fb3d07cb0 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -386,11 +386,12 @@ def _cleanup_openstack_keypairs(
exclude_instances: The keys of these instance will not be deleted.
"""
logger.info("Cleaning up openstack keypairs")
+ exclude_instance_set = set(exclude_instances)
keypairs = conn.list_keypairs()
for key in keypairs:
# The `name` attribute is of resource.Body type.
if key.name and str(key.name).startswith(self.prefix):
- if str(key.name) in set(exclude_instances):
+ if str(key.name) in exclude_instance_set:
continue
try:
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index 6b6b3d082..379d2ae4c 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -237,8 +237,6 @@ def _generate_runner_env(
pre_job_script=str(PRE_JOB_SCRIPT),
dockerhub_mirror=dockerhub_mirror or "",
ssh_debug_info=(secrets.choice(ssh_debug_connections) if ssh_debug_connections else None),
- # Proxies are handled by aproxy.
- proxies={},
)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c58d131c0..d95017710 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -269,6 +269,31 @@ def delete_runner(
self._delete_runner(instance, remove_token)
return next(extracted_metrics, None)
+ def flush_runners(
+ self, remove_token: str, busy: bool = False
+ ) -> Iterator[runner_metrics.RunnerMetrics]:
+ """Remove idle and/or busy runners.
+
+ Args:
+ remove_token:
+ busy: If false, only idle runners are removed. If true, both idle and busy runners are
+ removed.
+
+ Returns:
+ Any metrics retrieved from flushed runners.
+ """
+ instance_list = self._openstack_cloud.get_instances()
+ for instance in instance_list:
+ try:
+ self._check_state_and_flush(instance, busy)
+ except SSHError:
+ logger.warning(
+ "Unable to determine state of %s and kill runner process due to SSH issues",
+ instance.server_name,
+ )
+ continue
+ return self.cleanup(remove_token)
+
def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
@@ -381,8 +406,6 @@ def _generate_cloud_init(self, instance_name: str, registration_token: str) -> s
if self._service_config.ssh_debug_connections
else None
),
- # Proxies are handled by aproxy.
- proxies={},
)
pre_job_contents_dict = {
@@ -435,6 +458,56 @@ def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | Non
)
return None
+ @retry(tries=3, delay=5, backoff=2, local_logger=logger)
+ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> None:
+ """Kill runner process depending on idle or busy.
+
+ Due to update to runner state has some delay with GitHub API. The state of the runner is
+ determined by which runner processes are running. If the Runner.Worker process is running,
+ the runner is deemed to be busy.
+
+ Raises:
+ SSHError: Unable to check the state of the runner and kill the runner process due to
+ SSH failure.
+
+ Args:
+ instance: The openstack instance to kill the runner process.
+ busy: Kill the process if runner is busy, else only kill runner
+ process if runner is idle.
+ """
+ try:
+ ssh_conn = self._openstack_cloud.get_ssh_connection(instance)
+ except KeyfileError:
+ logger.exception(
+ "Health check failed due to unable to find keyfile for %s", instance.server_name
+ )
+ return
+ except SSHError:
+ logger.exception(
+ "SSH connection failure with %s during health check", instance.server_name
+ )
+ raise
+
+ if not busy:
+ # only kill Runner.Listener if Runner.Worker does not exist.
+ kill_command = (
+ "! pgrep -x Runner.Worker && pgrep -x Runner.Listener && "
+ "kill $(pgrep -x Runner.Listener)"
+ )
+ else:
+ # kill both Runner.Listener and Runner.Worker processes.
+ # This kills pre-job.sh, a child process of Runner.Worker.
+ kill_command = (
+ "pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener);"
+ "pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);"
+ )
+
+ result: invoke.runners.Result = ssh_conn.run(kill_command, warn=True)
+ if not result.ok:
+ logger.warning("Unable to SSH to kill runner process on %s", instance.name)
+ return
+ logger.info("Killed runner process on %s", instance.name)
+
@retry(tries=3, delay=5, backoff=2, local_logger=logger)
def _health_check(self, instance: OpenstackInstance) -> bool:
"""Check whether runner is healthy.
diff --git a/templates/env.j2 b/templates/env.j2
index c0de54aad..f7da33219 100644
--- a/templates/env.j2
+++ b/templates/env.j2
@@ -1,18 +1,4 @@
PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-{% if proxies.http %}
-HTTP_PROXY={{proxies.http}}
-http_proxy={{proxies.http}}
-{% endif %}
-{% if proxies.https %}
-HTTPS_PROXY={{proxies.https}}
-https_proxy={{proxies.https}}
-{% endif %}
-{% if proxies.ftp_proxy %}
-{% endif %}
-{% if proxies.no_proxy %}
-NO_PROXY={{proxies.no_proxy}}
-no_proxy={{proxies.no_proxy}}
-{% endif %}
{% if dockerhub_mirror %}
DOCKERHUB_MIRROR={{dockerhub_mirror}}
CONTAINER_REGISTRY_URL={{dockerhub_mirror}}
From 16025254400e0aecb02b46114b993c80fd262ad1 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:37:46 +0800
Subject: [PATCH 199/278] Fix test according to comments
---
.../test_runner_manager_openstack.py | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 523c2b5ae..cec1158e5 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -150,7 +150,7 @@ async def runner_manager_with_one_runner_fixture(runner_manager: RunnerManager)
runner_manager.create_runners(1)
runner_list = runner_manager.get_runners()
try:
- await assert_runner_amount(runner_manager, 1)
+ await wait_runner_amount(runner_manager, 1)
except TimeoutError as err:
raise AssertionError("Test arrange failed: Expect one runner") from err
@@ -183,10 +183,10 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool:
return workflow.status == status
-async def assert_runner_amount(runner_manager: RunnerManager, num: int):
- """Assert the number of runner a runner manager has.
+async def wait_runner_amount(runner_manager: RunnerManager, num: int):
+ """Wait until the runner manager has the number of runners.
- A TimeoutError will be thrown if runners are still found after timeout.
+ A TimeoutError will be thrown if runners amount is not correct after timeout.
Args:
runner_manager: The RunnerManager to check.
@@ -239,7 +239,7 @@ async def test_runner_normal_idle_lifecycle(
runner_id = runner_id_list[0]
try:
- await assert_runner_amount(runner_manager, 1)
+ await wait_runner_amount(runner_manager, 1)
except TimeoutError as err:
raise AssertionError("Test arrange failed: Expect one runner") from err
@@ -265,7 +265,7 @@ async def test_runner_normal_idle_lifecycle(
# 3.
runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
- await assert_runner_amount(runner_manager, 0)
+ await wait_runner_amount(runner_manager, 0)
@pytest.mark.openstack
@@ -320,7 +320,7 @@ async def test_runner_flush_busy_lifecycle(
issue_metrics_events = runner_manager_with_one_runner.cleanup()
assert issue_metrics_events[events.RunnerStart] == 1
- await assert_runner_amount(runner_manager_with_one_runner, 0)
+ await wait_runner_amount(runner_manager_with_one_runner, 0)
@pytest.mark.openstack
@@ -374,7 +374,7 @@ async def test_runner_normal_lifecycle(
assert metric_logs[1]["event"] == "runner_stop"
assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests"
- await assert_runner_amount(runner_manager_with_one_runner, 0)
+ await wait_runner_amount(runner_manager_with_one_runner, 0)
@pytest.mark.openstack
@@ -387,10 +387,10 @@ async def test_runner_spawn_two(
Arrange: RunnerManager instance with no runners.
Act:
1. Create two runner.
- 3. Delete all idle runner.
+ 2. Delete all idle runner.
Assert:
1. Two active idle runner.
- 3. No runners.
+ 2. No runners.
"""
# 1.
runner_id_list = runner_manager.create_runners(2)
@@ -398,7 +398,7 @@ async def test_runner_spawn_two(
assert len(runner_id_list) == 2
try:
- await assert_runner_amount(runner_manager, 2)
+ await wait_runner_amount(runner_manager, 2)
except TimeoutError as err:
raise AssertionError("Test arrange failed: Expect two runner") from err
@@ -408,4 +408,4 @@ async def test_runner_spawn_two(
# 3.
runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
- await assert_runner_amount(runner_manager, 0)
+ await wait_runner_amount(runner_manager, 0)
From 66003f400a4a830efd586b4da08a7c9719d92731 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:52:31 +0800
Subject: [PATCH 200/278] Fix unit test
---
tests/unit/test_openstack_manager.py | 46 +---------------------------
1 file changed, 1 insertion(+), 45 deletions(-)
diff --git a/tests/unit/test_openstack_manager.py b/tests/unit/test_openstack_manager.py
index 5e43fb518..5329b1282 100644
--- a/tests/unit/test_openstack_manager.py
+++ b/tests/unit/test_openstack_manager.py
@@ -287,52 +287,23 @@ def test__create_connection(
@pytest.mark.parametrize(
- "proxy_config, dockerhub_mirror, ssh_debug_connections, expected_env_contents",
+ "dockerhub_mirror, ssh_debug_connections, expected_env_contents",
[
pytest.param(
- None,
None,
None,
"""PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-
-
-
LANG=C.UTF-8
ACTIONS_RUNNER_HOOK_JOB_STARTED=/home/ubuntu/actions-runner/pre-job.sh
""",
id="all values empty",
),
pytest.param(
- openstack_manager.ProxyConfig(
- http="http://test.internal",
- https="https://test.internal",
- no_proxy="http://no_proxy.internal",
- ),
- None,
- None,
- """PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-
-
-
-
-LANG=C.UTF-8
-ACTIONS_RUNNER_HOOK_JOB_STARTED=/home/ubuntu/actions-runner/pre-job.sh
-""",
- id="proxy value set",
- ),
- pytest.param(
- None,
"http://dockerhub_mirror.test",
None,
"""PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-
-
-
DOCKERHUB_MIRROR=http://dockerhub_mirror.test
CONTAINER_REGISTRY_URL=http://dockerhub_mirror.test
@@ -342,7 +313,6 @@ def test__create_connection(
id="dockerhub mirror set",
),
pytest.param(
- None,
None,
[
openstack_manager.SSHDebugConnection(
@@ -354,10 +324,6 @@ def test__create_connection(
],
"""PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-
-
-
LANG=C.UTF-8
ACTIONS_RUNNER_HOOK_JOB_STARTED=/home/ubuntu/actions-runner/pre-job.sh
@@ -369,11 +335,6 @@ def test__create_connection(
id="ssh debug connection set",
),
pytest.param(
- openstack_manager.ProxyConfig(
- http="http://test.internal",
- https="https://test.internal",
- no_proxy="http://no_proxy.internal",
- ),
"http://dockerhub_mirror.test",
[
openstack_manager.SSHDebugConnection(
@@ -385,10 +346,6 @@ def test__create_connection(
],
"""PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-
-
-
DOCKERHUB_MIRROR=http://dockerhub_mirror.test
CONTAINER_REGISTRY_URL=http://dockerhub_mirror.test
@@ -405,7 +362,6 @@ def test__create_connection(
],
)
def test__generate_runner_env(
- proxy_config: Optional[openstack_manager.ProxyConfig],
dockerhub_mirror: Optional[str],
ssh_debug_connections: Optional[list[openstack_manager.SSHDebugConnection]],
expected_env_contents: str,
From 16ae84e1224eab9184bfa43d8e5e9dce28526682 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 09:37:48 +0800
Subject: [PATCH 201/278] Fix typo of attr
---
src/openstack_cloud/openstack_runner_manager.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index d95017710..25e849a9d 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -504,9 +504,9 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
result: invoke.runners.Result = ssh_conn.run(kill_command, warn=True)
if not result.ok:
- logger.warning("Unable to SSH to kill runner process on %s", instance.name)
+ logger.warning("Unable to SSH to kill runner process on %s: %s", instance.server_name, result.stderr)
return
- logger.info("Killed runner process on %s", instance.name)
+ logger.info("Killed runner process on %s", instance.server_name)
@retry(tries=3, delay=5, backoff=2, local_logger=logger)
def _health_check(self, instance: OpenstackInstance) -> bool:
@@ -548,7 +548,7 @@ def _run_health_check(ssh_conn: SSHConnection, name: str) -> bool:
"""
result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
if not result.ok:
- logger.warning("SSH run of `ps aux` failed on %s", name)
+ logger.warning("SSH run of `ps aux` failed on %s: %s", name, result.stderr)
return False
if (
RUNNER_WORKER_PROCESS not in result.stdout
From 6b96dd4238948b00be7e86e8e09a2fef5f460ac5 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 09:50:58 +0800
Subject: [PATCH 202/278] Add debug
---
src/openstack_cloud/openstack_runner_manager.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 25e849a9d..696a61088 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -488,6 +488,9 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
)
raise
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
if not busy:
# only kill Runner.Listener if Runner.Worker does not exist.
kill_command = (
@@ -501,6 +504,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
"pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener);"
"pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);"
)
+ pytest.set_trace()
result: invoke.runners.Result = ssh_conn.run(kill_command, warn=True)
if not result.ok:
From 9657299e09c0bafed362144b4c978bccf54d45b6 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 09:55:51 +0800
Subject: [PATCH 203/278] Add debug statement
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 696a61088..c6bc7a973 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -489,6 +489,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
raise
# TODO: debug
+ result: invoke.runners.Result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
import pytest
pytest.set_trace()
if not busy:
@@ -504,7 +505,6 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
"pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener);"
"pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);"
)
- pytest.set_trace()
result: invoke.runners.Result = ssh_conn.run(kill_command, warn=True)
if not result.ok:
From 29ef80994a659f063f5cb58b6d16dd179a55a56c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 09:57:17 +0800
Subject: [PATCH 204/278] Debug
---
.../test_runner_manager_openstack.py | 130 +++++++++---------
1 file changed, 65 insertions(+), 65 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index cec1158e5..434747acd 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -201,71 +201,71 @@ async def wait_runner_amount(runner_manager: RunnerManager, num: int):
await wait_for(lambda: len(runner_manager.get_runners()) == num)
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_get_no_runner(runner_manager: RunnerManager) -> None:
- """
- Arrange: RunnerManager instance with no runners.
- Act: Get runners.
- Assert: Empty tuple returned.
- """
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert not runner_list
-
-
-@pytest.mark.openstack
-@pytest.mark.asyncio
-@pytest.mark.abort_on_fail
-async def test_runner_normal_idle_lifecycle(
- runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
-) -> None:
- """
- Arrange: RunnerManager instance with no runners.
- Act:
- 1. Create one runner.
- 2. Run health check on the runner.
- 3. Delete all idle runner.
- Assert:
- 1. An active idle runner.
- 2. Health check passes.
- 3. No runners.
- """
- # 1.
- runner_id_list = runner_manager.create_runners(1)
- assert isinstance(runner_id_list, tuple)
- assert len(runner_id_list) == 1
- runner_id = runner_id_list[0]
-
- try:
- await wait_runner_amount(runner_manager, 1)
- except TimeoutError as err:
- raise AssertionError("Test arrange failed: Expect one runner") from err
-
- runner_list = runner_manager.get_runners()
- assert isinstance(runner_list, tuple)
- assert len(runner_list) == 1
- runner = runner_list[0]
- assert runner.instance_id == runner_id
- assert runner.cloud_state == CloudRunnerState.ACTIVE
- # Update on GitHub-side can take a bit of time.
- await wait_for(
- lambda: runner_manager.get_runners()[0].github_state == GitHubRunnerState.IDLE,
- timeout=120,
- check_interval=10,
- )
-
- # 2.
- openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
- assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
- runner = openstack_instances[0]
-
- assert openstack_runner_manager._health_check(runner)
-
- # 3.
- runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
- await wait_runner_amount(runner_manager, 0)
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_get_no_runner(runner_manager: RunnerManager) -> None:
+# """
+# Arrange: RunnerManager instance with no runners.
+# Act: Get runners.
+# Assert: Empty tuple returned.
+# """
+# runner_list = runner_manager.get_runners()
+# assert isinstance(runner_list, tuple)
+# assert not runner_list
+
+
+# @pytest.mark.openstack
+# @pytest.mark.asyncio
+# @pytest.mark.abort_on_fail
+# async def test_runner_normal_idle_lifecycle(
+# runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+# ) -> None:
+# """
+# Arrange: RunnerManager instance with no runners.
+# Act:
+# 1. Create one runner.
+# 2. Run health check on the runner.
+# 3. Delete all idle runner.
+# Assert:
+# 1. An active idle runner.
+# 2. Health check passes.
+# 3. No runners.
+# """
+# # 1.
+# runner_id_list = runner_manager.create_runners(1)
+# assert isinstance(runner_id_list, tuple)
+# assert len(runner_id_list) == 1
+# runner_id = runner_id_list[0]
+
+# try:
+# await wait_runner_amount(runner_manager, 1)
+# except TimeoutError as err:
+# raise AssertionError("Test arrange failed: Expect one runner") from err
+
+# runner_list = runner_manager.get_runners()
+# assert isinstance(runner_list, tuple)
+# assert len(runner_list) == 1
+# runner = runner_list[0]
+# assert runner.instance_id == runner_id
+# assert runner.cloud_state == CloudRunnerState.ACTIVE
+# # Update on GitHub-side can take a bit of time.
+# await wait_for(
+# lambda: runner_manager.get_runners()[0].github_state == GitHubRunnerState.IDLE,
+# timeout=120,
+# check_interval=10,
+# )
+
+# # 2.
+# openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
+# assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
+# runner = openstack_instances[0]
+
+# assert openstack_runner_manager._health_check(runner)
+
+# # 3.
+# runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
+# await wait_runner_amount(runner_manager, 0)
@pytest.mark.openstack
From d14effbfaa9cb7c3d7135492dcac27160005559c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 10:15:31 +0800
Subject: [PATCH 205/278] Fix return code of the kill command
---
src/openstack_cloud/openstack_runner_manager.py | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c6bc7a973..57b3e2278 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -506,11 +506,8 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
"pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);"
)
- result: invoke.runners.Result = ssh_conn.run(kill_command, warn=True)
- if not result.ok:
- logger.warning("Unable to SSH to kill runner process on %s: %s", instance.server_name, result.stderr)
- return
- logger.info("Killed runner process on %s", instance.server_name)
+ ssh_conn.run(kill_command, warn=True)
+ logger.info("Attempted to killed runner process on %s", instance.server_name)
@retry(tries=3, delay=5, backoff=2, local_logger=logger)
def _health_check(self, instance: OpenstackInstance) -> bool:
From 09a13c9357e583c227ae53ae28bfcc4ae047e295 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 10:22:03 +0800
Subject: [PATCH 206/278] Remove debug
---
src/openstack_cloud/openstack_runner_manager.py | 4 ----
1 file changed, 4 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 57b3e2278..ebcd7dac9 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -488,10 +488,6 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
)
raise
- # TODO: debug
- result: invoke.runners.Result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
- import pytest
- pytest.set_trace()
if not busy:
# only kill Runner.Listener if Runner.Worker does not exist.
kill_command = (
From f08b4ad66c96665aef77e2b400b3ea62ebd22717 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 10:32:20 +0800
Subject: [PATCH 207/278] Add comments on the flush kill command
---
src/openstack_cloud/openstack_runner_manager.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index ebcd7dac9..527596c74 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -488,6 +488,8 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
)
raise
+ # Using a single command to determine the state and kill the process if needed.
+ # This makes it more robust when network is unstable.
if not busy:
# only kill Runner.Listener if Runner.Worker does not exist.
kill_command = (
@@ -501,7 +503,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
"pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener);"
"pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);"
)
-
+ # Checking the result of kill command is not useful, as the exit code does not reveal much.
ssh_conn.run(kill_command, warn=True)
logger.info("Attempted to killed runner process on %s", instance.server_name)
From 68bd0cc0481737e33be65891fafa62cb2e9917fe Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 10:51:12 +0800
Subject: [PATCH 208/278] Add debug
---
.../openstack_runner_manager.py | 22 ++++++++++++-------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 527596c74..7ba96a4bb 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -487,21 +487,27 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
"SSH connection failure with %s during health check", instance.server_name
)
raise
+
+ # TODO: Debug
+ ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
+ import pytest
+ pytest.set_trace()
+
# Using a single command to determine the state and kill the process if needed.
# This makes it more robust when network is unstable.
- if not busy:
- # only kill Runner.Listener if Runner.Worker does not exist.
+ if busy:
+ # kill both Runner.Listener and Runner.Worker processes.
+ # This kills pre-job.sh, a child process of Runner.Worker.
kill_command = (
- "! pgrep -x Runner.Worker && pgrep -x Runner.Listener && "
- "kill $(pgrep -x Runner.Listener)"
+ f"pgrep -x {RUNNER_LISTENER_PROCESS} && kill $(pgrep -x {RUNNER_LISTENER_PROCESS});"
+ f"pgrep -x {RUNNER_WORKER_PROCESS} && kill $(pgrep -x {RUNNER_WORKER_PROCESS});"
)
else:
- # kill both Runner.Listener and Runner.Worker processes.
- # This kills pre-job.sh, a child process of Runner.Worker.
+ # Only kill Runner.Listener if Runner.Worker does not exist.
kill_command = (
- "pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener);"
- "pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);"
+ f"! pgrep -x {RUNNER_WORKER_PROCESS} && pgrep -x {RUNNER_LISTENER_PROCESS} && "
+ f"kill $(pgrep -x {RUNNER_LISTENER_PROCESS})"
)
# Checking the result of kill command is not useful, as the exit code does not reveal much.
ssh_conn.run(kill_command, warn=True)
From d92da28030e57e8c35476a1cb1489ed4c651e730 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 10:57:10 +0800
Subject: [PATCH 209/278] Fix debug
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 7ba96a4bb..7592515f8 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -489,7 +489,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
raise
# TODO: Debug
- ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
+ result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
import pytest
pytest.set_trace()
From 96dfab7756d19cb84d245389a9ab8d0381fbb7cd Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 11:10:12 +0800
Subject: [PATCH 210/278] Debug
---
.../openstack_runner_manager.py | 10 +-
.../test_runner_manager_openstack.py | 130 +++++++++---------
2 files changed, 73 insertions(+), 67 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 7592515f8..c812521a9 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -492,11 +492,17 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
import pytest
pytest.set_trace()
-
+ result = ssh_conn.run("! pgrep -x Runner.Worker && pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener)", warn=True)
+ import pytest
+ pytest.set_trace()
+ result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
+ import pytest
+ pytest.set_trace()
# Using a single command to determine the state and kill the process if needed.
# This makes it more robust when network is unstable.
if busy:
+ logger.info("Attempting to kill all runner process on %s", instance.server_name)
# kill both Runner.Listener and Runner.Worker processes.
# This kills pre-job.sh, a child process of Runner.Worker.
kill_command = (
@@ -504,6 +510,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
f"pgrep -x {RUNNER_WORKER_PROCESS} && kill $(pgrep -x {RUNNER_WORKER_PROCESS});"
)
else:
+ logger.info("Attempting to kill runner process on %s if not busy", instance.server_name)
# Only kill Runner.Listener if Runner.Worker does not exist.
kill_command = (
f"! pgrep -x {RUNNER_WORKER_PROCESS} && pgrep -x {RUNNER_LISTENER_PROCESS} && "
@@ -511,7 +518,6 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
)
# Checking the result of kill command is not useful, as the exit code does not reveal much.
ssh_conn.run(kill_command, warn=True)
- logger.info("Attempted to killed runner process on %s", instance.server_name)
@retry(tries=3, delay=5, backoff=2, local_logger=logger)
def _health_check(self, instance: OpenstackInstance) -> bool:
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 434747acd..cec1158e5 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -201,71 +201,71 @@ async def wait_runner_amount(runner_manager: RunnerManager, num: int):
await wait_for(lambda: len(runner_manager.get_runners()) == num)
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_get_no_runner(runner_manager: RunnerManager) -> None:
-# """
-# Arrange: RunnerManager instance with no runners.
-# Act: Get runners.
-# Assert: Empty tuple returned.
-# """
-# runner_list = runner_manager.get_runners()
-# assert isinstance(runner_list, tuple)
-# assert not runner_list
-
-
-# @pytest.mark.openstack
-# @pytest.mark.asyncio
-# @pytest.mark.abort_on_fail
-# async def test_runner_normal_idle_lifecycle(
-# runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
-# ) -> None:
-# """
-# Arrange: RunnerManager instance with no runners.
-# Act:
-# 1. Create one runner.
-# 2. Run health check on the runner.
-# 3. Delete all idle runner.
-# Assert:
-# 1. An active idle runner.
-# 2. Health check passes.
-# 3. No runners.
-# """
-# # 1.
-# runner_id_list = runner_manager.create_runners(1)
-# assert isinstance(runner_id_list, tuple)
-# assert len(runner_id_list) == 1
-# runner_id = runner_id_list[0]
-
-# try:
-# await wait_runner_amount(runner_manager, 1)
-# except TimeoutError as err:
-# raise AssertionError("Test arrange failed: Expect one runner") from err
-
-# runner_list = runner_manager.get_runners()
-# assert isinstance(runner_list, tuple)
-# assert len(runner_list) == 1
-# runner = runner_list[0]
-# assert runner.instance_id == runner_id
-# assert runner.cloud_state == CloudRunnerState.ACTIVE
-# # Update on GitHub-side can take a bit of time.
-# await wait_for(
-# lambda: runner_manager.get_runners()[0].github_state == GitHubRunnerState.IDLE,
-# timeout=120,
-# check_interval=10,
-# )
-
-# # 2.
-# openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
-# assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
-# runner = openstack_instances[0]
-
-# assert openstack_runner_manager._health_check(runner)
-
-# # 3.
-# runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
-# await wait_runner_amount(runner_manager, 0)
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_get_no_runner(runner_manager: RunnerManager) -> None:
+ """
+ Arrange: RunnerManager instance with no runners.
+ Act: Get runners.
+ Assert: Empty tuple returned.
+ """
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert not runner_list
+
+
+@pytest.mark.openstack
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_runner_normal_idle_lifecycle(
+ runner_manager: RunnerManager, openstack_runner_manager: OpenstackRunnerManager
+) -> None:
+ """
+ Arrange: RunnerManager instance with no runners.
+ Act:
+ 1. Create one runner.
+ 2. Run health check on the runner.
+ 3. Delete all idle runner.
+ Assert:
+ 1. An active idle runner.
+ 2. Health check passes.
+ 3. No runners.
+ """
+ # 1.
+ runner_id_list = runner_manager.create_runners(1)
+ assert isinstance(runner_id_list, tuple)
+ assert len(runner_id_list) == 1
+ runner_id = runner_id_list[0]
+
+ try:
+ await wait_runner_amount(runner_manager, 1)
+ except TimeoutError as err:
+ raise AssertionError("Test arrange failed: Expect one runner") from err
+
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 1
+ runner = runner_list[0]
+ assert runner.instance_id == runner_id
+ assert runner.cloud_state == CloudRunnerState.ACTIVE
+ # Update on GitHub-side can take a bit of time.
+ await wait_for(
+ lambda: runner_manager.get_runners()[0].github_state == GitHubRunnerState.IDLE,
+ timeout=120,
+ check_interval=10,
+ )
+
+ # 2.
+ openstack_instances = openstack_runner_manager._openstack_cloud.get_instances()
+ assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner."
+ runner = openstack_instances[0]
+
+ assert openstack_runner_manager._health_check(runner)
+
+ # 3.
+ runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
+ await wait_runner_amount(runner_manager, 0)
@pytest.mark.openstack
From 5a92be929f23324aea22f0139e775972b6e5914b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 11:19:25 +0800
Subject: [PATCH 211/278] Debug
---
src/openstack_cloud/openstack_runner_manager.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c812521a9..236ed71ae 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -489,10 +489,13 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
raise
# TODO: Debug
+ result = ssh_conn.run("! pgrep -x Runner.Worker && pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener)", warn=True)
+ import pytest
+ pytest.set_trace()
result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
import pytest
pytest.set_trace()
- result = ssh_conn.run("! pgrep -x Runner.Worker && pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener)", warn=True)
+ result = ssh_conn.run("ps aux", warn=True)
import pytest
pytest.set_trace()
result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
From d2d3b21a0e3c7256c4a3f0a2215ca27d16b10103 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 11:32:03 +0800
Subject: [PATCH 212/278] Remove debug
---
src/openstack_cloud/openstack_runner_manager.py | 14 --------------
tests/integration/test_runner_manager_openstack.py | 2 +-
2 files changed, 1 insertion(+), 15 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 236ed71ae..8363a64bc 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -487,20 +487,6 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
"SSH connection failure with %s during health check", instance.server_name
)
raise
-
- # TODO: Debug
- result = ssh_conn.run("! pgrep -x Runner.Worker && pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener)", warn=True)
- import pytest
- pytest.set_trace()
- result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
- import pytest
- pytest.set_trace()
- result = ssh_conn.run("ps aux", warn=True)
- import pytest
- pytest.set_trace()
- result = ssh_conn.run("! pgrep -x Runner.Worker && echo HELLO", warn=True)
- import pytest
- pytest.set_trace()
# Using a single command to determine the state and kill the process if needed.
# This makes it more robust when network is unstable.
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index cec1158e5..38873e3d6 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -295,7 +295,7 @@ async def test_runner_flush_busy_lifecycle(
github_repository=github_repository,
conclusion="success",
workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
- dispatch_input={"runner": runner_label, "minutes": "10"},
+ dispatch_input={"runner": runner_label, "minutes": "30"},
wait=False,
)
await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
From c48ef0b508ddcef37c2e6591858a746795365f9f Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 11:40:10 +0800
Subject: [PATCH 213/278] Add cleanup during idle and busy runner test
---
.../test_runner_manager_openstack.py | 36 ++++++++++++++-----
1 file changed, 28 insertions(+), 8 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 38873e3d6..f3e4955f7 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -226,11 +226,13 @@ async def test_runner_normal_idle_lifecycle(
Act:
1. Create one runner.
2. Run health check on the runner.
- 3. Delete all idle runner.
+ 3. Run cleanup.
+ 4. Delete all idle runner.
Assert:
1. An active idle runner.
2. Health check passes.
- 3. No runners.
+ 3. One idle runner remains.
+ 4. No runners.
"""
# 1.
runner_id_list = runner_manager.create_runners(1)
@@ -262,8 +264,17 @@ async def test_runner_normal_idle_lifecycle(
runner = openstack_instances[0]
assert openstack_runner_manager._health_check(runner)
-
+
# 3.
+ runner_manager.cleanup()
+ runner_list = runner_manager.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 1
+ runner = runner_list[0]
+ assert runner.instance_id == runner_id
+ assert runner.cloud_state == CloudRunnerState.ACTIVE
+
+ # 4.
runner_manager.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
await wait_runner_amount(runner_manager, 0)
@@ -281,12 +292,12 @@ async def test_runner_flush_busy_lifecycle(
Arrange: RunnerManager with one idle runner.
Act:
1. Run a long workflow.
- 2. Run flush idle runner.
- 3. Run flush busy runner.
+ 3. Run flush idle runner.
+ 4. Run flush busy runner.
Assert:
1. Runner takes the job and become busy.
- 2. Busy runner still exists.
- 3. No runners exists.
+ 3. Busy runner still exists.
+ 4. No runners exists.
"""
# 1.
workflow = await dispatch_workflow(
@@ -307,6 +318,15 @@ async def test_runner_flush_busy_lifecycle(
assert busy_runner.github_state == GitHubRunnerState.BUSY
# 2.
+ runner_manager_with_one_runner.cleanup()
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 1
+ runner = runner_list[0]
+ assert runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GitHubRunnerState.BUSY
+
+ # 3.
runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
runner_list = runner_manager_with_one_runner.get_runners()
assert len(runner_list) == 1
@@ -314,7 +334,7 @@ async def test_runner_flush_busy_lifecycle(
assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
assert busy_runner.github_state == GitHubRunnerState.BUSY
- # 3.
+ # 4.
runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_BUSY)
issue_metrics_events = runner_manager_with_one_runner.cleanup()
From 3f348634843738f79920a8c5918696deb7a2d018 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 11:52:10 +0800
Subject: [PATCH 214/278] Debug
---
src/openstack_cloud/openstack_cloud.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index fb3d07cb0..037e33cb9 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -391,6 +391,9 @@ def _cleanup_openstack_keypairs(
for key in keypairs:
# The `name` attribute is of resource.Body type.
if key.name and str(key.name).startswith(self.prefix):
+ # TODO: DEBUG
+ import pytest
+ pytest.set_trace()
if str(key.name) in exclude_instance_set:
continue
From e79bafed95adfcae88346778a305b2e1adf39c23 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 11:54:36 +0800
Subject: [PATCH 215/278] Disable tests during debug
---
.github/workflows/e2e_test.yaml | 5 ++++-
.github/workflows/integration_test.yaml | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 5933451ee..df2667ae2 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,7 +1,10 @@
name: End-to-End tests
on:
- pull_request:
+ # TODO: DEBUG
+ workflow_dispatch:
+ # pull_request:
+
jobs:
# test option values defined at test/conftest.py are passed on via repository secret
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 8e0bc700a..b67ec9e61 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,7 +1,9 @@
name: integration-tests
on:
- pull_request:
+ # TODO: DEBUG
+ workflow_dispatch:
+ # pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
From ccb62251dd9c2950e4155e822add780f0e3113d5 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 12:12:05 +0800
Subject: [PATCH 216/278] Debug missing keyfiles
---
src/openstack_cloud/openstack_cloud.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index 037e33cb9..e4c1c9b78 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -370,6 +370,9 @@ def _cleanup_key_files(self, exclude_instances: Iterable[str]) -> None:
# Find key file from this application.
if path.is_file() and path.name.startswith(self.prefix) and path.name.endswith(".key"):
total += 1
+ # TODO: DEBUG
+ import pytest
+ pytest.set_trace()
if path.name in exclude_filename:
continue
path.unlink()
From 4a26d75b8b8b49ebbc5a4fc0ca090762cb9cfac2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 12:24:11 +0800
Subject: [PATCH 217/278] Fix keyfile path matching issue
---
src/openstack_cloud/openstack_cloud.py | 9 +--------
src/openstack_cloud/openstack_runner_manager.py | 4 +++-
tests/integration/test_runner_manager_openstack.py | 2 +-
3 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/src/openstack_cloud/openstack_cloud.py b/src/openstack_cloud/openstack_cloud.py
index e4c1c9b78..ad21f4d97 100644
--- a/src/openstack_cloud/openstack_cloud.py
+++ b/src/openstack_cloud/openstack_cloud.py
@@ -370,10 +370,7 @@ def _cleanup_key_files(self, exclude_instances: Iterable[str]) -> None:
# Find key file from this application.
if path.is_file() and path.name.startswith(self.prefix) and path.name.endswith(".key"):
total += 1
- # TODO: DEBUG
- import pytest
- pytest.set_trace()
- if path.name in exclude_filename:
+ if path in exclude_filename:
continue
path.unlink()
deleted += 1
@@ -394,12 +391,8 @@ def _cleanup_openstack_keypairs(
for key in keypairs:
# The `name` attribute is of resource.Body type.
if key.name and str(key.name).startswith(self.prefix):
- # TODO: DEBUG
- import pytest
- pytest.set_trace()
if str(key.name) in exclude_instance_set:
continue
-
try:
self._delete_keypair(conn, key.name)
except openstack.exceptions.SDKException:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 8363a64bc..fca8fec01 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -499,7 +499,9 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
f"pgrep -x {RUNNER_WORKER_PROCESS} && kill $(pgrep -x {RUNNER_WORKER_PROCESS});"
)
else:
- logger.info("Attempting to kill runner process on %s if not busy", instance.server_name)
+ logger.info(
+ "Attempting to kill runner process on %s if not busy", instance.server_name
+ )
# Only kill Runner.Listener if Runner.Worker does not exist.
kill_command = (
f"! pgrep -x {RUNNER_WORKER_PROCESS} && pgrep -x {RUNNER_LISTENER_PROCESS} && "
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index f3e4955f7..39d351845 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -264,7 +264,7 @@ async def test_runner_normal_idle_lifecycle(
runner = openstack_instances[0]
assert openstack_runner_manager._health_check(runner)
-
+
# 3.
runner_manager.cleanup()
runner_list = runner_manager.get_runners()
From 1292940fc1d5273416d23aac5b789cee339a0d34 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 12:36:25 +0800
Subject: [PATCH 218/278] testing
---
src-docs/openstack_cloud.openstack_runner_manager.md | 2 +-
src/openstack_cloud/openstack_runner_manager.py | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 752e2f9d3..01d8eb3f6 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -131,7 +131,7 @@ The prefix of runner names.
---
-
+
### method `cleanup`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index fca8fec01..1ed907b41 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -285,7 +285,8 @@ def flush_runners(
instance_list = self._openstack_cloud.get_instances()
for instance in instance_list:
try:
- self._check_state_and_flush(instance, busy)
+ pass
+ # self._check_state_and_flush(instance, busy)
except SSHError:
logger.warning(
"Unable to determine state of %s and kill runner process due to SSH issues",
From b1af621cbc55dcf26e829d30b0ec94604ccaa4ef Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 12:56:54 +0800
Subject: [PATCH 219/278] debug
---
tests/integration/test_runner_manager_openstack.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 39d351845..57e420e1b 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -335,9 +335,7 @@ async def test_runner_flush_busy_lifecycle(
assert busy_runner.github_state == GitHubRunnerState.BUSY
# 4.
- runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_BUSY)
-
- issue_metrics_events = runner_manager_with_one_runner.cleanup()
+ issue_metrics_events = runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_BUSY)
assert issue_metrics_events[events.RunnerStart] == 1
await wait_runner_amount(runner_manager_with_one_runner, 0)
From 88b8fc38e9fdc07433b32e934ccf0f0530b57869 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 13:04:02 +0800
Subject: [PATCH 220/278] Add debug
---
src-docs/openstack_cloud.openstack_runner_manager.md | 2 +-
src/openstack_cloud/openstack_runner_manager.py | 12 ++++++++++--
tests/integration/test_runner_manager_openstack.py | 4 +---
3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 01d8eb3f6..752e2f9d3 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -131,7 +131,7 @@ The prefix of runner names.
---
-
+
### method `cleanup`
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 1ed907b41..bfa7d5827 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -285,8 +285,7 @@ def flush_runners(
instance_list = self._openstack_cloud.get_instances()
for instance in instance_list:
try:
- pass
- # self._check_state_and_flush(instance, busy)
+ self._check_state_and_flush(instance, busy)
except SSHError:
logger.warning(
"Unable to determine state of %s and kill runner process due to SSH issues",
@@ -510,6 +509,15 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
)
# Checking the result of kill command is not useful, as the exit code does not reveal much.
ssh_conn.run(kill_command, warn=True)
+
+ # TODO: debug
+ result = ssh_conn.run("ps aux", warn=True)
+ import pytest
+ pytest.set_trace()
+
+ result = ssh_conn.run("ps aux", warn=True)
+ import pytest
+ pytest.set_trace()
@retry(tries=3, delay=5, backoff=2, local_logger=logger)
def _health_check(self, instance: OpenstackInstance) -> bool:
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 57e420e1b..b20426ca0 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -335,9 +335,7 @@ async def test_runner_flush_busy_lifecycle(
assert busy_runner.github_state == GitHubRunnerState.BUSY
# 4.
- issue_metrics_events = runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_BUSY)
- assert issue_metrics_events[events.RunnerStart] == 1
-
+ runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_BUSY)
await wait_runner_amount(runner_manager_with_one_runner, 0)
From f5ded42ce9c431cc68c767b70d8000b5d6789892 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 13:31:53 +0800
Subject: [PATCH 221/278] Use OR
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index bfa7d5827..28b51d74f 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -504,7 +504,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
)
# Only kill Runner.Listener if Runner.Worker does not exist.
kill_command = (
- f"! pgrep -x {RUNNER_WORKER_PROCESS} && pgrep -x {RUNNER_LISTENER_PROCESS} && "
+ f"pgrep -x {RUNNER_WORKER_PROCESS} || pgrep -x {RUNNER_LISTENER_PROCESS} && "
f"kill $(pgrep -x {RUNNER_LISTENER_PROCESS})"
)
# Checking the result of kill command is not useful, as the exit code does not reveal much.
From abe0b07b059a2b29b02e3d77284ca2c09be0277c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 13:37:19 +0800
Subject: [PATCH 222/278] debug
---
src/openstack_cloud/openstack_runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 28b51d74f..758f432ca 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -508,7 +508,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
f"kill $(pgrep -x {RUNNER_LISTENER_PROCESS})"
)
# Checking the result of kill command is not useful, as the exit code does not reveal much.
- ssh_conn.run(kill_command, warn=True)
+ # ssh_conn.run(kill_command, warn=True)
# TODO: debug
result = ssh_conn.run("ps aux", warn=True)
From 5db2f6d2bbb1ab738ea43196e8b2a380459b8529 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 14:00:43 +0800
Subject: [PATCH 223/278] Debug
---
src/manager/runner_manager.py | 3 ++-
src/openstack_cloud/openstack_runner_manager.py | 13 ++-----------
2 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index a9cff2f35..94a99275b 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -249,7 +249,8 @@ def cleanup(self) -> IssuedMetricEventsStats:
Returns:
Stats on metrics events issued during the cleanup of runners.
"""
- self._github.delete_runners([GitHubRunnerState.OFFLINE])
+ # TODO: DEBUG
+ # self._github.delete_runners([GitHubRunnerState.OFFLINE])
remove_token = self._github.get_removal_token()
deleted_runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 758f432ca..6323b65fa 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -484,7 +484,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
return
except SSHError:
logger.exception(
- "SSH connection failure with %s during health check", instance.server_name
+ "SSH connection failure with %s during flushing", instance.server_name
)
raise
@@ -508,16 +508,7 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
f"kill $(pgrep -x {RUNNER_LISTENER_PROCESS})"
)
# Checking the result of kill command is not useful, as the exit code does not reveal much.
- # ssh_conn.run(kill_command, warn=True)
-
- # TODO: debug
- result = ssh_conn.run("ps aux", warn=True)
- import pytest
- pytest.set_trace()
-
- result = ssh_conn.run("ps aux", warn=True)
- import pytest
- pytest.set_trace()
+ ssh_conn.run(kill_command, warn=True)
@retry(tries=3, delay=5, backoff=2, local_logger=logger)
def _health_check(self, instance: OpenstackInstance) -> bool:
From 1cfe5f060fd346a933fc39ef201a8dd1d1db1611 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:11:34 +0800
Subject: [PATCH 224/278] Debug
---
src/manager/runner_manager.py | 3 +--
src/openstack_cloud/openstack_runner_manager.py | 3 +++
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 94a99275b..a9cff2f35 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -249,8 +249,7 @@ def cleanup(self) -> IssuedMetricEventsStats:
Returns:
Stats on metrics events issued during the cleanup of runners.
"""
- # TODO: DEBUG
- # self._github.delete_runners([GitHubRunnerState.OFFLINE])
+ self._github.delete_runners([GitHubRunnerState.OFFLINE])
remove_token = self._github.get_removal_token()
deleted_runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 6323b65fa..685644b30 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -548,6 +548,9 @@ def _run_health_check(ssh_conn: SSHConnection, name: str) -> bool:
Returns:
Whether the health succeed.
"""
+ # TODO: Debug
+ import pytest
+ pytest.set_trace()
result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
if not result.ok:
logger.warning("SSH run of `ps aux` failed on %s: %s", name, result.stderr)
From 6709348071315a1df1c11d6cd923b263f4dea206 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:17:40 +0800
Subject: [PATCH 225/278] Debug
---
src/openstack_cloud/openstack_runner_manager.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 685644b30..4715474b6 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -535,6 +535,9 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
"SSH connection failure with %s during health check", instance.server_name
)
raise
+ # TODO: Debug
+ import pytest
+ pytest.set_trace()
return OpenstackRunnerManager._run_health_check(ssh_conn, instance.server_name)
@staticmethod
@@ -548,9 +551,6 @@ def _run_health_check(ssh_conn: SSHConnection, name: str) -> bool:
Returns:
Whether the health succeed.
"""
- # TODO: Debug
- import pytest
- pytest.set_trace()
result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
if not result.ok:
logger.warning("SSH run of `ps aux` failed on %s: %s", name, result.stderr)
From e168737fc2fbd71f382e3bf992e98506ba068995 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:22:23 +0800
Subject: [PATCH 226/278] Debug
---
src/openstack_cloud/openstack_runner_manager.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 4715474b6..ae986ce6c 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -488,6 +488,10 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
)
raise
+ # TODO: Debug
+ import pytest
+ pytest.set_trace()
+
# Using a single command to determine the state and kill the process if needed.
# This makes it more robust when network is unstable.
if busy:
@@ -535,9 +539,6 @@ def _health_check(self, instance: OpenstackInstance) -> bool:
"SSH connection failure with %s during health check", instance.server_name
)
raise
- # TODO: Debug
- import pytest
- pytest.set_trace()
return OpenstackRunnerManager._run_health_check(ssh_conn, instance.server_name)
@staticmethod
From 550b81a3d0b235ae6fa981ab82de476b8df5f4c3 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:26:56 +0800
Subject: [PATCH 227/278] Debug
---
.../test_runner_manager_openstack.py | 29 ++++++++++---------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b20426ca0..21dd25305 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -311,20 +311,21 @@ async def test_runner_flush_busy_lifecycle(
)
await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
- runner_list = runner_manager_with_one_runner.get_runners()
- assert len(runner_list) == 1
- busy_runner = runner_list[0]
- assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GitHubRunnerState.BUSY
-
- # 2.
- runner_manager_with_one_runner.cleanup()
- runner_list = runner_manager_with_one_runner.get_runners()
- assert isinstance(runner_list, tuple)
- assert len(runner_list) == 1
- runner = runner_list[0]
- assert runner.cloud_state == CloudRunnerState.ACTIVE
- assert busy_runner.github_state == GitHubRunnerState.BUSY
+ # TODO: debug
+ # runner_list = runner_manager_with_one_runner.get_runners()
+ # assert len(runner_list) == 1
+ # busy_runner = runner_list[0]
+ # assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ # assert busy_runner.github_state == GitHubRunnerState.BUSY
+
+ # # 2.
+ # runner_manager_with_one_runner.cleanup()
+ # runner_list = runner_manager_with_one_runner.get_runners()
+ # assert isinstance(runner_list, tuple)
+ # assert len(runner_list) == 1
+ # runner = runner_list[0]
+ # assert runner.cloud_state == CloudRunnerState.ACTIVE
+ # assert busy_runner.github_state == GitHubRunnerState.BUSY
# 3.
runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
From ca52e5729ede1023e71b92a1f58f00b70d8ce987 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:26:47 +0800
Subject: [PATCH 228/278] Fix flush mode
---
src/manager/runner_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index a9cff2f35..048b9c628 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -237,7 +237,7 @@ def flush_runners(
)
busy = False
- if FlushMode.FLUSH_BUSY:
+ if flush_mode == FlushMode.FLUSH_BUSY:
busy = True
remove_token = self._github.get_removal_token()
stats = self._cloud.flush_runners(remove_token, busy)
From 0c730d6e532985bef70212d43336cccdac8c1fe7 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:55:30 +0800
Subject: [PATCH 229/278] Remove debug
---
.../openstack_runner_manager.py | 4 ---
.../test_runner_manager_openstack.py | 29 +++++++++----------
2 files changed, 14 insertions(+), 19 deletions(-)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index ae986ce6c..6323b65fa 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -488,10 +488,6 @@ def _check_state_and_flush(self, instance: OpenstackInstance, busy: bool) -> Non
)
raise
- # TODO: Debug
- import pytest
- pytest.set_trace()
-
# Using a single command to determine the state and kill the process if needed.
# This makes it more robust when network is unstable.
if busy:
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 21dd25305..b20426ca0 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -311,21 +311,20 @@ async def test_runner_flush_busy_lifecycle(
)
await wait_for(lambda: workflow_is_status(workflow, "in_progress"))
- # TODO: debug
- # runner_list = runner_manager_with_one_runner.get_runners()
- # assert len(runner_list) == 1
- # busy_runner = runner_list[0]
- # assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
- # assert busy_runner.github_state == GitHubRunnerState.BUSY
-
- # # 2.
- # runner_manager_with_one_runner.cleanup()
- # runner_list = runner_manager_with_one_runner.get_runners()
- # assert isinstance(runner_list, tuple)
- # assert len(runner_list) == 1
- # runner = runner_list[0]
- # assert runner.cloud_state == CloudRunnerState.ACTIVE
- # assert busy_runner.github_state == GitHubRunnerState.BUSY
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert len(runner_list) == 1
+ busy_runner = runner_list[0]
+ assert busy_runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GitHubRunnerState.BUSY
+
+ # 2.
+ runner_manager_with_one_runner.cleanup()
+ runner_list = runner_manager_with_one_runner.get_runners()
+ assert isinstance(runner_list, tuple)
+ assert len(runner_list) == 1
+ runner = runner_list[0]
+ assert runner.cloud_state == CloudRunnerState.ACTIVE
+ assert busy_runner.github_state == GitHubRunnerState.BUSY
# 3.
runner_manager_with_one_runner.flush_runners(flush_mode=FlushMode.FLUSH_IDLE)
From 6edf7c0aba48d3fba283edf3f84ee44b6fcd9e79 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 21 Aug 2024 17:25:07 +0800
Subject: [PATCH 230/278] Re-enable all tests
---
.github/workflows/e2e_test.yaml | 4 +---
.github/workflows/integration_test.yaml | 4 +---
2 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index df2667ae2..7d0383c12 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,9 +1,7 @@
name: End-to-End tests
on:
- # TODO: DEBUG
- workflow_dispatch:
- # pull_request:
+ pull_request:
jobs:
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index b67ec9e61..8e0bc700a 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,9 +1,7 @@
name: integration-tests
on:
- # TODO: DEBUG
- workflow_dispatch:
- # pull_request:
+ pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
From 4e3134b890ceef226534ca455d2df386243fa22b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 22 Aug 2024 13:47:11 +0800
Subject: [PATCH 231/278] Initial unit test for runner scaler
---
pyproject.toml | 2 +
src-docs/github_client.md | 12 +-
src-docs/openstack_cloud.openstack_manager.md | 4 +-
src-docs/runner_manager_type.md | 4 +-
src-docs/runner_type.md | 2 +-
src/charm.py | 6 +-
src/charm_state.py | 16 +-
src/github_client.py | 34 +--
src/manager/cloud_runner_manager.py | 9 +-
src/manager/github_runner_manager.py | 19 +-
src/manager/runner_manager.py | 8 +-
src/metrics/github.py | 4 +-
src/openstack_cloud/openstack_manager.py | 10 +-
.../openstack_runner_manager.py | 4 +-
src/runner.py | 4 +-
src/runner_manager_type.py | 6 +-
src/runner_type.py | 4 +-
.../test_runner_manager_openstack.py | 8 +-
tests/integration/test_self_hosted_runner.py | 4 +-
tests/unit/mock_runner_managers.py | 194 ++++++++++++++++++
tests/unit/test_charm.py | 12 +-
tests/unit/test_charm_state.py | 16 +-
tests/unit/test_github_client.py | 12 +-
tests/unit/test_lxd_runner_manager.py | 8 +-
tests/unit/test_runner.py | 6 +-
tests/unit/test_runner_manager.py | 2 +
tests/unit/test_runner_scaler.py | 40 ++++
27 files changed, 345 insertions(+), 105 deletions(-)
create mode 100644 tests/unit/mock_runner_managers.py
create mode 100644 tests/unit/test_runner_manager.py
create mode 100644 tests/unit/test_runner_scaler.py
diff --git a/pyproject.toml b/pyproject.toml
index f4a49bd2a..a60427837 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,8 @@ omit = [
# These are covered by `tests/integration/test_runner_manager_openstack.py`.
"src/openstack_cloud/openstack_cloud.py",
"src/openstack_cloud/openstack_runner_manager.py",
+ # Thin wrapper around GitHub API. Not a lot of value in unit tests.
+ "src/manager/github_runner_manager.py",
# Contains interface for calling LXD. Tested in integration tests and end to end tests.
"src/lxd.py",
# Contains interface for calling repo policy compliance service. Tested in integration test
diff --git a/src-docs/github_client.md b/src-docs/github_client.md
index 6cd298c52..fc0de8f7b 100644
--- a/src-docs/github_client.md
+++ b/src-docs/github_client.md
@@ -67,7 +67,7 @@ Instantiate the GiHub API client.
### method `delete_runner`
```python
-delete_runner(path: GithubOrg | GithubRepo, runner_id: int) → None
+delete_runner(path: GitHubOrg | GitHubRepo, runner_id: int) → None
```
Delete the self-hosted runner from GitHub.
@@ -87,7 +87,7 @@ Delete the self-hosted runner from GitHub.
```python
get_job_info(
- path: GithubRepo,
+ path: GitHubRepo,
workflow_run_id: str,
runner_name: str
) → JobStats
@@ -123,7 +123,7 @@ Get information about a job for a specific workflow run.
```python
get_runner_application(
- path: GithubOrg | GithubRepo,
+ path: GitHubOrg | GitHubRepo,
arch: Arch,
os: str = 'linux'
) → RunnerApplication
@@ -157,7 +157,7 @@ Get runner application available for download for given arch.
### method `get_runner_github_info`
```python
-get_runner_github_info(path: GithubOrg | GithubRepo) → list[SelfHostedRunner]
+get_runner_github_info(path: GitHubOrg | GitHubRepo) → list[SelfHostedRunner]
```
Get runner information on GitHub under a repo or org.
@@ -180,7 +180,7 @@ Get runner information on GitHub under a repo or org.
### method `get_runner_registration_token`
```python
-get_runner_registration_token(path: GithubOrg | GithubRepo) → str
+get_runner_registration_token(path: GitHubOrg | GitHubRepo) → str
```
Get token from GitHub used for registering runners.
@@ -203,7 +203,7 @@ Get token from GitHub used for registering runners.
### method `get_runner_remove_token`
```python
-get_runner_remove_token(path: GithubOrg | GithubRepo) → str
+get_runner_remove_token(path: GitHubOrg | GitHubRepo) → str
```
Get token from GitHub used for removing runners.
diff --git a/src-docs/openstack_cloud.openstack_manager.md b/src-docs/openstack_cloud.openstack_manager.md
index a0f0a2531..115eec05b 100644
--- a/src-docs/openstack_cloud.openstack_manager.md
+++ b/src-docs/openstack_cloud.openstack_manager.md
@@ -27,7 +27,7 @@ create_instance_config(
app_name: str,
unit_num: int,
image_id: str,
- path: GithubOrg | GithubRepo,
+ path: GitHubOrg | GitHubRepo,
labels: Iterable[str],
registration_token: str
) → InstanceConfig
@@ -75,7 +75,7 @@ The configuration values for creating a single runner instance.
```python
__init__(
- github_path: GithubOrg | GithubRepo,
+ github_path: GitHubOrg | GitHubRepo,
image_id: str,
labels: Iterable[str],
name: str,
diff --git a/src-docs/runner_manager_type.md b/src-docs/runner_manager_type.md
index 7e0675add..f6b58a83c 100644
--- a/src-docs/runner_manager_type.md
+++ b/src-docs/runner_manager_type.md
@@ -97,7 +97,7 @@ __init__(
charm_state: CharmState,
image: str,
lxd_storage_path: Path,
- path: GithubOrg | GithubRepo,
+ path: GitHubOrg | GitHubRepo,
service_token: str,
token: str,
dockerhub_mirror: str | None = None,
@@ -147,7 +147,7 @@ Configuration of runner manager.
```python
__init__(
charm_state: CharmState,
- path: GithubOrg | GithubRepo,
+ path: GitHubOrg | GitHubRepo,
labels: Iterable[str],
token: str,
flavor: str,
diff --git a/src-docs/runner_type.md b/src-docs/runner_type.md
index 8c9db658a..d5029f4f8 100644
--- a/src-docs/runner_type.md
+++ b/src-docs/runner_type.md
@@ -106,7 +106,7 @@ __init__(
labels: tuple[str],
lxd_storage_path: Path,
name: str,
- path: GithubOrg | GithubRepo,
+ path: GitHubOrg | GitHubRepo,
proxies: ProxySetting,
dockerhub_mirror: str | None = None,
ssh_debug_connections: list[SSHDebugConnection] | None = None
diff --git a/src/charm.py b/src/charm.py
index d3b48d44d..f842c591e 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -59,7 +59,7 @@
TOKEN_CONFIG_NAME,
CharmConfigInvalidError,
CharmState,
- GithubPath,
+ GitHubPath,
InstanceType,
OpenstackImage,
ProxyConfig,
@@ -372,7 +372,7 @@ def _ensure_service_health(self) -> None:
raise
def _get_runner_manager(
- self, state: CharmState, token: str | None = None, path: GithubPath | None = None
+ self, state: CharmState, token: str | None = None, path: GitHubPath | None = None
) -> LXDRunnerManager:
"""Get a RunnerManager instance.
@@ -1191,7 +1191,7 @@ def _get_set_image_ready_status(self) -> bool:
return True
def _get_runner_scaler(
- self, state: CharmState, token: str | None = None, path: GithubPath | None = None
+ self, state: CharmState, token: str | None = None, path: GitHubPath | None = None
) -> RunnerScaler:
"""Get runner scaler instance for scaling runners.
diff --git a/src/charm_state.py b/src/charm_state.py
index 186609806..d562c2242 100644
--- a/src/charm_state.py
+++ b/src/charm_state.py
@@ -87,7 +87,7 @@ class AnyHttpsUrl(AnyHttpUrl):
@dataclasses.dataclass
-class GithubRepo:
+class GitHubRepo:
"""Represent GitHub repository.
Attributes:
@@ -108,7 +108,7 @@ def path(self) -> str:
@dataclasses.dataclass
-class GithubOrg:
+class GitHubOrg:
"""Represent GitHub organization.
Attributes:
@@ -128,10 +128,10 @@ def path(self) -> str:
return self.org
-GithubPath = GithubOrg | GithubRepo
+GitHubPath = GitHubOrg | GitHubRepo
-def parse_github_path(path_str: str, runner_group: str) -> GithubPath:
+def parse_github_path(path_str: str, runner_group: str) -> GitHubPath:
"""Parse GitHub path.
Args:
@@ -151,8 +151,8 @@ def parse_github_path(path_str: str, runner_group: str) -> GithubPath:
if len(paths) != 2:
raise CharmConfigInvalidError(f"Invalid path configuration {path_str}")
owner, repo = paths
- return GithubRepo(owner=owner, repo=repo)
- return GithubOrg(org=path_str, group=runner_group)
+ return GitHubRepo(owner=owner, repo=repo)
+ return GitHubOrg(org=path_str, group=runner_group)
@dataclasses.dataclass
@@ -165,7 +165,7 @@ class GithubConfig:
"""
token: str
- path: GithubPath
+ path: GitHubPath
@classmethod
def from_charm(cls, charm: CharmBase) -> "GithubConfig":
@@ -367,7 +367,7 @@ class CharmConfig(BaseModel):
dockerhub_mirror: AnyHttpsUrl | None
labels: tuple[str, ...]
openstack_clouds_yaml: dict[str, dict] | None
- path: GithubPath
+ path: GitHubPath
reconcile_interval: int
repo_policy_compliance: RepoPolicyComplianceConfig | None
token: str
diff --git a/src/github_client.py b/src/github_client.py
index 3c7718f94..b724b5cdb 100644
--- a/src/github_client.py
+++ b/src/github_client.py
@@ -16,7 +16,7 @@
from ghapi.page import paged
from typing_extensions import assert_never
-from charm_state import Arch, GithubOrg, GithubPath, GithubRepo
+from charm_state import Arch, GitHubOrg, GitHubPath, GitHubRepo
from errors import GithubApiError, JobNotFoundError, RunnerBinaryError, TokenError
from github_type import (
JobStats,
@@ -88,7 +88,7 @@ def __init__(self, token: str):
@catch_http_errors
def get_runner_application(
- self, path: GithubPath, arch: Arch, os: str = "linux"
+ self, path: GitHubPath, arch: Arch, os: str = "linux"
) -> RunnerApplication:
"""Get runner application available for download for given arch.
@@ -106,11 +106,11 @@ def get_runner_application(
The runner application.
"""
runner_applications: RunnerApplicationList = []
- if isinstance(path, GithubRepo):
+ if isinstance(path, GitHubRepo):
runner_applications = self._client.actions.list_runner_applications_for_repo(
owner=path.owner, repo=path.repo
)
- if isinstance(path, GithubOrg):
+ if isinstance(path, GitHubOrg):
runner_applications = self._client.actions.list_runner_applications_for_org(
org=path.org
)
@@ -127,7 +127,7 @@ def get_runner_application(
) from err
@catch_http_errors
- def get_runner_github_info(self, path: GithubPath) -> list[SelfHostedRunner]:
+ def get_runner_github_info(self, path: GitHubPath) -> list[SelfHostedRunner]:
"""Get runner information on GitHub under a repo or org.
Args:
@@ -139,7 +139,7 @@ def get_runner_github_info(self, path: GithubPath) -> list[SelfHostedRunner]:
"""
remote_runners_list: list[SelfHostedRunner] = []
- if isinstance(path, GithubRepo):
+ if isinstance(path, GitHubRepo):
# The documentation of ghapi for pagination is incorrect and examples will give errors.
# This workaround is a temp solution. Will be moving to PyGitHub in the future.
self._client.actions.list_self_hosted_runners_for_repo(
@@ -157,7 +157,7 @@ def get_runner_github_info(self, path: GithubPath) -> list[SelfHostedRunner]:
)
for item in page["runners"]
]
- if isinstance(path, GithubOrg):
+ if isinstance(path, GitHubOrg):
# The documentation of ghapi for pagination is incorrect and examples will give errors.
# This workaround is a temp solution. Will be moving to PyGitHub in the future.
self._client.actions.list_self_hosted_runners_for_org(org=path.org, per_page=100)
@@ -175,7 +175,7 @@ def get_runner_github_info(self, path: GithubPath) -> list[SelfHostedRunner]:
return remote_runners_list
@catch_http_errors
- def get_runner_remove_token(self, path: GithubPath) -> str:
+ def get_runner_remove_token(self, path: GitHubPath) -> str:
"""Get token from GitHub used for removing runners.
Args:
@@ -185,11 +185,11 @@ def get_runner_remove_token(self, path: GithubPath) -> str:
The removing token.
"""
token: RemoveToken
- if isinstance(path, GithubRepo):
+ if isinstance(path, GitHubRepo):
token = self._client.actions.create_remove_token_for_repo(
owner=path.owner, repo=path.repo
)
- elif isinstance(path, GithubOrg):
+ elif isinstance(path, GitHubOrg):
token = self._client.actions.create_remove_token_for_org(org=path.org)
else:
assert_never(token)
@@ -197,7 +197,7 @@ def get_runner_remove_token(self, path: GithubPath) -> str:
return token["token"]
@catch_http_errors
- def get_runner_registration_token(self, path: GithubPath) -> str:
+ def get_runner_registration_token(self, path: GitHubPath) -> str:
"""Get token from GitHub used for registering runners.
Args:
@@ -208,11 +208,11 @@ def get_runner_registration_token(self, path: GithubPath) -> str:
The registration token.
"""
token: RegistrationToken
- if isinstance(path, GithubRepo):
+ if isinstance(path, GitHubRepo):
token = self._client.actions.create_registration_token_for_repo(
owner=path.owner, repo=path.repo
)
- elif isinstance(path, GithubOrg):
+ elif isinstance(path, GitHubOrg):
token = self._client.actions.create_registration_token_for_org(org=path.org)
else:
assert_never(token)
@@ -220,7 +220,7 @@ def get_runner_registration_token(self, path: GithubPath) -> str:
return token["token"]
@catch_http_errors
- def delete_runner(self, path: GithubPath, runner_id: int) -> None:
+ def delete_runner(self, path: GitHubPath, runner_id: int) -> None:
"""Delete the self-hosted runner from GitHub.
Args:
@@ -228,19 +228,19 @@ def delete_runner(self, path: GithubPath, runner_id: int) -> None:
name.
runner_id: Id of the runner.
"""
- if isinstance(path, GithubRepo):
+ if isinstance(path, GitHubRepo):
self._client.actions.delete_self_hosted_runner_from_repo(
owner=path.owner,
repo=path.repo,
runner_id=runner_id,
)
- if isinstance(path, GithubOrg):
+ if isinstance(path, GitHubOrg):
self._client.actions.delete_self_hosted_runner_from_org(
org=path.org,
runner_id=runner_id,
)
- def get_job_info(self, path: GithubRepo, workflow_run_id: str, runner_name: str) -> JobStats:
+ def get_job_info(self, path: GitHubRepo, workflow_run_id: str, runner_name: str) -> JobStats:
"""Get information about a job for a specific workflow run.
Args:
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index 28ed17b20..b2624199d 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -9,7 +9,7 @@
from enum import Enum, auto
from typing import Iterator, Sequence, Tuple
-from charm_state import GithubPath, ProxyConfig, SSHDebugConnection
+from charm_state import GitHubPath, ProxyConfig, SSHDebugConnection
from metrics.runner import RunnerMetrics
logger = logging.getLogger(__name__)
@@ -52,8 +52,9 @@ class CloudRunnerState(str, Enum):
UNKNOWN = auto()
UNEXPECTED = auto()
+ # Exclude from coverage as not much value for testing this object conversion.
@staticmethod
- def from_openstack_server_status(
+ def from_openstack_server_status( # pragma: no cover
openstack_server_status: str,
) -> "CloudRunnerState":
"""Create from openstack server status.
@@ -97,7 +98,7 @@ class GitHubRunnerConfig:
labels: The labels to add to runners.
"""
- github_path: GithubPath
+ github_path: GitHubPath
labels: list[str]
@@ -158,7 +159,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
"""
@abc.abstractmethod
- def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance:
+ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
"""Get a self-hosted runner by instance id.
Args:
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 0aed972bd..fcbaccfb2 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -4,9 +4,9 @@
"""Client for managing self-hosted runner on GitHub side."""
from enum import Enum, auto
-from typing import Sequence
+from typing import Iterable
-from charm_state import GithubPath
+from charm_state import GitHubPath
from github_client import GithubClient
from github_type import GitHubRunnerStatus, SelfHostedRunner
@@ -45,10 +45,10 @@ def from_runner(runner: SelfHostedRunner) -> "GitHubRunnerState":
return state
-class GithubRunnerManager:
+class GitHubRunnerManager:
"""Manage self-hosted runner on GitHub side."""
- def __init__(self, prefix: str, token: str, path: GithubPath):
+ def __init__(self, prefix: str, token: str, path: GitHubPath):
"""Construct the object.
Args:
@@ -61,8 +61,8 @@ def __init__(self, prefix: str, token: str, path: GithubPath):
self.github = GithubClient(token)
def get_runners(
- self, states: Sequence[GitHubRunnerState] | None = None
- ) -> tuple[SelfHostedRunner]:
+ self, states: Iterable[GitHubRunnerState] | None = None
+ ) -> tuple[SelfHostedRunner, ...]:
"""Get info on self-hosted runners of certain states.
Args:
@@ -72,14 +72,15 @@ def get_runners(
Information on the runners.
"""
runner_list = self.github.get_runner_github_info(self._path)
+ state_set = set(states)
return tuple(
runner
for runner in runner_list
if runner.name.startswith(self._prefix)
- and GithubRunnerManager._is_runner_in_state(runner, states)
+ and GitHubRunnerManager._is_runner_in_state(runner, state_set)
)
- def delete_runners(self, states: Sequence[GitHubRunnerState] | None = None) -> None:
+ def delete_runners(self, states: Iterable[GitHubRunnerState] | None = None) -> None:
"""Delete the self-hosted runners of certain states.
Args:
@@ -111,7 +112,7 @@ def get_removal_token(self) -> str:
@staticmethod
def _is_runner_in_state(
- runner: SelfHostedRunner, states: Sequence[GitHubRunnerState] | None
+ runner: SelfHostedRunner, states: set[GitHubRunnerState] | None
) -> bool:
"""Check that the runner is in one of the states provided.
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 048b9c628..e509473c0 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -9,7 +9,7 @@
from multiprocessing import Pool
from typing import Iterator, Sequence, Type, cast
-from charm_state import GithubPath
+from charm_state import GitHubPath
from errors import GithubMetricsError, RunnerCreateError
from github_type import SelfHostedRunner
from manager.cloud_runner_manager import (
@@ -19,7 +19,7 @@
HealthState,
InstanceId,
)
-from manager.github_runner_manager import GithubRunnerManager, GitHubRunnerState
+from manager.github_runner_manager import GitHubRunnerManager, GitHubRunnerState
from metrics import events as metric_events
from metrics import github as github_metrics
from metrics import runner as runner_metrics
@@ -86,7 +86,7 @@ class RunnerManagerConfig:
"""
token: str
- path: GithubPath
+ path: GitHubPath
class RunnerManager:
@@ -106,7 +106,7 @@ def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManag
self._config = config
self._cloud = cloud_runner_manager
self.name_prefix = self._cloud.name_prefix
- self._github = GithubRunnerManager(
+ self._github = GitHubRunnerManager(
prefix=self.name_prefix, token=self._config.token, path=self._config.path
)
diff --git a/src/metrics/github.py b/src/metrics/github.py
index 354933fea..e40574eb7 100644
--- a/src/metrics/github.py
+++ b/src/metrics/github.py
@@ -4,7 +4,7 @@
"""Functions to calculate metrics from data retrieved from GitHub."""
import logging
-from charm_state import GithubRepo
+from charm_state import GitHubRepo
from errors import GithubMetricsError, JobNotFoundError
from github_client import GithubClient
from metrics.runner import PreJobMetrics
@@ -35,7 +35,7 @@ def job(
try:
job_info = github_client.get_job_info(
- path=GithubRepo(owner=owner, repo=repo),
+ path=GitHubRepo(owner=owner, repo=repo),
workflow_run_id=pre_job_metrics.workflow_run_id,
runner_name=runner_name,
)
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index 379d2ae4c..e0ce47d4f 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -40,7 +40,7 @@
from paramiko.ssh_exception import NoValidConnectionsError
import reactive.runner_manager as reactive_runner_manager
-from charm_state import CharmState, GithubOrg, ProxyConfig, SSHDebugConnection
+from charm_state import CharmState, GitHubOrg, ProxyConfig, SSHDebugConnection
from errors import (
CreateMetricsStorageError,
GetMetricsStorageError,
@@ -62,7 +62,7 @@
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner_manager import IssuedMetricEventsStats
from runner_manager_type import FlushMode, OpenstackRunnerManagerConfig
-from runner_type import GithubPath, RunnerGithubInfo, RunnerNameByHealth
+from runner_type import GitHubPath, RunnerGithubInfo, RunnerNameByHealth
from utilities import retry, set_env_var
logger = logging.getLogger(__name__)
@@ -120,7 +120,7 @@ class InstanceConfig:
registration_token: Token for registering the runner on GitHub.
"""
- github_path: GithubPath
+ github_path: GitHubPath
image_id: str
labels: Iterable[str]
name: str
@@ -188,7 +188,7 @@ def create_instance_config( # pylint: disable=too-many-arguments
app_name: str,
unit_num: int,
image_id: str,
- path: GithubPath,
+ path: GitHubPath,
labels: Iterable[str],
registration_token: str,
) -> InstanceConfig:
@@ -257,7 +257,7 @@ def _generate_cloud_init_userdata(
instance_config = cloud_init_userdata.instance_config
proxies = cloud_init_userdata.proxies
- if isinstance(instance_config.github_path, GithubOrg):
+ if isinstance(instance_config.github_path, GitHubOrg):
runner_group = instance_config.github_path.group
aproxy_address = proxies.aproxy_address if proxies is not None else None
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index f504fd3d7..39475ee69 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -16,7 +16,7 @@
import paramiko.ssh_exception
from fabric import Connection as SSHConnection
-from charm_state import GithubOrg
+from charm_state import GitHubOrg
from errors import (
CreateMetricsStorageError,
GetMetricsStorageError,
@@ -432,7 +432,7 @@ def _generate_cloud_init(self, instance_name: str, registration_token: str) -> s
pre_job_contents = jinja.get_template("pre-job.j2").render(pre_job_contents_dict)
runner_group = None
- if isinstance(self._runner_config.github_path, GithubOrg):
+ if isinstance(self._runner_config.github_path, GitHubOrg):
runner_group = self._runner_config.github_path.group
aproxy_address = (
self._service_config.proxy_config.aproxy_address
diff --git a/src/runner.py b/src/runner.py
index 4610faded..61a12115c 100644
--- a/src/runner.py
+++ b/src/runner.py
@@ -23,7 +23,7 @@
import yaml
import shared_fs
-from charm_state import Arch, GithubOrg, SSHDebugConnection, VirtualMachineResources
+from charm_state import Arch, GitHubOrg, SSHDebugConnection, VirtualMachineResources
from errors import (
CreateMetricsStorageError,
GithubClientError,
@@ -838,7 +838,7 @@ def _register_runner(self, registration_token: str, labels: Sequence[str]) -> No
self.instance.name,
]
- if isinstance(self.config.path, GithubOrg):
+ if isinstance(self.config.path, GitHubOrg):
register_cmd += ["--runnergroup", self.config.path.group]
logger.info("Executing registration command...")
diff --git a/src/runner_manager_type.py b/src/runner_manager_type.py
index 343b1eb04..e37e0b290 100644
--- a/src/runner_manager_type.py
+++ b/src/runner_manager_type.py
@@ -10,7 +10,7 @@
import jinja2
-from charm_state import CharmState, GithubPath, ReactiveConfig
+from charm_state import CharmState, GitHubPath, ReactiveConfig
from github_client import GithubClient
from github_type import GitHubRunnerStatus
from lxd import LxdClient
@@ -81,7 +81,7 @@ class LXDRunnerManagerConfig: # pylint: disable=too-many-instance-attributes
charm_state: CharmState
image: str
lxd_storage_path: Path
- path: GithubPath
+ path: GitHubPath
service_token: str
token: str
dockerhub_mirror: str | None = None
@@ -113,7 +113,7 @@ class OpenstackRunnerManagerConfig: # pylint: disable=too-many-instance-attribu
"""
charm_state: CharmState
- path: GithubPath
+ path: GitHubPath
labels: Iterable[str]
token: str
flavor: str
diff --git a/src/runner_type.py b/src/runner_type.py
index 86769eafd..92560cbcf 100644
--- a/src/runner_type.py
+++ b/src/runner_type.py
@@ -8,7 +8,7 @@
from pathlib import Path
from typing import Optional
-from charm_state import GithubPath, SSHDebugConnection
+from charm_state import GitHubPath, SSHDebugConnection
@dataclass
@@ -64,7 +64,7 @@ class RunnerConfig: # pylint: disable=too-many-instance-attributes
labels: tuple[str]
lxd_storage_path: Path
name: str
- path: GithubPath
+ path: GitHubPath
proxies: ProxySetting
dockerhub_mirror: str | None = None
ssh_debug_connections: list[SSHDebugConnection] | None = None
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 5421569fe..b12d42414 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -17,7 +17,7 @@
from github.Workflow import Workflow
from openstack.connection import Connection as OpenstackConnection
-from charm_state import GithubPath, ProxyConfig, parse_github_path
+from charm_state import GitHubPath, ProxyConfig, parse_github_path
from manager.cloud_runner_manager import CloudRunnerState, GitHubRunnerConfig, SupportServiceConfig
from manager.github_runner_manager import GitHubRunnerState
from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
@@ -64,7 +64,7 @@ def log_dir_base_path_fixture(
@pytest.fixture(scope="module", name="github_path")
-def github_path_fixture(path: str) -> GithubPath:
+def github_path_fixture(path: str) -> GitHubPath:
return parse_github_path(path, "Default")
@@ -92,7 +92,7 @@ async def openstack_runner_manager_fixture(
openstack_test_image: str,
flavor_name: str,
network_name: str,
- github_path: GithubPath,
+ github_path: GitHubPath,
proxy_config: ProxyConfig,
runner_label: str,
openstack_connection: OpenstackConnection,
@@ -134,7 +134,7 @@ async def openstack_runner_manager_fixture(
async def runner_manager_fixture(
openstack_runner_manager: OpenStackRunnerManager,
token: str,
- github_path: GithubPath,
+ github_path: GitHubPath,
log_dir_base_path: dict[str, Path],
) -> RunnerManager:
"""Get RunnerManager instance.
diff --git a/tests/integration/test_self_hosted_runner.py b/tests/integration/test_self_hosted_runner.py
index c91ac8e97..4232fae4b 100644
--- a/tests/integration/test_self_hosted_runner.py
+++ b/tests/integration/test_self_hosted_runner.py
@@ -16,7 +16,7 @@
DOCKERHUB_MIRROR_CONFIG_NAME,
PATH_CONFIG_NAME,
VIRTUAL_MACHINES_CONFIG_NAME,
- GithubRepo,
+ GitHubRepo,
)
from github_client import GithubClient
from tests.integration.helpers.common import (
@@ -150,7 +150,7 @@ async def test_flush_busy_runner(
# Wait until runner online and then busy.
for _ in range(30):
all_runners = runner_manager_github_client.get_runner_github_info(
- GithubRepo(
+ GitHubRepo(
owner=forked_github_repository.owner.login, repo=forked_github_repository.name
)
)
diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py
new file mode 100644
index 000000000..75209680f
--- /dev/null
+++ b/tests/unit/mock_runner_managers.py
@@ -0,0 +1,194 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+import random
+import secrets
+from dataclasses import dataclass
+from typing import Iterable, Iterator, Sequence
+
+from charm_state import GitHubPath
+from github_type import GitHubRunnerStatus, SelfHostedRunner
+from manager.cloud_runner_manager import (
+ CloudRunnerInstance,
+ CloudRunnerManager,
+ CloudRunnerState,
+ InstanceId,
+)
+from manager.github_runner_manager import GitHubRunnerManager, GitHubRunnerState
+from metrics.runner import RunnerMetrics
+
+
+@dataclass
+class MockRunner:
+ """Mock of a runner"""
+
+ name: str
+ instance_id: InstanceId
+ cloud_state: CloudRunnerState
+ github_state: GitHubRunnerState
+ health: bool
+
+ def __init__(self, name: str):
+ self.name = name
+ self.instance_id = secrets.token_hex(6)
+ self.state = CloudRunnerState.ACTIVE
+ self.github_state = GitHubRunnerState.IDLE
+ self.health = True
+
+ def to_cloud_runner(self) -> CloudRunnerInstance:
+ return CloudRunnerInstance(
+ name=self.name,
+ instance_id=self.instance_id,
+ health=self.health,
+ state=self.cloud_state,
+ )
+
+
+@dataclass
+class SharedMockRunnerManagerState:
+ """State shared by mock runner managers.
+
+ For sharing the mock runner states between MockCloudRunnerManager and MockGitHubRunnerManager.
+ """
+
+ runners: dict[InstanceId, MockRunner]
+
+ def __init__(self):
+ self.runners = {}
+
+
+class MockCloudRunnerManager(CloudRunnerManager):
+ """Mock for CloudRunnerManager.
+
+ Metrics is not supported in this mock.
+ """
+
+ def __init__(self, state: SharedMockRunnerManagerState):
+ self.prefix = f"mock_{secrets.token_hex(4)}"
+ self.state = state
+
+ @property
+ def name_prefix(self) -> str:
+ """Get the name prefix of the self-hosted runners."""
+ return self.prefix
+
+ def create_runner(self, registration_token: str) -> InstanceId:
+ """Create a self-hosted runner.
+
+ Args:
+ registration_token: The GitHub registration token for registering runners.
+ """
+ name = f"{self.name_prefix}-{secrets.token_hex(6)}"
+ runner = MockRunner(name)
+ self.state.runners[runner.instance_id] = runner
+ return runner.instance_id
+
+ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
+ """Get a self-hosted runner by instance id.
+
+ Args:
+ instance_id: The instance id.
+ """
+ runner = self.state.runners.get(instance_id, None)
+ if runner is not None:
+ return runner.to_cloud_runner()
+ return None
+
+ def get_runners(self, states: Sequence[CloudRunnerState]) -> tuple[CloudRunnerInstance, ...]:
+ """Get self-hosted runners by state.
+
+ Args:
+ states: Filter for the runners with these github states. If None all states will be
+ included.
+ """
+ return tuple(
+ runner.to_cloud_runner()
+ for runner in self.state.runners.values()
+ if runner.state in states
+ )
+
+ def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMetrics | None:
+ """Delete self-hosted runner.
+
+ Args:
+ instance_id: The instance id of the runner to delete.
+ remove_token: The GitHub remove token.
+ """
+ self.state.runners.pop(instance_id, None)
+ return iter([])
+
+ def flush_runners(self, remove_token: str, busy: bool = False) -> Iterator[RunnerMetrics]:
+ """Stop all runners.
+
+ Args:
+ remove_token: The GitHub remove token for removing runners.
+ busy: If false, only idle runners are removed. If true, both idle and busy runners are
+ removed.
+ """
+ # No supporting metrics in the mocks.
+ if busy:
+ self.state.runners = {}
+ else:
+ self.state.runners = {
+ instance_id: runner
+ for instance_id, runner in self.state.runners.items()
+ if runner.github_state == GitHubRunnerState.BUSY
+ }
+ return iter([])
+
+ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
+ """Cleanup runner and resource on the cloud.
+
+ Perform health check on runner and delete the runner if it fails.
+
+ Args:
+ remove_token: The GitHub remove token for removing runners.
+ """
+ # No supporting metrics in the mocks.
+ return iter([])
+
+
+class MockGitHubRunnerManager:
+
+ def __init__(self, name_prefix: str, path: GitHubPath, state: SharedMockRunnerManagerState):
+ self.name_prefix = name_prefix
+ self.state = state
+ self.path = path
+
+ def get_registration_token(self) -> str:
+ return "mock_registration_token"
+
+ def get_remove_token(self) -> str:
+ return "mock_remove_token"
+
+ def get_runners(
+ self, github_states: Iterable[GitHubRunnerState] | None = None
+ ) -> tuple[SelfHostedRunner, ...]:
+ if github_states is None:
+ github_states = [member.value for member in GitHubRunnerState]
+
+ github_state_set = set(github_states)
+ return tuple(
+ SelfHostedRunner(
+ busy=runner.github_state == GitHubRunnerState.BUSY,
+ id=random.randint(1, 1000000),
+ labels=[],
+ os="linux",
+ name=runner.name,
+ status=(
+ GitHubRunnerStatus.OFFLINE
+ if runner.github_state == GitHubRunnerState.OFFLINE
+ else GitHubRunnerStatus.ONLINE
+ ),
+ )
+ for runner in self.state.runners.values()
+ if runner.github_state in github_state_set
+ )
+
+ def delete_runners(self, states: Iterable[GitHubRunnerState]) -> None:
+ github_states = set(states)
+ self.state.runners = {
+ instance_id: runner
+ for instance_id, runner in self.state.runners.items()
+ if runner.github_state not in github_states
+ }
diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py
index d44eca542..493d3ff94 100644
--- a/tests/unit/test_charm.py
+++ b/tests/unit/test_charm.py
@@ -27,8 +27,8 @@
VM_CPU_CONFIG_NAME,
VM_DISK_CONFIG_NAME,
Arch,
- GithubOrg,
- GithubRepo,
+ GitHubOrg,
+ GitHubRepo,
InstanceType,
OpenstackImage,
ProxyConfig,
@@ -461,7 +461,7 @@ def test_org_register(self, run, wt, mkdir, rm):
"github-runner",
"0",
LXDRunnerManagerConfig(
- path=GithubOrg(org="mockorg", group="mockgroup"),
+ path=GitHubOrg(org="mockorg", group="mockgroup"),
token="mocktoken",
image="jammy",
service_token=token,
@@ -491,7 +491,7 @@ def test_repo_register(self, run, wt, mkdir, rm):
"github-runner",
"0",
LXDRunnerManagerConfig(
- path=GithubRepo(owner="mockorg", repo="repo"),
+ path=GitHubRepo(owner="mockorg", repo="repo"),
token="mocktoken",
image="jammy",
service_token=token,
@@ -549,7 +549,7 @@ def test_update_config(self, run, wt, mkdir, rm):
"github-runner",
"0",
LXDRunnerManagerConfig(
- path=GithubRepo(owner="mockorg", repo="repo"),
+ path=GitHubRepo(owner="mockorg", repo="repo"),
token="mocktoken",
image="jammy",
service_token=token,
@@ -571,7 +571,7 @@ def test_update_config(self, run, wt, mkdir, rm):
"github-runner",
"0",
LXDRunnerManagerConfig(
- path=GithubRepo(owner="mockorg", repo="repo"),
+ path=GitHubRepo(owner="mockorg", repo="repo"),
token="mocktoken",
image="jammy",
service_token=token,
diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py
index 2025c0e76..b4b25afb0 100644
--- a/tests/unit/test_charm_state.py
+++ b/tests/unit/test_charm_state.py
@@ -40,8 +40,8 @@
CharmState,
FirewallEntry,
GithubConfig,
- GithubOrg,
- GithubRepo,
+ GitHubOrg,
+ GitHubRepo,
ImmutableConfigChangedError,
LocalLxdRunnerConfig,
OpenstackImage,
@@ -64,7 +64,7 @@ def test_github_repo_path():
"""
owner = "test_owner"
repo = "test_repo"
- github_repo = GithubRepo(owner, repo)
+ github_repo = GitHubRepo(owner, repo)
path = github_repo.path()
@@ -79,7 +79,7 @@ def test_github_org_path():
"""
org = "test_org"
group = "test_group"
- github_org = GithubOrg(org, group)
+ github_org = GitHubOrg(org, group)
path = github_org.path()
@@ -128,14 +128,14 @@ def test_github_config_from_charm_invalid_token():
@pytest.mark.parametrize(
"path_str, runner_group, expected_type, expected_attrs",
[
- ("owner/repo", "test_group", GithubRepo, {"owner": "owner", "repo": "repo"}),
- ("test_org", "test_group", GithubOrg, {"org": "test_org", "group": "test_group"}),
+ ("owner/repo", "test_group", GitHubRepo, {"owner": "owner", "repo": "repo"}),
+ ("test_org", "test_group", GitHubOrg, {"org": "test_org", "group": "test_group"}),
],
)
def test_parse_github_path(
path_str: str,
runner_group: str,
- expected_type: GithubRepo | GithubOrg,
+ expected_type: GitHubRepo | GitHubOrg,
expected_attrs: dict[str, str],
):
"""
@@ -479,7 +479,7 @@ def test_charm_config_from_charm_valid():
result = CharmConfig.from_charm(mock_charm)
- assert result.path == GithubRepo(owner="owner", repo="repo")
+ assert result.path == GitHubRepo(owner="owner", repo="repo")
assert result.reconcile_interval == 5
assert result.denylist == [
FirewallEntry(ip_range="192.168.1.1"),
diff --git a/tests/unit/test_github_client.py b/tests/unit/test_github_client.py
index b01a75a01..9bd336a03 100644
--- a/tests/unit/test_github_client.py
+++ b/tests/unit/test_github_client.py
@@ -10,7 +10,7 @@
import pytest
-from charm_state import GithubRepo
+from charm_state import GitHubRepo
from errors import JobNotFoundError
from github_client import GithubClient
from github_type import JobConclusion, JobStats
@@ -95,7 +95,7 @@ def test_get_job_info(github_client: GithubClient, job_stats_raw: JobStatsRawDat
act: Call get_job_info.
assert: The correct JobStats object is returned.
"""
- github_repo = GithubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
+ github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
job_stats = github_client.get_job_info(
path=github_repo,
workflow_run_id=secrets.token_hex(16),
@@ -128,7 +128,7 @@ def test_get_job_info_no_conclusion(github_client: GithubClient, job_stats_raw:
}
]
}
- github_repo = GithubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
+ github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
job_stats = github_client.get_job_info(
path=github_repo,
workflow_run_id=secrets.token_hex(16),
@@ -156,7 +156,7 @@ def test_github_api_pagination_multiple_pages(
github_client=github_client, job_stats_raw=job_stats_raw, include_runner=True
)
- github_repo = GithubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
+ github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
job_stats = github_client.get_job_info(
path=github_repo,
workflow_run_id=secrets.token_hex(16),
@@ -184,7 +184,7 @@ def test_github_api_pagination_job_not_found(
github_client=github_client, job_stats_raw=job_stats_raw, include_runner=False
)
- github_repo = GithubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
+ github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
with pytest.raises(JobNotFoundError):
github_client.get_job_info(
@@ -198,7 +198,7 @@ def test_github_api_http_error(github_client: GithubClient, job_stats_raw: JobSt
github_client._client.actions.list_jobs_for_workflow_run.side_effect = HTTPError(
"http://test.com", 500, "", http.client.HTTPMessage(), None
)
- github_repo = GithubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
+ github_repo = GitHubRepo(owner=secrets.token_hex(16), repo=secrets.token_hex(16))
with pytest.raises(JobNotFoundError):
github_client.get_job_info(
diff --git a/tests/unit/test_lxd_runner_manager.py b/tests/unit/test_lxd_runner_manager.py
index 829d73c9c..36c36df11 100644
--- a/tests/unit/test_lxd_runner_manager.py
+++ b/tests/unit/test_lxd_runner_manager.py
@@ -16,8 +16,8 @@
Arch,
CharmConfig,
CharmState,
- GithubOrg,
- GithubRepo,
+ GitHubOrg,
+ GitHubRepo,
ProxyConfig,
ReactiveConfig,
VirtualMachineResources,
@@ -67,9 +67,9 @@ def charm_state_fixture(charm_config: MagicMock):
scope="function",
name="runner_manager",
params=[
- (GithubOrg("test_org", "test_group"), ProxyConfig()),
+ (GitHubOrg("test_org", "test_group"), ProxyConfig()),
(
- GithubRepo("test_owner", "test_repo"),
+ GitHubRepo("test_owner", "test_repo"),
ProxyConfig(
no_proxy="test_no_proxy",
http=TEST_PROXY_SERVER_URL,
diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py
index fdf8fc2a1..af7954d06 100644
--- a/tests/unit/test_runner.py
+++ b/tests/unit/test_runner.py
@@ -13,7 +13,7 @@
from _pytest.monkeypatch import MonkeyPatch
import metrics.runner_logs
-from charm_state import GithubOrg, GithubRepo, SSHDebugConnection, VirtualMachineResources
+from charm_state import GitHubOrg, GitHubRepo, SSHDebugConnection, VirtualMachineResources
from errors import (
CreateMetricsStorageError,
LxdError,
@@ -138,11 +138,11 @@ def ssh_debug_connections_fixture() -> list[SSHDebugConnection]:
name="runner",
params=[
(
- GithubOrg("test_org", "test_group"),
+ GitHubOrg("test_org", "test_group"),
ProxySetting(no_proxy=None, http=None, https=None, aproxy_address=None),
),
(
- GithubRepo("test_owner", "test_repo"),
+ GitHubRepo("test_owner", "test_repo"),
ProxySetting(
no_proxy="test_no_proxy",
http=TEST_PROXY_SERVER_URL,
diff --git a/tests/unit/test_runner_manager.py b/tests/unit/test_runner_manager.py
new file mode 100644
index 000000000..e3979c0f6
--- /dev/null
+++ b/tests/unit/test_runner_manager.py
@@ -0,0 +1,2 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
new file mode 100644
index 000000000..76d14d941
--- /dev/null
+++ b/tests/unit/test_runner_scaler.py
@@ -0,0 +1,40 @@
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+
+import pytest
+
+from charm_state import GitHubRepo
+from manager.runner_manager import RunnerManager, RunnerManagerConfig
+from manager.runner_scaler import RunnerScaler
+from tests.unit.mock_runner_managers import (
+ MockCloudRunnerManager,
+ MockGitHubRunnerManager,
+ SharedMockRunnerManagerState,
+)
+
+
+@pytest.fixture(name="runner_manager")
+def runner_manager_fixture() -> RunnerManager:
+ state = SharedMockRunnerManagerState()
+ mock_cloud = MockCloudRunnerManager(state)
+ mock_path = GitHubRepo("mock_owner", "mock_repo")
+ mock_github = MockGitHubRunnerManager(mock_cloud.name_prefix, mock_path, state)
+
+ config = RunnerManagerConfig("mock_token", mock_path)
+ runner_manager = RunnerManager(mock_cloud, config)
+ runner_manager._github = mock_github
+ return runner_manager
+
+
+@pytest.fixture(name="runner_scaler")
+def runner_scaler_fixture(runner_manager: RunnerManager) -> RunnerScaler:
+ return RunnerScaler(runner_manager)
+
+
+def test_get_no_runner(runner_scaler: RunnerScaler):
+ info = runner_scaler.get_runner_info()
+ assert info["offline"] == 0
+ assert info["online"] == 0
+ assert info["unknown"] == 0
+ assert info["runners"] == tuple()
From 7632c91abc2aab4273014f80432faefd1101e8bd Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 22 Aug 2024 18:59:59 +0800
Subject: [PATCH 232/278] Add more unit tests for runner scaler
---
src/manager/github_runner_manager.py | 6 +-
src/manager/runner_manager.py | 53 ++++++++----
src/manager/runner_scaler.py | 13 +--
tests/unit/mock_runner_managers.py | 14 ++--
tests/unit/test_runner_scaler.py | 116 ++++++++++++++++++++++++---
5 files changed, 163 insertions(+), 39 deletions(-)
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index fcbaccfb2..8f00525b0 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -37,10 +37,10 @@ def from_runner(runner: SelfHostedRunner) -> "GitHubRunnerState":
"""
state = GitHubRunnerState.OFFLINE
# A runner that is busy and offline is possible.
- if runner.busy:
+ if runner["busy"]:
state = GitHubRunnerState.BUSY
- if runner.status == GitHubRunnerStatus.ONLINE:
- if not runner.busy:
+ if runner["status"] == GitHubRunnerStatus.ONLINE:
+ if not runner["busy"]:
state = GitHubRunnerState.IDLE
return state
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index e509473c0..40cde9bb4 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -7,7 +7,7 @@
from dataclasses import dataclass
from enum import Enum, auto
from multiprocessing import Pool
-from typing import Iterator, Sequence, Type, cast
+from typing import Iterable, Iterator, Sequence, Type, cast
from charm_state import GitHubPath
from errors import GithubMetricsError, RunnerCreateError
@@ -125,21 +125,8 @@ def create_runners(self, num: int) -> tuple[InstanceId]:
create_runner_args = [
RunnerManager._CreateRunnerArgs(self._cloud, registration_token) for _ in range(num)
]
- instance_id_list = []
- with Pool(processes=min(num, 10)) as pool:
- jobs = pool.imap_unordered(
- func=RunnerManager._create_runner, iterable=create_runner_args
- )
- for _ in range(num):
- try:
- instance_id = next(jobs)
- except RunnerCreateError:
- logger.exception("Failed to spawn a runner.")
- except StopIteration:
- break
- else:
- instance_id_list.append(instance_id)
- return tuple(instance_id_list)
+ return RunnerManager._spawn_runners(create_runner_args)
+
def get_runners(
self,
@@ -162,7 +149,7 @@ def get_runners(
logger.info("Getting runners...")
github_infos = self._github.get_runners(github_states)
cloud_infos = self._cloud.get_runners(cloud_states)
- github_infos_map = {info.name: info for info in github_infos}
+ github_infos_map = {info["name"]: info for info in github_infos}
cloud_infos_map = {info.name: info for info in cloud_infos}
logger.info(
"Found following runners: %s", cloud_infos_map.keys() | github_infos_map.keys()
@@ -254,6 +241,38 @@ def cleanup(self) -> IssuedMetricEventsStats:
deleted_runner_metrics = self._cloud.cleanup(remove_token)
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
+ @staticmethod
+ def _spawn_runners(create_runner_args: Iterable["RunnerManager._CreateRunnerArgs"]) -> tuple[InstanceId, ...]:
+ """Parallel spawn of runners.
+
+ The length of the create_runner_args is number _create_runner invocation, and therefore the
+ number of runner spawned.
+
+ Args:
+ create_runner_args: List of arg for invoking _create_runner method.
+
+ Returns:
+ A list of instance ID of runner spawned.
+ """
+ num = len(create_runner_args)
+
+ instance_id_list = []
+ with Pool(processes=min(num, 10)) as pool:
+ jobs = pool.imap_unordered(
+ func=RunnerManager._create_runner, iterable=create_runner_args
+ )
+ for _ in range(num):
+ try:
+ instance_id = next(jobs)
+ except RunnerCreateError:
+ logger.exception("Failed to spawn a runner.")
+ except StopIteration:
+ break
+ else:
+ instance_id_list.append(instance_id)
+ return tuple(instance_id_list)
+
+
def _delete_runners(
self, runners: Sequence[RunnerInstance], remove_token: str
) -> IssuedMetricEventsStats:
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 980c542d0..06c0ecee5 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -70,7 +70,7 @@ def get_runner_info(self) -> RunnerInfo:
online=online, offline=offline, unknown=unknown, runners=tuple(online_runners)
)
- def flush(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
+ def flush(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
"""Flush the runners.
Args:
@@ -80,10 +80,11 @@ def flush(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> None:
Number of runners flushed.
"""
metric_stats = self._manager.cleanup()
- delete_metric_stats = self._manager.delete_runners(flush_mode=flush_mode)
+ delete_metric_stats = self._manager.flush_runners(flush_mode=flush_mode)
+ events = set(delete_metric_stats.keys()) | set(metric_stats.keys())
metric_stats = {
- delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0)
- for event_name in set(delete_metric_stats) | set(metric_stats)
+ event_name: delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0)
+ for event_name in events
}
return metric_stats.get(metric_events.RunnerStop, 0)
@@ -137,13 +138,15 @@ def reconcile(self, num_of_runner: int) -> int:
]
try:
+ available_runners = set(runner.name for runner in idle_runners) | set(runner.name for runner in offline_healthy_runners)
+ logger.info("Current available runners (idle + healthy offline): %s", available_runners)
metric_events.issue_event(
metric_events.Reconciliation(
timestamp=time.time(),
flavor=self._manager.name_prefix,
crashed_runners=metric_stats.get(metric_events.RunnerStart, 0)
- metric_stats.get(metric_events.RunnerStop, 0),
- idle_runners=len(set(idle_runners) | set(offline_healthy_runners)),
+ idle_runners=len(available_runners),
duration=end_timestamp - start_timestamp,
)
)
diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py
index 75209680f..7db7e24a0 100644
--- a/tests/unit/mock_runner_managers.py
+++ b/tests/unit/mock_runner_managers.py
@@ -14,7 +14,7 @@
CloudRunnerState,
InstanceId,
)
-from manager.github_runner_manager import GitHubRunnerManager, GitHubRunnerState
+from manager.github_runner_manager import GitHubRunnerState
from metrics.runner import RunnerMetrics
@@ -31,7 +31,7 @@ class MockRunner:
def __init__(self, name: str):
self.name = name
self.instance_id = secrets.token_hex(6)
- self.state = CloudRunnerState.ACTIVE
+ self.cloud_state = CloudRunnerState.ACTIVE
self.github_state = GitHubRunnerState.IDLE
self.health = True
@@ -94,17 +94,21 @@ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
return runner.to_cloud_runner()
return None
- def get_runners(self, states: Sequence[CloudRunnerState]) -> tuple[CloudRunnerInstance, ...]:
+ def get_runners(self, states: Sequence[CloudRunnerState] | None = None) -> tuple[CloudRunnerInstance, ...]:
"""Get self-hosted runners by state.
Args:
states: Filter for the runners with these github states. If None all states will be
included.
"""
+ if states is None:
+ states = [member.value for member in CloudRunnerState]
+
+ state_set = set(states)
return tuple(
runner.to_cloud_runner()
for runner in self.state.runners.values()
- if runner.state in states
+ if runner.cloud_state in state_set
)
def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMetrics | None:
@@ -158,7 +162,7 @@ def __init__(self, name_prefix: str, path: GitHubPath, state: SharedMockRunnerMa
def get_registration_token(self) -> str:
return "mock_registration_token"
- def get_remove_token(self) -> str:
+ def get_removal_token(self) -> str:
return "mock_remove_token"
def get_runners(
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 76d14d941..3dd2f9800 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -2,10 +2,12 @@
# See LICENSE file for licensing details.
+from typing import Iterable
import pytest
from charm_state import GitHubRepo
-from manager.runner_manager import RunnerManager, RunnerManagerConfig
+from manager.cloud_runner_manager import InstanceId
+from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
from manager.runner_scaler import RunnerScaler
from tests.unit.mock_runner_managers import (
MockCloudRunnerManager,
@@ -14,27 +16,123 @@
)
-@pytest.fixture(name="runner_manager")
-def runner_manager_fixture() -> RunnerManager:
+def mock_runner_manager_spawn_runners(create_runner_args: Iterable[RunnerManager._CreateRunnerArgs]) -> tuple[InstanceId, ...]:
+ """Mock _spawn_runners method of RunnerManager.
+
+ The _spawn_runners method uses multi-process, which copies the object, e.g., the mocks.
+ There is easy way to sync the state of the mocks object across processes. Replacing the
+ _spawn_runner to remove the multi-process.pool is an easier approach.
+ """
+ return tuple(RunnerManager._create_runner(arg) for arg in create_runner_args)
+
+
+@pytest.fixture(scope="function", name="runner_manager")
+def runner_manager_fixture(monkeypatch) -> RunnerManager:
state = SharedMockRunnerManagerState()
mock_cloud = MockCloudRunnerManager(state)
mock_path = GitHubRepo("mock_owner", "mock_repo")
mock_github = MockGitHubRunnerManager(mock_cloud.name_prefix, mock_path, state)
+ monkeypatch.setattr("manager.runner_manager.RunnerManager._spawn_runners", mock_runner_manager_spawn_runners)
config = RunnerManagerConfig("mock_token", mock_path)
runner_manager = RunnerManager(mock_cloud, config)
runner_manager._github = mock_github
return runner_manager
-@pytest.fixture(name="runner_scaler")
+@pytest.fixture(scope="function", name="runner_scaler")
def runner_scaler_fixture(runner_manager: RunnerManager) -> RunnerScaler:
return RunnerScaler(runner_manager)
-def test_get_no_runner(runner_scaler: RunnerScaler):
+def assert_runner_info(
+ runner_scaler: RunnerScaler, online: int = 0, offline: int = 0, unknown: int = 0
+) -> None:
+ """Assert runner info contains a certain amount of runners.
+
+ Args:
+ runner_scaler: The RunnerScaler to get information from.
+ online: The number of online runners to assert for.
+ offline: The number of offline runners to assert for.
+ unknown: The number of unknown runners to assert for.
+ """
info = runner_scaler.get_runner_info()
- assert info["offline"] == 0
- assert info["online"] == 0
- assert info["unknown"] == 0
- assert info["runners"] == tuple()
+ assert info["offline"] == offline
+ assert info["online"] == online
+ assert info["unknown"] == unknown
+ assert isinstance(info["runners"], tuple)
+ assert len(info["runners"]) == online
+
+
+def test_get_no_runner(runner_scaler: RunnerScaler):
+ """
+ Arrange: A RunnerScaler with no runners.
+ Act: Get runner information.
+ Assert: Information should contain no runners.
+ """
+ assert_runner_info(runner_scaler, online=0)
+
+
+def test_flush_no_runner(runner_scaler: RunnerScaler):
+ """
+ Arrange: A RunnerScaler with no runners.
+ Act:
+ 1. Flush idle runners.
+ 2. Flush busy runners.
+ Assert:
+ 1. No change in number of runners. Runner info should contain no runners.
+ 2. No change in number of runners.
+ """
+ # 1.
+ diff = runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE)
+ assert diff == 0
+ assert_runner_info(runner_scaler, online=0)
+
+ # 2.
+ diff = runner_scaler.flush(flush_mode=FlushMode.FLUSH_BUSY)
+ assert diff == 0
+ assert_runner_info(runner_scaler, online=0)
+
+
+def test_reconcile_runner_create_one(runner_scaler: RunnerScaler):
+ """
+ Arrange: A RunnerScaler with no runners.
+ Act: Reconcile to no runners.
+ Assert: No changes. Runner info should contain no runners.
+ """
+ diff = runner_scaler.reconcile(num_of_runner=0)
+ assert diff == 0
+ assert_runner_info(runner_scaler, online=0)
+
+
+def test_one_runner(runner_scaler: RunnerScaler):
+ """
+ Arrange: A RunnerScaler with no runners.
+ Act:
+ 1. Reconcile to one runner.
+ 2. Reconcile to one runner.
+ 3. Flush idle runners.
+ 4. Reconcile to one runner.
+ Assert:
+ 1. Runner info has one runner.
+ 2. No changes to number of runner.
+ 3. Runner info has one runner.
+ """
+ # 1.
+ diff = runner_scaler.reconcile(1)
+ assert diff == 1
+ assert_runner_info(runner_scaler, online=1)
+
+ # 2.
+ diff = runner_scaler.reconcile(1)
+ assert diff == 0
+ assert_runner_info(runner_scaler, online=1)
+
+ # 3.
+ runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE)
+ assert_runner_info(runner_scaler, online=0)
+
+ # 3.
+ diff = runner_scaler.reconcile(1)
+ assert diff == 1
+ assert_runner_info(runner_scaler, online=1)
From 52a772428a19da92c0600979316df9df8e34584c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 26 Aug 2024 13:10:08 +0800
Subject: [PATCH 233/278] Add more tests
---
src/manager/runner_manager.py | 12 +--
src/manager/runner_scaler.py | 23 ++++-
tests/unit/mock_runner_managers.py | 18 +++-
tests/unit/test_runner_scaler.py | 140 ++++++++++++++++++++++++++---
4 files changed, 170 insertions(+), 23 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 40cde9bb4..03639a306 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -126,7 +126,6 @@ def create_runners(self, num: int) -> tuple[InstanceId]:
RunnerManager._CreateRunnerArgs(self._cloud, registration_token) for _ in range(num)
]
return RunnerManager._spawn_runners(create_runner_args)
-
def get_runners(
self,
@@ -242,17 +241,19 @@ def cleanup(self) -> IssuedMetricEventsStats:
return self._issue_runner_metrics(metrics=deleted_runner_metrics)
@staticmethod
- def _spawn_runners(create_runner_args: Iterable["RunnerManager._CreateRunnerArgs"]) -> tuple[InstanceId, ...]:
+ def _spawn_runners(
+ create_runner_args: Iterable["RunnerManager._CreateRunnerArgs"],
+ ) -> tuple[InstanceId, ...]:
"""Parallel spawn of runners.
-
- The length of the create_runner_args is number _create_runner invocation, and therefore the
+
+ The length of the create_runner_args is number _create_runner invocation, and therefore the
number of runner spawned.
Args:
create_runner_args: List of arg for invoking _create_runner method.
Returns:
- A list of instance ID of runner spawned.
+ A list of instance ID of runner spawned.
"""
num = len(create_runner_args)
@@ -271,7 +272,6 @@ def _spawn_runners(create_runner_args: Iterable["RunnerManager._CreateRunnerArgs
else:
instance_id_list.append(instance_id)
return tuple(instance_id_list)
-
def _delete_runners(
self, runners: Sequence[RunnerInstance], remove_token: str
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 06c0ecee5..c7cca69a3 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -21,15 +21,19 @@ class RunnerInfo(TypedDict):
Attributes:
online: The number of runner in online state.
+ busy: The number of the runner in busy state.
offline: The number of runner in offline state.
unknown: The number of runner in unknown state.
runners: The names of the online runners.
+ busy_runners: The names of the busy runners.
"""
online: int
+ busy: int
offline: int
unknown: int
runners: tuple[str, ...]
+ busy_runners: tuple[str, ...]
class RunnerScaler:
@@ -51,14 +55,18 @@ def get_runner_info(self) -> RunnerInfo:
"""
runner_list = self._manager.get_runners()
online = 0
+ busy = 0
offline = 0
unknown = 0
online_runners = []
+ busy_runners = []
for runner in runner_list:
match runner.github_state:
case GitHubRunnerState.BUSY:
online += 1
online_runners.append(runner.name)
+ busy += 1
+ busy_runners.append(runner.name)
case GitHubRunnerState.IDLE:
online += 1
online_runners.append(runner.name)
@@ -67,7 +75,12 @@ def get_runner_info(self) -> RunnerInfo:
case _:
unknown += 1
return RunnerInfo(
- online=online, offline=offline, unknown=unknown, runners=tuple(online_runners)
+ online=online,
+ busy=busy,
+ offline=offline,
+ unknown=unknown,
+ runners=tuple(online_runners),
+ busy_runners=tuple(busy_runners),
)
def flush(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
@@ -138,8 +151,12 @@ def reconcile(self, num_of_runner: int) -> int:
]
try:
- available_runners = set(runner.name for runner in idle_runners) | set(runner.name for runner in offline_healthy_runners)
- logger.info("Current available runners (idle + healthy offline): %s", available_runners)
+ available_runners = set(runner.name for runner in idle_runners) | set(
+ runner.name for runner in offline_healthy_runners
+ )
+ logger.info(
+ "Current available runners (idle + healthy offline): %s", available_runners
+ )
metric_events.issue_event(
metric_events.Reconciliation(
timestamp=time.time(),
diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py
index 7db7e24a0..269931bbd 100644
--- a/tests/unit/mock_runner_managers.py
+++ b/tests/unit/mock_runner_managers.py
@@ -20,7 +20,7 @@
@dataclass
class MockRunner:
- """Mock of a runner"""
+ """Mock of a runner."""
name: str
instance_id: InstanceId
@@ -29,6 +29,11 @@ class MockRunner:
health: bool
def __init__(self, name: str):
+ """Construct the object.
+
+ Args:
+ name: The name of the runner.
+ """
self.name = name
self.instance_id = secrets.token_hex(6)
self.cloud_state = CloudRunnerState.ACTIVE
@@ -36,6 +41,7 @@ def __init__(self, name: str):
self.health = True
def to_cloud_runner(self) -> CloudRunnerInstance:
+ """Construct CloudRunnerInstance from this object."""
return CloudRunnerInstance(
name=self.name,
instance_id=self.instance_id,
@@ -54,6 +60,7 @@ class SharedMockRunnerManagerState:
runners: dict[InstanceId, MockRunner]
def __init__(self):
+ """Construct the object."""
self.runners = {}
@@ -64,6 +71,11 @@ class MockCloudRunnerManager(CloudRunnerManager):
"""
def __init__(self, state: SharedMockRunnerManagerState):
+ """Construct the object.
+
+ Args:
+ state: The shared state between cloud and github runner managers.
+ """
self.prefix = f"mock_{secrets.token_hex(4)}"
self.state = state
@@ -94,7 +106,9 @@ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
return runner.to_cloud_runner()
return None
- def get_runners(self, states: Sequence[CloudRunnerState] | None = None) -> tuple[CloudRunnerInstance, ...]:
+ def get_runners(
+ self, states: Sequence[CloudRunnerState] | None = None
+ ) -> tuple[CloudRunnerInstance, ...]:
"""Get self-hosted runners by state.
Args:
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 3dd2f9800..476885954 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -3,10 +3,12 @@
from typing import Iterable
+
import pytest
-from charm_state import GitHubRepo
-from manager.cloud_runner_manager import InstanceId
+from charm_state import GitHubPath, GitHubRepo
+from manager.cloud_runner_manager import CloudRunnerState, InstanceId
+from manager.github_runner_manager import GitHubRunnerState
from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
from manager.runner_scaler import RunnerScaler
from tests.unit.mock_runner_managers import (
@@ -16,25 +18,48 @@
)
-def mock_runner_manager_spawn_runners(create_runner_args: Iterable[RunnerManager._CreateRunnerArgs]) -> tuple[InstanceId, ...]:
+def mock_runner_manager_spawn_runners(
+ create_runner_args: Iterable[RunnerManager._CreateRunnerArgs],
+) -> tuple[InstanceId, ...]:
"""Mock _spawn_runners method of RunnerManager.
-
+
The _spawn_runners method uses multi-process, which copies the object, e.g., the mocks.
- There is easy way to sync the state of the mocks object across processes. Replacing the
+ There is easy way to sync the state of the mocks object across processes. Replacing the
_spawn_runner to remove the multi-process.pool is an easier approach.
+
+ Args:
+ create_runner_args: The arguments for the create_runner method.
+
+ Returns:
+ The instance ids of the runner spawned.
"""
return tuple(RunnerManager._create_runner(arg) for arg in create_runner_args)
-@pytest.fixture(scope="function", name="runner_manager")
-def runner_manager_fixture(monkeypatch) -> RunnerManager:
+@pytest.fixture(scope="function", name="github_path")
+def github_path_fixture() -> GitHubPath:
+ return GitHubRepo("mock_owner", "mock_repo")
+
+
+@pytest.fixture(scope="function", name="mock_runner_managers")
+def mock_runner_managers_fixture(
+ github_path: GitHubPath,
+) -> tuple[MockCloudRunnerManager, MockGitHubRunnerManager]:
state = SharedMockRunnerManagerState()
mock_cloud = MockCloudRunnerManager(state)
- mock_path = GitHubRepo("mock_owner", "mock_repo")
- mock_github = MockGitHubRunnerManager(mock_cloud.name_prefix, mock_path, state)
+ mock_github = MockGitHubRunnerManager(mock_cloud.name_prefix, github_path, state)
+ return (mock_cloud, mock_github)
- monkeypatch.setattr("manager.runner_manager.RunnerManager._spawn_runners", mock_runner_manager_spawn_runners)
- config = RunnerManagerConfig("mock_token", mock_path)
+
+@pytest.fixture(scope="function", name="runner_manager")
+def runner_manager_fixture(
+ monkeypatch, mock_runner_managers, github_path: GitHubPath
+) -> RunnerManager:
+ mock_cloud, mock_github = mock_runner_managers
+ monkeypatch.setattr(
+ "manager.runner_manager.RunnerManager._spawn_runners", mock_runner_manager_spawn_runners
+ )
+ config = RunnerManagerConfig("mock_token", github_path)
runner_manager = RunnerManager(mock_cloud, config)
runner_manager._github = mock_github
return runner_manager
@@ -45,23 +70,49 @@ def runner_scaler_fixture(runner_manager: RunnerManager) -> RunnerScaler:
return RunnerScaler(runner_manager)
+@pytest.fixture(scope="function", name="runner_scaler_one_runner")
+def runner_scaler_one_runner_fixture(runner_scaler: RunnerScaler) -> RunnerScaler:
+ runner_scaler.reconcile(1)
+ assert_runner_info(runner_scaler, online=1)
+ return runner_scaler
+
+
+def set_one_runner_state(
+ runner_scaler: RunnerScaler,
+ github_state: GitHubRunnerState | None = None,
+ cloud_state: CloudRunnerState | None = None,
+) -> RunnerScaler:
+ runner_dict = runner_scaler._manager._github.state.runners
+ assert len(runner_dict) == 1, "Test arrange failed: One runner should be present"
+ instance_id = list(runner_dict.keys())[0]
+ if github_state is not None:
+ runner_dict[instance_id].github_state = github_state
+ if cloud_state is not None:
+ runner_dict[instance_id].cloud_state = cloud_state
+ return runner_scaler
+
+
def assert_runner_info(
- runner_scaler: RunnerScaler, online: int = 0, offline: int = 0, unknown: int = 0
+ runner_scaler: RunnerScaler, online: int = 0, busy: int = 0, offline: int = 0, unknown: int = 0
) -> None:
"""Assert runner info contains a certain amount of runners.
Args:
runner_scaler: The RunnerScaler to get information from.
online: The number of online runners to assert for.
+ busy: The number of buys runners to assert for.
offline: The number of offline runners to assert for.
unknown: The number of unknown runners to assert for.
"""
info = runner_scaler.get_runner_info()
assert info["offline"] == offline
assert info["online"] == online
+ assert info["busy"] == busy
assert info["unknown"] == unknown
assert isinstance(info["runners"], tuple)
assert len(info["runners"]) == online
+ assert isinstance(info["busy_runners"], tuple)
+ assert len(info["busy_runners"]) == busy
def test_get_no_runner(runner_scaler: RunnerScaler):
@@ -136,3 +187,68 @@ def test_one_runner(runner_scaler: RunnerScaler):
diff = runner_scaler.reconcile(1)
assert diff == 1
assert_runner_info(runner_scaler, online=1)
+
+
+def test_flush_busy_on_idle_runner(runner_scaler_one_runner: RunnerScaler):
+ """
+ Arrange: A RunnerScaler with one idle runner.
+ Act: Run flush busy runner.
+ Assert: No runners.
+ """
+ runner_scaler = runner_scaler_one_runner
+
+ runner_scaler.flush(flush_mode=FlushMode.FLUSH_BUSY)
+ assert_runner_info(runner_scaler, online=0)
+
+
+def test_flush_busy_on_busy_runner(
+ runner_scaler_one_runner: RunnerScaler,
+):
+ """
+ Arrange: A RunnerScaler with one busy runner.
+ Act: Run flush busy runner.
+ Assert: No runners.
+ """
+ runner_scaler = runner_scaler_one_runner
+ set_one_runner_state(runner_scaler, GitHubRunnerState.BUSY)
+
+ runner_scaler.flush(flush_mode=FlushMode.FLUSH_BUSY)
+ assert_runner_info(runner_scaler, online=0)
+
+
+def test_get_runner_one_busy_runner(
+ runner_scaler_one_runner: RunnerScaler,
+):
+ """
+ Arrange: A RunnerScaler with one busy runner.
+ Act: Run get runners.
+ Assert: One busy runner.
+ """
+ runner_scaler = runner_scaler_one_runner
+ set_one_runner_state(runner_scaler, GitHubRunnerState.BUSY)
+
+ assert_runner_info(runner_scaler=runner_scaler, online=1, busy=1)
+
+
+def test_get_runner_offline_runner(runner_scaler_one_runner: RunnerScaler):
+ """
+ Arrange: A RunnerScaler with one offline runner
+ Act: Run get runners.
+ Assert: One offline runner.
+ """
+ runner_scaler = runner_scaler_one_runner
+ set_one_runner_state(runner_scaler, GitHubRunnerState.OFFLINE)
+
+ assert_runner_info(runner_scaler=runner_scaler, offline=1)
+
+
+def test_get_runner_unknown_runner(runner_scaler_one_runner: RunnerScaler):
+ """
+ Arrange: A RunnerScaler with one offline runner
+ Act: Run get runners.
+ Assert: One offline runner.
+ """
+ runner_scaler = runner_scaler_one_runner
+ set_one_runner_state(runner_scaler, "UNKNOWN")
+
+ assert_runner_info(runner_scaler=runner_scaler, unknown=1)
From 7c5a78ed2bd52d66941fd0e9da44e9f4644731fb Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 26 Aug 2024 16:42:44 +0800
Subject: [PATCH 234/278] Fix merge issues
---
src-docs/openstack_cloud.md | 6 +
...penstack_cloud.openstack_runner_manager.md | 280 +++++++++
src/openstack_cloud/openstack_manager.py | 2 +-
tests/unit/test_runner_manager.py | 589 ------------------
tests/unit/test_runner_scaler.py | 2 +-
5 files changed, 288 insertions(+), 591 deletions(-)
delete mode 100644 tests/unit/test_runner_manager.py
diff --git a/src-docs/openstack_cloud.md b/src-docs/openstack_cloud.md
index ddcad41e0..51140a4b2 100644
--- a/src-docs/openstack_cloud.md
+++ b/src-docs/openstack_cloud.md
@@ -10,6 +10,12 @@ Module for managing Openstack cloud.
- **openstack_cloud**: # Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
+- **openstack_manager**: # Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+- **openstack_runner_manager**: # Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
---
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index e69de29bb..f5172cde6 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -0,0 +1,280 @@
+
+
+
+
+# module `openstack_cloud.openstack_runner_manager`
+Manager for self-hosted runner on OpenStack.
+
+**Global Variables**
+---------------
+- **BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME**
+- **MAX_METRICS_FILE_SIZE**
+- **RUNNER_STARTUP_PROCESS**
+- **RUNNER_LISTENER_PROCESS**
+- **RUNNER_WORKER_PROCESS**
+- **CREATE_SERVER_TIMEOUT**
+
+
+---
+
+
+
+## class `OpenStackCloudConfig`
+Configuration for OpenStack cloud authorisation information.
+
+
+
+**Attributes:**
+
+ - `clouds_config`: The clouds.yaml.
+ - `cloud`: The cloud name to connect to.
+
+
+
+### method `__init__`
+
+```python
+__init__(clouds_config: dict[str, dict], cloud: str) → None
+```
+
+
+
+
+
+
+
+
+
+---
+
+
+
+## class `OpenStackServerConfig`
+Configuration for OpenStack server.
+
+
+
+**Attributes:**
+
+ - `image`: The image name for runners to use.
+ - `flavor`: The flavor name for runners to use.
+ - `network`: The network name for runners to use.
+
+
+
+### method `__init__`
+
+```python
+__init__(image: str, flavor: str, network: str) → None
+```
+
+
+
+
+
+
+
+
+
+---
+
+
+
+## class `OpenStackRunnerManager`
+Manage self-hosted runner on OpenStack cloud.
+
+
+
+**Attributes:**
+
+ - `name_prefix`: The name prefix of the runners created.
+
+
+
+### method `__init__`
+
+```python
+__init__(
+ prefix: str,
+ cloud_config: OpenStackCloudConfig,
+ server_config: OpenStackServerConfig | None,
+ runner_config: GitHubRunnerConfig,
+ service_config: SupportServiceConfig
+) → None
+```
+
+Construct the object.
+
+
+
+**Args:**
+
+ - `prefix`: The prefix to runner name.
+ - `cloud_config`: The configuration for OpenStack authorisation.
+ - `server_config`: The configuration for creating OpenStack server. Unable to create runner if None.
+ - `runner_config`: The configuration for the runner.
+ - `service_config`: The configuration of supporting services of the runners.
+
+
+---
+
+#### property name_prefix
+
+The prefix of runner names.
+
+
+
+**Returns:**
+ The prefix of the runner names managed by this class.
+
+
+
+---
+
+
+
+### method `cleanup`
+
+```python
+cleanup(remove_token: str) → Iterator[RunnerMetrics]
+```
+
+Cleanup runner and resource on the cloud.
+
+
+
+**Args:**
+
+ - `remove_token`: The GitHub remove token.
+
+
+
+**Returns:**
+ Any metrics retrieved from cleanup runners.
+
+---
+
+
+
+### method `create_runner`
+
+```python
+create_runner(registration_token: str) → str
+```
+
+Create a self-hosted runner.
+
+
+
+**Args:**
+
+ - `registration_token`: The GitHub registration token for registering runners.
+
+
+
+**Raises:**
+
+ - `MissingServerConfigError`: Unable to create runner due to missing configuration.
+ - `RunnerCreateError`: Unable to create runner due to OpenStack issues.
+
+
+
+**Returns:**
+ Instance ID of the runner.
+
+---
+
+
+
+### method `delete_runner`
+
+```python
+delete_runner(instance_id: str, remove_token: str) → RunnerMetrics | None
+```
+
+Delete self-hosted runners.
+
+
+
+**Args:**
+
+ - `instance_id`: The instance id of the runner to delete.
+ - `remove_token`: The GitHub remove token.
+
+
+
+**Returns:**
+ Any metrics collected during the deletion of the runner.
+
+---
+
+
+
+### method `flush_runners`
+
+```python
+flush_runners(remove_token: str, busy: bool = False) → Iterator[RunnerMetrics]
+```
+
+Remove idle and/or busy runners.
+
+
+
+**Args:**
+ remove_token:
+ - `busy`: If false, only idle runners are removed. If true, both idle and busy runners are removed.
+
+
+
+**Returns:**
+ Any metrics retrieved from flushed runners.
+
+---
+
+
+
+### method `get_runner`
+
+```python
+get_runner(instance_id: str) → CloudRunnerInstance | None
+```
+
+Get a self-hosted runner by instance id.
+
+
+
+**Args:**
+
+ - `instance_id`: The instance id.
+
+
+
+**Returns:**
+ Information on the runner instance.
+
+---
+
+
+
+### method `get_runners`
+
+```python
+get_runners(
+ states: Optional[Sequence[CloudRunnerState]] = None
+) → tuple[CloudRunnerInstance, ]
+```
+
+Get self-hosted runners by state.
+
+
+
+**Args:**
+
+ - `states`: Filter for the runners with these github states. If None all states will be included.
+
+
+
+**Returns:**
+ Information on the runner instances.
+
+
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index 04cc39add..e0ce47d4f 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -62,7 +62,7 @@
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner_manager import IssuedMetricEventsStats
from runner_manager_type import FlushMode, OpenstackRunnerManagerConfig
-from runner_type import GithubPath, RunnerByHealth, RunnerGithubInfo
+from runner_type import GitHubPath, RunnerGithubInfo, RunnerNameByHealth
from utilities import retry, set_env_var
logger = logging.getLogger(__name__)
diff --git a/tests/unit/test_runner_manager.py b/tests/unit/test_runner_manager.py
deleted file mode 100644
index 94d3373d4..000000000
--- a/tests/unit/test_runner_manager.py
+++ /dev/null
@@ -1,589 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-"""Test cases of RunnerManager class."""
-import random
-import secrets
-from pathlib import Path
-from unittest.mock import MagicMock, call
-
-import pytest
-from pytest import LogCaptureFixture, MonkeyPatch
-
-import reactive.runner_manager
-import shared_fs
-from charm_state import (
- Arch,
- CharmConfig,
- CharmState,
- GithubOrg,
- GithubRepo,
- ProxyConfig,
- ReactiveConfig,
- VirtualMachineResources,
-)
-from errors import IssueMetricEventError, RunnerBinaryError
-from github_type import RunnerApplication
-from metrics.events import Reconciliation, RunnerInstalled, RunnerStart, RunnerStop
-from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
-from metrics.storage import MetricsStorage
-from runner import Runner, RunnerStatus
-from runner_manager import BUILD_IMAGE_SCRIPT_FILENAME, RunnerManager, RunnerManagerConfig
-from runner_type import RunnerByHealth
-from tests.unit.mock import TEST_BINARY, MockLxdImageManager
-
-FAKE_MONGODB_URI = "mongodb://example.com/db"
-
-IMAGE_NAME = "jammy"
-
-RUNNER_MANAGER_TIME_MODULE = "runner_manager.time.time"
-TEST_PROXY_SERVER_URL = "http://proxy.server:1234"
-
-
-@pytest.fixture(scope="function", name="token")
-def token_fixture():
- return secrets.token_hex()
-
-
-@pytest.fixture(scope="function", name="charm_config")
-def charm_config_fixture():
- """Mock charm config instance."""
- mock_charm_config = MagicMock(spec=CharmConfig)
- mock_charm_config.labels = ("test",)
- return mock_charm_config
-
-
-@pytest.fixture(scope="function", name="charm_state")
-def charm_state_fixture(charm_config: MagicMock):
- mock = MagicMock(spec=CharmState)
- mock.is_metrics_logging_available = False
- mock.arch = Arch.X64
- mock.ssh_debug_connections = None
- mock.charm_config = charm_config
- return mock
-
-
-@pytest.fixture(
- scope="function",
- name="runner_manager",
- params=[
- (GithubOrg("test_org", "test_group"), ProxyConfig()),
- (
- GithubRepo("test_owner", "test_repo"),
- ProxyConfig(
- no_proxy="test_no_proxy",
- http=TEST_PROXY_SERVER_URL,
- https=TEST_PROXY_SERVER_URL,
- use_aproxy=False,
- ),
- ),
- ],
-)
-def runner_manager_fixture(request, tmp_path, monkeypatch, token, charm_state):
- charm_state.proxy_config = request.param[1]
- monkeypatch.setattr(
- "runner_manager.RunnerManager.runner_bin_path", tmp_path / "mock_runner_binary"
- )
- pool_path = tmp_path / "test_storage"
- pool_path.mkdir(exist_ok=True)
-
- runner_manager = RunnerManager(
- "test app",
- "0",
- RunnerManagerConfig(
- path=request.param[0],
- token=token,
- image=IMAGE_NAME,
- service_token=secrets.token_hex(16),
- lxd_storage_path=pool_path,
- charm_state=charm_state,
- ),
- )
- runner_manager.runner_bin_path.write_bytes(TEST_BINARY)
- return runner_manager
-
-
-@pytest.fixture(autouse=True, name="issue_event_mock")
-def issue_event_mock_fixture(monkeypatch: MonkeyPatch) -> MagicMock:
- """Mock the issue_event function."""
- issue_event_mock = MagicMock()
- monkeypatch.setattr("metrics.events.issue_event", issue_event_mock)
- return issue_event_mock
-
-
-@pytest.fixture(autouse=True, name="shared_fs")
-def shared_fs_fixture(tmp_path: Path, monkeypatch: MonkeyPatch) -> MagicMock:
- """Mock the shared filesystem module."""
- shared_fs_mock = MagicMock(spec=shared_fs)
- monkeypatch.setattr("runner_manager.shared_fs", shared_fs_mock)
- monkeypatch.setattr("runner.shared_fs", shared_fs_mock)
- return shared_fs_mock
-
-
-@pytest.fixture(autouse=True, name="runner_metrics")
-def runner_metrics_fixture(monkeypatch: MonkeyPatch) -> MagicMock:
- """Mock the runner metrics module."""
- runner_metrics_mock = MagicMock()
- monkeypatch.setattr("runner_manager.runner_metrics", runner_metrics_mock)
- return runner_metrics_mock
-
-
-@pytest.fixture(name="reactive_reconcile_mock")
-def reactive_reconcile_fixture(monkeypatch: MonkeyPatch, tmp_path: Path) -> MagicMock:
- """Mock the job class."""
- reconcile_mock = MagicMock(spec=reactive.runner_manager.reconcile)
- monkeypatch.setattr("runner_manager.reactive_runner_manager.reconcile", reconcile_mock)
- reconcile_mock.side_effect = lambda quantity, **kwargs: quantity
- return reconcile_mock
-
-
-@pytest.mark.parametrize(
- "arch",
- [
- pytest.param(Arch.ARM64),
- pytest.param(Arch.X64),
- ],
-)
-def test_get_latest_runner_bin_url(runner_manager: RunnerManager, arch: Arch, charm_state):
- """
- arrange: Nothing.
- act: Get runner bin url of existing binary.
- assert: Correct mock data returned.
- """
- charm_state.arch = arch
- mock_gh_client = MagicMock()
- app = RunnerApplication(
- os="linux",
- architecture=arch.value,
- download_url=(download_url := "https://www.example.com"),
- filename=(filename := "test_runner_binary"),
- )
- mock_gh_client.get_runner_application.return_value = app
- runner_manager._clients.github = mock_gh_client
-
- runner_bin = runner_manager.get_latest_runner_bin_url(os_name="linux")
- assert runner_bin["os"] == "linux"
- assert runner_bin["architecture"] == arch.value
- assert runner_bin["download_url"] == download_url
- assert runner_bin["filename"] == filename
-
-
-def test_get_latest_runner_bin_url_missing_binary(runner_manager: RunnerManager):
- """
- arrange: Given a mocked GH API client that does not return any runner binaries.
- act: Get runner bin url of non-existing binary.
- assert: Error related to runner bin raised.
- """
- runner_manager._clients.github = MagicMock()
- runner_manager._clients.github.get_runner_application.side_effect = RunnerBinaryError
-
- with pytest.raises(RunnerBinaryError):
- runner_manager.get_latest_runner_bin_url(os_name="not_exist")
-
-
-def test_update_runner_bin(runner_manager: RunnerManager):
- """
- arrange: Remove the existing runner binary.
- act: Update runner binary.
- assert: Runner binary in runner manager is set.
- """
-
- class MockRequestLibResponse:
- """A mock requests library response."""
-
- def __init__(self, *args, **kwargs):
- """Initialize successful requests library response.
-
- Args:
- args: Placeholder for positional arguments.
- kwargs: Placeholder for keyword arguments.
- """
- self.status_code = 200
-
- def iter_content(self, *args, **kwargs):
- """Mock content iterator returning an iterator over a single test runner binary.
-
- Args:
- args: Placeholder positional arguments.
- kwargs: Placeholder keyword arguments.
-
- Returns:
- An iterator over a single test runner binary.
- """
- return iter([TEST_BINARY])
-
- runner_manager.runner_bin_path.unlink(missing_ok=True)
-
- runner_manager.session.get = MockRequestLibResponse
- runner_bin = runner_manager.get_latest_runner_bin_url(os_name="linux")
-
- runner_manager.update_runner_bin(runner_bin)
-
- assert runner_manager.runner_bin_path.read_bytes() == TEST_BINARY
-
-
-def test_reconcile_zero_count(runner_manager: RunnerManager):
- """
- arrange: Nothing.
- act: Reconcile with the current amount of runner.
- assert: No error should be raised.
- """
- # Reconcile with no change to runner count.
- delta = runner_manager.reconcile(0, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert delta == 0
-
-
-def test_reconcile_create_runner(runner_manager: RunnerManager):
- """
- arrange: Nothing.
- act: Reconcile to create a runner.
- assert: One runner should be created.
- """
- # Create a runner.
- delta = runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert delta == 1
-
-
-def test_reconcile_remove_runner(runner_manager: RunnerManager):
- """
- arrange: Create online runners.
- act: Reconcile to remove a runner.
- assert: One runner should be removed.
- """
-
- def mock_get_runners():
- """Create three mock runners.
-
- Returns:
- Three mock runners.
- """
- runners = []
- for _ in range(3):
- # 0 is a mock runner id.
- status = RunnerStatus(0, True, True, False)
- runners.append(Runner(MagicMock(), MagicMock(), status, None))
- return runners
-
- # Create online runners.
- runner_manager._get_runners = mock_get_runners
- runner_manager._get_runner_health_states = lambda: RunnerByHealth(
- (
- f"{runner_manager.instance_name}-0",
- f"{runner_manager.instance_name}-1",
- f"{runner_manager.instance_name}-2",
- ),
- (),
- )
-
- delta = runner_manager.reconcile(2, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert delta == -1
-
-
-def test_reconcile(runner_manager: RunnerManager, tmp_path: Path):
- """
- arrange: Setup one runner.
- act: Reconcile with the current amount of runner.
- assert: Still have one runner.
- """
- runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
- # Reconcile with no change to runner count.
- runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert len(runner_manager._get_runners()) == 1
-
-
-def test_empty_flush(runner_manager: RunnerManager):
- """
- arrange: No initial runners.
- act: Perform flushing with no runners.
- assert: No error thrown.
- """
- # Verifying the RunnerManager does not crash if flushing with no runners.
- runner_manager.flush()
-
-
-def test_flush(runner_manager: RunnerManager, tmp_path: Path):
- """
- arrange: Create some runners.
- act: Perform flushing.
- assert: No runners.
- """
- # Create a runner.
- runner_manager.reconcile(2, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- runner_manager.flush()
- assert len(runner_manager._get_runners()) == 0
-
-
-def test_reconcile_issues_runner_installed_event(
- runner_manager: RunnerManager,
- monkeypatch: MonkeyPatch,
- issue_event_mock: MagicMock,
- charm_state: MagicMock,
-):
- """
- arrange: Enable issuing of metrics and mock timestamps.
- act: Reconcile to create a runner.
- assert: The expected event is issued.
- """
- charm_state.is_metrics_logging_available = True
- t_mock = MagicMock(return_value=12345)
- monkeypatch.setattr(RUNNER_MANAGER_TIME_MODULE, t_mock)
-
- runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- issue_event_mock.assert_has_calls(
- [call(event=RunnerInstalled(timestamp=12345, flavor=runner_manager.app_name, duration=0))]
- )
-
-
-def test_reconcile_issues_no_runner_installed_event_if_metrics_disabled(
- runner_manager: RunnerManager, issue_event_mock: MagicMock, charm_state: MagicMock
-):
- """
- arrange: Disable issuing of metrics.
- act: Reconcile to create a runner.
- assert: The expected event is not issued.
- """
- charm_state.is_metrics_logging_available = False
-
- runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- issue_event_mock.assert_not_called()
-
-
-def test_reconcile_error_on_issue_event_is_ignored(
- runner_manager: RunnerManager,
- issue_event_mock: MagicMock,
- charm_state: MagicMock,
-):
- """
- arrange: Enable issuing of metrics and mock the metric issuing to raise an expected error.
- act: Reconcile.
- assert: No error is raised.
- """
- charm_state.is_metrics_logging_available = True
-
- issue_event_mock.side_effect = IssueMetricEventError("test error")
-
- delta = runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert delta == 1
-
-
-def test_reconcile_issues_reconciliation_metric_event(
- runner_manager: RunnerManager,
- monkeypatch: MonkeyPatch,
- issue_event_mock: MagicMock,
- runner_metrics: MagicMock,
- charm_state: MagicMock,
-):
- """
- arrange: \
- - Enable issuing of metrics \
- - Mock timestamps \
- - Mock the result of runner_metrics.issue_event to contain 2 RunnerStart and 1 RunnerStop \
- events, meaning one runner was active and one crashed. \
- - Create two online runners , one active and one idle.
- act: Reconcile.
- assert: The expected event is issued. We expect two idle runners and one crashed runner
- to be reported.
- """
- charm_state.is_metrics_logging_available = True
- t_mock = MagicMock(return_value=12345)
- monkeypatch.setattr(RUNNER_MANAGER_TIME_MODULE, t_mock)
- runner_metrics.extract.return_value = (MagicMock() for _ in range(2))
- runner_metrics.issue_events.side_effect = [{RunnerStart, RunnerStop}, {RunnerStart}]
-
- online_idle_runner_name = f"{runner_manager.instance_name}-0"
- offline_idle_runner_name = f"{runner_manager.instance_name}-1"
- active_runner_name = f"{runner_manager.instance_name}-2"
-
- def mock_get_runners():
- """Create three mock runners where one is busy.
-
- Returns:
- Mock runners with one busy runner.
- """
- runners = []
-
- online_idle_runner = RunnerStatus(runner_id=0, exist=True, online=True, busy=False)
- offline_idle_runner = RunnerStatus(runner_id=1, exist=True, online=False, busy=False)
- active_runner = RunnerStatus(runner_id=2, exist=True, online=True, busy=True)
-
- for runner_status, runner_config in zip(
- (online_idle_runner, offline_idle_runner, active_runner),
- (online_idle_runner_name, offline_idle_runner_name, active_runner_name),
- ):
- config = MagicMock()
- config.name = runner_config
- runners.append(
- Runner(
- clients=MagicMock(),
- runner_config=config,
- runner_status=runner_status,
- instance=None,
- )
- )
-
- return runners
-
- # Create online runners.
- runner_manager._get_runners = mock_get_runners
- runner_manager._get_runner_health_states = lambda: RunnerByHealth(
- healthy=(
- online_idle_runner_name,
- offline_idle_runner_name,
- active_runner_name,
- ),
- unhealthy=(),
- )
-
- runner_manager.reconcile(
- quantity=random.randint(0, 5), resources=VirtualMachineResources(2, "7GiB", "10Gib")
- )
-
- issue_event_mock.assert_any_call(
- event=Reconciliation(
- timestamp=12345,
- flavor=runner_manager.app_name,
- crashed_runners=1,
- idle_runners=2,
- duration=0,
- )
- )
-
-
-def test_reconcile_places_timestamp_in_newly_created_runner(
- runner_manager: RunnerManager,
- monkeypatch: MonkeyPatch,
- shared_fs: MagicMock,
- tmp_path: Path,
- charm_state: MagicMock,
-):
- """
- arrange: Enable issuing of metrics, mock timestamps and create the directory for the shared\
- filesystem.
- act: Reconcile to create a runner.
- assert: The expected timestamp is placed in the shared filesystem.
- """
- charm_state.is_metrics_logging_available = True
- t_mock = MagicMock(return_value=12345)
- monkeypatch.setattr(RUNNER_MANAGER_TIME_MODULE, t_mock)
- runner_shared_fs = tmp_path / "runner_fs"
- runner_shared_fs.mkdir()
- fs = MetricsStorage(path=runner_shared_fs, runner_name="test_runner")
- shared_fs.get.return_value = fs
-
- runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).exists()
- assert (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).read_text() == "12345"
-
-
-def test_reconcile_error_on_placing_timestamp_is_ignored(
- runner_manager: RunnerManager, shared_fs: MagicMock, tmp_path: Path, charm_state: MagicMock
-):
- """
- arrange: Enable issuing of metrics and do not create the directory for the shared filesystem\
- in order to let a FileNotFoundError to be raised inside the RunnerManager.
- act: Reconcile to create a runner.
- assert: No exception is raised.
- """
- charm_state.is_metrics_logging_available = True
- runner_shared_fs = tmp_path / "runner_fs"
- fs = MetricsStorage(path=runner_shared_fs, runner_name="test_runner")
- shared_fs.get.return_value = fs
-
- runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert not (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).exists()
-
-
-def test_reconcile_places_no_timestamp_in_newly_created_runner_if_metrics_disabled(
- runner_manager: RunnerManager, shared_fs: MagicMock, tmp_path: Path, charm_state: MagicMock
-):
- """
- arrange: Disable issuing of metrics, mock timestamps and the shared filesystem module.
- act: Reconcile to create a runner.
- assert: No timestamp is placed in the shared filesystem.
- """
- charm_state.is_metrics_logging_available = False
-
- fs = MetricsStorage(path=tmp_path, runner_name="test_runner")
- shared_fs.get.return_value = fs
-
- runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert not (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).exists()
-
-
-def test_reconcile_reactive_mode(
- runner_manager: RunnerManager,
- reactive_reconcile_mock: MagicMock,
- caplog: LogCaptureFixture,
-):
- """
- arrange: Enable reactive mode and mock the job class to return a job.
- act: Call reconcile with a random quantity n.
- assert: The mocked job is picked up n times and the expected log message is present.
- """
- count = random.randint(0, 5)
- runner_manager.config.reactive_config = ReactiveConfig(mq_uri=FAKE_MONGODB_URI)
- actual_count = runner_manager.reconcile(count, VirtualMachineResources(2, "7GiB", "10Gib"))
-
- assert actual_count == count
- reactive_reconcile_mock.assert_called_with(
- quantity=count, mq_uri=FAKE_MONGODB_URI, queue_name=runner_manager.app_name
- )
-
-
-def test_schedule_build_runner_image(
- runner_manager: RunnerManager,
- tmp_path: Path,
- charm_state: CharmState,
- monkeypatch: MonkeyPatch,
-):
- """
- arrange: Mock the cron path and the randint function.
- act: Schedule the build runner image.
- assert: The cron file is created with the expected content.
- """
- runner_manager.cron_path = tmp_path / "cron"
- runner_manager.cron_path.mkdir()
- monkeypatch.setattr(random, "randint", MagicMock(spec=random.randint, return_value=4))
-
- runner_manager.schedule_build_runner_image()
-
- cronfile = runner_manager.cron_path / "build-runner-image"
- http = charm_state.proxy_config.http or "''"
- https = charm_state.proxy_config.https or "''"
- no_proxy = charm_state.proxy_config.no_proxy or "''"
-
- cmd = f"/usr/bin/bash {BUILD_IMAGE_SCRIPT_FILENAME.absolute()} {http} {https} {no_proxy}"
-
- assert cronfile.exists()
- assert cronfile.read_text() == f"4 4,10,16,22 * * * ubuntu {cmd} jammy\n"
-
-
-def test_has_runner_image(runner_manager: RunnerManager):
- """
- arrange: Multiple setups.
- 1. no runner image exists.
- 2. runner image with wrong name exists.
- 3. runner image with correct name exists.
- act: Check if runner image exists.
- assert:
- 1 and 2. False is returned.
- 3. True is returned.
- """
- assert not runner_manager.has_runner_image()
-
- runner_manager._clients.lxd.images = MockLxdImageManager({"hirsute"})
- assert not runner_manager.has_runner_image()
-
- runner_manager._clients.lxd.images = MockLxdImageManager({IMAGE_NAME})
- assert runner_manager.has_runner_image()
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 476885954..5bb60ccb6 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -250,5 +250,5 @@ def test_get_runner_unknown_runner(runner_scaler_one_runner: RunnerScaler):
"""
runner_scaler = runner_scaler_one_runner
set_one_runner_state(runner_scaler, "UNKNOWN")
-
+
assert_runner_info(runner_scaler=runner_scaler, unknown=1)
From a473b688ca5a343d88dbea9cb7dc87cdba010303 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 27 Aug 2024 10:00:22 +0800
Subject: [PATCH 235/278] Fix states in get_runners methods
---
src-docs/openstack_cloud.openstack_runner_manager.md | 6 +++---
src/manager/github_runner_manager.py | 8 ++++++--
src/openstack_cloud/openstack_runner_manager.py | 4 +++-
3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index f5172cde6..95bf05c5d 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -131,7 +131,7 @@ The prefix of runner names.
---
-
+
### method `cleanup`
@@ -184,7 +184,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -208,7 +208,7 @@ Delete self-hosted runners.
---
-
+
### method `flush_runners`
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 8f00525b0..686976d84 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -72,12 +72,16 @@ def get_runners(
Information on the runners.
"""
runner_list = self.github.get_runner_github_info(self._path)
+ runner_list = [runner for runner in runner_list if runner.name.startswith(self._prefix)]
+
+ if states is None:
+ return tuple(runner_list)
+
state_set = set(states)
return tuple(
runner
for runner in runner_list
- if runner.name.startswith(self._prefix)
- and GitHubRunnerManager._is_runner_in_state(runner, state_set)
+ if GitHubRunnerManager._is_runner_in_state(runner, state_set)
)
def delete_runners(self, states: Iterable[GitHubRunnerState] | None = None) -> None:
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 39475ee69..c84f09d2e 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -247,7 +247,9 @@ def get_runners(
]
if states is None:
return tuple(instance_list)
- return tuple(instance for instance in instance_list if instance.state in states)
+
+ state_set = set(states)
+ return tuple(instance for instance in instance_list if instance.state in state_set)
def delete_runner(
self, instance_id: InstanceId, remove_token: str
From 895206eaaf8cba527b99d23c488daa12013a9726 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 27 Aug 2024 10:49:04 +0800
Subject: [PATCH 236/278] Add docstring for unit test mocks
---
tests/unit/mock_runner_managers.py | 37 ++++++++++++++++++++++++++++--
tests/unit/test_runner_scaler.py | 10 ++++++--
2 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py
index 269931bbd..83f88e73e 100644
--- a/tests/unit/mock_runner_managers.py
+++ b/tests/unit/mock_runner_managers.py
@@ -20,7 +20,15 @@
@dataclass
class MockRunner:
- """Mock of a runner."""
+ """Mock of a runner.
+
+ Attributes:
+ name: The name of the runner.
+ instance_id: The instance id of the runner.
+ cloud_state: The cloud state of the runner.
+ github_state: The github state of the runner.
+ health: The health state of the runner.
+ """
name: str
instance_id: InstanceId
@@ -41,7 +49,11 @@ def __init__(self, name: str):
self.health = True
def to_cloud_runner(self) -> CloudRunnerInstance:
- """Construct CloudRunnerInstance from this object."""
+ """Construct CloudRunnerInstance from this object.
+
+ Returns:
+ The CloudRunnerInstance instance.
+ """
return CloudRunnerInstance(
name=self.name,
instance_id=self.instance_id,
@@ -55,6 +67,9 @@ class SharedMockRunnerManagerState:
"""State shared by mock runner managers.
For sharing the mock runner states between MockCloudRunnerManager and MockGitHubRunnerManager.
+
+ Attributes:
+ runners: The runners.
"""
runners: dict[InstanceId, MockRunner]
@@ -89,6 +104,9 @@ def create_runner(self, registration_token: str) -> InstanceId:
Args:
registration_token: The GitHub registration token for registering runners.
+
+ Returns:
+ The instance id of the runner created.
"""
name = f"{self.name_prefix}-{secrets.token_hex(6)}"
runner = MockRunner(name)
@@ -100,6 +118,9 @@ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
Args:
instance_id: The instance id.
+
+ Returns:
+ The runner instance if found else None.
"""
runner = self.state.runners.get(instance_id, None)
if runner is not None:
@@ -114,6 +135,9 @@ def get_runners(
Args:
states: Filter for the runners with these github states. If None all states will be
included.
+
+ Returns:
+ The list of runner instances.
"""
if states is None:
states = [member.value for member in CloudRunnerState]
@@ -131,6 +155,9 @@ def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMet
Args:
instance_id: The instance id of the runner to delete.
remove_token: The GitHub remove token.
+
+ Returns:
+ Any runner metrics produced during deletion.
"""
self.state.runners.pop(instance_id, None)
return iter([])
@@ -142,6 +169,9 @@ def flush_runners(self, remove_token: str, busy: bool = False) -> Iterator[Runne
remove_token: The GitHub remove token for removing runners.
busy: If false, only idle runners are removed. If true, both idle and busy runners are
removed.
+
+ Returns:
+ Any runner metrics produced during flushing.
"""
# No supporting metrics in the mocks.
if busy:
@@ -161,6 +191,9 @@ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
Args:
remove_token: The GitHub remove token for removing runners.
+
+ Returns:
+ Any runner metrics produced during cleanup.
"""
# No supporting metrics in the mocks.
return iter([])
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 5bb60ccb6..5f30c11ab 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -81,7 +81,14 @@ def set_one_runner_state(
runner_scaler: RunnerScaler,
github_state: GitHubRunnerState | None = None,
cloud_state: CloudRunnerState | None = None,
-) -> RunnerScaler:
+):
+ """Set the runner state for a RunnerScaler with one runner.
+
+ Args:
+ runner_scaler: The RunnerScaler instance to modify.
+ github_state: The github state to set the runner.
+ cloud_state: The cloud state to set the runner.
+ """
runner_dict = runner_scaler._manager._github.state.runners
assert len(runner_dict) == 1, "Test arrange failed: One runner should be present"
instance_id = list(runner_dict.keys())[0]
@@ -89,7 +96,6 @@ def set_one_runner_state(
runner_dict[instance_id].github_state = github_state
if cloud_state is not None:
runner_dict[instance_id].cloud_state = cloud_state
- return runner_scaler
def assert_runner_info(
From 81815c4634d6530f566f1a9e8772cba5b69015a0 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 27 Aug 2024 10:54:12 +0800
Subject: [PATCH 237/278] Fix construction of repo-policy-compliance from
config
---
src/charm.py | 3 +--
src/manager/cloud_runner_manager.py | 8 +++-----
src/openstack_cloud/openstack_runner_manager.py | 5 +++--
tests/integration/test_runner_manager_openstack.py | 3 +--
4 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/src/charm.py b/src/charm.py
index f842c591e..b35f46919 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -1243,8 +1243,7 @@ def _get_runner_scaler(
proxy_config=state.proxy_config,
dockerhub_mirror=state.charm_config.dockerhub_mirror,
ssh_debug_connections=state.ssh_debug_connections,
- repo_policy_url=state.charm_config.repo_policy_compliance.url,
- repo_policy_token=state.charm_config.repo_policy_compliance.token,
+ repo_policy_compliance=state.charm_config.repo_policy_compliance,
)
openstack_runner_manager = OpenStackRunnerManager(
app_name,
diff --git a/src/manager/cloud_runner_manager.py b/src/manager/cloud_runner_manager.py
index b2624199d..aff75ed41 100644
--- a/src/manager/cloud_runner_manager.py
+++ b/src/manager/cloud_runner_manager.py
@@ -9,7 +9,7 @@
from enum import Enum, auto
from typing import Iterator, Sequence, Tuple
-from charm_state import GitHubPath, ProxyConfig, SSHDebugConnection
+from charm_state import GitHubPath, ProxyConfig, RepoPolicyComplianceConfig, SSHDebugConnection
from metrics.runner import RunnerMetrics
logger = logging.getLogger(__name__)
@@ -110,15 +110,13 @@ class SupportServiceConfig:
proxy_config: The proxy configuration.
dockerhub_mirror: The dockerhub mirror to use for runners.
ssh_debug_connections: The information on the ssh debug services.
- repo_policy_url: The URL of the repo policy service.
- repo_policy_token: The token to access the repo policy service.
+ repo_policy_compliance: The configuration of the repo policy compliance service.
"""
proxy_config: ProxyConfig | None
dockerhub_mirror: str | None
ssh_debug_connections: list[SSHDebugConnection] | None
- repo_policy_url: str | None
- repo_policy_token: str | None
+ repo_policy_compliance: RepoPolicyComplianceConfig | None
@dataclass
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c84f09d2e..3a9acd4a0 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -460,9 +460,10 @@ def _get_repo_policy_compliance_client(self) -> RepoPolicyComplianceClient | Non
Returns:
The repo policy compliance client.
"""
- if self._service_config.repo_policy_url and self._service_config.repo_policy_token:
+ if self._service_config.repo_policy_compliance is not None:
return RepoPolicyComplianceClient(
- self._service_config.repo_policy_url, self._service_config.repo_policy_token
+ self._service_config.repo_policy_compliance.url,
+ self._service_config.repo_policy_compliance.token,
)
return None
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index b12d42414..089888b94 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -122,8 +122,7 @@ async def openstack_runner_manager_fixture(
proxy_config=proxy_config,
dockerhub_mirror=None,
ssh_debug_connections=None,
- repo_policy_url=None,
- repo_policy_token=None,
+ repo_policy_compliance=None,
)
return OpenStackRunnerManager(
app_name, cloud_config, server_config, runner_config, service_config
From 4012ed02aad0649b53ad96196541fc67af53a4d5 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 27 Aug 2024 12:53:58 +0800
Subject: [PATCH 238/278] Fix get_runners action output
---
src/charm.py | 12 +++++++++++-
src/manager/runner_scaler.py | 4 +++-
tests/unit/test_runner_scaler.py | 16 ++++++++--------
3 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/src/charm.py b/src/charm.py
index b35f46919..a974ceb56 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -765,7 +765,17 @@ def _on_check_runners_action(self, event: ActionEvent) -> None:
if state.instance_type == InstanceType.OPENSTACK:
runner_scaler = self._get_runner_scaler(state)
- event.set_results(runner_scaler.get_runner_info())
+ info = runner_scaler.get_runner_info()
+ event.set_results(
+ {
+ "online": info.online,
+ "busy": info.busy,
+ "offline": info.offline,
+ "unknown": info.unknown,
+ "runners": info.runners,
+ "busy_runners": info.busy_runners,
+ }
+ )
return
runner_manager = self._get_runner_manager(state)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index c7cca69a3..606a3da08 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -5,6 +5,7 @@
import logging
import time
+from dataclasses import dataclass
from typing import TypedDict
from errors import IssueMetricEventError, MissingServerConfigError
@@ -16,7 +17,8 @@
logger = logging.getLogger(__name__)
-class RunnerInfo(TypedDict):
+@dataclass
+class RunnerInfo:
"""Information on the runners.
Attributes:
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 5f30c11ab..c7a14a431 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -111,14 +111,14 @@ def assert_runner_info(
unknown: The number of unknown runners to assert for.
"""
info = runner_scaler.get_runner_info()
- assert info["offline"] == offline
- assert info["online"] == online
- assert info["busy"] == busy
- assert info["unknown"] == unknown
- assert isinstance(info["runners"], tuple)
- assert len(info["runners"]) == online
- assert isinstance(info["busy_runners"], tuple)
- assert len(info["busy_runners"]) == busy
+ assert info.offline == offline
+ assert info.online == online
+ assert info.busy == busy
+ assert info.unknown == unknown
+ assert isinstance(info.runners, tuple)
+ assert len(info.runners) == online
+ assert isinstance(info.busy_runners, tuple)
+ assert len(info.busy_runners) == busy
def test_get_no_runner(runner_scaler: RunnerScaler):
From 782d145afec119702a34a3f8392377fe1beed390 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 27 Aug 2024 13:31:02 +0800
Subject: [PATCH 239/278] Fix the lints
---
src/manager/runner_manager.py | 4 +--
src/manager/runner_scaler.py | 1 -
tests/unit/mock_runner_managers.py | 44 +++++++++++++++++++++++++++++-
tests/unit/test_runner_scaler.py | 4 +--
4 files changed, 47 insertions(+), 6 deletions(-)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 03639a306..96d3acfed 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -7,7 +7,7 @@
from dataclasses import dataclass
from enum import Enum, auto
from multiprocessing import Pool
-from typing import Iterable, Iterator, Sequence, Type, cast
+from typing import Iterator, Sequence, Type, cast
from charm_state import GitHubPath
from errors import GithubMetricsError, RunnerCreateError
@@ -242,7 +242,7 @@ def cleanup(self) -> IssuedMetricEventsStats:
@staticmethod
def _spawn_runners(
- create_runner_args: Iterable["RunnerManager._CreateRunnerArgs"],
+ create_runner_args: Sequence["RunnerManager._CreateRunnerArgs"],
) -> tuple[InstanceId, ...]:
"""Parallel spawn of runners.
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 606a3da08..5216ec4f3 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -6,7 +6,6 @@
import logging
import time
from dataclasses import dataclass
-from typing import TypedDict
from errors import IssueMetricEventError, MissingServerConfigError
from manager.cloud_runner_manager import HealthState
diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py
index 83f88e73e..a737cbde0 100644
--- a/tests/unit/mock_runner_managers.py
+++ b/tests/unit/mock_runner_managers.py
@@ -80,9 +80,14 @@ def __init__(self):
class MockCloudRunnerManager(CloudRunnerManager):
- """Mock for CloudRunnerManager.
+ """Mock of CloudRunnerManager.
Metrics is not supported in this mock.
+
+ Attributes:
+ name_prefix: The naming prefix for runners managed.
+ prefix: The naming prefix for runners managed.
+ state: The shared state between mocks runner managers.
"""
def __init__(self, state: SharedMockRunnerManagerState):
@@ -200,21 +205,53 @@ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
class MockGitHubRunnerManager:
+ """Mock of GitHubRunnerManager.
+
+ Attributes:
+ name_prefix: The naming prefix for runner managed.
+ state: The shared state between mock runner managers.
+ path: The GitHub path to register the runners under.
+ """
def __init__(self, name_prefix: str, path: GitHubPath, state: SharedMockRunnerManagerState):
+ """Construct the object.
+
+ Args:
+ name_prefix: The naming prefix for runner managed.
+ path: The GitHub path to register the runners under.
+ state: The shared state between mock runner managers.
+ """
self.name_prefix = name_prefix
self.state = state
self.path = path
def get_registration_token(self) -> str:
+ """Get the registration token for registering runners on GitHub.
+
+ Returns:
+ The registration token.
+ """
return "mock_registration_token"
def get_removal_token(self) -> str:
+ """Get the remove token for removing runners on GitHub.
+
+ Returns:
+ The remove token.
+ """
return "mock_remove_token"
def get_runners(
self, github_states: Iterable[GitHubRunnerState] | None = None
) -> tuple[SelfHostedRunner, ...]:
+ """Get the runners.
+
+ Args:
+ github_states: The states to filter for.
+
+ Returns:
+ List of runners.
+ """
if github_states is None:
github_states = [member.value for member in GitHubRunnerState]
@@ -237,6 +274,11 @@ def get_runners(
)
def delete_runners(self, states: Iterable[GitHubRunnerState]) -> None:
+ """Delete the runners.
+
+ Args:
+ states: The states to filter the runners to delete.
+ """
github_states = set(states)
self.state.runners = {
instance_id: runner
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index c7a14a431..8eec56033 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -238,7 +238,7 @@ def test_get_runner_one_busy_runner(
def test_get_runner_offline_runner(runner_scaler_one_runner: RunnerScaler):
"""
- Arrange: A RunnerScaler with one offline runner
+ Arrange: A RunnerScaler with one offline runner.
Act: Run get runners.
Assert: One offline runner.
"""
@@ -250,7 +250,7 @@ def test_get_runner_offline_runner(runner_scaler_one_runner: RunnerScaler):
def test_get_runner_unknown_runner(runner_scaler_one_runner: RunnerScaler):
"""
- Arrange: A RunnerScaler with one offline runner
+ Arrange: A RunnerScaler with one offline runner.
Act: Run get runners.
Assert: One offline runner.
"""
From 40bec794e626d26ea0d23740ce962dfe7657c749 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 27 Aug 2024 18:36:21 +0800
Subject: [PATCH 240/278] Fix a naming issue
---
src/charm.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/charm.py b/src/charm.py
index a974ceb56..a19459b89 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -773,7 +773,7 @@ def _on_check_runners_action(self, event: ActionEvent) -> None:
"offline": info.offline,
"unknown": info.unknown,
"runners": info.runners,
- "busy_runners": info.busy_runners,
+ "busy-runners": info.busy_runners,
}
)
return
From e036d198cba7094424ba059bccf2c74ccf036026 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 27 Aug 2024 18:41:24 +0800
Subject: [PATCH 241/278] Fix naming prefix of runner
---
src/charm.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/src/charm.py b/src/charm.py
index a19459b89..198e7d2e9 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -1223,8 +1223,6 @@ def _get_runner_scaler(
if path is None:
path = state.charm_config.path
- app_name, _ = self.unit.name.rsplit("/", 1)
-
clouds = list(state.charm_config.openstack_clouds_yaml["clouds"].keys())
if len(clouds) > 1:
logger.warning(
@@ -1255,8 +1253,9 @@ def _get_runner_scaler(
ssh_debug_connections=state.ssh_debug_connections,
repo_policy_compliance=state.charm_config.repo_policy_compliance,
)
+ # The prefix is set to f"{application_name}-{unit number}"
openstack_runner_manager = OpenStackRunnerManager(
- app_name,
+ prefix=self.unit.name.replace("/", "-"),
cloud_config=cloud_config,
server_config=server_config,
runner_config=runner_config,
From c1c0ed99048223f02693b7258a8f09a360ad29b7 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 28 Aug 2024 08:57:35 +0800
Subject: [PATCH 242/278] Improve unit test
---
src/charm.py | 4 +++-
src/manager/runner_manager.py | 12 ++++++++++--
tests/unit/mock_runner_managers.py | 19 ++++++++++++++-----
tests/unit/test_runner_scaler.py | 11 ++++++++++-
4 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/src/charm.py b/src/charm.py
index 198e7d2e9..5adcd8b01 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -1266,7 +1266,9 @@ def _get_runner_scaler(
path=path,
)
runner_manager = RunnerManager(
- cloud_runner_manager=openstack_runner_manager, config=runner_manager_config
+ manager_name=self.app.name,
+ cloud_runner_manager=openstack_runner_manager,
+ config=runner_manager_config,
)
return RunnerScaler(runner_manager=runner_manager)
diff --git a/src/manager/runner_manager.py b/src/manager/runner_manager.py
index 96d3acfed..72ded77fb 100644
--- a/src/manager/runner_manager.py
+++ b/src/manager/runner_manager.py
@@ -93,16 +93,24 @@ class RunnerManager:
"""Manage the runners.
Attributes:
+ manager_name: A name to identify this manager.
name_prefix: The name prefix of the runners.
"""
- def __init__(self, cloud_runner_manager: CloudRunnerManager, config: RunnerManagerConfig):
+ def __init__(
+ self,
+ manager_name: str,
+ cloud_runner_manager: CloudRunnerManager,
+ config: RunnerManagerConfig,
+ ):
"""Construct the object.
Args:
+ manager_name: A name to identify this manager.
cloud_runner_manager: For managing the cloud instance of the runner.
config: Configuration of this class.
"""
+ self.manager_name = manager_name
self._config = config
self._cloud = cloud_runner_manager
self.name_prefix = self._cloud.name_prefix
@@ -321,7 +329,7 @@ def _issue_runner_metrics(self, metrics: Iterator[RunnerMetrics]) -> IssuedMetri
issued_events = runner_metrics.issue_events(
runner_metrics=extracted_metrics,
job_metrics=job_metrics,
- flavor=self.name_prefix,
+ flavor=self.manager_name,
)
for event_type in issued_events:
diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py
index a737cbde0..81c334f37 100644
--- a/tests/unit/mock_runner_managers.py
+++ b/tests/unit/mock_runner_managers.py
@@ -5,8 +5,10 @@
import secrets
from dataclasses import dataclass
from typing import Iterable, Iterator, Sequence
+from unittest.mock import MagicMock
from charm_state import GitHubPath
+from github_client import GithubClient
from github_type import GitHubRunnerStatus, SelfHostedRunner
from manager.cloud_runner_manager import (
CloudRunnerInstance,
@@ -15,7 +17,9 @@
InstanceId,
)
from manager.github_runner_manager import GitHubRunnerState
+from metrics.events import RunnerStop
from metrics.runner import RunnerMetrics
+from tests.unit.mock import MockGhapiClient
@dataclass
@@ -164,7 +168,9 @@ def delete_runner(self, instance_id: InstanceId, remove_token: str) -> RunnerMet
Returns:
Any runner metrics produced during deletion.
"""
- self.state.runners.pop(instance_id, None)
+ runner = self.state.runners.pop(instance_id, None)
+ if runner is not None:
+ return iter([MagicMock()])
return iter([])
def flush_runners(self, remove_token: str, busy: bool = False) -> Iterator[RunnerMetrics]:
@@ -178,7 +184,7 @@ def flush_runners(self, remove_token: str, busy: bool = False) -> Iterator[Runne
Returns:
Any runner metrics produced during flushing.
"""
- # No supporting metrics in the mocks.
+ num = len(self.state.runners)
if busy:
self.state.runners = {}
else:
@@ -187,7 +193,7 @@ def flush_runners(self, remove_token: str, busy: bool = False) -> Iterator[Runne
for instance_id, runner in self.state.runners.items()
if runner.github_state == GitHubRunnerState.BUSY
}
- return iter([])
+ return iter([MagicMock()])
def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
"""Cleanup runner and resource on the cloud.
@@ -200,14 +206,15 @@ def cleanup(self, remove_token: str) -> Iterator[RunnerMetrics]:
Returns:
Any runner metrics produced during cleanup.
"""
- # No supporting metrics in the mocks.
- return iter([])
+ # Do nothing in mocks.
+ return iter([MagicMock()])
class MockGitHubRunnerManager:
"""Mock of GitHubRunnerManager.
Attributes:
+ github: The GitHub client.
name_prefix: The naming prefix for runner managed.
state: The shared state between mock runner managers.
path: The GitHub path to register the runners under.
@@ -221,6 +228,8 @@ def __init__(self, name_prefix: str, path: GitHubPath, state: SharedMockRunnerMa
path: The GitHub path to register the runners under.
state: The shared state between mock runner managers.
"""
+ self.github = GithubClient("mock_token")
+ self.github._client = MockGhapiClient("mock_token")
self.name_prefix = name_prefix
self.state = state
self.path = path
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 8eec56033..7312a69e4 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -3,6 +3,7 @@
from typing import Iterable
+from unittest.mock import MagicMock
import pytest
@@ -59,8 +60,16 @@ def runner_manager_fixture(
monkeypatch.setattr(
"manager.runner_manager.RunnerManager._spawn_runners", mock_runner_manager_spawn_runners
)
+ # Patch out the metrics, as metrics has their own tests.
+ monkeypatch.setattr(
+ "manager.runner_manager.github_metrics.job", MagicMock()
+ )
+ monkeypatch.setattr(
+ "manager.runner_manager.runner_metrics.issue_events", MagicMock()
+ )
+
config = RunnerManagerConfig("mock_token", github_path)
- runner_manager = RunnerManager(mock_cloud, config)
+ runner_manager = RunnerManager("mock_runners", mock_cloud, config)
runner_manager._github = mock_github
return runner_manager
From 14a787a63ca2237bd0674f2d1c23775ff887e9ee Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 28 Aug 2024 09:12:51 +0800
Subject: [PATCH 243/278] Remove the old OpenstackRunnerManager
---
src-docs/openstack_cloud.md | 3 -
...penstack_cloud.openstack_runner_manager.md | 20 +-
src/openstack_cloud/openstack_manager.py | 1598 -----------------
.../openstack_runner_manager.py | 13 +-
tests/unit/mock_runner_managers.py | 2 -
tests/unit/test_openstack_manager.py | 1200 -------------
tests/unit/test_runner_scaler.py | 8 +-
7 files changed, 20 insertions(+), 2824 deletions(-)
delete mode 100644 src/openstack_cloud/openstack_manager.py
delete mode 100644 tests/unit/test_openstack_manager.py
diff --git a/src-docs/openstack_cloud.md b/src-docs/openstack_cloud.md
index 51140a4b2..34aa3f26f 100644
--- a/src-docs/openstack_cloud.md
+++ b/src-docs/openstack_cloud.md
@@ -10,9 +10,6 @@ Module for managing Openstack cloud.
- **openstack_cloud**: # Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
-- **openstack_manager**: # Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
- **openstack_runner_manager**: # Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 95bf05c5d..81a35247b 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -17,7 +17,7 @@ Manager for self-hosted runner on OpenStack.
---
-
+
## class `OpenStackCloudConfig`
Configuration for OpenStack cloud authorisation information.
@@ -47,7 +47,7 @@ __init__(clouds_config: dict[str, dict], cloud: str) → None
---
-
+
## class `OpenStackServerConfig`
Configuration for OpenStack server.
@@ -78,7 +78,7 @@ __init__(image: str, flavor: str, network: str) → None
---
-
+
## class `OpenStackRunnerManager`
Manage self-hosted runner on OpenStack cloud.
@@ -89,7 +89,7 @@ Manage self-hosted runner on OpenStack cloud.
- `name_prefix`: The name prefix of the runners created.
-
+
### method `__init__`
@@ -131,7 +131,7 @@ The prefix of runner names.
---
-
+
### method `cleanup`
@@ -154,7 +154,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -184,7 +184,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -208,7 +208,7 @@ Delete self-hosted runners.
---
-
+
### method `flush_runners`
@@ -231,7 +231,7 @@ Remove idle and/or busy runners.
---
-
+
### method `get_runner`
@@ -254,7 +254,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
deleted file mode 100644
index e0ce47d4f..000000000
--- a/src/openstack_cloud/openstack_manager.py
+++ /dev/null
@@ -1,1598 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-
-# TODO: 2024-04-11 The module contains too many lines which are scheduled for refactoring.
-# pylint: disable=too-many-lines
-
-# TODO: 2024-04-22 The module contains duplicate code which is scheduled for refactoring.
-# Lines related to issuing metrics are duplicated:
-# ==openstack_cloud.openstack_manager:[1320:1337]
-# ==runner_manager:[383:413]
-# ==openstack_cloud.openstack_manager:[1283:1314]
-# ==runner_manager:[339:368]
-
-# pylint: disable=duplicate-code
-
-"""Module for handling interactions with OpenStack."""
-import logging
-import secrets
-import shutil
-import time
-from contextlib import contextmanager
-from dataclasses import dataclass
-from datetime import datetime
-from multiprocessing import Pool
-from pathlib import Path
-from typing import Iterable, Iterator, Literal, Optional, cast
-
-import invoke
-import jinja2
-import openstack
-import openstack.connection
-import openstack.exceptions
-import openstack.image.v2.image
-import paramiko
-from fabric import Connection as SSHConnection
-from openstack.compute.v2.server import Server
-from openstack.connection import Connection as OpenstackConnection
-from openstack.exceptions import SDKException
-from openstack.network.v2.security_group import SecurityGroup
-from paramiko.ssh_exception import NoValidConnectionsError
-
-import reactive.runner_manager as reactive_runner_manager
-from charm_state import CharmState, GitHubOrg, ProxyConfig, SSHDebugConnection
-from errors import (
- CreateMetricsStorageError,
- GetMetricsStorageError,
- GithubApiError,
- GithubClientError,
- GithubMetricsError,
- IssueMetricEventError,
- OpenStackError,
- RunnerCreateError,
- RunnerStartError,
-)
-from github_client import GithubClient
-from github_type import GitHubRunnerStatus, SelfHostedRunner
-from metrics import events as metric_events
-from metrics import github as github_metrics
-from metrics import runner as runner_metrics
-from metrics import storage as metrics_storage
-from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
-from repo_policy_compliance_client import RepoPolicyComplianceClient
-from runner_manager import IssuedMetricEventsStats
-from runner_manager_type import FlushMode, OpenstackRunnerManagerConfig
-from runner_type import GitHubPath, RunnerGithubInfo, RunnerNameByHealth
-from utilities import retry, set_env_var
-
-logger = logging.getLogger(__name__)
-
-# Update the version when the security group rules are not backward compatible.
-SECURITY_GROUP_NAME = "github-runner-v1"
-BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME = "scripts/build-openstack-image.sh"
-_SSH_KEY_PATH = Path("/home/ubuntu/.ssh")
-_CONFIG_SCRIPT_PATH = Path("/home/ubuntu/actions-runner/config.sh")
-
-RUNNER_APPLICATION = Path("/home/ubuntu/actions-runner")
-METRICS_EXCHANGE_PATH = Path("/home/ubuntu/metrics-exchange")
-PRE_JOB_SCRIPT = RUNNER_APPLICATION / "pre-job.sh"
-MAX_METRICS_FILE_SIZE = 1024
-
-RUNNER_STARTUP_PROCESS = "/home/ubuntu/actions-runner/run.sh"
-RUNNER_LISTENER_PROCESS = "Runner.Listener"
-RUNNER_WORKER_PROCESS = "Runner.Worker"
-CREATE_SERVER_TIMEOUT = 5 * 60
-
-
-class _PullFileError(Exception):
- """Represents an error while pulling a file from the runner instance."""
-
- def __init__(self, reason: str):
- """Construct PullFileError object.
-
- Args:
- reason: The reason for the error.
- """
- super().__init__(reason)
-
-
-class _SSHError(Exception):
- """Represents an error while interacting with SSH."""
-
- def __init__(self, reason: str):
- """Construct SSHErrors object.
-
- Args:
- reason: The reason for the error.
- """
- super().__init__(reason)
-
-
-@dataclass
-class InstanceConfig:
- """The configuration values for creating a single runner instance.
-
- Attributes:
- github_path: The GitHub repo/org path to register the runner.
- image_id: The Openstack image id to use to boot the instance with.
- labels: The runner instance labels.
- name: Name of the image to launch the GitHub runner instance with.
- registration_token: Token for registering the runner on GitHub.
- """
-
- github_path: GitHubPath
- image_id: str
- labels: Iterable[str]
- name: str
- registration_token: str
-
-
-SupportedCloudImageArch = Literal["amd64", "arm64"]
-
-
-@dataclass
-class _CloudInitUserData:
- """Dataclass to hold cloud init userdata.
-
- Attributes:
- instance_config: The configuration values for Openstack instance to launch.
- runner_env: The contents of .env to source when launching Github runner.
- pre_job_contents: The contents of pre-job script to run before starting the job.
- proxies: Proxy values to enable on the Github runner.
- dockerhub_mirror: URL to dockerhub mirror.
- """
-
- instance_config: InstanceConfig
- runner_env: str
- pre_job_contents: str
- dockerhub_mirror: Optional[str] = None
- proxies: Optional[ProxyConfig] = None
-
-
-@contextmanager
-def _create_connection(cloud_config: dict[str, dict]) -> Iterator[openstack.connection.Connection]:
- """Create a connection context managed object, to be used within with statements.
-
- This method should be called with a valid cloud_config. See _validate_cloud_config.
- Also, this method assumes that the clouds.yaml exists on ~/.config/openstack/clouds.yaml.
- See charm_state.py _write_openstack_config_to_disk.
-
- Args:
- cloud_config: The configuration in clouds.yaml format to apply.
-
- Raises:
- OpenStackError: if the credentials provided is not authorized.
-
- Yields:
- An openstack.connection.Connection object.
- """
- clouds = list(cloud_config["clouds"].keys())
- if len(clouds) > 1:
- logger.warning("Multiple clouds defined in clouds.yaml. Using the first one to connect.")
- cloud_name = clouds[0]
-
- # api documents that keystoneauth1.exceptions.MissingRequiredOptions can be raised but
- # I could not reproduce it. Therefore, no catch here for such exception.
- try:
- with openstack.connect(cloud=cloud_name) as conn:
- conn.authorize()
- yield conn
- # pylint thinks this isn't an exception, but does inherit from Exception class.
- except openstack.exceptions.HttpException as exc: # pylint: disable=bad-exception-cause
- logger.exception("OpenStack API call failure")
- raise OpenStackError("Failed OpenStack API call") from exc
-
-
-# Disable too many arguments, as they are needed to create the dataclass.
-def create_instance_config( # pylint: disable=too-many-arguments
- app_name: str,
- unit_num: int,
- image_id: str,
- path: GitHubPath,
- labels: Iterable[str],
- registration_token: str,
-) -> InstanceConfig:
- """Create an instance config from charm data.
-
- Args:
- app_name: The juju application name.
- unit_num: The juju unit number.
- image_id: The openstack image id to create the instance with.
- path: Github organisation or repository path.
- labels: Addition labels for the runner.
- registration_token: The Github runner registration token. See \
- https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#create-a-registration-token-for-a-repository
-
- Returns:
- Instance configuration created.
- """
- suffix = secrets.token_hex(12)
- return InstanceConfig(
- github_path=path,
- image_id=image_id,
- labels=labels,
- name=f"{app_name}-{unit_num}-{suffix}",
- registration_token=registration_token,
- )
-
-
-def _generate_runner_env(
- templates_env: jinja2.Environment,
- dockerhub_mirror: Optional[str] = None,
- ssh_debug_connections: list[SSHDebugConnection] | None = None,
-) -> str:
- """Generate Github runner .env file contents.
-
- Proxy configuration are handled by aproxy.
-
- Args:
- templates_env: The jinja template environment.
- dockerhub_mirror: The url to Dockerhub to reduce rate limiting.
- ssh_debug_connections: Tmate SSH debug connection information to load as environment vars.
-
- Returns:
- The .env contents to be loaded by Github runner.
- """
- return templates_env.get_template("env.j2").render(
- pre_job_script=str(PRE_JOB_SCRIPT),
- dockerhub_mirror=dockerhub_mirror or "",
- ssh_debug_info=(secrets.choice(ssh_debug_connections) if ssh_debug_connections else None),
- )
-
-
-def _generate_cloud_init_userdata(
- templates_env: jinja2.Environment,
- cloud_init_userdata: _CloudInitUserData,
-) -> str:
- """Generate cloud init userdata to launch at startup.
-
- Args:
- templates_env: The jinja template environment.
- cloud_init_userdata: The dataclass containing the cloud init userdata.
-
- Returns:
- The cloud init userdata script.
- """
- runner_group = None
- instance_config = cloud_init_userdata.instance_config
- proxies = cloud_init_userdata.proxies
-
- if isinstance(instance_config.github_path, GitHubOrg):
- runner_group = instance_config.github_path.group
-
- aproxy_address = proxies.aproxy_address if proxies is not None else None
- return templates_env.get_template("openstack-userdata.sh.j2").render(
- github_url=f"https://github.com/{instance_config.github_path.path()}",
- runner_group=runner_group,
- token=instance_config.registration_token,
- instance_labels=",".join(instance_config.labels),
- instance_name=instance_config.name,
- env_contents=cloud_init_userdata.runner_env,
- pre_job_contents=cloud_init_userdata.pre_job_contents,
- metrics_exchange_path=str(METRICS_EXCHANGE_PATH),
- aproxy_address=aproxy_address,
- dockerhub_mirror=cloud_init_userdata.dockerhub_mirror,
- )
-
-
-class GithubRunnerRemoveError(Exception):
- """Represents an error removing registered runner from Github."""
-
-
-_INSTANCE_STATUS_SHUTOFF = "SHUTOFF"
-_INSTANCE_STATUS_ERROR = "ERROR"
-_INSTANCE_STATUS_ACTIVE = "ACTIVE"
-_INSTANCE_STATUS_BUILDING = "BUILDING"
-
-
-class OpenstackRunnerManager:
- """Runner manager for OpenStack-based instances.
-
- Attributes:
- app_name: The juju application name.
- unit_num: The juju unit number.
- instance_name: Prefix of the name for the set of runners.
- """
-
- def __init__(
- self,
- app_name: str,
- unit_num: int,
- openstack_runner_manager_config: OpenstackRunnerManagerConfig,
- cloud_config: dict[str, dict],
- ):
- """Construct OpenstackRunnerManager object.
-
- Args:
- app_name: The juju application name.
- unit_num: The juju unit number.
- openstack_runner_manager_config: Configurations related to runner manager.
- cloud_config: The openstack clouds.yaml in dict format.
- """
- # Setting the env var to this process and any child process spawned.
- proxies = openstack_runner_manager_config.charm_state.proxy_config
- if no_proxy := proxies.no_proxy:
- set_env_var("NO_PROXY", no_proxy)
- if http_proxy := proxies.http:
- set_env_var("HTTP_PROXY", http_proxy)
- if https_proxy := proxies.https:
- set_env_var("HTTPS_PROXY", https_proxy)
-
- self.app_name = app_name
- self.unit_num = unit_num
- self.instance_name = f"{app_name}-{unit_num}"
- self._config = openstack_runner_manager_config
- self._cloud_config = cloud_config
- self._github = GithubClient(token=self._config.token)
-
- def reconcile(self, quantity: int) -> int:
- """Reconcile the quantity of runners.
-
- Args:
- quantity: The number of intended runners.
-
- Returns:
- The change in number of runners.
- """
- if self._config.reactive_config:
- logger.info("Reactive configuration detected, going into experimental reactive mode.")
- return self._reconcile_reactive(quantity)
-
- start_ts = time.time()
- try:
- delta = self._reconcile_runners(quantity)
- finally:
- end_ts = time.time()
- self._issue_reconciliation_metrics(
- reconciliation_start_ts=start_ts, reconciliation_end_ts=end_ts
- )
-
- return delta
-
- def _reconcile_reactive(self, quantity: int) -> int:
- """Reconcile runners reactively.
-
- Args:
- quantity: Number of intended runners.
-
- Returns:
- The difference between intended runners and actual runners. In reactive mode
- this number is never negative as additional processes should terminate after a timeout.
- """
- logger.info("Reactive mode is experimental and not yet fully implemented.")
- return reactive_runner_manager.reconcile(
- quantity=quantity, mq_uri=self._config.reactive_config.mq_uri, queue_name=self.app_name
- )
-
- def _reconcile_runners(self, quantity: int) -> int:
- """Reconcile the number of runners.
-
- Args:
- quantity: The number of intended runners.
-
- Returns:
- The change in number of runners.
- """
- with _create_connection(self._cloud_config) as conn:
- runner_by_health = self._get_openstack_runner_status(conn)
- logger.info(
- "Found %s healthy runner and %s unhealthy runner",
- len(runner_by_health.healthy),
- len(runner_by_health.unhealthy),
- )
- logger.debug("Healthy runner: %s", runner_by_health.healthy)
- logger.debug("Unhealthy runner: %s", runner_by_health.unhealthy)
- remove_token = self._github.get_runner_remove_token(path=self._config.path)
-
- self._clean_up_runners(
- conn=conn, runner_by_health=runner_by_health, remove_token=remove_token
- )
-
- delta = self._scale(
- quantity=quantity,
- conn=conn,
- runner_by_health=runner_by_health,
- remove_token=remove_token,
- )
- return delta
-
- def get_github_runner_info(self) -> tuple[RunnerGithubInfo, ...]:
- """Get information on GitHub for the runners.
-
- Returns:
- Collection of runner GitHub information.
- """
- remote_runners_list: list[SelfHostedRunner] = self._github.get_runner_github_info(
- self._config.path
- )
- logger.debug("List of runners found on GitHub:%s", remote_runners_list)
- return tuple(
- RunnerGithubInfo(
- runner["name"],
- runner["id"],
- runner["status"] == GitHubRunnerStatus.ONLINE,
- runner["busy"],
- )
- for runner in remote_runners_list
- if runner["name"].startswith(f"{self.instance_name}-")
- )
-
- def _get_openstack_runner_status(self, conn: OpenstackConnection) -> RunnerNameByHealth:
- """Get status on OpenStack of each runner.
-
- Args:
- conn: The connection object to access OpenStack cloud.
-
- Returns:
- Runner status grouped by health.
- """
- healthy_runner = []
- unhealthy_runner = []
- openstack_instances = self._get_openstack_instances(conn)
-
- logger.debug("Found openstack instances: %s", openstack_instances)
-
- for instance in openstack_instances:
- if not OpenstackRunnerManager._health_check(conn=conn, server_name=instance.name):
- unhealthy_runner.append(instance.name)
- else:
- healthy_runner.append(instance.name)
-
- return RunnerNameByHealth(healthy=tuple(healthy_runner), unhealthy=tuple(unhealthy_runner))
-
- def _get_openstack_instances(self, conn: OpenstackConnection) -> list[Server]:
- """Get the OpenStack servers managed by this unit.
-
- Args:
- conn: The connection object to access OpenStack cloud.
-
- Returns:
- List of OpenStack instances.
- """
- return [
- instance
- for instance in cast(list[Server], conn.list_servers())
- if instance.name.startswith(f"{self.instance_name}-")
- ]
-
- @staticmethod
- def _health_check(
- conn: OpenstackConnection,
- server_name: str,
- startup: bool = False,
- ) -> bool:
- """Health check a server instance.
-
- A healthy server is defined as:
- 1. Openstack instance status is ACTIVE or BUILDING.
- 2. Openstack instance status is in BUILDING less than CREATE_SERVER_TIMEOUT seconds.
- 3. Runner.Worker exists (running a job).
- 4. Runner.Listener exists (waiting for job).
- 5. GitHub runner status is Idle or Active.
-
- An undetermined server is marked as healthy when:
- 1. SSH fails - could be a transient network error.
- 2. The Runner.* processes do not exist. Mark healthy for now to gather data. This is
- subject to change to unhealthy once enough data has been gathered.
-
- Args:
- conn: The Openstack connection instance.
- server_name: The name of the OpenStack server to health check.
- startup: Check only whether the startup is successful.
-
- Returns:
- Whether the instance is healthy.
- """
- server: Server | None = conn.get_server(name_or_id=server_name)
- if not server:
- return False
- if server.status == (_INSTANCE_STATUS_SHUTOFF, _INSTANCE_STATUS_ERROR):
- return False
- if server.status not in (_INSTANCE_STATUS_ACTIVE, _INSTANCE_STATUS_BUILDING):
- return False
- created_at = datetime.strptime(server.created_at, "%Y-%m-%dT%H:%M:%SZ")
- current_time = datetime.now(created_at.tzinfo)
- elapsed_min = (created_at - current_time).total_seconds()
- if server.status == _INSTANCE_STATUS_BUILDING:
- return elapsed_min < CREATE_SERVER_TIMEOUT
- try:
- return OpenstackRunnerManager._ssh_health_check(
- conn=conn, server_name=server_name, startup=startup
- )
- except _SSHError:
- logger.warning("Health check failed, unable to SSH into server: %s", server_name)
- return False
-
- @staticmethod
- @retry(tries=3, delay=5, max_delay=60, backoff=2, local_logger=logger)
- def _ssh_health_check(conn: OpenstackConnection, server_name: str, startup: bool) -> bool:
- """Use SSH to check whether runner application is running.
-
- A healthy runner is defined as:
- 1. SSH connection can be established.
- 2. Runner.Worker exists (running a job).
- 3. Runner.Listener exists (waiting for job).
-
- Args:
- conn: The Openstack connection instance.
- server_name: The openstack server instance to check connections.
- startup: Check only whether the startup is successful.
-
- Raises:
- _SSHError: if there was an error SSH-ing into the machine or with the SSH command.
-
- Returns:
- Whether the runner application is running.
- """
- try:
- ssh_conn = OpenstackRunnerManager._get_ssh_connection(
- conn=conn, server_name=server_name
- )
- except _SSHError as exc:
- logger.error("[ALERT]: Unable to SSH to server: %s, reason: %s", server_name, str(exc))
- raise
-
- result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
- logger.debug("Output of `ps aux` on %s stderr: %s", server_name, result.stderr)
- if not result.ok:
- logger.warning("List all process command failed on %s.", server_name)
- raise _SSHError(f"List process command failed on {server_name}.")
- if RUNNER_STARTUP_PROCESS not in result.stdout:
- logger.warning("No startup process found on server %s.", server_name)
- raise _SSHError(f"Runner not yet started on {server_name}.")
-
- logger.info("Runner process found to be healthy on %s", server_name)
- if startup:
- return True
-
- if RUNNER_WORKER_PROCESS in result.stdout or RUNNER_LISTENER_PROCESS in result.stdout:
- return True
-
- return False
-
- @staticmethod
- @retry(tries=3, delay=5, max_delay=60, backoff=2, local_logger=logger)
- def _get_ssh_connection(
- conn: OpenstackConnection, server_name: str, timeout: int = 30
- ) -> SSHConnection:
- """Get a valid ssh connection within a network for a given openstack instance.
-
- The SSH connection will attempt to establish connection until the timeout configured.
-
- Args:
- conn: The Openstack connection instance.
- server_name: The Openstack server instance name.
- timeout: Timeout in seconds to attempt connection to each available server address.
-
- Raises:
- _SSHError: If there was an error getting a valid SSH connection.
-
- Returns:
- An SSH connection to OpenStack server instance.
- """
- server: Server | None = conn.get_server(name_or_id=server_name)
- if server is None:
- raise _SSHError(f"Server gone while trying to get SSH connection: {server_name}.")
- if not server.key_name:
- raise _SSHError(
- f"Unable to create SSH connection, no valid keypair found for {server.name}"
- )
- key_path = OpenstackRunnerManager._get_key_path(server.name)
- if not key_path.exists():
- raise _SSHError(f"Missing keyfile for server: {server.name}, key path: {key_path}")
- network_address_list = server.addresses.values()
- if not network_address_list:
- raise _SSHError(f"No addresses found for OpenStack server {server.name}")
-
- server_addresses: list[str] = [
- address["addr"]
- for network_addresses in network_address_list
- for address in network_addresses
- ]
- for ip in server_addresses:
- try:
- connection = SSHConnection(
- host=ip,
- user="ubuntu",
- connect_kwargs={"key_filename": str(key_path)},
- connect_timeout=timeout,
- )
- result = connection.run("echo hello world", warn=True, timeout=timeout)
- if not result.ok:
- logger.warning(
- "SSH test connection failed, server: %s, address: %s", server.name, ip
- )
- continue
- if "hello world" in result.stdout:
- return connection
- except (NoValidConnectionsError, TimeoutError, paramiko.ssh_exception.SSHException):
- logger.warning(
- "Unable to SSH into %s with address %s",
- server.name,
- connection.host,
- exc_info=True,
- )
- continue
- raise _SSHError(
- f"No connectable SSH addresses found, server: {server.name}, "
- f"addresses: {server_addresses}"
- )
-
- @staticmethod
- def _get_key_path(name: str) -> Path:
- """Get the filepath for storing private SSH of a runner.
-
- Args:
- name: The name of the runner.
-
- Returns:
- Path to reserved for the key file of the runner.
- """
- return _SSH_KEY_PATH / f"runner-{name}.key"
-
- @dataclass
- class _CreateRunnerArgs:
- """Arguments for _create_runner method.
-
- Attributes:
- app_name: The juju application name.
- cloud_config: The clouds.yaml containing the OpenStack credentials. The first cloud
- in the file will be used.
- config: Configurations related to runner manager.
- registration_token: Token for registering the runner on GitHub.
- unit_num: The juju unit number.
- """
-
- app_name: str
- cloud_config: dict[str, dict]
- config: OpenstackRunnerManagerConfig
- registration_token: str
- unit_num: int
-
- @staticmethod
- def _create_runner(args: _CreateRunnerArgs) -> None:
- """Create a runner on OpenStack cloud.
-
- Arguments are gathered into a dataclass due to Pool.map needing one argument functions.
-
- Args:
- args: Arguments of the method.
-
- Raises:
- RunnerCreateError: Unable to create the OpenStack runner.
- """
- ts_now = time.time()
- environment = jinja2.Environment(
- loader=jinja2.FileSystemLoader("templates"), autoescape=True
- )
-
- env_contents = _generate_runner_env(
- templates_env=environment,
- dockerhub_mirror=args.config.dockerhub_mirror,
- ssh_debug_connections=args.config.charm_state.ssh_debug_connections,
- )
-
- pre_job_contents = OpenstackRunnerManager._render_pre_job_contents(
- charm_state=args.config.charm_state, templates_env=environment
- )
-
- instance_config = create_instance_config(
- args.app_name,
- args.unit_num,
- args.config.image,
- args.config.path,
- args.config.labels,
- args.registration_token,
- )
- cloud_user_data = _CloudInitUserData(
- instance_config=instance_config,
- runner_env=env_contents,
- pre_job_contents=pre_job_contents,
- dockerhub_mirror=args.config.dockerhub_mirror,
- proxies=args.config.charm_state.proxy_config,
- )
- cloud_userdata_str = _generate_cloud_init_userdata(
- templates_env=environment,
- cloud_init_userdata=cloud_user_data,
- )
-
- with _create_connection(cloud_config=args.cloud_config) as conn:
- runner_security_group = OpenstackRunnerManager._ensure_security_group(conn)
- OpenstackRunnerManager._setup_runner_keypair(conn, instance_config.name)
-
- logger.info("Creating runner %s", instance_config.name)
- try:
- instance = conn.create_server(
- name=instance_config.name,
- image=instance_config.image_id,
- key_name=instance_config.name,
- flavor=args.config.flavor,
- network=args.config.network,
- security_groups=[runner_security_group["id"]],
- userdata=cloud_userdata_str,
- auto_ip=False,
- timeout=CREATE_SERVER_TIMEOUT,
- wait=True,
- )
- except openstack.exceptions.ResourceTimeout as err:
- logger.exception("Timeout creating OpenStack runner %s", instance_config.name)
- try:
- logger.info(
- "Attempting to remove OpenStack runner %s that timeout on creation",
- instance_config.name,
- )
- conn.delete_server(name_or_id=instance_config.name, wait=True)
- try:
- conn.delete_keypair(instance_config.name)
- except openstack.exceptions.SDKException:
- logger.exception(
- "Unable to delete OpenStack keypair %s", instance_config.name
- )
- OpenstackRunnerManager._get_key_path(instance_config.name).unlink(
- missing_ok=True
- )
- except openstack.exceptions.SDKException:
- logger.exception(
- "Cleanup of creation failure runner %s has failed", instance_config.name
- )
- # Reconcile will attempt to cleanup again prior to spawning new runners.
- raise RunnerCreateError(
- f"Timeout creating OpenStack runner {instance_config.name}"
- ) from err
- except openstack.exceptions.SDKException as err:
- logger.exception("Failed to create OpenStack runner %s", instance_config.name)
- raise RunnerCreateError(
- f"Failed to create OpenStack runner {instance_config.name}"
- ) from err
-
- logger.info("Waiting runner %s to come online", instance_config.name)
- OpenstackRunnerManager._wait_until_runner_process_running(conn, instance.name)
- logger.info("Finished creating runner %s", instance_config.name)
- ts_after = time.time()
- OpenstackRunnerManager._issue_runner_installed_metric(
- app_name=args.app_name,
- instance_config=instance_config,
- install_end_ts=ts_after,
- install_start_ts=ts_now,
- )
-
- @staticmethod
- def _render_pre_job_contents(
- charm_state: CharmState, templates_env: jinja2.Environment
- ) -> str:
- """Render the pre-job script contents.
-
- Args:
- charm_state: The charm state object.
- templates_env: The jinja template environment.
-
- Returns:
- The rendered pre-job script contents.
- """
- pre_job_contents_dict = {
- "issue_metrics": True,
- "metrics_exchange_path": str(METRICS_EXCHANGE_PATH),
- "do_repo_policy_check": False,
- }
- if repo_policy_config := charm_state.charm_config.repo_policy_compliance:
- repo_policy_client = RepoPolicyComplianceClient(
- url=repo_policy_config.url, charm_token=repo_policy_config.token
- )
- pre_job_contents_dict.update(
- {
- "repo_policy_base_url": repo_policy_client.base_url,
- "repo_policy_one_time_token": repo_policy_client.get_one_time_token(),
- "do_repo_policy_check": True,
- }
- )
- pre_job_contents = templates_env.get_template("pre-job.j2").render(pre_job_contents_dict)
- return pre_job_contents
-
- @staticmethod
- def _ensure_security_group(conn: OpenstackConnection) -> SecurityGroup:
- """Ensure runner security group exists.
-
- Args:
- conn: The connection object to access OpenStack cloud.
-
- Returns:
- The security group with the rules for runners.
- """
- rule_exists_icmp = False
- rule_exists_ssh = False
- rule_exists_tmate_ssh = False
-
- security_group_list = conn.list_security_groups(filters={"name": SECURITY_GROUP_NAME})
- # Pick the first security_group returned.
- security_group = next(iter(security_group_list), None)
-
- if security_group is None:
- logger.info("Security group %s not found, creating it", SECURITY_GROUP_NAME)
- security_group = conn.create_security_group(
- name=SECURITY_GROUP_NAME,
- description="For servers managed by the github-runner charm.",
- )
- else:
- existing_rules = security_group["security_group_rules"]
- for rule in existing_rules:
- if rule["protocol"] == "icmp":
- logger.debug(
- "Found ICMP rule in existing security group %s of ID %s",
- SECURITY_GROUP_NAME,
- security_group["id"],
- )
- rule_exists_icmp = True
- if (
- rule["protocol"] == "tcp"
- and rule["port_range_min"] == rule["port_range_max"] == 22
- ):
- logger.debug(
- "Found SSH rule in existing security group %s of ID %s",
- SECURITY_GROUP_NAME,
- security_group["id"],
- )
- rule_exists_ssh = True
- if (
- rule["protocol"] == "tcp"
- and rule["port_range_min"] == rule["port_range_max"] == 10022
- ):
- logger.debug(
- "Found tmate SSH rule in existing security group %s of ID %s",
- SECURITY_GROUP_NAME,
- security_group["id"],
- )
- rule_exists_tmate_ssh = True
-
- if not rule_exists_icmp:
- conn.create_security_group_rule(
- secgroup_name_or_id=security_group["id"],
- protocol="icmp",
- direction="ingress",
- ethertype="IPv4",
- )
- if not rule_exists_ssh:
- conn.create_security_group_rule(
- secgroup_name_or_id=security_group["id"],
- port_range_min="22",
- port_range_max="22",
- protocol="tcp",
- direction="ingress",
- ethertype="IPv4",
- )
- if not rule_exists_tmate_ssh:
- conn.create_security_group_rule(
- secgroup_name_or_id=security_group["id"],
- port_range_min="10022",
- port_range_max="10022",
- protocol="tcp",
- direction="egress",
- ethertype="IPv4",
- )
- return security_group
-
- @staticmethod
- def _setup_runner_keypair(conn: OpenstackConnection, name: str) -> None:
- """Set up the SSH keypair for a runner.
-
- Args:
- conn: The connection object to access OpenStack cloud.
- name: The name of the runner.
- """
- private_key_path = OpenstackRunnerManager._get_key_path(name)
-
- if private_key_path.exists():
- logger.warning("Existing private key file for %s found, removing it.", name)
- private_key_path.unlink()
-
- keypair = conn.create_keypair(name=name)
- private_key_path.write_text(keypair.private_key)
- shutil.chown(private_key_path, user="ubuntu", group="ubuntu")
- private_key_path.chmod(0o400)
-
- @retry(tries=10, delay=60, local_logger=logger)
- @staticmethod
- def _wait_until_runner_process_running(conn: OpenstackConnection, instance_name: str) -> None:
- """Wait until the runner process is running.
-
- The waiting to done by the retry declarator.
-
- Args:
- conn: The openstack connection instance.
- instance_name: The name of the instance to wait on.
-
- Raises:
- RunnerStartError: Unable perform health check of the runner application.
- """
- try:
- if not OpenstackRunnerManager._health_check(
- conn=conn, server_name=instance_name, startup=True
- ):
- raise RunnerStartError(
- (
- "Unable to find running process of runner application on openstack runner "
- f"{instance_name}"
- )
- )
- except TimeoutError as err:
- raise RunnerStartError(
- f"Unable to connect to openstack runner {instance_name}"
- ) from err
-
- @staticmethod
- def _issue_runner_installed_metric(
- app_name: str,
- instance_config: InstanceConfig,
- install_start_ts: float,
- install_end_ts: float,
- ) -> None:
- """Issue RunnerInstalled metric.
-
- Args:
- app_name: The juju application name.
- instance_config: The configuration values for Openstack instance.
- install_start_ts: The timestamp when the installation started.
- install_end_ts: The timestamp when the installation ended.
- """
- try:
- metric_events.issue_event(
- event=metric_events.RunnerInstalled(
- timestamp=install_start_ts,
- flavor=app_name,
- duration=install_end_ts - install_start_ts,
- ),
- )
- except IssueMetricEventError:
- logger.exception("Failed to issue RunnerInstalled metric")
- try:
- storage = metrics_storage.create(instance_config.name)
- except CreateMetricsStorageError:
- logger.exception(
- "Failed to create metrics storage for runner %s, "
- "will not be able to issue all metrics.",
- instance_config.name,
- )
- else:
- try:
- (storage.path / RUNNER_INSTALLED_TS_FILE_NAME).write_text(
- str(install_end_ts), encoding="utf-8"
- )
- except FileNotFoundError:
- logger.exception(
- "Failed to write runner-installed.timestamp into metrics storage "
- "for runner %s, will not be able to issue all metrics.",
- instance_config.name,
- )
-
- def _remove_runners(
- self,
- conn: OpenstackConnection,
- instance_names: Iterable[str],
- remove_token: str | None = None,
- num_to_remove: int | float | None = None,
- ) -> None:
- """Delete runners on Openstack.
-
- Removes the registered runner from Github if remove_token is provided.
-
- Args:
- conn: The Openstack connection instance.
- instance_names: The Openstack server names to delete.
- remove_token: The GitHub runner remove token.
- num_to_remove: Remove a specified number of runners. Remove all if None.
- """
- if num_to_remove is None:
- num_to_remove = float("inf")
-
- name_to_github_id = {
- runner["name"]: runner["id"]
- for runner in self._github.get_runner_github_info(self._config.path)
- }
- for instance_name in instance_names:
- if num_to_remove < 1:
- break
-
- github_id = name_to_github_id.get(instance_name, None)
- self._remove_one_runner(conn, instance_name, github_id, remove_token)
-
- # Attempt to delete the keys. This is place at the end of deletion, so we can access
- # the instances that failed to delete on previous tries.
- try:
- conn.delete_keypair(instance_name)
- except openstack.exceptions.SDKException:
- logger.exception("Unable to delete OpenStack keypair %s", instance_name)
- OpenstackRunnerManager._get_key_path(instance_name).unlink(missing_ok=True)
- num_to_remove -= 1
-
- def _remove_one_runner(
- self,
- conn: OpenstackConnection,
- instance_name: str,
- github_id: int | None = None,
- remove_token: str | None = None,
- ) -> None:
- """Remove one OpenStack runner.
-
- Args:
- conn: The Openstack connection instance.
- instance_name: The Openstack server name to delete.
- github_id: The runner id on GitHub.
- remove_token: The GitHub runner remove token.
- """
- logger.info("Attempting to remove OpenStack runner %s", instance_name)
-
- server: Server | None = conn.get_server(name_or_id=instance_name)
-
- if server is not None:
- logger.info(
- "Pulling metrics and deleting server for OpenStack runner %s", instance_name
- )
- self._pull_metrics(conn=conn, instance_name=instance_name)
- self._remove_openstack_runner(conn, server, remove_token)
- else:
- logger.info(
- "Not found server for OpenStack runner %s marked for deletion", instance_name
- )
-
- if github_id is not None:
- try:
- self._github.delete_runner(self._config.path, github_id)
- except GithubClientError as exc:
- logger.warning("Failed to remove runner from Github %s, %s", instance_name, exc)
- # TODO: 2024-04-23: The broad except clause is for logging purposes.
- # Will be removed in future versions.
- except Exception: # pylint: disable=broad-exception-caught
- logger.critical(
- "Found unexpected exception, please contact the developers", exc_info=True
- )
-
- def _pull_metrics(self, conn: OpenstackConnection, instance_name: str) -> None:
- """Pull metrics from the runner into the respective storage for the runner.
-
- Args:
- conn: The Openstack connection instance.
- instance_name: The Openstack server name.
- """
- try:
- storage = metrics_storage.get(instance_name)
- except GetMetricsStorageError:
- logger.exception(
- "Failed to get shared metrics storage for runner %s, "
- "will not be able to issue all metrics.",
- instance_name,
- )
- return
-
- try:
- ssh_conn = self._get_ssh_connection(conn=conn, server_name=instance_name)
- except _SSHError as exc:
- logger.info("Failed to pull metrics for %s: %s", instance_name, exc)
- return
-
- try:
- self._pull_file(
- ssh_conn=ssh_conn,
- remote_path=str(METRICS_EXCHANGE_PATH / "pre-job-metrics.json"),
- local_path=str(storage.path / "pre-job-metrics.json"),
- max_size=MAX_METRICS_FILE_SIZE,
- )
- self._pull_file(
- ssh_conn=ssh_conn,
- remote_path=str(METRICS_EXCHANGE_PATH / "post-job-metrics.json"),
- local_path=str(storage.path / "post-job-metrics.json"),
- max_size=MAX_METRICS_FILE_SIZE,
- )
- return
- except _PullFileError as exc:
- logger.warning(
- "Failed to pull metrics for %s: %s . Will not be able to issue all metrics",
- instance_name,
- exc,
- )
- return
-
- def _pull_file(
- self, ssh_conn: SSHConnection, remote_path: str, local_path: str, max_size: int
- ) -> None:
- """Pull file from the runner instance.
-
- Args:
- ssh_conn: The SSH connection instance.
- remote_path: The file path on the runner instance.
- local_path: The local path to store the file.
- max_size: If the file is larger than this, it will not be pulled.
-
- Raises:
- _PullFileError: Unable to pull the file from the runner instance.
- _SSHError: Issue with SSH connection.
- """
- try:
- result = ssh_conn.run(f"stat -c %s {remote_path}", warn=True)
- except (NoValidConnectionsError, TimeoutError, paramiko.ssh_exception.SSHException) as exc:
- raise _SSHError(reason=f"Unable to SSH into {ssh_conn.host}") from exc
- if not result.ok:
- logger.warning(
- (
- "Unable to get file size of %s on instance %s, "
- "exit code: %s, stdout: %s, stderr: %s"
- ),
- remote_path,
- ssh_conn.host,
- result.return_code,
- result.stdout,
- result.stderr,
- )
- raise _PullFileError(reason=f"Unable to get file size of {remote_path}")
-
- stdout = result.stdout
- try:
- stdout.strip()
- size = int(stdout)
- if size > max_size:
- raise _PullFileError(
- reason=f"File size of {remote_path} too large {size} > {max_size}"
- )
- except ValueError as exc:
- raise _PullFileError(reason=f"Invalid file size for {remote_path}: {stdout}") from exc
-
- try:
- ssh_conn.get(remote=remote_path, local=local_path)
- except (NoValidConnectionsError, TimeoutError, paramiko.ssh_exception.SSHException) as exc:
- raise _SSHError(reason=f"Unable to SSH into {ssh_conn.host}") from exc
- except OSError as exc:
- raise _PullFileError(reason=f"Unable to retrieve file {remote_path}") from exc
-
- def _remove_openstack_runner(
- self,
- conn: OpenstackConnection,
- server: Server,
- remove_token: str | None = None,
- ) -> None:
- """Remove a OpenStack server hosting the GitHub runner application.
-
- Args:
- conn: The Openstack connection instance.
- server: The Openstack server.
- remove_token: The GitHub runner remove token.
- """
- try:
- self._run_github_removal_script(conn=conn, server=server, remove_token=remove_token)
- except (TimeoutError, invoke.exceptions.UnexpectedExit, GithubRunnerRemoveError):
- logger.warning(
- "Failed to run runner removal script for %s", server.name, exc_info=True
- )
- # TODO: 2024-04-23: The broad except clause is for logging purposes.
- # Will be removed in future versions.
- except Exception: # pylint: disable=broad-exception-caught
- logger.critical(
- "Found unexpected exception, please contact the developers", exc_info=True
- )
- try:
- if not conn.delete_server(name_or_id=server.name, wait=True, delete_ips=True):
- logger.warning("Server does not exist %s", server.name)
- except SDKException as exc:
- logger.error("Something wrong deleting the server %s, %s", server.name, exc)
- # TODO: 2024-04-23: The broad except clause is for logging purposes.
- # Will be removed in future versions.
- except Exception: # pylint: disable=broad-exception-caught
- logger.critical(
- "Found unexpected exception, please contact the developers", exc_info=True
- )
-
- def _run_github_removal_script(
- self, conn: OpenstackConnection, server: Server, remove_token: str | None
- ) -> None:
- """Run Github runner removal script.
-
- Args:
- conn: The Openstack connection instance.
- server: The Openstack server instance.
- remove_token: The GitHub instance removal token.
-
- Raises:
- GithubRunnerRemoveError: Unable to remove runner from GitHub.
- """
- if not remove_token:
- return
- try:
- ssh_conn = OpenstackRunnerManager._get_ssh_connection(
- conn=conn, server_name=server.name
- )
- except _SSHError as exc:
- logger.error(
- "Unable to run GitHub removal script, server: %s, reason: %s",
- server.name,
- str(exc),
- )
- raise GithubRunnerRemoveError(
- f"Failed to remove runner {server.name} from Github."
- ) from exc
-
- try:
- result: invoke.runners.Result = ssh_conn.run(
- f"{_CONFIG_SCRIPT_PATH} remove --token {remove_token}",
- warn=True,
- )
- if not result.ok:
- logger.warning(
- (
- "Unable to run removal script on instance %s, "
- "exit code: %s, stdout: %s, stderr: %s"
- ),
- server.name,
- result.return_code,
- result.stdout,
- result.stderr,
- )
- return
- # TODO: 2024-04-23: The broad except clause is for logging purposes.
- # Will be removed in future versions.
- except Exception: # pylint: disable=broad-exception-caught
- logger.critical(
- "Found unexpected exception, please contact the developers", exc_info=True
- )
-
- logger.warning("Failed to run GitHub runner removal script %s", server.name)
- raise GithubRunnerRemoveError(f"Failed to remove runner {server.name} from Github.")
-
- def _clean_up_keys_files(
- self, conn: OpenstackConnection, exclude_instances: Iterable[str]
- ) -> None:
- """Delete all SSH key files except the specified instances.
-
- Args:
- conn: The Openstack connection instance.
- exclude_instances: The keys of these instance will not be deleted.
- """
- logger.info("Cleaning up SSH key files")
- exclude_filename = set(
- OpenstackRunnerManager._get_key_path(instance) for instance in exclude_instances
- )
-
- total = 0
- deleted = 0
- for path in _SSH_KEY_PATH.iterdir():
- # Find key file from this application.
- if (
- path.is_file()
- and path.name.startswith(self.instance_name)
- and path.name.endswith(".key")
- ):
- total += 1
- if path.name in exclude_filename:
- continue
-
- keypair_name = path.name.split(".")[0]
- try:
- conn.delete_keypair(keypair_name)
- except openstack.exceptions.SDKException:
- logger.warning(
- "Unable to delete OpenStack keypair associated with deleted key file %s ",
- path.name,
- )
-
- path.unlink()
- deleted += 1
- logger.info("Found %s key files, clean up %s key files", total, deleted)
-
- def _clean_up_openstack_keypairs(
- self, conn: OpenstackConnection, exclude_instances: Iterable[str]
- ) -> None:
- """Delete all OpenStack keypairs except the specified instances.
-
- Args:
- conn: The Openstack connection instance.
- exclude_instances: The keys of these instance will not be deleted.
- """
- logger.info("Cleaning up openstack keypairs")
- keypairs = conn.list_keypairs()
- for key in keypairs:
- # The `name` attribute is of resource.Body type.
- if key.name and str(key.name).startswith(self.instance_name):
- if str(key.name) in exclude_instances:
- continue
-
- try:
- conn.delete_keypair(key.name)
- except openstack.exceptions.SDKException:
- logger.warning(
- "Unable to delete OpenStack keypair associated with deleted key file %s ",
- key.name,
- )
-
- def _clean_up_runners(
- self, conn: OpenstackConnection, runner_by_health: RunnerNameByHealth, remove_token: str
- ) -> None:
- """Clean up offline or unhealthy runners.
-
- Args:
- conn: The openstack connection instance.
- runner_by_health: The runner status grouped by health.
- remove_token: The GitHub runner remove token.
-
- """
- github_info = self.get_github_runner_info()
- online_runners = [runner.runner_name for runner in github_info if runner.online]
- offline_runners = [runner.runner_name for runner in github_info if not runner.online]
- busy_runners = [runner.runner_name for runner in github_info if runner.busy]
- logger.info(
- "Found %s online and %s offline openstack runners, %s of the runners are busy",
- len(online_runners),
- len(offline_runners),
- len(busy_runners),
- )
- logger.debug("Online runner: %s", online_runners)
- logger.debug("Offline runner: %s", offline_runners)
- logger.debug("Busy runner: %s", busy_runners)
-
- healthy_runners_set = set(runner_by_health.healthy)
- busy_runners_set = set(busy_runners)
- busy_unhealthy_runners = set(runner_by_health.unhealthy).intersection(busy_runners_set)
- if busy_unhealthy_runners:
- logger.warning("Found unhealthy busy runners %s", busy_unhealthy_runners)
-
- # Clean up offline (SHUTOFF) runners or unhealthy (no connection/cloud-init script)
- # runners.
- # Possible for a healthy runner to be appear as offline for sometime as GitHub can be
- # slow to update the status.
- # For busy runners let GitHub decide whether the runner should be removed.
- instance_to_remove = tuple(
- runner
- for runner in (*runner_by_health.unhealthy, *offline_runners)
- if runner not in healthy_runners_set and runner not in busy_runners_set
- )
- logger.debug("Removing following runners with issues %s", instance_to_remove)
- self._remove_runners(
- conn=conn, instance_names=instance_to_remove, remove_token=remove_token
- )
- # Clean up orphan keys, e.g., If openstack instance is removed externally the key
- # would not be deleted.
- self._clean_up_keys_files(conn, runner_by_health.healthy)
- self._clean_up_openstack_keypairs(conn, runner_by_health.healthy)
-
- def _scale(
- self,
- quantity: int,
- conn: OpenstackConnection,
- runner_by_health: RunnerNameByHealth,
- remove_token: str,
- ) -> int:
- """Scale the number of runners.
-
- Args:
- quantity: The number of intended runners.
- conn: The openstack connection instance.
- runner_by_health: The runner status grouped by health.
- remove_token: The GitHub runner remove token.
-
- Returns:
- The change in number of runners.
- """
- # Get the number of OpenStack servers.
- # This is not calculated due to there might be removal failures.
- servers = self._get_openstack_instances(conn)
- delta = quantity - len(servers)
- registration_token = self._github.get_runner_registration_token(path=self._config.path)
-
- # Spawn new runners
- if delta > 0:
- logger.info("Creating %s OpenStack runners", delta)
- args = [
- OpenstackRunnerManager._CreateRunnerArgs(
- app_name=self.app_name,
- config=self._config,
- cloud_config=self._cloud_config,
- registration_token=registration_token,
- unit_num=self.unit_num,
- )
- for _ in range(delta)
- ]
- with Pool(processes=min(delta, 10)) as pool:
- pool.map(
- func=OpenstackRunnerManager._create_runner,
- iterable=args,
- )
-
- elif delta < 0:
- logger.info("Removing %s OpenStack runners", delta)
- self._remove_runners(
- conn=conn,
- instance_names=runner_by_health.healthy,
- remove_token=remove_token,
- num_to_remove=abs(delta),
- )
- else:
- logger.info("No changes to number of runners needed")
-
- return delta
-
- def _issue_reconciliation_metrics(
- self,
- reconciliation_start_ts: float,
- reconciliation_end_ts: float,
- ) -> None:
- """Issue all reconciliation related metrics.
-
- This includes the metrics for the runners and the reconciliation metric itself.
-
- Args:
- reconciliation_start_ts: The timestamp of when reconciliation started.
- reconciliation_end_ts: The timestamp of when reconciliation ended.
- """
- with _create_connection(self._cloud_config) as conn:
- runner_states = self._get_openstack_runner_status(conn)
-
- metric_stats = self._issue_runner_metrics(conn)
- self._issue_reconciliation_metric(
- metric_stats=metric_stats,
- reconciliation_start_ts=reconciliation_start_ts,
- reconciliation_end_ts=reconciliation_end_ts,
- runner_states=runner_states,
- )
-
- def _issue_runner_metrics(self, conn: OpenstackConnection) -> IssuedMetricEventsStats:
- """Issue runner metrics.
-
- Args:
- conn: The connection object to access OpenStack cloud.
-
- Returns:
- The stats of issued metric events.
- """
- total_stats: IssuedMetricEventsStats = {}
-
- try:
- openstack_instances = self._get_openstack_instances(conn)
- except openstack.exceptions.SDKException:
- logger.exception(
- "Failed to get openstack instances to ignore when extracting metrics."
- " Will not issue runner metrics"
- )
- return total_stats
-
- logger.debug(
- "Found following openstack instances before extracting metrics: %s",
- openstack_instances,
- )
- # Don't extract metrics for instances which are still there, as it might be
- # the case that the metrics have not yet been pulled
- # (they get pulled right before server termination).
- instance_names = {instance.name for instance in openstack_instances}
-
- for extracted_metrics in runner_metrics.extract(
- metrics_storage_manager=metrics_storage,
- runners=instance_names,
- ):
- try:
- job_metrics = github_metrics.job(
- github_client=self._github,
- pre_job_metrics=extracted_metrics.pre_job,
- runner_name=extracted_metrics.runner_name,
- )
- except GithubMetricsError:
- logger.exception("Failed to calculate job metrics")
- job_metrics = None
-
- issued_events = runner_metrics.issue_events(
- runner_metrics=extracted_metrics,
- job_metrics=job_metrics,
- flavor=self.app_name,
- )
- for event_type in issued_events:
- total_stats[event_type] = total_stats.get(event_type, 0) + 1
- return total_stats
-
- def _issue_reconciliation_metric(
- self,
- metric_stats: IssuedMetricEventsStats,
- reconciliation_start_ts: float,
- reconciliation_end_ts: float,
- runner_states: RunnerNameByHealth,
- ) -> None:
- """Issue reconciliation metric.
-
- Args:
- metric_stats: The stats of issued metric events.
- reconciliation_start_ts: The timestamp of when reconciliation started.
- reconciliation_end_ts: The timestamp of when reconciliation ended.
- runner_states: The states of the runners.
- """
- try:
- github_info = self.get_github_runner_info()
- except GithubApiError:
- logger.exception(
- "Failed to retrieve github info for reconciliation metric. "
- "Will not issue reconciliation metric."
- )
- return
-
- online_runners = [runner for runner in github_info if runner.online]
- offline_runner_names = {runner.runner_name for runner in github_info if not runner.online}
- active_runner_names = {runner.runner_name for runner in online_runners if runner.busy}
- healthy_runners = set(runner_states.healthy)
-
- active_count = len(active_runner_names)
- idle_online_count = len(online_runners) - active_count
- idle_offline_count = len((offline_runner_names & healthy_runners) - active_runner_names)
-
- try:
- metric_events.issue_event(
- event=metric_events.Reconciliation(
- timestamp=time.time(),
- flavor=self.app_name,
- crashed_runners=metric_stats.get(metric_events.RunnerStart, 0)
- - metric_stats.get(metric_events.RunnerStop, 0),
- idle_runners=idle_online_count + idle_offline_count,
- duration=reconciliation_end_ts - reconciliation_start_ts,
- )
- )
- except IssueMetricEventError:
- logger.exception("Failed to issue Reconciliation metric")
-
- def flush(self, mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
- """Flush Openstack servers.
-
- 1. Kill the processes depending on flush mode.
- 2. Get unhealthy runners after process purging.
- 3. Delete unhealthy runners.
-
- Args:
- mode: The mode to determine which runner to flush.
-
- Returns:
- The number of runners flushed.
- """
- logger.info("Flushing OpenStack all runners")
- with _create_connection(self._cloud_config) as conn:
- self._kill_runner_processes(conn=conn, mode=mode)
- runner_by_health = self._get_openstack_runner_status(conn)
- remove_token = self._github.get_runner_remove_token(path=self._config.path)
- self._remove_runners(
- conn=conn,
- instance_names=runner_by_health.unhealthy,
- remove_token=remove_token,
- )
- return len(runner_by_health.unhealthy)
-
- def _kill_runner_processes(self, conn: OpenstackConnection, mode: FlushMode) -> None:
- """Kill runner application that are not running any jobs.
-
- Runners that have not picked up a job has
- 1. no Runner.Worker process
- 2. no pre-run.sh job process
-
- Args:
- conn: The connection object to access OpenStack cloud.
- mode: The flush mode to determine which runner processes to kill.
-
- Raises:
- NotImplementedError: If unsupported flush mode has been passed.
- """
- killer_command: str
- match mode:
- case FlushMode.FLUSH_IDLE:
- # only kill Runner.Listener if Runner.Worker does not exist.
- killer_command = (
- "! pgrep -x Runner.Worker && pgrep -x Runner.Listener && "
- "kill $(pgrep -x Runner.Listener)"
- )
- case FlushMode.FLUSH_BUSY:
- # kill both Runner.Listener and Runner.Worker processes.
- # This kills pre-job.sh, a child process of Runner.Worker.
- killer_command = (
- "pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener);"
- "pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);"
- )
- case _:
- raise NotImplementedError(f"Unsupported flush mode {mode}")
-
- servers = self._get_openstack_instances(conn=conn)
- for server in servers:
- ssh_conn: SSHConnection = self._get_ssh_connection(conn=conn, server_name=server.name)
- result: invoke.runners.Result = ssh_conn.run(
- killer_command,
- warn=True,
- )
- if not result.ok:
- logger.warning("Failed to kill runner process. Instance: %s", server.name)
- continue
- logger.info("Successfully killed runner process. Instance: %s", server.name)
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index 3a9acd4a0..c87af1a7d 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -41,7 +41,6 @@
from metrics import runner as runner_metrics
from metrics import storage as metrics_storage
from openstack_cloud.openstack_cloud import OpenstackCloud, OpenstackInstance
-from openstack_cloud.openstack_manager import GithubRunnerRemoveError
from repo_policy_compliance_client import RepoPolicyComplianceClient
from utilities import retry
@@ -61,6 +60,10 @@
CREATE_SERVER_TIMEOUT = 5 * 60
+class _GithubRunnerRemoveError(Exception):
+ """Represents an error while SSH into a runner and running the remove script."""
+
+
class _PullFileError(Exception):
"""Represents an error while pulling a file from the runner instance."""
@@ -337,7 +340,7 @@ def _delete_runner(self, instance: OpenstackInstance, remove_token: str) -> None
OpenStackRunnerManager._run_runner_removal_script(
instance.server_name, ssh_conn, remove_token
)
- except GithubRunnerRemoveError:
+ except _GithubRunnerRemoveError:
logger.warning(
"Unable to run github runner removal script for %s",
instance.server_name,
@@ -784,7 +787,7 @@ def _run_runner_removal_script(
remove_token: The GitHub instance removal token.
Raises:
- GithubRunnerRemoveError: Unable to remove runner from GitHub.
+ _GithubRunnerRemoveError: Unable to remove runner from GitHub.
"""
try:
result = ssh_conn.run(
@@ -804,12 +807,12 @@ def _run_runner_removal_script(
result.stdout,
result.stderr,
)
- raise GithubRunnerRemoveError(f"Failed to remove runner {instance_name} from Github.")
+ raise _GithubRunnerRemoveError(f"Failed to remove runner {instance_name} from Github.")
except (
TimeoutError,
paramiko.ssh_exception.NoValidConnectionsError,
paramiko.ssh_exception.SSHException,
) as exc:
- raise GithubRunnerRemoveError(
+ raise _GithubRunnerRemoveError(
f"Failed to remove runner {instance_name} from Github."
) from exc
diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py
index 81c334f37..443c84dfd 100644
--- a/tests/unit/mock_runner_managers.py
+++ b/tests/unit/mock_runner_managers.py
@@ -17,7 +17,6 @@
InstanceId,
)
from manager.github_runner_manager import GitHubRunnerState
-from metrics.events import RunnerStop
from metrics.runner import RunnerMetrics
from tests.unit.mock import MockGhapiClient
@@ -184,7 +183,6 @@ def flush_runners(self, remove_token: str, busy: bool = False) -> Iterator[Runne
Returns:
Any runner metrics produced during flushing.
"""
- num = len(self.state.runners)
if busy:
self.state.runners = {}
else:
diff --git a/tests/unit/test_openstack_manager.py b/tests/unit/test_openstack_manager.py
deleted file mode 100644
index 5329b1282..000000000
--- a/tests/unit/test_openstack_manager.py
+++ /dev/null
@@ -1,1200 +0,0 @@
-# Copyright 2024 Canonical Ltd.
-# See LICENSE file for licensing details.
-import random
-import secrets
-from pathlib import Path
-from typing import Optional
-from unittest.mock import MagicMock, call
-
-import jinja2
-import openstack.connection
-import openstack.exceptions
-import pytest
-from fabric.connection import Connection as SSHConnection
-from invoke import Result
-from openstack.compute.v2.keypair import Keypair
-from openstack.compute.v2.server import Server
-from pytest import LogCaptureFixture, MonkeyPatch
-
-import metrics.storage
-import reactive.runner_manager
-from charm_state import CharmState, ProxyConfig, ReactiveConfig, RepoPolicyComplianceConfig
-from errors import OpenStackError, RunnerStartError
-from github_type import GitHubRunnerStatus, RunnerApplication, SelfHostedRunner
-from metrics import events as metric_events
-from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
-from metrics.storage import MetricsStorage
-from openstack_cloud import openstack_manager
-from openstack_cloud.openstack_manager import MAX_METRICS_FILE_SIZE, METRICS_EXCHANGE_PATH
-from runner_manager_type import FlushMode
-from runner_type import RunnerGithubInfo, RunnerNameByHealth
-from tests.unit import factories
-
-FAKE_MONGODB_URI = "mongodb://example.com/db"
-CLOUD_NAME = "microstack"
-
-
-@pytest.fixture(autouse=True, name="openstack_connect_mock")
-def mock_openstack_connect_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
- """Mock openstack.connect."""
- mock_connect = MagicMock(spec=openstack_manager.openstack.connect)
- monkeypatch.setattr("openstack_cloud.openstack_manager.openstack.connect", mock_connect)
- return mock_connect
-
-
-@pytest.fixture(name="mock_server")
-def mock_server_fixture() -> MagicMock:
- """Mock OpenStack Server object."""
- mock_server = MagicMock(spec=Server)
- mock_server.key_name = "mock_key"
- mock_server.addresses.values = MagicMock(return_value=[[{"addr": "10.0.0.1"}]])
- return mock_server
-
-
-@pytest.fixture(name="patch_get_ssh_connection_health_check")
-def patch_get_ssh_connection_health_check_fixture(monkeypatch: pytest.MonkeyPatch):
- """Patch SSH connection to a MagicMock instance for get_ssh_connection health check."""
- mock_get_ssh_connection = MagicMock(
- spec=openstack_manager.OpenstackRunnerManager._get_ssh_connection
- )
- mock_ssh_connection = MagicMock(spec=SSHConnection)
- mock_ssh_connection.host = "test host IP"
- mock_result = MagicMock(spec=Result)
- mock_result.ok = True
- mock_result.stderr = ""
- mock_result.stdout = "hello world"
- mock_ssh_connection.run.return_value = mock_result
- mock_get_ssh_connection.return_value = [mock_ssh_connection]
-
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_ssh_connection",
- mock_get_ssh_connection,
- )
-
-
-@pytest.fixture(name="ssh_connection_health_check")
-def ssh_connection_health_check_fixture(monkeypatch: pytest.MonkeyPatch):
- """SSH connection to a MagicMock instance for health check."""
- mock_get_ssh_connection = MagicMock(
- spec=openstack_manager.OpenstackRunnerManager._get_ssh_connection
- )
- mock_ssh_connection = MagicMock(spec=SSHConnection)
- mock_ssh_connection.host = "test host IP"
- mock_result = MagicMock(spec=Result)
- mock_result.ok = True
- mock_result.stderr = ""
- mock_result.stdout = "-- Test output: /bin/bash /home/ubuntu/actions-runner/run.sh --"
- mock_ssh_connection.run.return_value = mock_result
- mock_get_ssh_connection.return_value = mock_ssh_connection
-
- return mock_get_ssh_connection
-
-
-@pytest.fixture(name="patch_ssh_connection_error")
-def patch_ssh_connection_error_fixture(monkeypatch: pytest.MonkeyPatch):
- """Patch SSH connection to a MagicMock instance with error on run."""
- mock_get_ssh_connection = MagicMock(
- spec=openstack_manager.OpenstackRunnerManager._get_ssh_connection
- )
- mock_ssh_connection = MagicMock(spec=SSHConnection)
- mock_result = MagicMock(spec=Result)
- mock_result.ok = False
- mock_result.stdout = "Mock stdout"
- mock_result.stderr = "Mock stderr"
- mock_ssh_connection.run.return_value = mock_result
- mock_get_ssh_connection.return_value = mock_ssh_connection
-
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_ssh_connection",
- mock_get_ssh_connection,
- )
-
-
-@pytest.fixture(name="mock_github_client")
-def mock_github_client_fixture() -> MagicMock:
- """Mocked github client that returns runner application."""
- mock_github_client = MagicMock(spec=openstack_manager.GithubClient)
- mock_github_client.get_runner_application.return_value = RunnerApplication(
- os="linux",
- architecture="x64",
- download_url="http://test_url",
- filename="test_filename",
- temp_download_token="test_token",
- )
- mock_github_client.get_runner_registration_token.return_value = "test_token"
- return mock_github_client
-
-
-@pytest.fixture(name="patch_execute_command")
-def patch_execute_command_fixture(monkeypatch: pytest.MonkeyPatch):
- """Patch execute command to a MagicMock instance."""
- monkeypatch.setattr(
- openstack_manager,
- "execute_command",
- MagicMock(spec=openstack_manager.execute_command),
- )
-
-
-@pytest.fixture(name="patched_create_connection_context")
-def patched_create_connection_context_fixture(monkeypatch: pytest.MonkeyPatch):
- """Return a mocked openstack connection context manager and patch create_connection."""
- mock_connection = MagicMock(spec=openstack_manager.openstack.connection.Connection)
- monkeypatch.setattr(
- openstack_manager,
- "_create_connection",
- MagicMock(spec=openstack_manager._create_connection, return_value=mock_connection),
- )
- return mock_connection.__enter__()
-
-
-@pytest.fixture(name="ssh_connection_mock")
-def ssh_connection_mock_fixture() -> MagicMock:
- """Return a mocked ssh connection."""
- test_file_content = secrets.token_hex(16)
- ssh_conn_mock = MagicMock(spec=openstack_manager.SSHConnection)
- ssh_conn_mock.get.side_effect = lambda remote, local: Path(local).write_text(test_file_content)
- ssh_conn_mock.run.side_effect = lambda cmd, **kwargs: (
- Result(stdout="1") if cmd.startswith("stat") else Result()
- )
- ssh_conn_mock.run.return_value = Result()
-
- return ssh_conn_mock
-
-
-@pytest.fixture(name="openstack_manager_for_reconcile")
-def openstack_manager_for_reconcile_fixture(
- monkeypatch: pytest.MonkeyPatch,
- mock_github_client: MagicMock,
- patched_create_connection_context: MagicMock,
- tmp_path: Path,
- ssh_connection_mock: MagicMock,
-):
- """Create a mocked openstack manager for the reconcile tests."""
- t_mock = MagicMock(return_value=12345)
- monkeypatch.setattr(openstack_manager.time, "time", t_mock)
-
- issue_event_mock = MagicMock(spec=metric_events.issue_event)
- monkeypatch.setattr(openstack_manager.metric_events, "issue_event", issue_event_mock)
-
- runner_metrics_mock = MagicMock(openstack_manager.runner_metrics)
- monkeypatch.setattr(openstack_manager, "runner_metrics", runner_metrics_mock)
-
- github_metrics_mock = MagicMock(openstack_manager.github_metrics)
- monkeypatch.setattr(openstack_manager, "github_metrics", github_metrics_mock)
-
- monkeypatch.setattr(
- openstack_manager, "GithubClient", MagicMock(return_value=mock_github_client)
- )
-
- runner_metrics_path = tmp_path / "runner_fs"
- ms = MetricsStorage(path=runner_metrics_path, runner_name="test_runner")
- monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
- monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms))
-
- pool_mock = MagicMock()
- pool_mock.__enter__.return_value = pool_mock
- pool_mock.map.side_effect = lambda func, iterable: func(*iterable)
- pool_cls_mock = MagicMock()
- pool_cls_mock.return_value = pool_mock
- monkeypatch.setattr(openstack_manager, "Pool", pool_cls_mock)
-
- app_name = secrets.token_hex(16)
- charm_state = MagicMock(spec=CharmState)
- charm_state.proxy_config = ProxyConfig()
- charm_state.ssh_debug_connections = MagicMock()
- charm_state.charm_config = MagicMock()
- charm_state.charm_config.repo_policy_compliance = None
- os_runner_manager_config = openstack_manager.OpenstackRunnerManagerConfig(
- charm_state=charm_state,
- path=MagicMock(),
- labels=[],
- token=secrets.token_hex(16),
- flavor=app_name,
- image="test-image-id",
- network=secrets.token_hex(16),
- dockerhub_mirror=None,
- )
- patched_create_connection_context.create_keypair.return_value = Keypair(private_key="test_key")
- server_mock = MagicMock()
- server_mock.status = openstack_manager._INSTANCE_STATUS_ACTIVE
- patched_create_connection_context.get_server.return_value = server_mock
-
- os_runner_manager = openstack_manager.OpenstackRunnerManager(
- app_name=app_name,
- unit_num=0,
- openstack_runner_manager_config=os_runner_manager_config,
- cloud_config={},
- )
- os_runner_manager._ssh_health_check = MagicMock(return_value=True)
- os_runner_manager._get_ssh_connection = MagicMock(return_value=ssh_connection_mock)
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager, "_wait_until_runner_process_running", MagicMock()
- )
-
- monkeypatch.setattr(openstack_manager, "_SSH_KEY_PATH", tmp_path)
- monkeypatch.setattr(openstack_manager.shutil, "chown", MagicMock())
-
- return os_runner_manager
-
-
-@pytest.fixture(name="reactive_reconcile_mock")
-def reactive_reconcile_fixture(monkeypatch: MonkeyPatch, tmp_path: Path) -> MagicMock:
- """Mock the job class."""
- reconcile_mock = MagicMock(spec=reactive.runner_manager.reconcile)
- monkeypatch.setattr(
- "openstack_cloud.openstack_manager.reactive_runner_manager.reconcile", reconcile_mock
- )
- reconcile_mock.side_effect = lambda quantity, **kwargs: quantity
- return reconcile_mock
-
-
-def test__create_connection_error(clouds_yaml: dict, openstack_connect_mock: MagicMock):
- """
- arrange: given a monkeypatched connection.authorize() function that raises an error.
- act: when _create_connection is called.
- assert: OpenStackUnauthorizedError is raised.
- """
- connection_mock = MagicMock()
- connection_context = MagicMock()
- connection_context.authorize.side_effect = openstack.exceptions.HttpException
- connection_mock.__enter__.return_value = connection_context
- openstack_connect_mock.return_value = connection_mock
-
- with pytest.raises(OpenStackError) as exc:
- with openstack_manager._create_connection(cloud_config=clouds_yaml):
- pass
-
- assert "Failed OpenStack API call" in str(exc)
-
-
-def test__create_connection(
- multi_clouds_yaml: dict, clouds_yaml: dict, cloud_name: str, openstack_connect_mock: MagicMock
-):
- """
- arrange: given a cloud config yaml dict with 1. multiple clouds 2. single cloud.
- act: when _create_connection is called.
- assert: connection with first cloud in the config is used.
- """
- # 1. multiple clouds
- with openstack_manager._create_connection(cloud_config=multi_clouds_yaml):
- openstack_connect_mock.assert_called_with(cloud=CLOUD_NAME)
-
- # 2. single cloud
- with openstack_manager._create_connection(cloud_config=clouds_yaml):
- openstack_connect_mock.assert_called_with(cloud=cloud_name)
-
-
-@pytest.mark.parametrize(
- "dockerhub_mirror, ssh_debug_connections, expected_env_contents",
- [
- pytest.param(
- None,
- None,
- """PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-LANG=C.UTF-8
-ACTIONS_RUNNER_HOOK_JOB_STARTED=/home/ubuntu/actions-runner/pre-job.sh
-""",
- id="all values empty",
- ),
- pytest.param(
- "http://dockerhub_mirror.test",
- None,
- """PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-DOCKERHUB_MIRROR=http://dockerhub_mirror.test
-CONTAINER_REGISTRY_URL=http://dockerhub_mirror.test
-
-LANG=C.UTF-8
-ACTIONS_RUNNER_HOOK_JOB_STARTED=/home/ubuntu/actions-runner/pre-job.sh
-""",
- id="dockerhub mirror set",
- ),
- pytest.param(
- None,
- [
- openstack_manager.SSHDebugConnection(
- host="127.0.0.1",
- port=10022,
- rsa_fingerprint="SHA256:testrsa",
- ed25519_fingerprint="SHA256:tested25519",
- )
- ],
- """PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-LANG=C.UTF-8
-ACTIONS_RUNNER_HOOK_JOB_STARTED=/home/ubuntu/actions-runner/pre-job.sh
-
-TMATE_SERVER_HOST=127.0.0.1
-TMATE_SERVER_PORT=10022
-TMATE_SERVER_RSA_FINGERPRINT=SHA256:testrsa
-TMATE_SERVER_ED25519_FINGERPRINT=SHA256:tested25519
-""",
- id="ssh debug connection set",
- ),
- pytest.param(
- "http://dockerhub_mirror.test",
- [
- openstack_manager.SSHDebugConnection(
- host="127.0.0.1",
- port=10022,
- rsa_fingerprint="SHA256:testrsa",
- ed25519_fingerprint="SHA256:tested25519",
- )
- ],
- """PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
-
-DOCKERHUB_MIRROR=http://dockerhub_mirror.test
-CONTAINER_REGISTRY_URL=http://dockerhub_mirror.test
-
-LANG=C.UTF-8
-ACTIONS_RUNNER_HOOK_JOB_STARTED=/home/ubuntu/actions-runner/pre-job.sh
-
-TMATE_SERVER_HOST=127.0.0.1
-TMATE_SERVER_PORT=10022
-TMATE_SERVER_RSA_FINGERPRINT=SHA256:testrsa
-TMATE_SERVER_ED25519_FINGERPRINT=SHA256:tested25519
-""",
- id="all values set",
- ),
- ],
-)
-def test__generate_runner_env(
- dockerhub_mirror: Optional[str],
- ssh_debug_connections: Optional[list[openstack_manager.SSHDebugConnection]],
- expected_env_contents: str,
-):
- """
- arrange: given configuration values for runner environment.
- act: when _generate_runner_env is called.
- assert: expected .env contents are generated.
- """
- environment = jinja2.Environment(loader=jinja2.FileSystemLoader("templates"), autoescape=True)
- assert (
- openstack_manager._generate_runner_env(
- templates_env=environment,
- dockerhub_mirror=dockerhub_mirror,
- ssh_debug_connections=ssh_debug_connections,
- )
- == expected_env_contents
- )
-
-
-def test_reconcile_issues_runner_installed_event(
- openstack_manager_for_reconcile: openstack_manager.OpenstackRunnerManager,
-):
- """
- arrange: Mock openstack manager for reconcile.
- act: Reconcile to create a runner.
- assert: The expected event is issued.
- """
- openstack_manager_for_reconcile.reconcile(quantity=1)
-
- openstack_manager.metric_events.issue_event.assert_has_calls(
- [
- call(
- event=metric_events.RunnerInstalled(
- timestamp=openstack_manager.time.time(),
- flavor=openstack_manager_for_reconcile.app_name,
- duration=0,
- )
- )
- ]
- )
-
-
-def test_reconcile_places_timestamp_in_metrics_storage(
- openstack_manager_for_reconcile: openstack_manager.OpenstackRunnerManager,
- monkeypatch: pytest.MonkeyPatch,
- tmp_path: Path,
-):
- """
- arrange: Mock timestamps and create the directory for the metrics storage.
- act: Reconcile to create a runner.
- assert: The expected timestamp is placed in the shared filesystem.
- """
- runner_metrics_path = tmp_path / "runner_fs"
- runner_metrics_path.mkdir()
- ms = MetricsStorage(path=runner_metrics_path, runner_name="test_runner")
- monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
-
- openstack_manager_for_reconcile.reconcile(quantity=1)
-
- assert (ms.path / RUNNER_INSTALLED_TS_FILE_NAME).exists()
- assert (ms.path / RUNNER_INSTALLED_TS_FILE_NAME).read_text() == str(
- openstack_manager.time.time()
- )
-
-
-def test_reconcile_error_on_placing_timestamp_is_ignored(
- openstack_manager_for_reconcile: openstack_manager.OpenstackRunnerManager,
- monkeypatch: pytest.MonkeyPatch,
- tmp_path: Path,
-):
- """
- arrange: Do not create the directory for the metrics storage\
- in order to let a FileNotFoundError to be raised inside the OpenstackRunnerManager.
- act: Reconcile to create a runner.
- assert: No exception is raised.
- """
- runner_metrics_path = tmp_path / "runner_fs"
-
- ms = MetricsStorage(path=runner_metrics_path, runner_name="test_runner")
- monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
-
- openstack_manager_for_reconcile.reconcile(quantity=1)
-
- assert not (ms.path / RUNNER_INSTALLED_TS_FILE_NAME).exists()
-
-
-def test_reconcile_pulls_metric_files(
- openstack_manager_for_reconcile: openstack_manager.OpenstackRunnerManager,
- monkeypatch: pytest.MonkeyPatch,
- tmp_path: Path,
- ssh_connection_mock: MagicMock,
-):
- """
- arrange: Mock the metrics storage and the ssh connection.
- act: Reconcile to create a runner.
- assert: The expected metric files are pulled from the shared filesystem.
- """
- runner_metrics_path = tmp_path / "runner_fs"
- runner_metrics_path.mkdir()
- ms = MetricsStorage(path=runner_metrics_path, runner_name="test_runner")
- monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
- monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms))
- openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock(
- return_value=RunnerNameByHealth(healthy=(), unhealthy=("test_runner",))
- )
- ssh_connection_mock.get.side_effect = MagicMock()
- openstack_manager_for_reconcile.reconcile(quantity=0)
-
- ssh_connection_mock.get.assert_any_call(
- remote=str(METRICS_EXCHANGE_PATH / "pre-job-metrics.json"),
- local=str(ms.path / "pre-job-metrics.json"),
- )
- ssh_connection_mock.get.assert_any_call(
- remote=str(METRICS_EXCHANGE_PATH / "post-job-metrics.json"),
- local=str(ms.path / "post-job-metrics.json"),
- )
-
-
-def test_reconcile_does_not_pull_too_large_files(
- openstack_manager_for_reconcile: openstack_manager.OpenstackRunnerManager,
- monkeypatch: pytest.MonkeyPatch,
- tmp_path: Path,
- ssh_connection_mock: MagicMock,
-):
- """
- arrange: Mock the metrics storage and the ssh connection to return a file that is too large.
- act: Reconcile to create a runner.
- assert: The expected metric files are not pulled from the shared filesystem.
- """
- runner_metrics_path = tmp_path / "runner_fs"
- runner_metrics_path.mkdir()
- ms = MetricsStorage(path=runner_metrics_path, runner_name="test_runner")
- monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
- monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms))
- ssh_connection_mock.run.side_effect = lambda cmd, **kwargs: (
- Result(stdout=f"{MAX_METRICS_FILE_SIZE + 1}") if cmd.startswith("stat") else Result()
- )
- openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock(
- return_value=RunnerNameByHealth(healthy=("test_runner",), unhealthy=())
- )
-
- openstack_manager_for_reconcile.reconcile(quantity=0)
-
- assert not (ms.path / "pre-job-metrics.json").exists()
- assert not (ms.path / "post-job-metrics.json").exists()
-
-
-def test_reconcile_issue_reconciliation_metrics(
- openstack_manager_for_reconcile: openstack_manager.OpenstackRunnerManager,
- monkeypatch: pytest.MonkeyPatch,
- tmp_path: Path,
-):
- """
- arrange: Mock the metrics storage and the ssh connection.
- act: Reconcile.
- assert: The expected reconciliation metrics are issued.
- """
- runner_metrics_path = tmp_path / "runner_fs"
- runner_metrics_path.mkdir()
- ms = MetricsStorage(path=runner_metrics_path, runner_name="test_runner")
- monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
- monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms))
- openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock(
- return_value=RunnerNameByHealth(healthy=("test_runner",), unhealthy=())
- )
-
- openstack_manager.runner_metrics.extract.return_value = (MagicMock() for _ in range(2))
- openstack_manager.runner_metrics.issue_events.side_effect = [
- {metric_events.RunnerStart, metric_events.RunnerStop},
- {metric_events.RunnerStart},
- ]
-
- openstack_manager_for_reconcile._github.get_runner_github_info.return_value = [
- SelfHostedRunner(
- busy=False,
- id=1,
- labels=[],
- os="linux",
- name=f"{openstack_manager_for_reconcile.instance_name}-test_runner",
- status=GitHubRunnerStatus.ONLINE,
- )
- ]
- openstack_manager_for_reconcile.reconcile(quantity=0)
-
- openstack_manager.metric_events.issue_event.assert_has_calls(
- [
- call(
- event=metric_events.Reconciliation(
- timestamp=12345,
- flavor=openstack_manager_for_reconcile.app_name,
- crashed_runners=1,
- idle_runners=1,
- duration=0,
- )
- )
- ]
- )
-
-
-def test_reconcile_ignores_metrics_for_openstack_online_runners(
- openstack_manager_for_reconcile,
- monkeypatch,
- tmp_path,
- patched_create_connection_context: MagicMock,
-):
- """
- arrange: Combination of runner status/github status and openstack status.
- act: Call reconcile.
- assert: All runners which have an instance on Openstack are ignored for metrics extraction.
- """
- runner_metrics_path = tmp_path / "runner_fs"
- runner_metrics_path.mkdir()
- ms = MetricsStorage(path=runner_metrics_path, runner_name="test_runner")
- monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms))
- monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms))
- instance_name = openstack_manager_for_reconcile.instance_name
- runner_names = {
- k: f"{instance_name}-{k}"
- for k in [
- "healthy_online",
- "healthy_offline",
- "unhealthy_online",
- "unhealthy_offline",
- "openstack_online_no_github_status",
- "github_online_no_openstack_status",
- ]
- }
- openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock(
- return_value=RunnerNameByHealth(
- healthy=(runner_names["healthy_online"], runner_names["healthy_offline"]),
- unhealthy=(
- runner_names["unhealthy_online"],
- runner_names["unhealthy_offline"],
- runner_names["github_online_no_openstack_status"],
- ),
- )
- )
- openstack_manager_for_reconcile.get_github_runner_info = MagicMock(
- return_value=(
- RunnerGithubInfo(
- runner_name=runner_names["healthy_online"], runner_id=0, online=True, busy=True
- ),
- RunnerGithubInfo(
- runner_name=runner_names["unhealthy_online"], runner_id=1, online=True, busy=False
- ),
- RunnerGithubInfo(
- runner_name=runner_names["healthy_offline"], runner_id=2, online=False, busy=False
- ),
- RunnerGithubInfo(
- runner_name=runner_names["unhealthy_offline"],
- runner_id=3,
- online=False,
- busy=False,
- ),
- RunnerGithubInfo(
- runner_name=runner_names["github_online_no_openstack_status"],
- runner_id=4,
- online=True,
- busy=False,
- ),
- )
- )
-
- openstack_online_runner_names = [
- runner
- for (name, runner) in runner_names.items()
- if name != "github_online_no_openstack_status"
- ]
- openstack_instances = [
- openstack_manager.openstack.compute.v2.server.Server(
- name=runner_name, status=random.choice(("ACTIVE", "BUILD", "STOPPED"))
- )
- for runner_name in openstack_online_runner_names
- ]
- patched_create_connection_context.list_servers.return_value = openstack_instances
-
- openstack_manager.runner_metrics.extract.return_value = (MagicMock() for _ in range(1))
- openstack_manager.runner_metrics.issue_events.side_effect = [
- {metric_events.RunnerStart, metric_events.RunnerStop},
- ]
-
- openstack_manager_for_reconcile.reconcile(quantity=0)
-
- openstack_manager.runner_metrics.extract.assert_called_once_with(
- metrics_storage_manager=metrics.storage,
- runners=set(openstack_online_runner_names),
- )
-
-
-def test_reconcile_reactive_mode(
- openstack_manager_for_reconcile: openstack_manager.OpenstackRunnerManager,
- reactive_reconcile_mock: MagicMock,
- caplog: LogCaptureFixture,
-):
- """
- arrange: Enable reactive mode and mock the job class to return a job.
- act: Call reconcile with a random quantity n.
- assert: The mocked job is picked up n times and the expected log message is present.
- """
- count = random.randint(0, 5)
- openstack_manager_for_reconcile._config.reactive_config = ReactiveConfig(
- mq_uri=FAKE_MONGODB_URI
- )
- actual_count = openstack_manager_for_reconcile.reconcile(quantity=count)
-
- assert actual_count == count
- reactive_reconcile_mock.assert_called_with(
- quantity=count,
- mq_uri=FAKE_MONGODB_URI,
- queue_name=openstack_manager_for_reconcile.app_name,
- )
-
-
-def test_repo_policy_config(
- openstack_manager_for_reconcile: openstack_manager.OpenstackRunnerManager,
- monkeypatch: pytest.MonkeyPatch,
- patched_create_connection_context: MagicMock,
-):
- """
- arrange: Mock the repo policy compliance config.
- act: Reconcile to create a runner.
- assert: The expected url and one-time-token is present in the pre-job script in \
- the cloud-init data.
- """
- test_url = "http://test.url"
- token = secrets.token_hex(16)
- one_time_token = secrets.token_hex(16)
- openstack_manager_for_reconcile._config.charm_state.charm_config.repo_policy_compliance = (
- RepoPolicyComplianceConfig(url=test_url, token=token)
- )
- repo_policy_compliance_client_mock = MagicMock(
- spec=openstack_manager.RepoPolicyComplianceClient
- )
- repo_policy_compliance_client_mock.base_url = test_url
- repo_policy_compliance_client_mock.get_one_time_token.return_value = one_time_token
- repo_policy_compliance_cls_mock = MagicMock(return_value=repo_policy_compliance_client_mock)
- monkeypatch.setattr(
- openstack_manager, "RepoPolicyComplianceClient", repo_policy_compliance_cls_mock
- )
-
- openstack_manager_for_reconcile.reconcile(quantity=1)
-
- cloud_init_data_str = patched_create_connection_context.create_server.call_args[1]["userdata"]
- repo_policy_compliance_client_mock.get_one_time_token.assert_called_once()
- assert one_time_token in cloud_init_data_str
- assert test_url in cloud_init_data_str
-
-
-def test__ensure_security_group_with_existing_rules():
- """
- arrange: Mock OpenStack connection with the security rules created.
- act: Run `_ensure_security_group`.
- assert: The security rules are not created again.
- """
- connection_mock = MagicMock(spec=openstack.connection.Connection)
- connection_mock.list_security_groups.return_value = [
- {
- "security_group_rules": [
- {"protocol": "icmp"},
- {"protocol": "tcp", "port_range_min": 22, "port_range_max": 22},
- {"protocol": "tcp", "port_range_min": 10022, "port_range_max": 10022},
- ],
- "id": "TEST_ID",
- }
- ]
-
- openstack_manager.OpenstackRunnerManager._ensure_security_group(connection_mock)
- connection_mock.create_security_group_rule.assert_not_called()
-
-
-def test__get_ssh_connection(
- monkeypatch,
- patch_get_ssh_connection_health_check,
- mock_server: MagicMock,
-):
- """
- arrange: A server with SSH setup correctly.
- act: Get the SSH connections.
- assert: The SSH connections contains at least one connection.
- """
- # Patching the `_get_key_path` to get around the keyfile checks.
- mock__get_key_path = MagicMock(spec=openstack_manager.OpenstackRunnerManager._get_key_path)
- mock_key_path = MagicMock(spec=Path)
- mock_key_path.exists.return_value = True
- mock__get_key_path.return_value = mock_key_path
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager, "_get_key_path", mock__get_key_path
- )
- mock_connection = MagicMock(spec=openstack.connection.Connection)
- mock_connection.get_server.return_value = mock_server
-
- conn = openstack_manager.OpenstackRunnerManager._get_ssh_connection(
- mock_connection, mock_server.name
- )
- assert conn is not None
-
-
-@pytest.mark.usefixtures("skip_retry")
-def test__ssh_health_check_success(monkeypatch: pytest.MonkeyPatch, mock_server: MagicMock):
- """
- arrange: A server with SSH correctly setup.
- act: Run health check on the server.
- assert: The health check passes.
- """
- ssh_connection_mock = MagicMock()
- result_mock = MagicMock()
- result_mock.stdout = "/home/ubuntu/actions-runner/run.sh\nRunner.Worker"
- ssh_connection_mock.run.return_value = result_mock
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_ssh_connection",
- MagicMock(return_value=ssh_connection_mock),
- )
- mock_connection = MagicMock(spec=openstack.connection.Connection)
- mock_connection.get_server.return_value = mock_server
-
- assert openstack_manager.OpenstackRunnerManager._ssh_health_check(
- mock_connection, mock_server.name, False
- )
-
-
-@pytest.mark.usefixtures("skip_retry")
-def test__ssh_health_check_no_key(mock_server: MagicMock):
- """
- arrange: A server with no key available.
- act: Run health check on the server.
- assert: The health check fails.
- """
- # Remove the mock SSH key.
- mock_server.key_name = None
-
- mock_connection = MagicMock(spec=openstack.connection.Connection)
- mock_connection.get_server.return_value = mock_server
-
- with pytest.raises(openstack_manager._SSHError) as exc:
- openstack_manager.OpenstackRunnerManager._ssh_health_check(
- mock_connection, mock_server.name, False
- )
-
- assert "no valid keypair found" in str(exc)
-
-
-@pytest.mark.usefixtures("skip_retry")
-def test__ssh_health_check_error(monkeypatch: pytest.MonkeyPatch, mock_server: MagicMock):
- """
- arrange: A server with error on SSH run.
- act: Run health check on the server.
- assert: The health check fails.
- """
- monkeypatch.setattr(openstack_manager.OpenstackRunnerManager, "_get_key_path", MagicMock())
- mock_connection = MagicMock(spec=openstack.connection.Connection)
- mock_connection.get_server.return_value = mock_server
- mock_ssh_connection = MagicMock()
- mock_ssh_connection.run = MagicMock(side_effect=TimeoutError)
- monkeypatch.setattr(
- openstack_manager, "SSHConnection", MagicMock(return_value=mock_ssh_connection)
- )
-
- with pytest.raises(openstack_manager._SSHError) as exc:
- openstack_manager.OpenstackRunnerManager._ssh_health_check(
- mock_connection, mock_server.name, False
- )
-
- assert "No connectable SSH addresses found" in str(exc)
-
-
-def test__wait_until_runner_process_running_no_server():
- """
- arrange: No server existing on the OpenStack connection.
- act: Check if runner process is running.
- assert: RunnerStartError thrown.
- """
- mock_connection = MagicMock(spec=openstack.connection.Connection)
- mock_connection.get_server.return_value = None
-
- with pytest.raises(RunnerStartError):
- openstack_manager.OpenstackRunnerManager._wait_until_runner_process_running(
- mock_connection, "Non-existing-server"
- )
-
-
-@pytest.mark.parametrize(
- "server",
- [
- pytest.param(None, id="no server"),
- pytest.param(factories.MockOpenstackServer(status="SHUTOFF"), id="shutoff"),
- pytest.param(factories.MockOpenstackServer(status="REBUILD"), id="not active/building"),
- ],
-)
-def test__health_check(server: factories.MockOpenstackServer | None):
- """
- arrange: given a mock openstack.get_server response.
- act: when _health_check is called.
- assert: False is returned, meaning unhealthy runner.
- """
- mock_get_server = MagicMock(return_value=server)
- mock_connection = MagicMock()
- mock_connection.get_server = mock_get_server
-
- assert not openstack_manager.OpenstackRunnerManager._health_check(
- conn=mock_connection, server_name="test"
- )
-
-
-# The SSH health check will temporarily return True on failure for debugging purposes.
-@pytest.mark.xfail
-def test__ssh_health_check_connection_error(monkeypatch: pytest.MonkeyPatch):
- """
- arrange: given a monkeypatched _get_ssh_connection function that raises _SSHError.
- act: when _ssh_health_check is called.
- assert: False is returned, meaning unhealthy runner.
- """
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_ssh_connection",
- MagicMock(side_effect=openstack_manager._SSHError),
- )
-
- assert not openstack_manager.OpenstackRunnerManager._ssh_health_check(
- server=MagicMock(), startup=False
- )
-
-
-@pytest.mark.parametrize(
- "result",
- [
- pytest.param(factories.MockSSHRunResult(exited=1), id="ssh result not ok"),
- pytest.param(
- factories.MockSSHRunResult(exited=0, stdout=""),
- id="runner process not found in stdout",
- ),
- # This health check should fail but temporarily marking as passing for passive runner
- # deletion until we have more data.
- pytest.param(
- factories.MockSSHRunResult(exited=0, stdout="/home/ubuntu/actions-runner/run.sh"),
- id="startup process exists but no listener or worker process",
- ),
- ],
-)
-@pytest.mark.xfail
-def test__ssh_health_check_unhealthy(
- monkeypatch: pytest.MonkeyPatch, result: factories.MockSSHRunResult
-):
- """
- arrange: given unhealthy ssh responses.
- act: when _ssh_health_check is called.
- assert: False is returned, meaning unhealthy runner.
- """
- mock_ssh_connection = MagicMock()
- mock_ssh_connection.run = MagicMock(return_value=result)
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_ssh_connection",
- MagicMock(return_value=mock_ssh_connection),
- )
-
- assert not openstack_manager.OpenstackRunnerManager._ssh_health_check(
- server=MagicMock(), startup=False
- )
-
-
-@pytest.mark.parametrize(
- "result, startup",
- [
- pytest.param(
- factories.MockSSHRunResult(
- exited=0, stdout="/home/ubuntu/actions-runner/run.sh\nRunner.Worker"
- ),
- False,
- id="runner process & workper process found",
- ),
- pytest.param(
- factories.MockSSHRunResult(
- exited=0, stdout="/home/ubuntu/actions-runner/run.sh\nRunner.Listener"
- ),
- False,
- id="runner process & listener process found",
- ),
- pytest.param(
- factories.MockSSHRunResult(exited=0, stdout="/home/ubuntu/actions-runner/run.sh"),
- True,
- id="runner process found for startup",
- ),
- ],
-)
-def test__ssh_health_check_healthy(
- monkeypatch: pytest.MonkeyPatch, result: factories.MockSSHRunResult, startup: bool
-):
- """
- arrange: given healthy ssh response.
- act: when _ssh_health_check is called.
- assert: True is returned, meaning healthy runner.
- """
- mock_ssh_connection = MagicMock()
- mock_ssh_connection.run = MagicMock(return_value=result)
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_ssh_connection",
- MagicMock(return_value=mock_ssh_connection),
- )
-
- assert openstack_manager.OpenstackRunnerManager._ssh_health_check(
- conn=MagicMock(), server_name=MagicMock(), startup=startup
- )
-
-
-@pytest.mark.usefixtures("skip_retry")
-def test__get_ssh_connection_server_gone():
- """
- arrange: given a mock Openstack get_server function that returns None.
- act: when _get_ssh_connection is called.
- assert: _SSHError is raised.
- """
- mock_connection = MagicMock()
- mock_connection.get_server.return_value = None
-
- with pytest.raises(openstack_manager._SSHError) as exc:
- openstack_manager.OpenstackRunnerManager._get_ssh_connection(
- conn=mock_connection, server_name="test"
- )
-
- assert "Server gone while trying to get SSH connection" in str(exc.getrepr())
-
-
-@pytest.mark.usefixtures("skip_retry")
-def test__get_ssh_connection_no_server_key():
- """
- arrange: given a mock server instance with no key attached.
- act: when _get_ssh_connection is called.
- assert: _SSHError is raised.
- """
- mock_server = MagicMock()
- mock_server.key_name = None
- mock_connection = MagicMock()
- mock_connection.get_server.return_value = mock_server
-
- with pytest.raises(openstack_manager._SSHError) as exc:
- openstack_manager.OpenstackRunnerManager._get_ssh_connection(
- conn=mock_connection, server_name="test"
- )
-
- assert "Unable to create SSH connection, no valid keypair found" in str(exc.getrepr())
-
-
-@pytest.mark.usefixtures("skip_retry")
-def test__get_ssh_connection_key_not_exists(monkeypatch: pytest.MonkeyPatch):
- """
- arrange: given a monkeypatched _get_key_path function that returns a non-existent path.
- act: when _get_ssh_connection is called.
- assert: _SSHError is raised.
- """
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_key_path",
- MagicMock(return_value=Path("does-not-exist")),
- )
- mock_connection = MagicMock()
-
- with pytest.raises(openstack_manager._SSHError) as exc:
- openstack_manager.OpenstackRunnerManager._get_ssh_connection(
- conn=mock_connection, server_name="test"
- )
-
- assert "Missing keyfile for server" in str(exc.getrepr())
-
-
-@pytest.mark.usefixtures("skip_retry")
-def test__get_ssh_connection_server_no_addresses(monkeypatch: pytest.MonkeyPatch):
- """
- arrange: given a mock server instance with no server addresses.
- act: when _get_ssh_connection is called.
- assert: _SSHError is raised.
- """
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_key_path",
- MagicMock(return_value=Path(".")),
- )
- mock_server = MagicMock()
- mock_server.addresses = {}
- mock_connection = MagicMock()
- mock_connection.get_server.return_value = mock_server
-
- with pytest.raises(openstack_manager._SSHError) as exc:
- openstack_manager.OpenstackRunnerManager._get_ssh_connection(
- conn=mock_connection, server_name="test"
- )
-
- assert "No addresses found for OpenStack server" in str(exc.getrepr())
-
-
-@pytest.mark.usefixtures("skip_retry")
-@pytest.mark.parametrize(
- "run",
- [
- pytest.param(MagicMock(side_effect=TimeoutError), id="timeout error"),
- pytest.param(
- MagicMock(return_value=factories.MockSSHRunResult(exited=1)), id="result not ok"
- ),
- pytest.param(
- MagicMock(return_value=factories.MockSSHRunResult(exited=0, stdout="")),
- id="empty response",
- ),
- ],
-)
-def test__get_ssh_connection_server_no_valid_connections(
- monkeypatch: pytest.MonkeyPatch, run: MagicMock
-):
- """
- arrange: given a monkeypatched Connection instance that returns invalid connections.
- act: when _get_ssh_connection is called.
- assert: _SSHError is raised.
- """
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_key_path",
- MagicMock(return_value=Path(".")),
- )
- mock_server = MagicMock()
- mock_server.addresses = {"test": [{"addr": "test-address"}]}
- mock_connection = MagicMock()
- mock_connection.get_server.return_value = mock_server
- mock_ssh_connection = MagicMock()
- mock_ssh_connection.run = run
- monkeypatch.setattr(
- openstack_manager, "SSHConnection", MagicMock(return_value=mock_ssh_connection)
- )
-
- with pytest.raises(openstack_manager._SSHError) as exc:
- openstack_manager.OpenstackRunnerManager._get_ssh_connection(
- conn=mock_connection, server_name="test"
- )
-
- assert "No connectable SSH addresses found" in str(exc.getrepr())
-
-
-@pytest.mark.usefixtures("skip_retry")
-def test__get_ssh_connection_server(monkeypatch: pytest.MonkeyPatch):
- """
- arrange: given monkeypatched SSH connection instance.
- act: when _get_ssh_connection is called.
- assert: the SSH connection instance is returned.
- """
- monkeypatch.setattr(
- openstack_manager.OpenstackRunnerManager,
- "_get_key_path",
- MagicMock(return_value=Path(".")),
- )
- mock_server = MagicMock()
- mock_server.addresses = {"test": [{"addr": "test-address"}]}
- mock_connection = MagicMock()
- mock_connection.get_server.return_value = mock_server
- mock_ssh_connection = MagicMock()
- mock_ssh_connection.run = MagicMock(
- return_value=factories.MockSSHRunResult(exited=0, stdout="hello world")
- )
- monkeypatch.setattr(
- openstack_manager, "SSHConnection", MagicMock(return_value=mock_ssh_connection)
- )
-
- assert (
- openstack_manager.OpenstackRunnerManager._get_ssh_connection(
- conn=mock_connection, server_name="test"
- )
- == mock_ssh_connection
- )
-
-
-def test_flush(monkeypatch: pytest.MonkeyPatch):
- """
- arrange: given monkeypatched sub functions of flush.
- act: when flush is called.
- assert: sub functions are called.
- """
- monkeypatch.setattr(openstack_manager, "_create_connection", MagicMock())
- monkeypatch.setattr(openstack_manager, "set_env_var", MagicMock())
- runner_manager = openstack_manager.OpenstackRunnerManager(
- app_name=MagicMock(),
- unit_num=MagicMock(),
- openstack_runner_manager_config=MagicMock(),
- cloud_config=MagicMock(),
- )
- runner_manager._kill_runner_processes = MagicMock()
- runner_manager._get_openstack_runner_status = MagicMock()
- runner_manager._github = MagicMock()
- runner_manager._remove_runners = MagicMock()
-
- runner_manager.flush(mode=MagicMock())
-
- runner_manager._kill_runner_processes.assert_called()
- runner_manager._get_openstack_runner_status.assert_called()
- runner_manager._github.get_runner_remove_token.assert_called()
- runner_manager._remove_runners.assert_called()
-
-
-@pytest.mark.parametrize(
- "flush_mode, expected_command",
- [
- pytest.param(
- FlushMode.FLUSH_BUSY,
- "pgrep -x Runner.Listener && kill $(pgrep -x Runner.Listener);"
- "pgrep -x Runner.Worker && kill $(pgrep -x Runner.Worker);",
- id="Flush Busy",
- ),
- pytest.param(
- FlushMode.FLUSH_IDLE,
- "! pgrep -x Runner.Worker && pgrep -x Runner.Listener && "
- "kill $(pgrep -x Runner.Listener)",
- id="Flush Idle",
- ),
- ],
-)
-def test__kill_runner_processes(
- monkeypatch: pytest.MonkeyPatch, flush_mode: FlushMode, expected_command: str
-):
- """
- arrange: given supported flush modes.
- act: when _kill_runner_processes is called.
- assert: expected kill commands are issued.
- """
- monkeypatch.setattr(openstack_manager, "_create_connection", MagicMock())
- monkeypatch.setattr(openstack_manager, "set_env_var", MagicMock())
- runner_manager = openstack_manager.OpenstackRunnerManager(
- app_name=MagicMock(),
- unit_num=MagicMock(),
- openstack_runner_manager_config=MagicMock(),
- cloud_config=MagicMock(),
- )
- runner_manager._get_openstack_instances = MagicMock(return_value=[MagicMock(), MagicMock()])
- mock_connection = MagicMock()
- runner_manager._get_ssh_connection = MagicMock(return_value=mock_connection)
-
- runner_manager._kill_runner_processes(conn=MagicMock(), mode=flush_mode)
-
- mock_connection.run.assert_called_with(expected_command, warn=True)
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 7312a69e4..3bf4dfea0 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -61,12 +61,8 @@ def runner_manager_fixture(
"manager.runner_manager.RunnerManager._spawn_runners", mock_runner_manager_spawn_runners
)
# Patch out the metrics, as metrics has their own tests.
- monkeypatch.setattr(
- "manager.runner_manager.github_metrics.job", MagicMock()
- )
- monkeypatch.setattr(
- "manager.runner_manager.runner_metrics.issue_events", MagicMock()
- )
+ monkeypatch.setattr("manager.runner_manager.github_metrics.job", MagicMock())
+ monkeypatch.setattr("manager.runner_manager.runner_metrics.issue_events", MagicMock())
config = RunnerManagerConfig("mock_token", github_path)
runner_manager = RunnerManager("mock_runners", mock_cloud, config)
From 19b1a385bf88806b03887097d2f8c747ab3d82e6 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:34:30 +0800
Subject: [PATCH 244/278] Fix test contstruction of runner manager.
---
tests/integration/test_runner_manager_openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index 089888b94..dfce31a8e 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -141,7 +141,7 @@ async def runner_manager_fixture(
Import of log_dir_base_path to monkeypatch the runner logs path with tmp_path.
"""
config = RunnerManagerConfig(token, github_path)
- return RunnerManager(openstack_runner_manager, config)
+ return RunnerManager("test_runner", openstack_runner_manager, config)
@pytest_asyncio.fixture(scope="function", name="runner_manager_with_one_runner")
From dab56116d94e1fe639e399f9ffca75b7b5b2faa0 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:48:16 +0800
Subject: [PATCH 245/278] Fix flavor naming
---
.../openstack_cloud.openstack_runner_manager.md | 14 ++++++++------
src/charm.py | 1 +
src/openstack_cloud/openstack_runner_manager.py | 5 ++++-
tests/integration/test_runner_manager_openstack.py | 2 +-
4 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md
index 81a35247b..7f28b8689 100644
--- a/src-docs/openstack_cloud.openstack_runner_manager.md
+++ b/src-docs/openstack_cloud.openstack_runner_manager.md
@@ -95,6 +95,7 @@ Manage self-hosted runner on OpenStack cloud.
```python
__init__(
+ manager_name: str,
prefix: str,
cloud_config: OpenStackCloudConfig,
server_config: OpenStackServerConfig | None,
@@ -109,6 +110,7 @@ Construct the object.
**Args:**
+ - `manager_name`: A name to identify this manager.
- `prefix`: The prefix to runner name.
- `cloud_config`: The configuration for OpenStack authorisation.
- `server_config`: The configuration for creating OpenStack server. Unable to create runner if None.
@@ -131,7 +133,7 @@ The prefix of runner names.
---
-
+
### method `cleanup`
@@ -154,7 +156,7 @@ Cleanup runner and resource on the cloud.
---
-
+
### method `create_runner`
@@ -184,7 +186,7 @@ Create a self-hosted runner.
---
-
+
### method `delete_runner`
@@ -208,7 +210,7 @@ Delete self-hosted runners.
---
-
+
### method `flush_runners`
@@ -231,7 +233,7 @@ Remove idle and/or busy runners.
---
-
+
### method `get_runner`
@@ -254,7 +256,7 @@ Get a self-hosted runner by instance id.
---
-
+
### method `get_runners`
diff --git a/src/charm.py b/src/charm.py
index 5adcd8b01..714b4485e 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -1255,6 +1255,7 @@ def _get_runner_scaler(
)
# The prefix is set to f"{application_name}-{unit number}"
openstack_runner_manager = OpenStackRunnerManager(
+ manager_name=self.app.name,
prefix=self.unit.name.replace("/", "-"),
cloud_config=cloud_config,
server_config=server_config,
diff --git a/src/openstack_cloud/openstack_runner_manager.py b/src/openstack_cloud/openstack_runner_manager.py
index c87af1a7d..aa03b0ec3 100644
--- a/src/openstack_cloud/openstack_runner_manager.py
+++ b/src/openstack_cloud/openstack_runner_manager.py
@@ -119,6 +119,7 @@ class OpenStackRunnerManager(CloudRunnerManager):
# Ignore "Too many arguments", as the class requires a lot of configurations.
def __init__( # pylint: disable=R0913
self,
+ manager_name: str,
prefix: str,
cloud_config: OpenStackCloudConfig,
server_config: OpenStackServerConfig | None,
@@ -128,6 +129,7 @@ def __init__( # pylint: disable=R0913
"""Construct the object.
Args:
+ manager_name: A name to identify this manager.
prefix: The prefix to runner name.
cloud_config: The configuration for OpenStack authorisation.
server_config: The configuration for creating OpenStack server. Unable to create
@@ -135,6 +137,7 @@ def __init__( # pylint: disable=R0913
runner_config: The configuration for the runner.
service_config: The configuration of supporting services of the runners.
"""
+ self._manager_name = manager_name
self._prefix = prefix
self._cloud_config = cloud_config
self._server_config = server_config
@@ -194,7 +197,7 @@ def create_runner(self, registration_token: str) -> InstanceId:
end_timestamp = time.time()
OpenStackRunnerManager._issue_runner_installed_metric(
name=instance_name,
- flavor=self.name_prefix,
+ flavor=self._manager_name,
install_start_timestamp=start_timestamp,
install_end_timestamp=end_timestamp,
)
diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py
index dfce31a8e..63b7204b3 100644
--- a/tests/integration/test_runner_manager_openstack.py
+++ b/tests/integration/test_runner_manager_openstack.py
@@ -125,7 +125,7 @@ async def openstack_runner_manager_fixture(
repo_policy_compliance=None,
)
return OpenStackRunnerManager(
- app_name, cloud_config, server_config, runner_config, service_config
+ app_name, f"{app_name}-0", cloud_config, server_config, runner_config, service_config
)
From 921ce2a0b3022007fe18caaa73ae0f4824303577 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:29:46 +0800
Subject: [PATCH 246/278] Fix flush action result output.
---
pyproject.toml | 2 +-
src-docs/runner_manager.md | 2 +-
src-docs/runner_manager_type.md | 2 +-
src/charm.py | 30 +++++++++++++++---------------
src/runner_manager.py | 18 +++++++++---------
src/runner_manager_type.py | 2 +-
6 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index a60427837..9b69abe91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,7 @@ omit = [
]
[tool.coverage.report]
-fail_under = 83
+fail_under = 85
show_missing = true
[tool.pytest.ini_options]
diff --git a/src-docs/runner_manager.md b/src-docs/runner_manager.md
index 2cf622469..f3ea1a1e7 100644
--- a/src-docs/runner_manager.md
+++ b/src-docs/runner_manager.md
@@ -94,7 +94,7 @@ Check if runner binary exists.
### method `flush`
```python
-flush(mode: FlushMode = ) → int
+flush(mode: LXDFlushMode = ) → int
```
Remove existing runners.
diff --git a/src-docs/runner_manager_type.md b/src-docs/runner_manager_type.md
index f6b58a83c..cd7eaf5d2 100644
--- a/src-docs/runner_manager_type.md
+++ b/src-docs/runner_manager_type.md
@@ -11,7 +11,7 @@ Types used by RunnerManager class.
-## class `FlushMode`
+## class `LXDFlushMode`
Strategy for flushing runners.
During pre-job (repo-check), the runners are marked as idle and if the pre-job fails, the runner falls back to being idle again. Hence wait_repo_check is required.
diff --git a/src/charm.py b/src/charm.py
index 714b4485e..122b01efb 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -9,7 +9,7 @@
"""Charm for creating and managing GitHub self-hosted runner instances."""
from manager.cloud_runner_manager import GitHubRunnerConfig, SupportServiceConfig
-from manager.runner_manager import RunnerManager, RunnerManagerConfig
+from manager.runner_manager import FlushMode, RunnerManager, RunnerManagerConfig
from manager.runner_scaler import RunnerScaler
from utilities import bytes_with_unit_to_kib, execute_command, remove_residual_venv_dirs, retry
@@ -88,7 +88,7 @@
)
from runner import LXD_PROFILE_YAML
from runner_manager import LXDRunnerManager, LXDRunnerManagerConfig
-from runner_manager_type import FlushMode
+from runner_manager_type import LXDFlushMode
RECONCILE_RUNNERS_EVENT = "reconcile-runners"
@@ -515,7 +515,7 @@ def _on_start(self, _: StartEvent) -> None:
self.unit.status = MaintenanceStatus("Starting runners")
try:
- runner_manager.flush(FlushMode.FLUSH_IDLE)
+ runner_manager.flush(LXDFlushMode.FLUSH_IDLE)
self._reconcile_runners(
runner_manager,
state.runner_config.virtual_machines,
@@ -581,7 +581,7 @@ def _on_upgrade_charm(self, _: UpgradeCharmEvent) -> None:
runner_manager = self._get_runner_manager(state)
logger.info("Flushing the runners...")
- runner_manager.flush(FlushMode.FLUSH_BUSY_WAIT_REPO_CHECK)
+ runner_manager.flush(LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK)
self._reconcile_runners(
runner_manager,
state.runner_config.virtual_machines,
@@ -617,7 +617,7 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901
if prev_runner_manager:
self.unit.status = MaintenanceStatus("Removing runners due to config change")
# Flush runner in case the prev token has expired.
- prev_runner_manager.flush(FlushMode.FORCE_FLUSH_WAIT_REPO_CHECK)
+ prev_runner_manager.flush(LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK)
state = self._setup_state()
@@ -636,7 +636,7 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901
runner_manager = self._get_runner_manager(state)
if state.charm_config.token != self._stored.token:
- runner_manager.flush(FlushMode.FORCE_FLUSH_WAIT_REPO_CHECK)
+ runner_manager.flush(LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK)
self._stored.token = state.charm_config.token
self._reconcile_runners(
runner_manager,
@@ -697,7 +697,7 @@ def _check_and_update_local_lxd_dependencies(
runner_bin_updated,
)
self.unit.status = MaintenanceStatus("Flushing runners due to updated deps")
- runner_manager.flush(FlushMode.FLUSH_IDLE_WAIT_REPO_CHECK)
+ runner_manager.flush(LXDFlushMode.FLUSH_IDLE_WAIT_REPO_CHECK)
self._start_services(token, proxy_config)
self.unit.status = ActiveStatus()
@@ -850,21 +850,21 @@ def _on_flush_runners_action(self, event: ActionEvent) -> None:
if state.instance_type == InstanceType.OPENSTACK:
# Flushing mode not implemented for OpenStack yet.
runner_scaler = self._get_runner_scaler(state)
- flushed = runner_scaler.flush()
- event.set_results({"delta": {"virtual-machines": flushed}})
+ flushed = runner_scaler.flush(flush_mode=FlushMode.FLUSH_BUSY)
+ logger.info("Flushed %s runners", flushed)
+ delta = runner_scaler.reconcile(state.runner_config.virtual_machines)
+ event.set_results({"delta": {"virtual-machines": delta}})
return
runner_manager = self._get_runner_manager(state)
- runner_manager.flush(FlushMode.FLUSH_BUSY_WAIT_REPO_CHECK)
+ runner_manager.flush(LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK)
delta = self._reconcile_runners(
runner_manager,
state.runner_config.virtual_machines,
state.runner_config.virtual_machine_resources,
)
-
- self._on_check_runners_action(event)
- event.set_results(delta)
+ event.set_results({"delta": {"virtual-machines": delta}})
@catch_action_errors
def _on_update_dependencies_action(self, event: ActionEvent) -> None:
@@ -902,7 +902,7 @@ def _on_stop(self, _: StopEvent) -> None:
return
runner_manager = self._get_runner_manager(state)
- runner_manager.flush(FlushMode.FLUSH_BUSY)
+ runner_manager.flush(LXDFlushMode.FLUSH_BUSY)
def _reconcile_runners(
self, runner_manager: LXDRunnerManager, num: int, resources: VirtualMachineResources
@@ -1158,7 +1158,7 @@ def _on_debug_ssh_relation_changed(self, _: ops.RelationChangedEvent) -> None:
self._refresh_firewall(state)
runner_manager = self._get_runner_manager(state)
- runner_manager.flush(FlushMode.FLUSH_IDLE)
+ runner_manager.flush(LXDFlushMode.FLUSH_IDLE)
self._reconcile_runners(
runner_manager,
state.runner_config.virtual_machines,
diff --git a/src/runner_manager.py b/src/runner_manager.py
index e79d9f7a6..914bfdb0f 100644
--- a/src/runner_manager.py
+++ b/src/runner_manager.py
@@ -41,7 +41,7 @@
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner import LXD_PROFILE_YAML, CreateRunnerConfig, Runner, RunnerConfig, RunnerStatus
-from runner_manager_type import FlushMode, LXDRunnerManagerConfig, RunnerInfo, RunnerManagerClients
+from runner_manager_type import LXDFlushMode, LXDRunnerManagerConfig, RunnerInfo, RunnerManagerClients
from runner_type import ProxySetting as RunnerProxySetting
from runner_type import RunnerNameByHealth
from utilities import execute_command, retry, set_env_var
@@ -619,7 +619,7 @@ def _runners_in_pre_job(self) -> bool:
return False
return True
- def flush(self, mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
+ def flush(self, mode: LXDFlushMode = LXDFlushMode.FLUSH_IDLE) -> int:
"""Remove existing runners.
Args:
@@ -636,7 +636,7 @@ def flush(self, mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
remove_token = self._clients.github.get_runner_remove_token(self.config.path)
except GithubClientError:
logger.exception("Failed to get remove-token to unregister runners from GitHub.")
- if mode != FlushMode.FORCE_FLUSH_WAIT_REPO_CHECK:
+ if mode != LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK:
raise
logger.info("Proceeding with flush without remove-token.")
remove_token = None
@@ -656,9 +656,9 @@ def flush(self, mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
logger.info(REMOVED_RUNNER_LOG_STR, runner.config.name)
if mode in (
- FlushMode.FLUSH_IDLE_WAIT_REPO_CHECK,
- FlushMode.FLUSH_BUSY_WAIT_REPO_CHECK,
- FlushMode.FORCE_FLUSH_WAIT_REPO_CHECK,
+ LXDFlushMode.FLUSH_IDLE_WAIT_REPO_CHECK,
+ LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK,
+ LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK,
):
for _ in range(5):
if not self._runners_in_pre_job():
@@ -673,9 +673,9 @@ def flush(self, mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
)
if mode in (
- FlushMode.FLUSH_BUSY_WAIT_REPO_CHECK,
- FlushMode.FLUSH_BUSY,
- FlushMode.FORCE_FLUSH_WAIT_REPO_CHECK,
+ LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK,
+ LXDFlushMode.FLUSH_BUSY,
+ LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK,
):
busy_runners = [runner for runner in self._get_runners() if runner.status.exist]
diff --git a/src/runner_manager_type.py b/src/runner_manager_type.py
index e37e0b290..95f8edcc3 100644
--- a/src/runner_manager_type.py
+++ b/src/runner_manager_type.py
@@ -17,7 +17,7 @@
from repo_policy_compliance_client import RepoPolicyComplianceClient
-class FlushMode(Enum):
+class LXDFlushMode(Enum):
"""Strategy for flushing runners.
During pre-job (repo-check), the runners are marked as idle and if the pre-job fails, the
From 4a3866e995d377b3093cf06df295491ce64fc296 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 28 Aug 2024 18:22:10 +0800
Subject: [PATCH 247/278] Fix flavor of metric
---
src/manager/runner_scaler.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 5216ec4f3..b2727aabe 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -161,7 +161,7 @@ def reconcile(self, num_of_runner: int) -> int:
metric_events.issue_event(
metric_events.Reconciliation(
timestamp=time.time(),
- flavor=self._manager.name_prefix,
+ flavor=self._manager.manager_name,
crashed_runners=metric_stats.get(metric_events.RunnerStart, 0)
- metric_stats.get(metric_events.RunnerStop, 0),
idle_runners=len(available_runners),
From c11028ed3f2eb97dcad6c8544a2fb3e8a510d3e6 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 28 Aug 2024 18:23:39 +0800
Subject: [PATCH 248/278] Testing out a integration test fikx
---
tests/integration/test_charm_metrics_failure.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_charm_metrics_failure.py b/tests/integration/test_charm_metrics_failure.py
index e3de1600d..a6058c8cb 100644
--- a/tests/integration/test_charm_metrics_failure.py
+++ b/tests/integration/test_charm_metrics_failure.py
@@ -55,7 +55,7 @@ async def app_fixture(
{
VIRTUAL_MACHINES_CONFIG_NAME: "0",
"repo-policy-compliance-token": "",
- "repo-policy-compliance-url": "",
+ "repo-policy-compliance-url": "http://fake_site.com",
}
)
await reconcile(app=app_for_metric, model=model)
From 58a6782c2092c6958d069cdbb93d6cc4b8e8c66d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 09:21:13 +0800
Subject: [PATCH 249/278] change flush runner to flush idle.
---
src/charm.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/charm.py b/src/charm.py
index 122b01efb..8cf81b910 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -850,7 +850,7 @@ def _on_flush_runners_action(self, event: ActionEvent) -> None:
if state.instance_type == InstanceType.OPENSTACK:
# Flushing mode not implemented for OpenStack yet.
runner_scaler = self._get_runner_scaler(state)
- flushed = runner_scaler.flush(flush_mode=FlushMode.FLUSH_BUSY)
+ flushed = runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE)
logger.info("Flushed %s runners", flushed)
delta = runner_scaler.reconcile(state.runner_config.virtual_machines)
event.set_results({"delta": {"virtual-machines": delta}})
From 9b57745c7c2e394b8e2f4aa1b0f0248da3efa0b2 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 09:42:54 +0800
Subject: [PATCH 250/278] Add debug in integration test
---
tests/integration/test_charm_metrics_failure.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/integration/test_charm_metrics_failure.py b/tests/integration/test_charm_metrics_failure.py
index a6058c8cb..f62246158 100644
--- a/tests/integration/test_charm_metrics_failure.py
+++ b/tests/integration/test_charm_metrics_failure.py
@@ -55,7 +55,7 @@ async def app_fixture(
{
VIRTUAL_MACHINES_CONFIG_NAME: "0",
"repo-policy-compliance-token": "",
- "repo-policy-compliance-url": "http://fake_site.com",
+ "repo-policy-compliance-url": "",
}
)
await reconcile(app=app_for_metric, model=model)
@@ -84,6 +84,7 @@ async def test_charm_issues_metrics_for_failed_repo_policy(
await app.set_config({PATH_CONFIG_NAME: forked_github_repository.full_name})
if isinstance(instance_helper, OpenStackInstanceHelper):
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
await setup_repo_policy(
app=app,
openstack_connection=instance_helper.openstack_connection,
From c51bb7be8260e81a51be786a1bed9ab12c17aed1 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 13:11:55 +0800
Subject: [PATCH 251/278] Manual test mode
---
.github/workflows/e2e_test.yaml | 4 +++-
.github/workflows/integration_test.yaml | 4 +++-
.github/workflows/manual_test_env.yaml | 18 +++++++++++++++
src-docs/runner_manager.md | 22 +++++++++----------
src/runner_manager.py | 7 +++++-
.../integration/test_charm_metrics_failure.py | 5 ++++-
6 files changed, 45 insertions(+), 15 deletions(-)
create mode 100644 .github/workflows/manual_test_env.yaml
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 7d0383c12..47224c63a 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,7 +1,9 @@
name: End-to-End tests
on:
- pull_request:
+ # TODO: debug
+ workflow_dispatch:
+ # pull_request:
jobs:
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 8e0bc700a..91137ea43 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,7 +1,9 @@
name: integration-tests
on:
- pull_request:
+ # TODO: debug
+ workflow_dispatch:
+ # pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
new file mode 100644
index 000000000..6b65cbfce
--- /dev/null
+++ b/.github/workflows/manual_test_env.yaml
@@ -0,0 +1,18 @@
+name: Manual test env
+
+on:
+ pull_request:
+
+jobs:
+ manual-test-env:
+ name: manual-test-env
+ runs-on: ["self-hosted", "stg-private-endpoint"]
+ steps:
+ - run: sudo apt update -yq
+ - run: sudo apt install pipx -yq
+ - run: pipx ensurepath
+ - run: pipx install tox
+ - uses: actions/checkout@v4
+ - name: Tmate debugging session (self-hosted)
+ uses: canonical/action-tmate@main
+ timeout-minutes: 300
diff --git a/src-docs/runner_manager.md b/src-docs/runner_manager.md
index f3ea1a1e7..f52829efa 100644
--- a/src-docs/runner_manager.md
+++ b/src-docs/runner_manager.md
@@ -13,7 +13,7 @@ Runner Manager manages the runners on LXD and GitHub.
---
-
+
## class `LXDRunnerManager`
Manage a group of runners according to configuration.
@@ -25,7 +25,7 @@ Manage a group of runners according to configuration.
- `runner_bin_path`: The github runner app scripts path.
- `cron_path`: The path to runner build image cron job.
-
+
### method `__init__`
@@ -52,7 +52,7 @@ Construct RunnerManager object for creating and managing runners.
---
-
+
### method `build_runner_image`
@@ -72,7 +72,7 @@ Build container image in test mode, else virtual machine image.
---
-
+
### method `check_runner_bin`
@@ -89,7 +89,7 @@ Check if runner binary exists.
---
-
+
### method `flush`
@@ -118,7 +118,7 @@ Remove existing runners.
---
-
+
### method `get_github_info`
@@ -135,7 +135,7 @@ Get information on the runners from GitHub.
---
-
+
### method `get_latest_runner_bin_url`
@@ -166,7 +166,7 @@ The runner binary URL changes when a new version is available.
---
-
+
### method `has_runner_image`
@@ -183,7 +183,7 @@ Check if the runner image exists.
---
-
+
### method `reconcile`
@@ -207,7 +207,7 @@ Bring runners in line with target.
---
-
+
### method `schedule_build_runner_image`
@@ -219,7 +219,7 @@ Install cron job for building runner image.
---
-
+
### method `update_runner_bin`
diff --git a/src/runner_manager.py b/src/runner_manager.py
index 914bfdb0f..31c30ef85 100644
--- a/src/runner_manager.py
+++ b/src/runner_manager.py
@@ -41,7 +41,12 @@
from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME
from repo_policy_compliance_client import RepoPolicyComplianceClient
from runner import LXD_PROFILE_YAML, CreateRunnerConfig, Runner, RunnerConfig, RunnerStatus
-from runner_manager_type import LXDFlushMode, LXDRunnerManagerConfig, RunnerInfo, RunnerManagerClients
+from runner_manager_type import (
+ LXDFlushMode,
+ LXDRunnerManagerConfig,
+ RunnerInfo,
+ RunnerManagerClients,
+)
from runner_type import ProxySetting as RunnerProxySetting
from runner_type import RunnerNameByHealth
from utilities import execute_command, retry, set_env_var
diff --git a/tests/integration/test_charm_metrics_failure.py b/tests/integration/test_charm_metrics_failure.py
index f62246158..e4f9c7965 100644
--- a/tests/integration/test_charm_metrics_failure.py
+++ b/tests/integration/test_charm_metrics_failure.py
@@ -83,8 +83,11 @@ async def test_charm_issues_metrics_for_failed_repo_policy(
"""
await app.set_config({PATH_CONFIG_NAME: forked_github_repository.full_name})
+ # TODO: debug
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
+ print(instance_helper)
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
if isinstance(instance_helper, OpenStackInstanceHelper):
- print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
await setup_repo_policy(
app=app,
openstack_connection=instance_helper.openstack_connection,
From 9e9eb33da2c37d0ebb5838bb8f57790df54f7171 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 15:37:53 +0800
Subject: [PATCH 252/278] Start new manual test env
---
.github/workflows/manual_test_env.yaml | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 6b65cbfce..85d9ac7e0 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -13,6 +13,10 @@ jobs:
- run: pipx ensurepath
- run: pipx install tox
- uses: actions/checkout@v4
+ - uses: charmed-kubernetes/actions-operator@main
+ - run: sudo snap install charmcraft --classic
+ - run: lxd init --auto
+ - run: charmcraft pack
- name: Tmate debugging session (self-hosted)
uses: canonical/action-tmate@main
timeout-minutes: 300
From 73f96422bf487813c8dff7d4878a09822b0b763c Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 16:32:06 +0800
Subject: [PATCH 253/278] Spawn x64 manual test env.
---
.github/workflows/manual_test_env.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
index 85d9ac7e0..cab00f90e 100644
--- a/.github/workflows/manual_test_env.yaml
+++ b/.github/workflows/manual_test_env.yaml
@@ -6,7 +6,7 @@ on:
jobs:
manual-test-env:
name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint"]
+ runs-on: ["self-hosted", "stg-private-endpoint", "x64"]
steps:
- run: sudo apt update -yq
- run: sudo apt install pipx -yq
From 9785bcf633b219a60daae43fa7d6fe6839aebf4b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 16:56:18 +0800
Subject: [PATCH 254/278] Improve logging during reconcile
---
src/manager/runner_scaler.py | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index b2727aabe..0fc4f5170 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -141,6 +141,9 @@ def reconcile(self, num_of_runner: int) -> int:
}
runner_list = self._manager.get_runners()
+ busy_runners = [
+ runner for runner in runner_list if runner.github_state == GitHubRunnerState.BUSY
+ ]
idle_runners = [
runner for runner in runner_list if runner.github_state == GitHubRunnerState.IDLE
]
@@ -150,6 +153,15 @@ def reconcile(self, num_of_runner: int) -> int:
if runner.github_state == GitHubRunnerState.OFFLINE
and runner.health == HealthState.HEALTHY
]
+ unhealthy_runners = [
+ runner
+ for runner in runner_list
+ if runner.health == HealthState.HEALTHY
+ ]
+ logger.info("Found %s busy runners: %s", len(busy_runners), busy_runners)
+ logger.info("Found %s idle runners: %s", len(idle_runners), idle_runners)
+ logger.info("Found %s offline runners that are healthy: %s", len(offline_healthy_runners), offline_healthy_runners)
+ logger.info("Found %s unhealthy runners: %s", len(unhealthy_runners), unhealthy_runners)
try:
available_runners = set(runner.name for runner in idle_runners) | set(
From 7e5859b535e3401761587cb02f4fe73b3f23bb11 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:31:00 +0800
Subject: [PATCH 255/278] Fix crashed metric collection
---
src/manager/runner_scaler.py | 2 +-
tests/integration/test_charm_metrics_failure.py | 4 ----
2 files changed, 1 insertion(+), 5 deletions(-)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 0fc4f5170..b4877a583 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -136,7 +136,7 @@ def reconcile(self, num_of_runner: int) -> int:
# Merge the two metric stats.
if delete_metric_stats is not None:
metric_stats = {
- delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0)
+ event_name: delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0)
for event_name in set(delete_metric_stats) | set(metric_stats)
}
diff --git a/tests/integration/test_charm_metrics_failure.py b/tests/integration/test_charm_metrics_failure.py
index e4f9c7965..e3de1600d 100644
--- a/tests/integration/test_charm_metrics_failure.py
+++ b/tests/integration/test_charm_metrics_failure.py
@@ -83,10 +83,6 @@ async def test_charm_issues_metrics_for_failed_repo_policy(
"""
await app.set_config({PATH_CONFIG_NAME: forked_github_repository.full_name})
- # TODO: debug
- print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
- print(instance_helper)
- print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
if isinstance(instance_helper, OpenStackInstanceHelper):
await setup_repo_policy(
app=app,
From 3fa5c1782a748901674696adc1acbed6fd006293 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:31:37 +0800
Subject: [PATCH 256/278] Remove debug workflow
---
.github/workflows/manual_test_env.yaml | 22 ----------------------
1 file changed, 22 deletions(-)
delete mode 100644 .github/workflows/manual_test_env.yaml
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
deleted file mode 100644
index cab00f90e..000000000
--- a/.github/workflows/manual_test_env.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Manual test env
-
-on:
- pull_request:
-
-jobs:
- manual-test-env:
- name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint", "x64"]
- steps:
- - run: sudo apt update -yq
- - run: sudo apt install pipx -yq
- - run: pipx ensurepath
- - run: pipx install tox
- - uses: actions/checkout@v4
- - uses: charmed-kubernetes/actions-operator@main
- - run: sudo snap install charmcraft --classic
- - run: lxd init --auto
- - run: charmcraft pack
- - name: Tmate debugging session (self-hosted)
- uses: canonical/action-tmate@main
- timeout-minutes: 300
From 08477ee949660bda5bcb303f4c2c6f40c0195a3b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:32:28 +0800
Subject: [PATCH 257/278] Format
---
src/manager/runner_scaler.py | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index b4877a583..06e2ec43a 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -136,7 +136,8 @@ def reconcile(self, num_of_runner: int) -> int:
# Merge the two metric stats.
if delete_metric_stats is not None:
metric_stats = {
- event_name: delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0)
+ event_name: delete_metric_stats.get(event_name, 0)
+ + metric_stats.get(event_name, 0)
for event_name in set(delete_metric_stats) | set(metric_stats)
}
@@ -154,13 +155,15 @@ def reconcile(self, num_of_runner: int) -> int:
and runner.health == HealthState.HEALTHY
]
unhealthy_runners = [
- runner
- for runner in runner_list
- if runner.health == HealthState.HEALTHY
+ runner for runner in runner_list if runner.health == HealthState.HEALTHY
]
logger.info("Found %s busy runners: %s", len(busy_runners), busy_runners)
logger.info("Found %s idle runners: %s", len(idle_runners), idle_runners)
- logger.info("Found %s offline runners that are healthy: %s", len(offline_healthy_runners), offline_healthy_runners)
+ logger.info(
+ "Found %s offline runners that are healthy: %s",
+ len(offline_healthy_runners),
+ offline_healthy_runners,
+ )
logger.info("Found %s unhealthy runners: %s", len(unhealthy_runners), unhealthy_runners)
try:
From f45b0045f9a53a37a3997fc6baa7829f5cd8a39b Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Thu, 29 Aug 2024 18:50:54 +0800
Subject: [PATCH 258/278] Test
---
.github/workflows/integration_test.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 91137ea43..7d371d046 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -3,7 +3,7 @@ name: integration-tests
on:
# TODO: debug
workflow_dispatch:
- # pull_request:
+ pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
From febc6526bb0843a4011a75664730d6bafc72d9f8 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 30 Aug 2024 11:57:11 +0800
Subject: [PATCH 259/278] Add reactive back in
---
src/charm.py | 2 +-
src/manager/runner_scaler.py | 40 +++++++++++++++++++++++++++-----
tests/unit/test_runner_scaler.py | 4 ++--
3 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/src/charm.py b/src/charm.py
index c702f3659..73f81a74f 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -1288,7 +1288,7 @@ def _get_runner_scaler(
cloud_runner_manager=openstack_runner_manager,
config=runner_manager_config,
)
- return RunnerScaler(runner_manager=runner_manager)
+ return RunnerScaler(runner_manager=runner_manager, reactive_config=state.reactive_config)
if __name__ == "__main__":
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 06e2ec43a..34707fe39 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -7,6 +7,10 @@
import time
from dataclasses import dataclass
+from pydantic import MongoDsn
+
+import reactive.runner_manager as reactive_runner_manager
+from charm_state import ReactiveConfig
from errors import IssueMetricEventError, MissingServerConfigError
from manager.cloud_runner_manager import HealthState
from manager.github_runner_manager import GitHubRunnerState
@@ -40,13 +44,15 @@ class RunnerInfo:
class RunnerScaler:
"""Manage the reconcile of runners."""
- def __init__(self, runner_manager: RunnerManager):
+ def __init__(self, runner_manager: RunnerManager, reactive_config: ReactiveConfig | None):
"""Construct the object.
Args:
runner_manager: The RunnerManager to perform runner reconcile.
+ reactive_config: Reactive runner configuration.
"""
self._manager = runner_manager
+ self._reactive_config = reactive_config
def get_runner_info(self) -> RunnerInfo:
"""Get information on the runners.
@@ -102,24 +108,28 @@ def flush(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> int:
}
return metric_stats.get(metric_events.RunnerStop, 0)
- def reconcile(self, num_of_runner: int) -> int:
+ def reconcile(self, quantity: int) -> int:
"""Reconcile the quantity of runners.
Args:
- num_of_runner: The number of intended runners.
+ quantity: The number of intended runners.
Returns:
The Change in number of runners.
"""
- logger.info("Start reconcile to %s runner", num_of_runner)
+ logger.info("Start reconcile to %s runner", quantity)
+
+ if self._reactive_config is not None:
+ logger.info("Reactive configuration detected, going into experimental reactive mode.")
+ return self._reconcile_reactive(quantity, self._reactive_config.mq_uri)
start_timestamp = time.time()
delete_metric_stats = None
metric_stats = self._manager.cleanup()
runners = self._manager.get_runners()
current_num = len(runners)
- logger.info("Reconcile runners from %s to %s", current_num, num_of_runner)
- runner_diff = num_of_runner - current_num
+ logger.info("Reconcile runners from %s to %s", current_num, quantity)
+ runner_diff = quantity - current_num
if runner_diff > 0:
try:
self._manager.create_runners(runner_diff)
@@ -187,3 +197,21 @@ def reconcile(self, num_of_runner: int) -> int:
logger.exception("Failed to issue Reconciliation metric")
return runner_diff
+
+ def _reconcile_reactive(self, quantity: int, mq_uri: MongoDsn) -> int:
+ """Reconcile runners reactively.
+
+ Args:
+ quantity: Number of intended runners.
+ mq_uri: The URI of the MQ to use to spawn runners reactively.
+
+ Returns:
+ The difference between intended runners and actual runners. In reactive mode
+ this number is never negative as additional processes should terminate after a timeout.
+ """
+ logger.info("Reactive mode is experimental and not yet fully implemented.")
+ return reactive_runner_manager.reconcile(
+ quantity=quantity,
+ mq_uri=mq_uri,
+ queue_name=self._manager.manager_name,
+ )
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 3bf4dfea0..4b66dff57 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -72,7 +72,7 @@ def runner_manager_fixture(
@pytest.fixture(scope="function", name="runner_scaler")
def runner_scaler_fixture(runner_manager: RunnerManager) -> RunnerScaler:
- return RunnerScaler(runner_manager)
+ return RunnerScaler(runner_manager, None)
@pytest.fixture(scope="function", name="runner_scaler_one_runner")
@@ -162,7 +162,7 @@ def test_reconcile_runner_create_one(runner_scaler: RunnerScaler):
Act: Reconcile to no runners.
Assert: No changes. Runner info should contain no runners.
"""
- diff = runner_scaler.reconcile(num_of_runner=0)
+ diff = runner_scaler.reconcile(quantity=0)
assert diff == 0
assert_runner_info(runner_scaler, online=0)
From 7b430e5be1a1e2bd47de1dce2f1f8af60324da43 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:01:36 +0800
Subject: [PATCH 260/278] Fix flushing of runners
---
src/charm.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/charm.py b/src/charm.py
index 73f81a74f..d2616ba1b 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -624,9 +624,9 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901
if state.instance_type == InstanceType.OPENSTACK:
if not self._get_set_image_ready_status():
return
- if state.charm_config.token != self._stored.token:
+ if should_flush_runners:
runner_scaler = self._get_runner_scaler(state)
- runner_scaler.flush()
+ runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE)
runner_scaler.reconcile(state.runner_config.virtual_machines)
# TODO: 2024-04-12: Flush on token changes.
self.unit.status = ActiveStatus()
From 85f7079e916fc9e4f44a686d48440c9b5409b114 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:05:03 +0800
Subject: [PATCH 261/278] Debug workflow
---
.github/workflows/integration_test.yaml | 2 +-
tests/integration/helpers/charm_metrics.py | 8 ++++++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 7d371d046..91137ea43 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -3,7 +3,7 @@ name: integration-tests
on:
# TODO: debug
workflow_dispatch:
- pull_request:
+ # pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/tests/integration/helpers/charm_metrics.py b/tests/integration/helpers/charm_metrics.py
index b6c2f05bc..d42761c50 100644
--- a/tests/integration/helpers/charm_metrics.py
+++ b/tests/integration/helpers/charm_metrics.py
@@ -182,6 +182,10 @@ async def assert_events_after_reconciliation(
metrics_log = await get_metrics_log(unit=unit)
log_lines = list(map(lambda line: json.loads(line), metrics_log.splitlines()))
events = set(map(lambda line: line.get("event"), log_lines))
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+
assert {
"runner_start",
"runner_stop",
@@ -209,6 +213,10 @@ async def assert_events_after_reconciliation(
JobConclusion.CANCELLED,
]
elif post_job_status == PostJobStatus.REPO_POLICY_CHECK_FAILURE:
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+
assert metric_log.get("status_info", {}).get("code", 0) == 403
assert metric_log.get("job_conclusion") == JobConclusion.FAILURE
else:
From cd7d81b7a784e40bee3597c5d08a53f116bf9517 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:25:30 +0800
Subject: [PATCH 262/278] Add debug
---
tests/integration/helpers/charm_metrics.py | 4 ----
tests/integration/helpers/openstack.py | 14 ++++++++++++++
2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/tests/integration/helpers/charm_metrics.py b/tests/integration/helpers/charm_metrics.py
index d42761c50..5c882ac3d 100644
--- a/tests/integration/helpers/charm_metrics.py
+++ b/tests/integration/helpers/charm_metrics.py
@@ -213,10 +213,6 @@ async def assert_events_after_reconciliation(
JobConclusion.CANCELLED,
]
elif post_job_status == PostJobStatus.REPO_POLICY_CHECK_FAILURE:
- # TODO: debug
- import pytest
- pytest.set_trace()
-
assert metric_log.get("status_info", {}).get("code", 0) == 403
assert metric_log.get("job_conclusion") == JobConclusion.FAILURE
else:
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index b2d7624a6..a539a8d60 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -166,6 +166,11 @@ def _get_runner(self, unit: Unit) -> Server | None:
The runner server.
"""
servers: list[Server] = self.openstack_connection.list_servers()
+
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+
runner = None
unit_name_without_slash = unit.name.replace("/", "-")
for server in servers:
@@ -212,8 +217,17 @@ async def setup_repo_policy(
}
)
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
await instance_helper.ensure_charm_has_runner(app=app)
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
await instance_helper.expose_to_instance(unit, 8080)
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
async def _install_repo_policy(
From 1534311c4fa7e5491aea35640e9f7e8ba8733d59 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:44:42 +0800
Subject: [PATCH 263/278] Fix logging of health state
---
src/manager/runner_scaler.py | 2 +-
tests/integration/helpers/openstack.py | 17 ++++++++---------
2 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 34707fe39..331d13728 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -165,7 +165,7 @@ def reconcile(self, quantity: int) -> int:
and runner.health == HealthState.HEALTHY
]
unhealthy_runners = [
- runner for runner in runner_list if runner.health == HealthState.HEALTHY
+ runner for runner in runner_list if runner.health == HealthState.UNHEALTHY or runner.health == HealthState.UNKNOWN
]
logger.info("Found %s busy runners: %s", len(busy_runners), busy_runners)
logger.info("Found %s idle runners: %s", len(idle_runners), idle_runners)
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index a539a8d60..f0879dd47 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -55,10 +55,18 @@ async def expose_to_instance(
break
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+
ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i /home/ubuntu/.ssh/runner-{runner.name}.key -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
exit_code, _, stderr = await run_in_unit(unit, ssh_cmd)
assert exit_code == 0, f"Error in SSH remote forwarding of port {port}: {stderr}"
+ # TODO: debug
+ import pytest
+ pytest.set_trace()
+
async def run_in_instance(
self,
unit: Unit,
@@ -217,17 +225,8 @@ async def setup_repo_policy(
}
)
- # TODO: debug
- import pytest
- pytest.set_trace()
await instance_helper.ensure_charm_has_runner(app=app)
- # TODO: debug
- import pytest
- pytest.set_trace()
await instance_helper.expose_to_instance(unit, 8080)
- # TODO: debug
- import pytest
- pytest.set_trace()
async def _install_repo_policy(
From 8fa1fd5813cce02765b2787c5f0366f5f396280d Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Fri, 30 Aug 2024 19:01:36 +0800
Subject: [PATCH 264/278] Remove debug
---
.github/workflows/e2e_test.yaml | 4 +---
.github/workflows/integration_test.yaml | 4 +---
tests/integration/helpers/charm_metrics.py | 4 ----
tests/integration/helpers/openstack.py | 15 +--------------
4 files changed, 3 insertions(+), 24 deletions(-)
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 47224c63a..7d0383c12 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,9 +1,7 @@
name: End-to-End tests
on:
- # TODO: debug
- workflow_dispatch:
- # pull_request:
+ pull_request:
jobs:
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index 91137ea43..8e0bc700a 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -1,9 +1,7 @@
name: integration-tests
on:
- # TODO: debug
- workflow_dispatch:
- # pull_request:
+ pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/tests/integration/helpers/charm_metrics.py b/tests/integration/helpers/charm_metrics.py
index 5c882ac3d..b6c2f05bc 100644
--- a/tests/integration/helpers/charm_metrics.py
+++ b/tests/integration/helpers/charm_metrics.py
@@ -182,10 +182,6 @@ async def assert_events_after_reconciliation(
metrics_log = await get_metrics_log(unit=unit)
log_lines = list(map(lambda line: json.loads(line), metrics_log.splitlines()))
events = set(map(lambda line: line.get("event"), log_lines))
- # TODO: debug
- import pytest
- pytest.set_trace()
-
assert {
"runner_start",
"runner_stop",
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index f0879dd47..933b47208 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -55,18 +55,10 @@ async def expose_to_instance(
break
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
- # TODO: debug
- import pytest
- pytest.set_trace()
-
ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i /home/ubuntu/.ssh/runner-{runner.name}.key -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
exit_code, _, stderr = await run_in_unit(unit, ssh_cmd)
assert exit_code == 0, f"Error in SSH remote forwarding of port {port}: {stderr}"
- # TODO: debug
- import pytest
- pytest.set_trace()
-
async def run_in_instance(
self,
unit: Unit,
@@ -174,11 +166,6 @@ def _get_runner(self, unit: Unit) -> Server | None:
The runner server.
"""
servers: list[Server] = self.openstack_connection.list_servers()
-
- # TODO: debug
- import pytest
- pytest.set_trace()
-
runner = None
unit_name_without_slash = unit.name.replace("/", "-")
for server in servers:
@@ -260,7 +247,7 @@ async def _install_repo_policy(
)
await run_in_unit(
unit,
- f'sudo -u ubuntu HTTPS_PROXY={https_proxy if https_proxy else ""} pip install --proxy http://squid.internal:3128 -r /home/ubuntu/repo_policy_compliance/requirements.txt',
+ f'sudo -u ubuntu HTTPS_PROXY={https_proxy if https_proxy else ""} pip install {f"--proxy {https_proxy}" if https_proxy else ""} -r /home/ubuntu/repo_policy_compliance/requirements.txt',
assert_on_failure=True,
assert_msg="Failed to install repo-policy-compliance requirements",
)
From d531297c3c97f9ccb5f1ca5201eb9f6f33ca4715 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sun, 1 Sep 2024 09:48:48 +0800
Subject: [PATCH 265/278] Debug
---
.github/workflows/e2e_test.yaml | 4 +++-
.github/workflows/manual_test_env.yaml | 22 ++++++++++++++++++++++
src/manager/runner_scaler.py | 5 ++---
tests/integration/helpers/openstack.py | 5 +++++
4 files changed, 32 insertions(+), 4 deletions(-)
create mode 100644 .github/workflows/manual_test_env.yaml
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 7d0383c12..47224c63a 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,7 +1,9 @@
name: End-to-End tests
on:
- pull_request:
+ # TODO: debug
+ workflow_dispatch:
+ # pull_request:
jobs:
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
new file mode 100644
index 000000000..cab00f90e
--- /dev/null
+++ b/.github/workflows/manual_test_env.yaml
@@ -0,0 +1,22 @@
+name: Manual test env
+
+on:
+ pull_request:
+
+jobs:
+ manual-test-env:
+ name: manual-test-env
+ runs-on: ["self-hosted", "stg-private-endpoint", "x64"]
+ steps:
+ - run: sudo apt update -yq
+ - run: sudo apt install pipx -yq
+ - run: pipx ensurepath
+ - run: pipx install tox
+ - uses: actions/checkout@v4
+ - uses: charmed-kubernetes/actions-operator@main
+ - run: sudo snap install charmcraft --classic
+ - run: lxd init --auto
+ - run: charmcraft pack
+ - name: Tmate debugging session (self-hosted)
+ uses: canonical/action-tmate@main
+ timeout-minutes: 300
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 331d13728..e8670e981 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -164,9 +164,8 @@ def reconcile(self, quantity: int) -> int:
if runner.github_state == GitHubRunnerState.OFFLINE
and runner.health == HealthState.HEALTHY
]
- unhealthy_runners = [
- runner for runner in runner_list if runner.health == HealthState.UNHEALTHY or runner.health == HealthState.UNKNOWN
- ]
+ unhealthy_states = set(HealthState.UNHEALTHY, HealthState.UNKNOWN)
+ unhealthy_runners = [runner for runner in runner_list if runner.health in unhealthy_states]
logger.info("Found %s busy runners: %s", len(busy_runners), busy_runners)
logger.info("Found %s idle runners: %s", len(idle_runners), idle_runners)
logger.info(
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index 933b47208..eefdb02ca 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -55,6 +55,11 @@ async def expose_to_instance(
break
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
+ # TODO: debug
+ import pytest
+
+ pytest.set_trace()
+
ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i /home/ubuntu/.ssh/runner-{runner.name}.key -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
exit_code, _, stderr = await run_in_unit(unit, ssh_cmd)
assert exit_code == 0, f"Error in SSH remote forwarding of port {port}: {stderr}"
From 7fd2774384d747d3e2675f6881dcefbc74839c7f Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sun, 1 Sep 2024 11:03:59 +0800
Subject: [PATCH 266/278] Fix set contruction
---
src/manager/runner_scaler.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index e8670e981..2cee3f13d 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -127,9 +127,8 @@ def reconcile(self, quantity: int) -> int:
delete_metric_stats = None
metric_stats = self._manager.cleanup()
runners = self._manager.get_runners()
- current_num = len(runners)
- logger.info("Reconcile runners from %s to %s", current_num, quantity)
- runner_diff = quantity - current_num
+ logger.info("Reconcile runners from %s to %s", len(runners), quantity)
+ runner_diff = quantity - len(runners)
if runner_diff > 0:
try:
self._manager.create_runners(runner_diff)
@@ -164,7 +163,7 @@ def reconcile(self, quantity: int) -> int:
if runner.github_state == GitHubRunnerState.OFFLINE
and runner.health == HealthState.HEALTHY
]
- unhealthy_states = set(HealthState.UNHEALTHY, HealthState.UNKNOWN)
+ unhealthy_states = set((HealthState.UNHEALTHY, HealthState.UNKNOWN))
unhealthy_runners = [runner for runner in runner_list if runner.health in unhealthy_states]
logger.info("Found %s busy runners: %s", len(busy_runners), busy_runners)
logger.info("Found %s idle runners: %s", len(idle_runners), idle_runners)
From 436325dca8442a73f00b24e7ea82fb536e893527 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Sun, 1 Sep 2024 14:02:56 +0800
Subject: [PATCH 267/278] Fix SSH key path in integration test setup
---
.github/workflows/e2e_test.yaml | 4 +---
.github/workflows/manual_test_env.yaml | 22 ----------------------
tests/integration/helpers/openstack.py | 14 +++++++-------
3 files changed, 8 insertions(+), 32 deletions(-)
delete mode 100644 .github/workflows/manual_test_env.yaml
diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml
index 47224c63a..7d0383c12 100644
--- a/.github/workflows/e2e_test.yaml
+++ b/.github/workflows/e2e_test.yaml
@@ -1,9 +1,7 @@
name: End-to-End tests
on:
- # TODO: debug
- workflow_dispatch:
- # pull_request:
+ pull_request:
jobs:
diff --git a/.github/workflows/manual_test_env.yaml b/.github/workflows/manual_test_env.yaml
deleted file mode 100644
index cab00f90e..000000000
--- a/.github/workflows/manual_test_env.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Manual test env
-
-on:
- pull_request:
-
-jobs:
- manual-test-env:
- name: manual-test-env
- runs-on: ["self-hosted", "stg-private-endpoint", "x64"]
- steps:
- - run: sudo apt update -yq
- - run: sudo apt install pipx -yq
- - run: pipx ensurepath
- - run: pipx install tox
- - uses: actions/checkout@v4
- - uses: charmed-kubernetes/actions-operator@main
- - run: sudo snap install charmcraft --classic
- - run: lxd init --auto
- - run: charmcraft pack
- - name: Tmate debugging session (self-hosted)
- uses: canonical/action-tmate@main
- timeout-minutes: 300
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index eefdb02ca..5d562e748 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -10,6 +10,7 @@
from openstack.compute.v2.server import Server
from charm_state import VIRTUAL_MACHINES_CONFIG_NAME
+from openstack_cloud.openstack_cloud import OpenstackCloud
from tests.integration.helpers.common import InstanceHelper, reconcile, run_in_unit, wait_for
logger = logging.getLogger(__name__)
@@ -55,12 +56,9 @@ async def expose_to_instance(
break
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
- # TODO: debug
- import pytest
-
- pytest.set_trace()
-
- ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i /home/ubuntu/.ssh/runner-{runner.name}.key -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
+ key_path = OpenstackCloud._get_key_path(runner.name)
+ assert key_path.exists(), f"SSH key for runner {runner.name} not found in the juju unit"
+ ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i /home/ubuntu/.ssh/{runner.name}.key -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
exit_code, _, stderr = await run_in_unit(unit, ssh_cmd)
assert exit_code == 0, f"Error in SSH remote forwarding of port {port}: {stderr}"
@@ -99,7 +97,9 @@ async def run_in_instance(
break
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
- ssh_cmd = f'ssh -i /home/ubuntu/.ssh/runner-{runner.name}.key -o "StrictHostKeyChecking no" ubuntu@{ip} {command}'
+ key_path = OpenstackCloud._get_key_path(runner.name)
+ assert key_path.exists(), f"SSH key for runner {runner.name} not found in the juju unit"
+ ssh_cmd = f'ssh -i {key_path} -o "StrictHostKeyChecking no" ubuntu@{ip} {command}'
ssh_cmd_as_ubuntu_user = f"su - ubuntu -c '{ssh_cmd}'"
logging.warning("ssh_cmd: %s", ssh_cmd_as_ubuntu_user)
exit_code, stdout, stderr = await run_in_unit(unit, ssh_cmd, timeout)
From f1b0d68d4a7f77006b9283374bb2ac5088b37ed4 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:56:30 +0800
Subject: [PATCH 268/278] Add more checks to repo-policy-compliance setup in
tests
---
src/manager/runner_scaler.py | 2 +-
tests/integration/helpers/openstack.py | 35 ++++++++++++++++----------
2 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/src/manager/runner_scaler.py b/src/manager/runner_scaler.py
index 2cee3f13d..271b92e51 100644
--- a/src/manager/runner_scaler.py
+++ b/src/manager/runner_scaler.py
@@ -128,7 +128,7 @@ def reconcile(self, quantity: int) -> int:
metric_stats = self._manager.cleanup()
runners = self._manager.get_runners()
logger.info("Reconcile runners from %s to %s", len(runners), quantity)
- runner_diff = quantity - len(runners)
+ runner_diff = quantity - len(runners)
if runner_diff > 0:
try:
self._manager.create_runners(runner_diff)
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index 5d562e748..ff3da0f68 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -41,7 +41,7 @@ async def expose_to_instance(
unit: The juju unit of the github-runner charm.
port: The port on the juju machine to expose to the runner.
"""
- runner = self._get_runner(unit=unit)
+ runner = self._get_single_runner(unit=unit)
assert runner, f"Runner not found for unit {unit.name}"
network_address_list = runner.addresses.values()
logger.warning(network_address_list)
@@ -60,7 +60,9 @@ async def expose_to_instance(
assert key_path.exists(), f"SSH key for runner {runner.name} not found in the juju unit"
ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i /home/ubuntu/.ssh/{runner.name}.key -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
exit_code, _, stderr = await run_in_unit(unit, ssh_cmd)
- assert exit_code == 0, f"Error in SSH remote forwarding of port {port}: {stderr}"
+ assert (
+ exit_code == 0
+ ), f"Error in starting background process of SSH remote forwarding of port {port}: {stderr}"
async def run_in_instance(
self,
@@ -82,7 +84,7 @@ async def run_in_instance(
Returns:
Tuple of return code, stdout and stderr.
"""
- runner = self._get_runner(unit=unit)
+ runner = self._get_single_runner(unit=unit)
assert runner, f"Runner not found for unit {unit.name}"
network_address_list = runner.addresses.values()
logger.warning(network_address_list)
@@ -157,12 +159,14 @@ async def _get_runner_names(self, unit: Unit) -> tuple[str, ...]:
Returns:
Tuple of runner names.
"""
- runner = self._get_runner(unit)
+ runner = self._get_single_runner(unit)
assert runner, "Failed to find runner server"
return (cast(str, runner.name),)
- def _get_runner(self, unit: Unit) -> Server | None:
- """Get the runner server.
+ def _get_single_runner(self, unit: Unit) -> Server | None:
+ """Get the only runner for the unit.
+
+ This method asserts for exactly one runner for the unit.
Args:
unit: The unit to get the runner for.
@@ -171,14 +175,12 @@ def _get_runner(self, unit: Unit) -> Server | None:
The runner server.
"""
servers: list[Server] = self.openstack_connection.list_servers()
- runner = None
unit_name_without_slash = unit.name.replace("/", "-")
- for server in servers:
- if server.name.startswith(unit_name_without_slash):
- runner = server
- break
-
- return runner
+ runners = [server for server in servers if server.name.startswith(unit_name_without_slash)]
+ assert (
+ len(runners) == 1
+ ), f"In {unit.name} found more than one runners or no runners: {runners}"
+ return runners[0]
async def setup_repo_policy(
@@ -219,6 +221,13 @@ async def setup_repo_policy(
await instance_helper.ensure_charm_has_runner(app=app)
await instance_helper.expose_to_instance(unit, 8080)
+ # This tests the connection to the repo policy compliance, not a health check of service.
+ await instance_helper.run_in_instance(
+ unit=unit,
+ command="curl http://localhost/8080",
+ assert_on_failure=True,
+ assert_msg="Unable to reach the repo policy compliance server setup",
+ )
async def _install_repo_policy(
From 7603bc1def9525a1879a1838065ddeacf3a35b62 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:58:34 +0800
Subject: [PATCH 269/278] Fix key path check
---
tests/integration/helpers/openstack.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index ff3da0f68..c43301d75 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -57,8 +57,9 @@ async def expose_to_instance(
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
key_path = OpenstackCloud._get_key_path(runner.name)
- assert key_path.exists(), f"SSH key for runner {runner.name} not found in the juju unit"
- ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i /home/ubuntu/.ssh/{runner.name}.key -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
+ exit_code, _, _ = await run_in_unit(unit, "ls {key_path}")
+ assert exit_code == 0, f"Unable to find key file {key_path}"
+ ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i {key_path} -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
exit_code, _, stderr = await run_in_unit(unit, ssh_cmd)
assert (
exit_code == 0
@@ -100,7 +101,8 @@ async def run_in_instance(
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
key_path = OpenstackCloud._get_key_path(runner.name)
- assert key_path.exists(), f"SSH key for runner {runner.name} not found in the juju unit"
+ exit_code, _, _ = await run_in_unit(unit, "ls {key_path}")
+ assert exit_code == 0, f"Unable to find key file {key_path}"
ssh_cmd = f'ssh -i {key_path} -o "StrictHostKeyChecking no" ubuntu@{ip} {command}'
ssh_cmd_as_ubuntu_user = f"su - ubuntu -c '{ssh_cmd}'"
logging.warning("ssh_cmd: %s", ssh_cmd_as_ubuntu_user)
From 8bcea0023eea47d6d0d7e99c7109b435ce2c2a11 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 2 Sep 2024 16:04:19 +0800
Subject: [PATCH 270/278] Fix format string issue
---
tests/integration/helpers/openstack.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index c43301d75..f94a61f25 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -57,7 +57,7 @@ async def expose_to_instance(
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
key_path = OpenstackCloud._get_key_path(runner.name)
- exit_code, _, _ = await run_in_unit(unit, "ls {key_path}")
+ exit_code, _, _ = await run_in_unit(unit, "ls f{key_path}")
assert exit_code == 0, f"Unable to find key file {key_path}"
ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i {key_path} -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
exit_code, _, stderr = await run_in_unit(unit, ssh_cmd)
@@ -101,7 +101,7 @@ async def run_in_instance(
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
key_path = OpenstackCloud._get_key_path(runner.name)
- exit_code, _, _ = await run_in_unit(unit, "ls {key_path}")
+ exit_code, _, _ = await run_in_unit(unit, f"ls {key_path}")
assert exit_code == 0, f"Unable to find key file {key_path}"
ssh_cmd = f'ssh -i {key_path} -o "StrictHostKeyChecking no" ubuntu@{ip} {command}'
ssh_cmd_as_ubuntu_user = f"su - ubuntu -c '{ssh_cmd}'"
From 031c113d6ad2e20cf331729838bee2b0822e28dc Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Mon, 2 Sep 2024 16:06:36 +0800
Subject: [PATCH 271/278] Fix format string typo
---
tests/integration/helpers/openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index f94a61f25..79973555a 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -57,7 +57,7 @@ async def expose_to_instance(
assert ip, f"Failed to get IP address for OpenStack server {runner.name}"
key_path = OpenstackCloud._get_key_path(runner.name)
- exit_code, _, _ = await run_in_unit(unit, "ls f{key_path}")
+ exit_code, _, _ = await run_in_unit(unit, f"ls {key_path}")
assert exit_code == 0, f"Unable to find key file {key_path}"
ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i {key_path} -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &'
exit_code, _, stderr = await run_in_unit(unit, ssh_cmd)
From 49184a67c4d183651411812013ebb88b11001d82 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 3 Sep 2024 09:00:35 +0800
Subject: [PATCH 272/278] Add some logging of test setup
---
tests/integration/helpers/openstack.py | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index 79973555a..46d1f403a 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -2,6 +2,7 @@
# See LICENSE file for licensing details.
import logging
import secrets
+from asyncio import sleep
from typing import Optional, TypedDict, cast
import openstack.connection
@@ -43,6 +44,7 @@ async def expose_to_instance(
"""
runner = self._get_single_runner(unit=unit)
assert runner, f"Runner not found for unit {unit.name}"
+ logger.info("[TEST SETUP] Exposing port %s on %s", port, runner.name)
network_address_list = runner.addresses.values()
logger.warning(network_address_list)
assert (
@@ -65,6 +67,14 @@ async def expose_to_instance(
exit_code == 0
), f"Error in starting background process of SSH remote forwarding of port {port}: {stderr}"
+ await sleep(1)
+ for _ in range(6):
+ exit_code, _, _ = self.run_in_instance(unit=unit, command=f"nc -z localhost {port}")
+ if exit_code == 0:
+ return
+ await sleep(10)
+ assert False, f"Exposing the port {port} failed"
+
async def run_in_instance(
self,
unit: Unit,
@@ -87,6 +97,7 @@ async def run_in_instance(
"""
runner = self._get_single_runner(unit=unit)
assert runner, f"Runner not found for unit {unit.name}"
+ logger.info("[TEST SETUP] Run command %s on %s", command, runner.name)
network_address_list = runner.addresses.values()
logger.warning(network_address_list)
assert (
From 2eafedc732de6e06019cb6c111569e7799b68cec Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 3 Sep 2024 10:06:49 +0800
Subject: [PATCH 273/278] Fix missing await
---
src/charm.py | 3 ---
tests/integration/helpers/openstack.py | 4 +++-
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/charm.py b/src/charm.py
index d2616ba1b..173e0750a 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -1222,9 +1222,6 @@ def _get_runner_scaler(
) -> RunnerScaler:
"""Get runner scaler instance for scaling runners.
- TODO: 2024-07-09 Combine this with `_get_runner_manager` during the runner manager \
- interface refactor.
-
Args:
state: Charm state.
token: GitHub personal access token to manage the runners with. If None the token in
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index 46d1f403a..5c4d00103 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -69,7 +69,9 @@ async def expose_to_instance(
await sleep(1)
for _ in range(6):
- exit_code, _, _ = self.run_in_instance(unit=unit, command=f"nc -z localhost {port}")
+ exit_code, _, _ = await self.run_in_instance(
+ unit=unit, command=f"nc -z localhost {port}"
+ )
if exit_code == 0:
return
await sleep(10)
From 5b7830fe0317324360bdbb30f4ccf95e79d22867 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 3 Sep 2024 10:42:03 +0800
Subject: [PATCH 274/278] Revert config-change flushing
---
src/charm.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/charm.py b/src/charm.py
index 173e0750a..669bb1764 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -624,7 +624,7 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901
if state.instance_type == InstanceType.OPENSTACK:
if not self._get_set_image_ready_status():
return
- if should_flush_runners:
+ if state.charm_config.token != self._stored.token:
runner_scaler = self._get_runner_scaler(state)
runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE)
runner_scaler.reconcile(state.runner_config.virtual_machines)
From 4e0594e03206c3b7fec68c101091f8e5e03e86ba Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 3 Sep 2024 10:56:39 +0800
Subject: [PATCH 275/278] Add maintance status for image relation change
---
src/charm.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/charm.py b/src/charm.py
index 669bb1764..f7d34d316 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -500,6 +500,7 @@ def _on_start(self, _: StartEvent) -> None:
state = self._setup_state()
if state.instance_type == InstanceType.OPENSTACK:
+ self.unit.status = MaintenanceStatus("Starting runners")
if not self._get_set_image_ready_status():
return
runner_scaler = self._get_runner_scaler(state)
@@ -1186,6 +1187,7 @@ def _on_image_relation_joined(self, _: ops.RelationJoinedEvent) -> None:
def _on_image_relation_changed(self, _: ops.RelationChangedEvent) -> None:
"""Handle image relation changed event."""
state = self._setup_state()
+ self.unit.status = MaintenanceStatus("Update image for runners")
if state.instance_type != InstanceType.OPENSTACK:
self.unit.status = BlockedStatus(
@@ -1196,8 +1198,7 @@ def _on_image_relation_changed(self, _: ops.RelationChangedEvent) -> None:
return
runner_scaler = self._get_runner_scaler(state)
- # TODO: 2024-04-12: Should be flush idle.
- runner_scaler.flush()
+ runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE)
runner_scaler.reconcile(state.runner_config.virtual_machines)
self.unit.status = ActiveStatus()
return
From 80e75528b62f362ca56bd20651407cbf4d700d86 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Tue, 3 Sep 2024 12:22:09 +0800
Subject: [PATCH 276/278] Fix HTTP format
---
tests/integration/helpers/openstack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py
index 5c4d00103..c15afd5a5 100644
--- a/tests/integration/helpers/openstack.py
+++ b/tests/integration/helpers/openstack.py
@@ -239,7 +239,7 @@ async def setup_repo_policy(
# This tests the connection to the repo policy compliance, not a health check of service.
await instance_helper.run_in_instance(
unit=unit,
- command="curl http://localhost/8080",
+ command="curl http://localhost:8080",
assert_on_failure=True,
assert_msg="Unable to reach the repo policy compliance server setup",
)
From f56e291a7c60acd141054a920bfac9749e178624 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 4 Sep 2024 10:46:18 +0800
Subject: [PATCH 277/278] Update coverage ignore of github_runner_manager
---
pyproject.toml | 2 --
src/manager/github_runner_manager.py | 3 ++-
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 9b69abe91..d16bac3a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,8 +13,6 @@ omit = [
# These are covered by `tests/integration/test_runner_manager_openstack.py`.
"src/openstack_cloud/openstack_cloud.py",
"src/openstack_cloud/openstack_runner_manager.py",
- # Thin wrapper around GitHub API. Not a lot of value in unit tests.
- "src/manager/github_runner_manager.py",
# Contains interface for calling LXD. Tested in integration tests and end to end tests.
"src/lxd.py",
# Contains interface for calling repo policy compliance service. Tested in integration test
diff --git a/src/manager/github_runner_manager.py b/src/manager/github_runner_manager.py
index 686976d84..949a1df38 100644
--- a/src/manager/github_runner_manager.py
+++ b/src/manager/github_runner_manager.py
@@ -45,7 +45,8 @@ def from_runner(runner: SelfHostedRunner) -> "GitHubRunnerState":
return state
-class GitHubRunnerManager:
+# Thin wrapper around the GitHub Client. Not much value in unit testing.
+class GitHubRunnerManager: # pragma: no cover
"""Manage self-hosted runner on GitHub side."""
def __init__(self, prefix: str, token: str, path: GitHubPath):
From 711b6eb512ea3ec9de46949271301d92f761a521 Mon Sep 17 00:00:00 2001
From: yhaliaw <43424755+yhaliaw@users.noreply.github.com>
Date: Wed, 4 Sep 2024 17:07:04 +0800
Subject: [PATCH 278/278] Minor fix in test comments
---
tests/unit/test_runner_scaler.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 4b66dff57..845c8da49 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -178,7 +178,8 @@ def test_one_runner(runner_scaler: RunnerScaler):
Assert:
1. Runner info has one runner.
2. No changes to number of runner.
- 3. Runner info has one runner.
+ 3. No runners.
+ 4. Runner info has one runner.
"""
# 1.
diff = runner_scaler.reconcile(1)
@@ -194,7 +195,7 @@ def test_one_runner(runner_scaler: RunnerScaler):
runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE)
assert_runner_info(runner_scaler, online=0)
- # 3.
+ # 4.
diff = runner_scaler.reconcile(1)
assert diff == 1
assert_runner_info(runner_scaler, online=1)