diff --git a/src-docs/manager.runner_scaler.md b/src-docs/manager.runner_scaler.md
index ade13dd..e5abac0 100644
--- a/src-docs/manager.runner_scaler.md
+++ b/src-docs/manager.runner_scaler.md
@@ -9,7 +9,7 @@ Module for scaling the runners amount.
---
-
+
## class `RunnerInfo`
Information on the runners.
@@ -50,12 +50,12 @@ __init__(
---
-
+
## class `RunnerScaler`
Manage the reconcile of runners.
-
+
### method `__init__`
@@ -77,7 +77,7 @@ Construct the object.
---
-
+
### method `flush`
@@ -100,7 +100,7 @@ Flush the runners.
---
-
+
### method `get_runner_info`
@@ -117,7 +117,7 @@ Get information on the runners.
---
-
+
### method `reconcile`
diff --git a/src/github_runner_manager/manager/runner_scaler.py b/src/github_runner_manager/manager/runner_scaler.py
index 3ddcf03..9b9234b 100644
--- a/src/github_runner_manager/manager/runner_scaler.py
+++ b/src/github_runner_manager/manager/runner_scaler.py
@@ -13,7 +13,12 @@
from github_runner_manager.errors import IssueMetricEventError, MissingServerConfigError
from github_runner_manager.manager.cloud_runner_manager import HealthState
from github_runner_manager.manager.github_runner_manager import GitHubRunnerState
-from github_runner_manager.manager.runner_manager import FlushMode, RunnerManager
+from github_runner_manager.manager.runner_manager import (
+ FlushMode,
+ IssuedMetricEventsStats,
+ RunnerInstance,
+ RunnerManager,
+)
from github_runner_manager.metrics import events as metric_events
from github_runner_manager.types_ import ReactiveConfig
@@ -123,34 +128,59 @@ def reconcile(self, quantity: int) -> int:
logger.info("Reactive configuration detected, going into experimental reactive mode.")
return self._reconcile_reactive(quantity, self._reactive_config.mq_uri)
+ metric_stats = {}
start_timestamp = time.time()
- delete_metric_stats = None
- metric_stats = self._manager.cleanup()
- runners = self._manager.get_runners()
- logger.info("Reconcile runners from %s to %s", len(runners), quantity)
- runner_diff = quantity - len(runners)
- if runner_diff > 0:
- try:
- self._manager.create_runners(runner_diff)
- except MissingServerConfigError:
- logging.exception(
- "Unable to spawn runner due to missing server configuration, such as, image."
- )
- elif runner_diff < 0:
- delete_metric_stats = self._manager.delete_runners(-runner_diff)
- else:
- logger.info("No changes to the number of runners.")
- end_timestamp = time.time()
-
- # Merge the two metric stats.
- if delete_metric_stats is not None:
- metric_stats = {
- event_name: delete_metric_stats.get(event_name, 0)
- + metric_stats.get(event_name, 0)
- for event_name in set(delete_metric_stats) | set(metric_stats)
- }
- runner_list = self._manager.get_runners()
+ try:
+ delete_metric_stats = None
+ metric_stats = self._manager.cleanup()
+ runners = self._manager.get_runners()
+ logger.info("Reconcile runners from %s to %s", len(runners), quantity)
+ runner_diff = quantity - len(runners)
+ if runner_diff > 0:
+ try:
+ self._manager.create_runners(runner_diff)
+ except MissingServerConfigError:
+ logging.exception(
+ "Unable to spawn runner due to missing server configuration, "
+ "such as, image."
+ )
+ elif runner_diff < 0:
+ delete_metric_stats = self._manager.delete_runners(-runner_diff)
+ else:
+ logger.info("No changes to the number of runners.")
+
+ # Merge the two metric stats.
+ if delete_metric_stats is not None:
+ metric_stats = {
+ event_name: delete_metric_stats.get(event_name, 0)
+ + metric_stats.get(event_name, 0)
+ for event_name in set(delete_metric_stats) | set(metric_stats)
+ }
+ finally:
+ runner_list = self._manager.get_runners()
+ self._log_runners(runner_list)
+ end_timestamp = time.time()
+ self._issue_reconciliation_metric(
+ start_timestamp, end_timestamp, metric_stats, runner_list
+ )
+
+ return runner_diff
+
+ @staticmethod
+ def _log_runners(runner_list: tuple[RunnerInstance]) -> None:
+ """Log information about the runners found.
+
+ Args:
+ runner_list: The list of runners.
+ """
+ for runner in runner_list:
+ logger.info(
+ "Runner %s: state=%s, health=%s",
+ runner.name,
+ runner.github_state,
+ runner.health,
+ )
busy_runners = [
runner for runner in runner_list if runner.github_state == GitHubRunnerState.BUSY
]
@@ -163,7 +193,7 @@ def reconcile(self, quantity: int) -> int:
if runner.github_state == GitHubRunnerState.OFFLINE
and runner.health == HealthState.HEALTHY
]
- unhealthy_states = set((HealthState.UNHEALTHY, HealthState.UNKNOWN))
+ unhealthy_states = {HealthState.UNHEALTHY, HealthState.UNKNOWN}
unhealthy_runners = [runner for runner in runner_list if runner.health in unhealthy_states]
logger.info("Found %s busy runners: %s", len(busy_runners), busy_runners)
logger.info("Found %s idle runners: %s", len(idle_runners), idle_runners)
@@ -174,6 +204,31 @@ def reconcile(self, quantity: int) -> int:
)
logger.info("Found %s unhealthy runners: %s", len(unhealthy_runners), unhealthy_runners)
+ def _issue_reconciliation_metric(
+ self,
+ start_timestamp: float,
+ end_timestamp: float,
+ metric_stats: IssuedMetricEventsStats,
+ runner_list: tuple[RunnerInstance],
+ ) -> None:
+ """Issue the reconciliation metric.
+
+ Args:
+ start_timestamp: The start timestamp of the reconciliation.
+ end_timestamp: The end timestamp of the reconciliation.
+ metric_stats: The metric stats.
+ runner_list: The list of runners
+ """
+ idle_runners = [
+ runner for runner in runner_list if runner.github_state == GitHubRunnerState.IDLE
+ ]
+ offline_healthy_runners = [
+ runner
+ for runner in runner_list
+ if runner.github_state == GitHubRunnerState.OFFLINE
+ and runner.health == HealthState.HEALTHY
+ ]
+
try:
available_runners = set(runner.name for runner in idle_runners) | set(
runner.name for runner in offline_healthy_runners
@@ -194,8 +249,6 @@ def reconcile(self, quantity: int) -> int:
except IssueMetricEventError:
logger.exception("Failed to issue Reconciliation metric")
- return runner_diff
-
def _reconcile_reactive(self, quantity: int, mq_uri: MongoDsn) -> int:
"""Reconcile runners reactively.
diff --git a/tests/unit/test_runner_scaler.py b/tests/unit/test_runner_scaler.py
index 9806a50..1ebaffc 100644
--- a/tests/unit/test_runner_scaler.py
+++ b/tests/unit/test_runner_scaler.py
@@ -15,6 +15,7 @@
RunnerManagerConfig,
)
from github_runner_manager.manager.runner_scaler import RunnerScaler
+from github_runner_manager.metrics.events import Reconciliation
from github_runner_manager.types_.github import GitHubPath, GitHubRepo
from tests.unit.mock_runner_managers import (
MockCloudRunnerManager,
@@ -56,9 +57,18 @@ def mock_runner_managers_fixture(
return (mock_cloud, mock_github)
+@pytest.fixture(scope="function", name="issue_events_mock")
+def issue_events_mock_fixture(monkeypatch: pytest.MonkeyPatch):
+ issue_events_mock = MagicMock()
+ monkeypatch.setattr(
+ "github_runner_manager.manager.runner_scaler.metric_events.issue_event", issue_events_mock
+ )
+ return issue_events_mock
+
+
@pytest.fixture(scope="function", name="runner_manager")
def runner_manager_fixture(
- monkeypatch, mock_runner_managers, github_path: GitHubPath
+ monkeypatch, mock_runner_managers, github_path: GitHubPath, issue_events_mock
) -> RunnerManager:
mock_cloud, mock_github = mock_runner_managers
monkeypatch.setattr(
@@ -176,6 +186,24 @@ def test_reconcile_runner_create_one(runner_scaler: RunnerScaler):
assert_runner_info(runner_scaler, online=0)
+def test_reconcile_error_still_issue_metrics(
+ runner_scaler: RunnerScaler, monkeypatch: pytest.MonkeyPatch, issue_events_mock: MagicMock
+):
+ """
+ Arrange: A RunnerScaler with no runners which raises an error on reconcile.
+ Act: Reconcile to one runner.
+ Assert: ReconciliationEvent should be issued.
+ """
+ monkeypatch.setattr(
+ runner_scaler._manager, "cleanup", MagicMock(side_effect=Exception("Mock error"))
+ )
+ with pytest.raises(Exception):
+ runner_scaler.reconcile(1)
+ issue_events_mock.assert_called_once()
+ issued_event = issue_events_mock.call_args[0][0]
+ assert isinstance(issued_event, Reconciliation)
+
+
def test_one_runner(runner_scaler: RunnerScaler):
"""
Arrange: A RunnerScaler with no runners.