diff --git a/src-docs/errors.py.md b/src-docs/errors.py.md index 4abec3e04..ed16bb84d 100644 --- a/src-docs/errors.py.md +++ b/src-docs/errors.py.md @@ -200,6 +200,15 @@ Error for loading file on runner. +--- + +## class `RunnerLogsError` +Base class for all runner logs errors. + + + + + --- ## class `RunnerMetricsError` diff --git a/src-docs/runner_logs.py.md b/src-docs/runner_logs.py.md index 824d9c78d..b6d660d26 100644 --- a/src-docs/runner_logs.py.md +++ b/src-docs/runner_logs.py.md @@ -11,7 +11,7 @@ Functions to pull and remove the logs of the crashed runners. --- - + ## function `get_crashed` @@ -30,9 +30,15 @@ Expects the runner to have an instance. - `runner`: The runner. + +**Raises:** + + - `RunnerLogsError`: If the runner logs could not be pulled. + + --- - + ## function `remove_outdated_crashed` diff --git a/src-docs/runner_manager.py.md b/src-docs/runner_manager.py.md index ab503799d..631916e88 100644 --- a/src-docs/runner_manager.py.md +++ b/src-docs/runner_manager.py.md @@ -46,7 +46,7 @@ Construct RunnerManager object for creating and managing runners. --- - + ### function `build_runner_image` @@ -83,7 +83,7 @@ Check if runner binary exists. --- - + ### function `flush` @@ -172,7 +172,7 @@ Bring runners in line with target. --- - + ### function `schedule_build_runner_image` diff --git a/src/errors.py b/src/errors.py index f670fa12b..b38364e9a 100644 --- a/src/errors.py +++ b/src/errors.py @@ -148,3 +148,7 @@ class GithubClientError(Exception): class JobNotFoundError(GithubClientError): """Represents an error when the job could not be found on GitHub.""" + + +class RunnerLogsError(Exception): + """Base class for all runner logs errors.""" diff --git a/src/runner_logs.py b/src/runner_logs.py index 94c1678c6..de8ad7b8e 100644 --- a/src/runner_logs.py +++ b/src/runner_logs.py @@ -9,6 +9,7 @@ from datetime import datetime from pathlib import Path +from errors import LxdError, RunnerLogsError from runner import Runner CRASHED_RUNNER_LOGS_DIR_PATH = Path("/var/log/github-runner-crashed") @@ -27,19 +28,24 @@ def get_crashed(runner: Runner) -> None: Args: runner: The runner. + + Raises: + RunnerLogsError: If the runner logs could not be pulled. """ logger.info("Pulling the logs of the crashed runner %s.", runner.config.name) if runner.instance is None: - logger.error( - "Cannot pull the logs for %s as runner has no running instance.", runner.config.name + raise RunnerLogsError( + f"Cannot pull the logs for {runner.config.name} as runner has no running instance." ) - return target_log_path = CRASHED_RUNNER_LOGS_DIR_PATH / runner.config.name target_log_path.mkdir(parents=True, exist_ok=True) - runner.instance.files.pull_file(str(DIAG_DIR_PATH), str(target_log_path), is_dir=True) - runner.instance.files.pull_file(str(SYSLOG_PATH), str(target_log_path)) + try: + runner.instance.files.pull_file(str(DIAG_DIR_PATH), str(target_log_path), is_dir=True) + runner.instance.files.pull_file(str(SYSLOG_PATH), str(target_log_path)) + except LxdError as exc: + raise RunnerLogsError(f"Cannot pull the logs for {runner.config.name}.") from exc def remove_outdated_crashed() -> None: diff --git a/src/runner_manager.py b/src/runner_manager.py index 96c40ffa3..1147c69a9 100644 --- a/src/runner_manager.py +++ b/src/runner_manager.py @@ -485,7 +485,12 @@ def reconcile(self, quantity: int, resources: VirtualMachineResources) -> int: for runner in unhealthy_runners: if self.config.are_metrics_enabled: - runner_logs.get_crashed(runner) + try: + runner_logs.get_crashed(runner) + except errors.RunnerLogsError: + logger.exception( + "Failed to get logs of crashed runner %s", runner.config.name + ) runner.remove(remove_token) logger.info(REMOVED_RUNNER_LOG_STR, runner.config.name) diff --git a/tests/unit/test_runner_logs.py b/tests/unit/test_runner_logs.py index dcd209536..ce87d0de4 100644 --- a/tests/unit/test_runner_logs.py +++ b/tests/unit/test_runner_logs.py @@ -6,6 +6,7 @@ import pytest import runner_logs +from errors import LxdError, RunnerLogsError from runner_logs import get_crashed @@ -42,6 +43,41 @@ def test_get_crashed(log_dir_base_path: Path): ) +def test_get_crashed_no_instance(log_dir_base_path: Path): + """ + arrange: Mock the Runner instance to be None + act: Get the logs of the crashed runner + assert: A RunnerLogsError is raised + """ + runner = MagicMock() + runner.config.name = "test-runner" + runner.instance = None + + with pytest.raises(RunnerLogsError) as exc_info: + get_crashed(runner) + + assert "Cannot pull the logs for test-runner as runner has no running instance." in str( + exc_info.value + ) + + +def test_get_crashed_lxd_error(log_dir_base_path: Path): + """ + arrange: Mock the Runner instance to raise an LxdError + act: Get the logs of the crashed runner + assert: A RunnerLogsError is raised + """ + runner = MagicMock() + runner.config.name = "test-runner" + runner.instance.files.pull_file = MagicMock(side_effect=LxdError("Cannot pull file")) + + with pytest.raises(RunnerLogsError) as exc_info: + get_crashed(runner) + + assert "Cannot pull the logs for test-runner." in str(exc_info.value) + assert "Cannot pull file" in str(exc_info.value.__cause__) + + def test_remove_outdated_crashed(log_dir_base_path: Path, monkeypatch: pytest.MonkeyPatch): """ arrange: Mock the base log directory path