diff --git a/src-docs/errors.py.md b/src-docs/errors.py.md
index 4abec3e04..ed16bb84d 100644
--- a/src-docs/errors.py.md
+++ b/src-docs/errors.py.md
@@ -200,6 +200,15 @@ Error for loading file on runner.
+---
+
+## class `RunnerLogsError`
+Base class for all runner logs errors.
+
+
+
+
+
---
## class `RunnerMetricsError`
diff --git a/src-docs/runner_logs.py.md b/src-docs/runner_logs.py.md
index 824d9c78d..b6d660d26 100644
--- a/src-docs/runner_logs.py.md
+++ b/src-docs/runner_logs.py.md
@@ -11,7 +11,7 @@ Functions to pull and remove the logs of the crashed runners.
---
-
+
## function `get_crashed`
@@ -30,9 +30,15 @@ Expects the runner to have an instance.
- `runner`: The runner.
+
+**Raises:**
+
+ - `RunnerLogsError`: If the runner logs could not be pulled.
+
+
---
-
+
## function `remove_outdated_crashed`
diff --git a/src-docs/runner_manager.py.md b/src-docs/runner_manager.py.md
index ab503799d..631916e88 100644
--- a/src-docs/runner_manager.py.md
+++ b/src-docs/runner_manager.py.md
@@ -46,7 +46,7 @@ Construct RunnerManager object for creating and managing runners.
---
-
+
### function `build_runner_image`
@@ -83,7 +83,7 @@ Check if runner binary exists.
---
-
+
### function `flush`
@@ -172,7 +172,7 @@ Bring runners in line with target.
---
-
+
### function `schedule_build_runner_image`
diff --git a/src/errors.py b/src/errors.py
index f670fa12b..b38364e9a 100644
--- a/src/errors.py
+++ b/src/errors.py
@@ -148,3 +148,7 @@ class GithubClientError(Exception):
class JobNotFoundError(GithubClientError):
"""Represents an error when the job could not be found on GitHub."""
+
+
+class RunnerLogsError(Exception):
+ """Base class for all runner logs errors."""
diff --git a/src/runner_logs.py b/src/runner_logs.py
index 94c1678c6..de8ad7b8e 100644
--- a/src/runner_logs.py
+++ b/src/runner_logs.py
@@ -9,6 +9,7 @@
from datetime import datetime
from pathlib import Path
+from errors import LxdError, RunnerLogsError
from runner import Runner
CRASHED_RUNNER_LOGS_DIR_PATH = Path("/var/log/github-runner-crashed")
@@ -27,19 +28,24 @@ def get_crashed(runner: Runner) -> None:
Args:
runner: The runner.
+
+ Raises:
+ RunnerLogsError: If the runner logs could not be pulled.
"""
logger.info("Pulling the logs of the crashed runner %s.", runner.config.name)
if runner.instance is None:
- logger.error(
- "Cannot pull the logs for %s as runner has no running instance.", runner.config.name
+ raise RunnerLogsError(
+ f"Cannot pull the logs for {runner.config.name} as runner has no running instance."
)
- return
target_log_path = CRASHED_RUNNER_LOGS_DIR_PATH / runner.config.name
target_log_path.mkdir(parents=True, exist_ok=True)
- runner.instance.files.pull_file(str(DIAG_DIR_PATH), str(target_log_path), is_dir=True)
- runner.instance.files.pull_file(str(SYSLOG_PATH), str(target_log_path))
+ try:
+ runner.instance.files.pull_file(str(DIAG_DIR_PATH), str(target_log_path), is_dir=True)
+ runner.instance.files.pull_file(str(SYSLOG_PATH), str(target_log_path))
+ except LxdError as exc:
+ raise RunnerLogsError(f"Cannot pull the logs for {runner.config.name}.") from exc
def remove_outdated_crashed() -> None:
diff --git a/src/runner_manager.py b/src/runner_manager.py
index 96c40ffa3..1147c69a9 100644
--- a/src/runner_manager.py
+++ b/src/runner_manager.py
@@ -485,7 +485,12 @@ def reconcile(self, quantity: int, resources: VirtualMachineResources) -> int:
for runner in unhealthy_runners:
if self.config.are_metrics_enabled:
- runner_logs.get_crashed(runner)
+ try:
+ runner_logs.get_crashed(runner)
+ except errors.RunnerLogsError:
+ logger.exception(
+ "Failed to get logs of crashed runner %s", runner.config.name
+ )
runner.remove(remove_token)
logger.info(REMOVED_RUNNER_LOG_STR, runner.config.name)
diff --git a/tests/unit/test_runner_logs.py b/tests/unit/test_runner_logs.py
index dcd209536..ce87d0de4 100644
--- a/tests/unit/test_runner_logs.py
+++ b/tests/unit/test_runner_logs.py
@@ -6,6 +6,7 @@
import pytest
import runner_logs
+from errors import LxdError, RunnerLogsError
from runner_logs import get_crashed
@@ -42,6 +43,41 @@ def test_get_crashed(log_dir_base_path: Path):
)
+def test_get_crashed_no_instance(log_dir_base_path: Path):
+ """
+ arrange: Mock the Runner instance to be None
+ act: Get the logs of the crashed runner
+ assert: A RunnerLogsError is raised
+ """
+ runner = MagicMock()
+ runner.config.name = "test-runner"
+ runner.instance = None
+
+ with pytest.raises(RunnerLogsError) as exc_info:
+ get_crashed(runner)
+
+ assert "Cannot pull the logs for test-runner as runner has no running instance." in str(
+ exc_info.value
+ )
+
+
+def test_get_crashed_lxd_error(log_dir_base_path: Path):
+ """
+ arrange: Mock the Runner instance to raise an LxdError
+ act: Get the logs of the crashed runner
+ assert: A RunnerLogsError is raised
+ """
+ runner = MagicMock()
+ runner.config.name = "test-runner"
+ runner.instance.files.pull_file = MagicMock(side_effect=LxdError("Cannot pull file"))
+
+ with pytest.raises(RunnerLogsError) as exc_info:
+ get_crashed(runner)
+
+ assert "Cannot pull the logs for test-runner." in str(exc_info.value)
+ assert "Cannot pull file" in str(exc_info.value.__cause__)
+
+
def test_remove_outdated_crashed(log_dir_base_path: Path, monkeypatch: pytest.MonkeyPatch):
"""
arrange: Mock the base log directory path