Skip to content

Commit

Permalink
Merge branch 'main' into remove-shared-fs-if-not-mounted
Browse files Browse the repository at this point in the history
  • Loading branch information
cbartz authored Jan 12, 2024
2 parents 947a7dc + 41fa738 commit cf3370d
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 11 deletions.
9 changes: 9 additions & 0 deletions src-docs/errors.py.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,15 @@ Error for loading file on runner.



---

## <kbd>class</kbd> `RunnerLogsError`
Base class for all runner logs errors.





---

## <kbd>class</kbd> `RunnerMetricsError`
Expand Down
10 changes: 8 additions & 2 deletions src-docs/runner_logs.py.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Functions to pull and remove the logs of the crashed runners.

---

<a href="../src/runner_logs.py#L23"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_logs.py#L24"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

## <kbd>function</kbd> `get_crashed`

Expand All @@ -30,9 +30,15 @@ Expects the runner to have an instance.
- <b>`runner`</b>: The runner.



**Raises:**

- <b>`RunnerLogsError`</b>: If the runner logs could not be pulled.


---

<a href="../src/runner_logs.py#L45"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_logs.py#L51"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

## <kbd>function</kbd> `remove_outdated_crashed`

Expand Down
6 changes: 3 additions & 3 deletions src-docs/runner_manager.py.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Construct RunnerManager object for creating and managing runners.

---

<a href="../src/runner_manager.py#L643"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L648"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `build_runner_image`

Expand Down Expand Up @@ -83,7 +83,7 @@ Check if runner binary exists.

---

<a href="../src/runner_manager.py#L510"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L515"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `flush`

Expand Down Expand Up @@ -172,7 +172,7 @@ Bring runners in line with target.

---

<a href="../src/runner_manager.py#L653"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L658"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `schedule_build_runner_image`

Expand Down
4 changes: 4 additions & 0 deletions src/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,7 @@ class GithubClientError(Exception):

class JobNotFoundError(GithubClientError):
"""Represents an error when the job could not be found on GitHub."""


class RunnerLogsError(Exception):
"""Base class for all runner logs errors."""
16 changes: 11 additions & 5 deletions src/runner_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from datetime import datetime
from pathlib import Path

from errors import LxdError, RunnerLogsError
from runner import Runner

CRASHED_RUNNER_LOGS_DIR_PATH = Path("/var/log/github-runner-crashed")
Expand All @@ -27,19 +28,24 @@ def get_crashed(runner: Runner) -> None:
Args:
runner: The runner.
Raises:
RunnerLogsError: If the runner logs could not be pulled.
"""
logger.info("Pulling the logs of the crashed runner %s.", runner.config.name)
if runner.instance is None:
logger.error(
"Cannot pull the logs for %s as runner has no running instance.", runner.config.name
raise RunnerLogsError(
f"Cannot pull the logs for {runner.config.name} as runner has no running instance."
)
return

target_log_path = CRASHED_RUNNER_LOGS_DIR_PATH / runner.config.name
target_log_path.mkdir(parents=True, exist_ok=True)

runner.instance.files.pull_file(str(DIAG_DIR_PATH), str(target_log_path), is_dir=True)
runner.instance.files.pull_file(str(SYSLOG_PATH), str(target_log_path))
try:
runner.instance.files.pull_file(str(DIAG_DIR_PATH), str(target_log_path), is_dir=True)
runner.instance.files.pull_file(str(SYSLOG_PATH), str(target_log_path))
except LxdError as exc:
raise RunnerLogsError(f"Cannot pull the logs for {runner.config.name}.") from exc


def remove_outdated_crashed() -> None:
Expand Down
7 changes: 6 additions & 1 deletion src/runner_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,12 @@ def reconcile(self, quantity: int, resources: VirtualMachineResources) -> int:

for runner in unhealthy_runners:
if self.config.are_metrics_enabled:
runner_logs.get_crashed(runner)
try:
runner_logs.get_crashed(runner)
except errors.RunnerLogsError:
logger.exception(
"Failed to get logs of crashed runner %s", runner.config.name
)
runner.remove(remove_token)
logger.info(REMOVED_RUNNER_LOG_STR, runner.config.name)

Expand Down
36 changes: 36 additions & 0 deletions tests/unit/test_runner_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest

import runner_logs
from errors import LxdError, RunnerLogsError
from runner_logs import get_crashed


Expand Down Expand Up @@ -42,6 +43,41 @@ def test_get_crashed(log_dir_base_path: Path):
)


def test_get_crashed_no_instance(log_dir_base_path: Path):
"""
arrange: Mock the Runner instance to be None
act: Get the logs of the crashed runner
assert: A RunnerLogsError is raised
"""
runner = MagicMock()
runner.config.name = "test-runner"
runner.instance = None

with pytest.raises(RunnerLogsError) as exc_info:
get_crashed(runner)

assert "Cannot pull the logs for test-runner as runner has no running instance." in str(
exc_info.value
)


def test_get_crashed_lxd_error(log_dir_base_path: Path):
"""
arrange: Mock the Runner instance to raise an LxdError
act: Get the logs of the crashed runner
assert: A RunnerLogsError is raised
"""
runner = MagicMock()
runner.config.name = "test-runner"
runner.instance.files.pull_file = MagicMock(side_effect=LxdError("Cannot pull file"))

with pytest.raises(RunnerLogsError) as exc_info:
get_crashed(runner)

assert "Cannot pull the logs for test-runner." in str(exc_info.value)
assert "Cannot pull file" in str(exc_info.value.__cause__)


def test_remove_outdated_crashed(log_dir_base_path: Path, monkeypatch: pytest.MonkeyPatch):
"""
arrange: Mock the base log directory path
Expand Down

0 comments on commit cf3370d

Please sign in to comment.