Skip to content

Commit

Permalink
Remove race condition between restart repo-policy-compliance and requ…
Browse files Browse the repository at this point in the history
…est of the service (#166)
  • Loading branch information
yhaliaw committed Feb 1, 2024
1 parent 5cdc03e commit 7da7cde
Show file tree
Hide file tree
Showing 13 changed files with 272 additions and 48 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/integration_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
provider: lxd
test-tox-env: integration-juju2.9
modules: '["test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_one_runner", "test_charm_metrics_success", "test_charm_metrics_failure", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage", "test_debug_ssh"]'
integration-tests-juju3:
integration-tests:
name: Integration test with juju 3.1
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
secrets: inherit
Expand Down
28 changes: 28 additions & 0 deletions .github/workflows/workflow_dispatch_wait_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Workflow Dispatch Wait Tests

on:
# Manually dispatched workflow action
workflow_dispatch:
inputs:
runner:
description: 'Self hosted gh runner'
required: true
minutes:
description: 'Number of minutes to wait'
# Number type not supported in workflow dispatch: https://github.com/orgs/community/discussions/67182
# Seems to be by design: https://github.blog/changelog/2021-11-10-github-actions-input-types-for-manual-workflows/
default: '2'

jobs:
workflow-dispatch-tests:
runs-on: [self-hosted, linux, x64, "${{ inputs.runner }}"]
steps:
- name: Echo input variable and message
run: |
echo "Hello, runner: ${{ inputs.runner }}"
- name: Wait
run: |
sleep ${{ inputs.minutes }}m
- name: Always echo a message
if: always()
run: echo "Should not echo if pre-job script failed"
6 changes: 3 additions & 3 deletions src-docs/charm.py.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Charm for creating and managing GitHub self-hosted runner instances.

---

<a href="../src/charm.py#L64"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/charm.py#L65"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

## <kbd>function</kbd> `catch_charm_errors`

Expand All @@ -37,7 +37,7 @@ Catch common errors in charm.

---

<a href="../src/charm.py#L95"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/charm.py#L96"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

## <kbd>function</kbd> `catch_action_errors`

Expand Down Expand Up @@ -66,7 +66,7 @@ Catch common errors in actions.
## <kbd>class</kbd> `GithubRunnerCharm`
Charm for managing GitHub self-hosted runners.

<a href="../src/charm.py#L138"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/charm.py#L139"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `__init__`

Expand Down
6 changes: 3 additions & 3 deletions src-docs/runner.py.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ The configuration values for creating a single runner instance.
## <kbd>class</kbd> `Runner`
Single instance of GitHub self-hosted runner.

<a href="../src/runner.py#L102"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner.py#L103"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `__init__`

Expand Down Expand Up @@ -67,7 +67,7 @@ Construct the runner instance.

---

<a href="../src/runner.py#L132"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner.py#L133"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `create`

Expand All @@ -91,7 +91,7 @@ Create the runner instance on LXD and register it on GitHub.

---

<a href="../src/runner.py#L168"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner.py#L169"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `remove`

Expand Down
22 changes: 11 additions & 11 deletions src-docs/runner_manager.py.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Runner Manager manages the runners on LXD and GitHub.
## <kbd>class</kbd> `RunnerManager`
Manage a group of runners according to configuration.

<a href="../src/runner_manager.py#L55"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L56"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `__init__`

Expand Down Expand Up @@ -46,7 +46,7 @@ Construct RunnerManager object for creating and managing runners.

---

<a href="../src/runner_manager.py#L660"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L714"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `build_runner_image`

Expand All @@ -66,7 +66,7 @@ Build container image in test mode, else virtual machine image.

---

<a href="../src/runner_manager.py#L116"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L117"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `check_runner_bin`

Expand All @@ -83,12 +83,12 @@ Check if runner binary exists.

---

<a href="../src/runner_manager.py#L526"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L555"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `flush`

```python
flush(flush_busy: bool = True) → int
flush(mode: FlushMode = <FlushMode.FLUSH_IDLE: 1>) → int
```

Remove existing runners.
Expand All @@ -97,7 +97,7 @@ Remove existing runners.

**Args:**

- <b>`flush_busy`</b>: Whether to flush busy runners as well.
- <b>`mode`</b>: Strategy for flushing runners.



Expand All @@ -106,7 +106,7 @@ Remove existing runners.

---

<a href="../src/runner_manager.py#L217"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L218"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `get_github_info`

Expand All @@ -123,7 +123,7 @@ Get information on the runners from GitHub.

---

<a href="../src/utilities.py#L124"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/utilities.py#L125"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `get_latest_runner_bin_url`

Expand All @@ -148,7 +148,7 @@ The runner binary URL changes when a new version is available.

---

<a href="../src/runner_manager.py#L449"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L450"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `reconcile`

Expand All @@ -172,7 +172,7 @@ Bring runners in line with target.

---

<a href="../src/runner_manager.py#L670"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/runner_manager.py#L724"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `schedule_build_runner_image`

Expand All @@ -184,7 +184,7 @@ Install cron job for building runner image.

---

<a href="../src/utilities.py#L150"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
<a href="../src/utilities.py#L151"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

### <kbd>function</kbd> `update_runner_bin`

Expand Down
18 changes: 18 additions & 0 deletions src-docs/runner_manager_type.py.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ Types used by RunnerManager class.



---

## <kbd>class</kbd> `FlushMode`
Strategy for flushing runners.



**Attributes:**

- <b>`FLUSH_IDLE`</b>: Flush only idle runners.
- <b>`FLUSH_IDLE_WAIT_REPO_CHECK`</b>: Flush only idle runners, then wait until repo-policy-check is completed for the busy runners.
- <b>`FORCE_FLUSH_BUSY`</b>: Force flush busy runners.
- <b>`FORCE_FLUSH_BUSY_WAIT_REPO_CHECK`</b>: Wait until the repo-policy-check is completed before force flush of busy runners.





---

## <kbd>class</kbd> `RunnerInfo`
Expand Down
17 changes: 9 additions & 8 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from github_type import GitHubRunnerStatus
from runner import LXD_PROFILE_YAML
from runner_manager import RunnerManager, RunnerManagerConfig
from runner_manager_type import FlushMode
from runner_type import GithubOrg, GithubRepo, ProxySetting, VirtualMachineResources
from utilities import bytes_with_unit_to_kib, execute_command, retry

Expand Down Expand Up @@ -411,7 +412,7 @@ def _on_start(self, _event: StartEvent) -> None:

self.unit.status = MaintenanceStatus("Starting runners")
try:
runner_manager.flush(flush_busy=False)
runner_manager.flush(FlushMode.FLUSH_IDLE)
self._reconcile_runners(runner_manager)
except RunnerError as err:
logger.exception("Failed to start runners")
Expand Down Expand Up @@ -467,7 +468,7 @@ def _on_upgrade_charm(self, _event: UpgradeCharmEvent) -> None:
if not runner_manager:
return

runner_manager.flush()
runner_manager.flush(FlushMode.FORCE_FLUSH_BUSY_WAIT_REPO_CHECK)
self._reconcile_runners(runner_manager)

@catch_charm_errors
Expand Down Expand Up @@ -501,7 +502,7 @@ def _on_config_changed(self, _event: ConfigChangedEvent) -> None:
) # Casting for mypy checks.
if prev_runner_manager:
self.unit.status = MaintenanceStatus("Removing runners from old org/repo")
prev_runner_manager.flush(flush_busy=False)
prev_runner_manager.flush(FlushMode.FORCE_FLUSH_BUSY_WAIT_REPO_CHECK)
self._stored.path = self.config["path"]

runner_manager = self._get_runner_manager()
Expand All @@ -512,7 +513,7 @@ def _on_config_changed(self, _event: ConfigChangedEvent) -> None:
self.unit.status = BlockedStatus("Missing token or org/repo path config")

if self.config["token"] != self._stored.token:
runner_manager.flush(flush_busy=False)
runner_manager.flush(FlushMode.FORCE_FLUSH_BUSY_WAIT_REPO_CHECK)
self._stored.token = self.config["token"]

def _check_and_update_dependencies(self) -> bool:
Expand Down Expand Up @@ -565,8 +566,8 @@ def _check_and_update_dependencies(self) -> bool:

self.unit.status = MaintenanceStatus("Flushing runners due to updated deps")

runner_manager.flush(FlushMode.FLUSH_IDLE_WAIT_REPO_CHECK)
self._start_services()
runner_manager.flush(flush_busy=False)

self.unit.status = ActiveStatus()
return service_updated or runner_bin_updated
Expand Down Expand Up @@ -655,7 +656,7 @@ def _on_flush_runners_action(self, event: ActionEvent) -> None:
"""
runner_manager = self._get_runner_manager()

runner_manager.flush()
runner_manager.flush(FlushMode.FORCE_FLUSH_BUSY_WAIT_REPO_CHECK)
delta = self._reconcile_runners(runner_manager)

self._on_check_runners_action(event)
Expand Down Expand Up @@ -686,7 +687,7 @@ def _on_stop(self, _: StopEvent) -> None:
self.unit.status = BlockedStatus(f"Failed to stop charm event timer: {ex}")

runner_manager = self._get_runner_manager()
runner_manager.flush()
runner_manager.flush(FlushMode.FORCE_FLUSH_BUSY)

def _reconcile_runners(self, runner_manager: RunnerManager) -> Dict[str, Any]:
"""Reconcile the current runners state and intended runner state.
Expand Down Expand Up @@ -917,7 +918,7 @@ def _on_debug_ssh_relation_changed(self, _: ops.RelationChangedEvent) -> None:
"""Handle debug ssh relation changed event."""
self._refresh_firewall()
runner_manager = self._get_runner_manager()
runner_manager.flush(flush_busy=False)
runner_manager.flush(FlushMode.FLUSH_IDLE)
self._reconcile_runners(runner_manager)


Expand Down
8 changes: 7 additions & 1 deletion src/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, NamedTuple, Optional, Sequence
from urllib.error import HTTPError

import yaml

Expand Down Expand Up @@ -229,7 +230,12 @@ def remove(self, remove_token: str) -> None:
self.status.runner_id,
self.config.path.path(),
)
self._clients.github.delete_runner(self.config.path, self.status.runner_id)
try:
self._clients.github.delete_runner(self.config.path, self.status.runner_id)
except HTTPError:
logger.exception("Unable the remove runner on GitHub: %s", self.config.name)
# This can occur when attempting to remove a busy runner.
# The caller should retry later, after GitHub mark the runner as offline.

def _add_shared_filesystem(self, path: Path) -> None:
"""Add the shared filesystem to the runner instance.
Expand Down
Loading

0 comments on commit 7da7cde

Please sign in to comment.