canonical · yhaliaw · Feb 1, 2024 · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023
@@ -14,7 +14,7 @@ jobs:
       provider: lxd
       test-tox-env: integration-juju2.9
       modules: '["test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_one_runner", "test_charm_metrics", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage"]'
-  integration-tests-juju3:
+  integration-tests:
     name: Integration test with juju 3.1
     uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
     secrets: inherit

@@ -0,0 +1,29 @@
+name: Workflow Dispatch Wait Tests
+
+on:
+  # Manually dispatched workflow action
+  workflow_dispatch:
+    inputs:
+      runner:
+        description: 'Self hosted gh runner'
+        required: true
+      minutes:
+        description: 'Number of minutes to wait'
+        # Number type not supported in workflow dispatch: https://github.com/orgs/community/discussions/67182
+        # Seems to be by design: https://github.blog/changelog/2021-11-10-github-actions-input-types-for-manual-workflows/
+        default: '2'
+
+
+jobs:
+  workflow-dispatch-tests:
+    runs-on: [self-hosted, linux, x64, "${{ inputs.runner }}"]
+    steps:
+      - name: Echo input variable and message
+        run: |
+          echo "Hello, runner: ${{ inputs.runner }}"
+      - name: Wait
+        run: |
+          sleep ${{ inputs.minutes }}m
+      - name: Always echo a message
+        if: always()
+        run: echo "Should not echo if pre-job script failed"
@@ -17,7 +17,7 @@ Runner Manager manages the runners on LXD and GitHub.
 ## <kbd>class</kbd> `RunnerManager`
 Manage a group of runners according to configuration. 
 
-<a href="../src/runner_manager.py#L55"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L56"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `__init__`
 
@@ -46,7 +46,7 @@ Construct RunnerManager object for creating and managing runners.
 
 ---
 
-<a href="../src/runner_manager.py#L658"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L698"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `build_runner_image`
 
@@ -66,7 +66,7 @@ Build container image in test mode, else virtual machine image.
 
 ---
 
-<a href="../src/runner_manager.py#L116"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L117"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `check_runner_bin`
 
@@ -83,12 +83,12 @@ Check if runner binary exists.
 
 ---
 
-<a href="../src/runner_manager.py#L525"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L550"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `flush`
 
 ```python
-flush(flush_busy: bool = True) → int
+flush(flush_busy: bool = True, wait_repo_check: bool = False) → int
 ```
 
 Remove existing runners. 
@@ -97,7 +97,8 @@ Remove existing runners.
 
 **Args:**
 
- - <b>`flush_busy`</b>:  Whether to flush busy runners as well. 
+ - <b>`flush_busy`</b>:  Whether to flush busy runners. 
+ - <b>`wait_repo_check`</b>:  Whether to wait for busy runner to complete  repo-policy-compliance check before flushing the runners. 
 
 
 
@@ -106,7 +107,7 @@ Remove existing runners.
 
 ---
 
-<a href="../src/runner_manager.py#L217"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L218"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `get_github_info`
 
@@ -123,7 +124,7 @@ Get information on the runners from GitHub.
 
 ---
 
-<a href="../src/utilities.py#L124"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/utilities.py#L125"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `get_latest_runner_bin_url`
 
@@ -148,7 +149,7 @@ The runner binary URL changes when a new version is available.
 
 ---
 
-<a href="../src/runner_manager.py#L448"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L449"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `reconcile`
 
@@ -172,7 +173,7 @@ Bring runners in line with target.
 
 ---
 
-<a href="../src/runner_manager.py#L668"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L708"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `schedule_build_runner_image`
 
@@ -184,7 +185,7 @@ Install cron job for building runner image.
 
 ---
 
-<a href="../src/utilities.py#L150"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/utilities.py#L151"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `update_runner_bin`
 

@@ -578,8 +578,8 @@ def _check_and_update_dependencies(self) -> bool:
 
             self.unit.status = MaintenanceStatus("Flushing runners due to updated deps")
 
+            runner_manager.flush(flush_busy=False, wait_repo_check=True)
             self._start_services()
-            runner_manager.flush(flush_busy=False)
 
         self.unit.status = ActiveStatus()
         return service_updated or runner_bin_updated

@@ -10,6 +10,7 @@
 import tarfile
 import time
 import urllib.request
+from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from typing import Dict, Iterator, Optional, Type
 
@@ -522,15 +523,54 @@ def reconcile(self, quantity: int, resources: VirtualMachineResources) -> int:
             )
         return delta
 
-    def flush(self, flush_busy: bool = True) -> int:
+    def _runners_in_pre_job(self) -> bool:
+        """Check there exist runners in the pre-job script stage.
+
+        If a runner has taken a job for 1 minute or more, it is assumed to exit the pre-job script.
+
+        Returns:
+            Whether there are runners that has taken a job and run for less than 1 minute.
+        """
+        now = datetime.now(timezone.utc)
+        busy_runners = [
+            runner for runner in self._get_runners() if runner.status.exist and runner.status.busy
+        ]
+        for runner in busy_runners:
+            exit_code, stdout, _ = runner.instance.execute(
+                ["/usr/bin/stat", "-c", "'%w'", "/home/ubuntu/github-runner/_work"]
+            )
+            if exit_code != 0:
+                return False
+            date_str, time_str, timezone_str = stdout.read().decode("utf-8").split(" ")
+            job_start_time = datetime.fromisoformat(f"{date_str}T{time_str[:12]}{timezone_str}")
+            if job_start_time + timedelta(minutes=1) > now:
+                return False
+        return True
+
+    def flush(self, flush_busy: bool = True, wait_repo_check: bool = False) -> int:
         """Remove existing runners.
 
         Args:
-            flush_busy: Whether to flush busy runners as well.
+            flush_busy: Whether to flush busy runners.
+            wait_repo_check: Whether to wait for busy runner to complete
+                repo-policy-compliance check before flushing the runners.
 
         Returns:
             Number of runners removed.
         """
+        if wait_repo_check:
+            for _ in range(5):
+                if not self._runners_in_pre_job():
+                    break
+                time.sleep(30)
+            else:
+                logger.warning(
+                    (
+                        "Proceed with flush runner after timeout waiting on runner in setup "
+                        "stage, pre-job script might fail in currently running jobs"
+                    )
+                )
+
         if flush_busy:
             runners = [runner for runner in self._get_runners() if runner.status.exist]
         else:

@@ -19,6 +19,7 @@
 from juju.model import Model
 from pytest_operator.plugin import OpsTest
 
+from github_client import GithubClient
 from tests.integration.helpers import (
     deploy_github_runner_charm,
     ensure_charm_has_runner,
@@ -137,6 +138,11 @@ def model(ops_test: OpsTest) -> Model:
     return ops_test.model
 
 
+@pytest.fixture(scope="module")
+def runner_manager_github_client(token: str) -> GithubClient:
+    return GithubClient(token=token)
+
+
 @pytest_asyncio.fixture(scope="module")
 async def app_no_runner(
     model: Model,

@@ -22,6 +22,7 @@
 DISPATCH_TEST_WORKFLOW_FILENAME = "workflow_dispatch_test.yaml"
 DISPATCH_CRASH_TEST_WORKFLOW_FILENAME = "workflow_dispatch_crash_test.yaml"
 DISPATCH_FAILURE_TEST_WORKFLOW_FILENAME = "workflow_dispatch_failure_test.yaml"
+DISPATCH_WAIT_TEST_WORKFLOW_FILENAME = "workflow_dispatch_wait_test.yaml"
 
 
 async def check_runner_binary_exists(unit: Unit) -> bool:

diff --git a/tests/integration/test_charm_one_runner.py b/tests/integration/test_charm_one_runner.py
@@ -3,12 +3,20 @@
 
 """Integration tests for github-runner charm."""
 
+from datetime import datetime, timezone
+from time import sleep
+
+import github
 import pytest
+import requests
+from github.Repository import Repository
 from juju.application import Application
 from juju.model import Model
 
 from charm import GithubRunnerCharm
+from github_client import GithubClient
 from tests.integration.helpers import (
+    DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
     assert_resource_lxd_profile,
     get_runner_names,
     reconcile,
@@ -189,6 +197,70 @@ async def test_reconcile_runners_with_lxd_storage_pool_failure(
 
 @pytest.mark.asyncio
 @pytest.mark.abort_on_fail
+async def test_wait_on_busy_runner_repo_check(
+    model: Model,
+    app_runner: Application,
+    github_repository: Repository,
+    runner_manager_github_client: GithubClient,
+) -> None:
+    """
+    arrange: A working application with no runners.
+    act:
+        1. Set dockerhub-mirror config and spawn one runner.
+        2. Dispatch a workflow.
+    assert:
+        1. registry-mirrors is setup in /etc/docker/daemon.json of runner.
+        2. Message about dockerhub_mirror appears in logs.
+    """
+
+    unit = app_runner.units[0]
+
+    names = await get_runner_names(unit)
+    assert len(names) == 1
+
+    runner_to_be_used = names[0]
+
+    main_branch = github_repository.get_branch(github_repository.default_branch)
+    workflow = github_repository.get_workflow(id_or_file_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME)
+
+    workflow.create_dispatch(main_branch, {"runner": app_runner.name, "minutes": 30})
+
+    # Wait until runner is busy.
+    for _ in range(30):
+        all_runners = runner_manager_github_client.get_runner_github_info(
+            f"{github_repository.owner}/{github_repository.name}"
+        )
+        runners = [runner for runner in all_runners if runner.name == runner_to_be_used]
+        assert len(runners) == 1, "Should not occur GitHub should enforce unique naming"
+        runner = runners[0]
+        if runner["busy"]:
+            start_time = datetime.now(timezone.utc)
+            break
+
+        sleep(10)
+    else:
+        assert False, "Timeout while waiting for workflow to complete"
+
+    # Unable to find the run id of the workflow that was dispatched.
+    # Therefore, all runs after this test start should pass the conditions.
+    for run in workflow.get_runs(created=f">={start_time.isoformat()}"):
+        if start_time > run.created_at:
+            continue
+
+        try:
+            logs_url = run.jobs()[0].logs_url()
+            logs = requests.get(logs_url).content.decode("utf-8")
+        except github.GithubException.GithubException:
+            continue
+
+        if f"Job is about to start running on the runner: {app_runner.name}-" in logs:
+            assert run.jobs()[0].conclusion == "success"
+            assert (
+                "A private docker registry is setup as a dockerhub mirror for this self-hosted"
+                " runner."
+            ) in logs
+
+
 async def test_change_runner_storage(model: Model, app: Application) -> None:
     """
     arrange: A working application with one runners using memory as disk.