Align upgrade_charm hook with install hook (#252)

* Reword update kernel message * Add schedule_build_runner_image to upgrade_charm * Add _set_reconcile_timer call to upgrade_charm * Remove download binary in install hook * Remove outdated code * Reword upgrading kernel msg * add integration test * extract common code into method * extract common code into method * Revert "Remove download binary in install hook" This reverts commit e8399b3. * Adjust idle period in integration test * lint * cleanup * Reword comment * Add _block_on_openstack_config to upgrade * improve integration test * Don't go into BlockedStatus on error * improve integration test * lint * add return type to is_upgrade_charm_event_emitted * lint * fix after merge * Remove _set_reconcile_timer in upgrade-charm hook * Move _set_reconcile_timers to common hook Also make image building conditional and move to common hook * Fix assert docstring * Fix lxd exists * Make build-lxd-image.sh executable * Assert that image has not been rebuilt in integration test * cleanup --------- Co-authored-by: Yanks Yoon <[email protected]> Co-authored-by: javierdelapuente <[email protected]>
canonical · Apr 12, 2024 · baa10e2 · baa10e2
1 parent 4f1d2ea
commit baa10e2
Show file tree

Hide file tree

Showing 12 changed files with 298 additions and 64 deletions.
diff --git a/scripts/build-lxd-image.sh b/scripts/build-lxd-image.sh
diff --git a/src-docs/lxd.py.md b/src-docs/lxd.py.md
@@ -17,7 +17,7 @@ The LxdClient class offers a low-level interface to isolate the underlying imple
 ## <kbd>class</kbd> `LxdClient`
 LXD client. 
 
-<a href="../src/lxd.py#L583"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/lxd.py#L601"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `__init__`
 
@@ -80,6 +80,29 @@ Import a LXD image.
 
  - <b>`LxdError`</b>:  Unable to import the file as LXD image. 
 
+---
+
+<a href="../src/lxd.py#L578"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+
+### <kbd>function</kbd> `exists`
+
+```python
+exists(alias: 'str') → bool
+```
+
+Check if an image with the given name exists. 
+
+
+
+**Args:**
+
+ - <b>`alias`</b>:  Alias name of the image to check. 
+
+
+
+**Returns:**
+ Whether the image exists. 
+
 
 ---
 

diff --git a/src-docs/runner_manager.py.md b/src-docs/runner_manager.py.md
@@ -50,7 +50,7 @@ Construct RunnerManager object for creating and managing runners.
 
 ---
 
-<a href="../src/runner_manager.py#L788"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L796"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `build_runner_image`
 
@@ -164,6 +164,23 @@ The runner binary URL changes when a new version is available.
 
 ---
 
+<a href="../src/runner_manager.py#L788"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+
+### <kbd>function</kbd> `has_runner_image`
+
+```python
+has_runner_image() → bool
+```
+
+Check if the runner image exists. 
+
+
+
+**Returns:**
+  Whether the runner image exists. 
+
+---
+
 <a href="../src/runner_manager.py#L526"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `reconcile`
@@ -188,7 +205,7 @@ Bring runners in line with target.
 
 ---
 
-<a href="../src/runner_manager.py#L802"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L810"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `schedule_build_runner_image`
 

diff --git a/src/charm.py b/src/charm.py
@@ -404,11 +404,19 @@ def _block_on_openstack_config(self, state: CharmState) -> bool:
             return True
         return False
 
-    @catch_charm_errors
-    def _on_install(self, _: InstallEvent) -> None:
-        """Handle the installation of charm."""
-        state = self._setup_state()
+    def _common_install_code(self, state: CharmState) -> bool:
+        """Installation code shared between install and upgrade hook.
 
+        Args:
+            state: The charm state instance.
+
+        Raises:
+            LogrotateSetupError: Failed to setup logrotate.
+            SubprocessError: Failed to install dependencies.
+
+        Returns:
+            True if installation was successful, False otherwise.
+        """
         if state.charm_config.openstack_clouds_yaml:
             # Only build it in test mode since it may interfere with users systems.
             if self.config.get(TEST_MODE_CONFIG_NAME) == "insecure":
@@ -436,31 +444,33 @@ def _on_install(self, _: InstallEvent) -> None:
                     ssh_debug_connections=state.ssh_debug_connections,
                 )
                 logger.info("OpenStack instance: %s", instance)
-            self._block_on_openstack_config(state)
-            return
+            return not self._block_on_openstack_config(state)
 
         self.unit.status = MaintenanceStatus("Installing packages")
         try:
             # The `_start_services`, `_install_deps` includes retry.
             self._install_deps()
             self._start_services(state.charm_config.token, state.proxy_config)
+        except SubprocessError:
+            logger.error("Failed to install dependencies")
+            raise
+
+        try:
             metrics.setup_logrotate()
-        except (LogrotateSetupError, SubprocessError) as err:
-            logger.exception(err)
-            if isinstance(err, LogrotateSetupError):
-                msg = "Failed to setup logrotate"
-            else:
-                msg = "Failed to install dependencies"
-            self.unit.status = BlockedStatus(msg)
-            return
+        except LogrotateSetupError:
+            logger.error("Failed to setup logrotate")
+            raise
 
         self._refresh_firewall(state)
-        runner_manager = self._get_runner_manager(state)
 
-        self.unit.status = MaintenanceStatus("Building runner image")
-        runner_manager.build_runner_image()
+        runner_manager = self._get_runner_manager(state)
+        if not runner_manager.has_runner_image():
+            self.unit.status = MaintenanceStatus("Building runner image")
+            runner_manager.build_runner_image()
         runner_manager.schedule_build_runner_image()
 
+        self._set_reconcile_timer()
+
         self.unit.status = MaintenanceStatus("Downloading runner binary")
         try:
             runner_info = runner_manager.get_latest_runner_bin_url()
@@ -475,9 +485,16 @@ def _on_install(self, _: InstallEvent) -> None:
             # Failure to download runner binary is a transient error.
             # The charm automatically update runner binary on a schedule.
             self.unit.status = MaintenanceStatus(f"Failed to update runner binary: {err}")
-            return
+            return False
 
         self.unit.status = ActiveStatus()
+        return True
+
+    @catch_charm_errors
+    def _on_install(self, _: InstallEvent) -> None:
+        """Handle the installation of charm."""
+        state = self._setup_state()
+        self._common_install_code(state)
 
     @catch_charm_errors
     def _on_start(self, _: StartEvent) -> None:
@@ -518,7 +535,7 @@ def _update_kernel(self, now: bool = False) -> None:
         Args:
             now: Whether the reboot should trigger at end of event handler or now.
         """
-        logger.info("Upgrading kernel")
+        logger.info("Updating kernel (if available)")
         self._apt_install(["linux-generic"])
 
         _, exit_code = execute_command(["ls", "/var/run/reboot-required"], check_exit=False)
@@ -551,28 +568,11 @@ def _on_upgrade_charm(self, _: UpgradeCharmEvent) -> None:
         state = self._setup_state()
 
         logger.info("Reinstalling dependencies...")
-        try:
-            # The `_start_services`, `_install_deps` includes retry.
-            self._install_deps()
-            self._start_services(state.charm_config.token, state.proxy_config)
-            metrics.setup_logrotate()
-        except (LogrotateSetupError, SubprocessError) as err:
-            logger.exception(err)
-
-            if isinstance(err, LogrotateSetupError):
-                msg = "Failed to setup logrotate"
-            else:
-                msg = "Failed to install dependencies"
-            self.unit.status = BlockedStatus(msg)
+        if not self._common_install_code(state):
             return
 
-        state = self._setup_state()
-        self._refresh_firewall(state)
-        logger.info("Flushing the runners...")
         runner_manager = self._get_runner_manager(state)
-        if not runner_manager:
-            return
-
+        logger.info("Flushing the runners...")
         runner_manager.flush(FlushMode.FLUSH_BUSY_WAIT_REPO_CHECK)
         self._reconcile_runners(
             runner_manager,

diff --git a/src/lxd.py b/src/lxd.py
@@ -575,6 +575,24 @@ def create(self, name: str, path: Path) -> None:
         if result.returncode != 0:
             raise LxdError(result.stdout.decode("utf-8"))
 
+    def exists(self, alias: str) -> bool:
+        """Check if an image with the given name exists.
+
+        Args:
+            alias: Alias name of the image to check.
+
+        Returns:
+            Whether the image exists.
+        """
+        # There is no direct method to check if an image exists by alias in pylxd, we therefore
+        # use the pylxd client to get the image by alias and catch the NotFound exception.
+        try:
+            self._pylxd_client.images.get_by_alias(alias)
+        except pylxd.exceptions.NotFound:
+            return False
+
+        return True
+
 
 # Disable pylint as the public methods of this class are split into instances and profiles.
 class LxdClient:  # pylint: disable=too-few-public-methods

diff --git a/src/runner_manager.py b/src/runner_manager.py
@@ -785,6 +785,14 @@ def _build_image_command(self) -> list[str]:
             cmd += ["test"]
         return cmd
 
+    def has_runner_image(self) -> bool:
+        """Check if the runner image exists.
+
+        Returns:
+            Whether the runner image exists.
+        """
+        return self._clients.lxd.images.exists(self.config.image)
+
     def build_runner_image(self) -> None:
         """Build the LXD image for hosting runner.
 

diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
@@ -549,7 +549,7 @@ async def dispatch_workflow(
     ), "Failed to create workflow"
 
     # There is a very small chance of selecting a run not created by the dispatch above.
-    run = await wait_for(
+    run: WorkflowRun | None = await wait_for(
         partial(_get_latest_run, workflow=workflow, start_time=start_time, branch=branch)
     )
     assert run, f"Run not found for workflow: {workflow.name} ({workflow.id})"
@@ -565,10 +565,11 @@ async def dispatch_workflow(
 
 P = ParamSpec("P")
 R = TypeVar("R")
+S = Callable[P, R] | Callable[P, Awaitable[R]]
 
 
 async def wait_for(
-    func: Callable[P, R],
+    func: S,
     timeout: int | float = 300,
     check_interval: int = 10,
 ) -> R:
@@ -593,7 +594,7 @@ async def wait_for(
                 return result
         else:
             if result := func():
-                return result
+                return cast(R, result)
         time.sleep(check_interval)
 
     # final check before raising TimeoutError.
@@ -602,5 +603,5 @@ async def wait_for(
             return result
     else:
         if result := func():
-            return result
+            return cast(R, result)
     raise TimeoutError()
diff --git a/tests/integration/test_charm_no_runner.py b/tests/integration/test_charm_no_runner.py
@@ -2,6 +2,8 @@
 # See LICENSE file for licensing details.
 
 """Integration tests for github-runner charm with no runner."""
+import json
+from datetime import datetime, timezone
 
 import pytest
 from juju.application import Application
@@ -14,6 +16,8 @@
     install_repo_policy_compliance_from_git_source,
     reconcile,
     remove_runner_bin,
+    run_in_unit,
+    wait_for,
     wait_till_num_of_runners,
 )
 from tests.status_name import ACTIVE
@@ -192,3 +196,52 @@ async def test_reconcile_runners(model: Model, app_no_runner: Application) -> No
     await reconcile(app=app, model=model)
 
     await wait_till_num_of_runners(unit, 0)
+
+
+@pytest.mark.asyncio
+@pytest.mark.abort_on_fail
+async def test_charm_upgrade(model: Model, app_no_runner: Application, charm_file: str) -> None:
+    """
+    arrange: A working application with no runners.
+    act: Upgrade the charm.
+    assert: The upgrade_charm hook ran successfully and the image has not been rebuilt.
+    """
+    start_time = datetime.now(tz=timezone.utc)
+
+    await app_no_runner.refresh(path=charm_file)
+
+    unit = app_no_runner.units[0]
+    unit_name_without_slash = unit.name.replace("/", "-")
+    juju_unit_log_file = f"/var/log/juju/unit-{unit_name_without_slash}.log"
+
+    async def is_upgrade_charm_event_emitted() -> bool:
+        """Check if the upgrade_charm event is emitted.
+
+        Returns:
+            bool: True if the event is emitted, False otherwise.
+        """
+        ret_code, stdout, stderr = await run_in_unit(
+            unit=unit, command=f"cat {juju_unit_log_file}"
+        )
+        assert ret_code == 0, f"Failed to read the log file: {stderr}"
+        return stdout is not None and "Emitting Juju event upgrade_charm." in stdout
+
+    await wait_for(is_upgrade_charm_event_emitted, timeout=360, check_interval=60)
+    await model.wait_for_idle(status=ACTIVE)
+
+    ret_code, stdout, stderr = await run_in_unit(
+        unit=unit, command="/snap/bin/lxc image list --format json"
+    )
+    assert ret_code == 0, f"Failed to read the image list: {stderr}"
+    assert stdout is not None, f"Failed to read the image list: {stderr}"
+    images = json.loads(stdout)
+    jammy_image = next(
+        (image for image in images if "jammy" in {alias["name"] for alias in image["aliases"]}),
+        None,
+    )
+    assert jammy_image is not None, "Jammy image not found."
+    # len("2024-04-10T00:00:00") == 19
+    assert (
+        datetime.fromisoformat(jammy_image["created_at"][:19]).replace(tzinfo=timezone.utc)
+        <= start_time
+    ), f"Image has been rebuilt after the upgrade: {jammy_image['created_at'][:19]} > {start_time}"
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
@@ -8,7 +8,7 @@
 
 import pytest
 
-import openstack_cloud
+from openstack_cloud import openstack_manager
 from tests.unit.mock import MockGhapiClient, MockLxdClient, MockRepoPolicyComplianceClient
 
 
@@ -44,7 +44,7 @@ def disk_usage_mock(total_disk: int):
 
 @pytest.fixture(autouse=True)
 def mocks(monkeypatch, tmp_path, exec_command, lxd_exec_command, runner_binary_path):
-    openstack_manager_mock = unittest.mock.MagicMock(spec=openstack_cloud)
+    openstack_manager_mock = unittest.mock.MagicMock(spec=openstack_manager)
 
     cron_path = tmp_path / "cron.d"
     cron_path.mkdir()