From 0b6a63b6bb1b7ee4bac731970bbf0d6c84f16aac Mon Sep 17 00:00:00 2001 From: Andrew Liaw <43424755+yhaliaw@users.noreply.github.com> Date: Tue, 4 Jul 2023 10:07:25 +0800 Subject: [PATCH 1/6] Add upgrading of linux kernel --- src/charm.py | 15 ++++++++++++-- src/runner_manager.py | 48 +++++++++++++++++++++++++------------------ 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/src/charm.py b/src/charm.py index d51cea756..584df88b7 100755 --- a/src/charm.py +++ b/src/charm.py @@ -27,7 +27,7 @@ from ops.main import main from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus -from errors import MissingConfigurationError, RunnerError, SubprocessError +from errors import MissingConfigurationError, RunnerBinaryError, RunnerError, SubprocessError from event_timer import EventTimer, TimerDisableError, TimerEnableError from github_type import GitHubRunnerStatus from runner import LXD_PROFILE_YAML @@ -271,12 +271,16 @@ def _on_install(self, _event: InstallEvent) -> None: self._stored.runner_bin_url = runner_info.download_url runner_manager.update_runner_bin(runner_info) # Safe guard against transient unexpected error. - except Exception as err: # pylint: disable=broad-exception-caught + except RunnerBinaryError as err: logger.exception("Failed to update runner binary") # Failure to download runner binary is a transient error. # The charm automatically update runner binary on a schedule. self.unit.status = MaintenanceStatus(f"Failed to update runner binary: {err}") return + + # Temporary solution: Upgrade the kernel due to a kernel bug in 5.15. + self._upgrade_kernel() + self.unit.status = MaintenanceStatus("Starting runners") try: self._reconcile_runners(runner_manager) @@ -287,6 +291,12 @@ def _on_install(self, _event: InstallEvent) -> None: else: self.unit.status = BlockedStatus("Missing token or org/repo path config") + def _upgrade_kernel(self) -> None: + """Upgrade the Linux kernel.""" + execute_command(["/usr/bin/apt-get", "update"]) + execute_command(["/usr/bin/apt-get", "install", "-qy", "linux-generic-hwe-22.04"]) + execute_command(["reboot"]) + @catch_charm_errors def _on_upgrade_charm(self, _event: UpgradeCharmEvent) -> None: """Handle the update of charm. @@ -549,6 +559,7 @@ def _install_deps(self) -> None: env["NO_PROXY"] = self.proxies["no_proxy"] env["no_proxy"] = self.proxies["no_proxy"] + execute_command(["/usr/bin/apt-get", "update"]) execute_command(["/usr/bin/apt-get", "install", "-qy", "gunicorn", "python3-pip"]) execute_command( [ diff --git a/src/runner_manager.py b/src/runner_manager.py index aa4ab394e..aad926992 100644 --- a/src/runner_manager.py +++ b/src/runner_manager.py @@ -190,30 +190,40 @@ def update_runner_bin(self, binary: RunnerApplication) -> None: """ logger.info("Downloading runner binary from: %s", binary["download_url"]) - # Delete old version of runner binary. - RunnerManager.runner_bin_path.unlink(missing_ok=True) + try: + # Delete old version of runner binary. + RunnerManager.runner_bin_path.unlink(missing_ok=True) + except OSError as err: + logger.exception("Unable to perform file operation on the runner binary path") + raise RunnerBinaryError("File operation failed on the runner binary path") from err - # Download the new file - response = self.session.get(binary["download_url"], stream=True) + try: + # Download the new file + response = self.session.get(binary["download_url"], stream=True) - logger.info( - "Download of runner binary from %s return status code: %i", - binary["download_url"], - response.status_code, - ) + logger.info( + "Download of runner binary from %s return status code: %i", + binary["download_url"], + response.status_code, + ) - if not binary["sha256_checksum"]: - logger.error("Checksum for runner binary is not found, unable to verify download.") - raise RunnerBinaryError("Checksum for runner binary is not found in GitHub response.") + if not binary["sha256_checksum"]: + logger.error("Checksum for runner binary is not found, unable to verify download.") + raise RunnerBinaryError( + "Checksum for runner binary is not found in GitHub response." + ) - sha256 = hashlib.sha256() + sha256 = hashlib.sha256() - with RunnerManager.runner_bin_path.open(mode="wb") as file: - # Process with chunk_size of 128 KiB. - for chunk in response.iter_content(chunk_size=128 * 1024, decode_unicode=False): - file.write(chunk) + with RunnerManager.runner_bin_path.open(mode="wb") as file: + # Process with chunk_size of 128 KiB. + for chunk in response.iter_content(chunk_size=128 * 1024, decode_unicode=False): + file.write(chunk) - sha256.update(chunk) + sha256.update(chunk) + except requests.RequestException as err: + logger.exception("Failed to download of runner binary") + raise RunnerBinaryError("Failed to download runner binary") from err logger.info("Finished download of runner binary.") @@ -224,13 +234,11 @@ def update_runner_bin(self, binary: RunnerApplication) -> None: binary["sha256_checksum"], sha256, ) - RunnerManager.runner_bin_path.unlink(missing_ok=True) raise RunnerBinaryError("Checksum mismatch for downloaded runner binary") # Verify the file integrity. if not tarfile.is_tarfile(file.name): logger.error("Failed to decompress downloaded GitHub runner binary.") - RunnerManager.runner_bin_path.unlink(missing_ok=True) raise RunnerBinaryError("Downloaded runner binary cannot be decompressed.") logger.info("Validated newly downloaded runner binary and enabled it.") From 7e95373a96ac52fc659f0647117991e99891c120 Mon Sep 17 00:00:00 2001 From: Andrew Liaw <43424755+yhaliaw@users.noreply.github.com> Date: Tue, 4 Jul 2023 11:23:51 +0800 Subject: [PATCH 2/6] Disable kernel upgrade and reboot for e2e test --- src/charm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/charm.py b/src/charm.py index 9eaf25226..676b80797 100755 --- a/src/charm.py +++ b/src/charm.py @@ -301,8 +301,10 @@ def _on_install(self, _event: InstallEvent) -> None: self.unit.status = MaintenanceStatus(f"Failed to update runner binary: {err}") return - # Temporary solution: Upgrade the kernel due to a kernel bug in 5.15. - self._upgrade_kernel() + # Temporary solution: Upgrade the kernel due to a kernel bug in 5.15. Kernel upgrade + # not needed for container-based end-to-end tests. + if not LXD_PROFILE_YAML.exists(): + self._upgrade_kernel() self.unit.status = MaintenanceStatus("Starting runners") try: From 57e92079429c9563b19262471ededbb40b33567a Mon Sep 17 00:00:00 2001 From: Andrew Liaw <43424755+yhaliaw@users.noreply.github.com> Date: Tue, 4 Jul 2023 11:46:59 +0800 Subject: [PATCH 3/6] Fix reboot loop --- src/charm.py | 15 +++++++++------ src/firewall.py | 2 +- src/utilities.py | 13 ++++++------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/charm.py b/src/charm.py index 676b80797..d67f8d22a 100755 --- a/src/charm.py +++ b/src/charm.py @@ -273,6 +273,12 @@ def _on_install(self, _event: InstallEvent) -> None: """ self.unit.status = MaintenanceStatus("Installing packages") + # Temporary solution: Upgrade the kernel due to a kernel bug in 5.15. Kernel upgrade + # not needed for container-based end-to-end tests. + if not LXD_PROFILE_YAML.exists(): + self.unit.status = MaintenanceStatus("Upgrading kernel") + self._upgrade_kernel() + try: # The `_start_services`, `_install_deps` includes retry. self._install_deps() @@ -301,11 +307,6 @@ def _on_install(self, _event: InstallEvent) -> None: self.unit.status = MaintenanceStatus(f"Failed to update runner binary: {err}") return - # Temporary solution: Upgrade the kernel due to a kernel bug in 5.15. Kernel upgrade - # not needed for container-based end-to-end tests. - if not LXD_PROFILE_YAML.exists(): - self._upgrade_kernel() - self.unit.status = MaintenanceStatus("Starting runners") try: self._reconcile_runners(runner_manager) @@ -320,7 +321,9 @@ def _upgrade_kernel(self) -> None: """Upgrade the Linux kernel.""" execute_command(["/usr/bin/apt-get", "update"]) execute_command(["/usr/bin/apt-get", "install", "-qy", "linux-generic-hwe-22.04"]) - execute_command(["reboot"]) + _, exit_code = execute_command(["ls", "/var/run/reboot-required"], check_exit=False) + if exit_code == 0: + execute_command(["reboot"]) @catch_charm_errors def _on_upgrade_charm(self, _event: UpgradeCharmEvent) -> None: diff --git a/src/firewall.py b/src/firewall.py index 406dcf26c..a83771638 100644 --- a/src/firewall.py +++ b/src/firewall.py @@ -61,7 +61,7 @@ def get_host_ip(self) -> str: Returns: The host IP address. """ - address = execute_command( + address, _ = execute_command( ["/snap/bin/lxc", "network", "get", self._network, "ipv4.address"] ) return str(ipaddress.IPv4Interface(address.strip()).ip) diff --git a/src/utilities.py b/src/utilities.py index 4c9f1c147..9a1d3ef1e 100644 --- a/src/utilities.py +++ b/src/utilities.py @@ -123,7 +123,7 @@ def secure_run_subprocess(cmd: Sequence[str], **kwargs) -> subprocess.CompletedP return result -def execute_command(cmd: Sequence[str], check_exit: bool = True, **kwargs) -> str: +def execute_command(cmd: Sequence[str], check_exit: bool = True, **kwargs) -> tuple[str, int]: """Execute a command on a subprocess. The command is executed with `subprocess.run`, additional arguments can be passed to it as @@ -136,7 +136,7 @@ def execute_command(cmd: Sequence[str], check_exit: bool = True, **kwargs) -> st kwargs: Additional keyword arguments for the `subprocess.run` call. Returns: - Output on stdout. + Output on stdout, and the exit code. """ result = secure_run_subprocess(cmd, **kwargs) @@ -153,11 +153,10 @@ def execute_command(cmd: Sequence[str], check_exit: bool = True, **kwargs) -> st raise SubprocessError(cmd, err.returncode, err.stdout, err.stderr) from err - return ( - result.stdout - if isinstance(result.stdout, str) - else result.stdout.decode(kwargs.get("encoding", "utf-8")) - ) + if isinstance(result.stdout, str): + return (result.stdout, result.returncode) + + return (result.stdout.decode(kwargs.get("encoding", "utf-8")), result.returncode) def get_env_var(env_var: str) -> Optional[str]: From 0d52bcfc7ad51d9c66a1c8469ac1c32c16705de4 Mon Sep 17 00:00:00 2001 From: Andrew Liaw <43424755+yhaliaw@users.noreply.github.com> Date: Tue, 4 Jul 2023 13:52:25 +0800 Subject: [PATCH 4/6] Add logging of reboot --- src/charm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/charm.py b/src/charm.py index d67f8d22a..43bed783c 100755 --- a/src/charm.py +++ b/src/charm.py @@ -321,8 +321,10 @@ def _upgrade_kernel(self) -> None: """Upgrade the Linux kernel.""" execute_command(["/usr/bin/apt-get", "update"]) execute_command(["/usr/bin/apt-get", "install", "-qy", "linux-generic-hwe-22.04"]) + _, exit_code = execute_command(["ls", "/var/run/reboot-required"], check_exit=False) if exit_code == 0: + logger.info("Rebooting system...") execute_command(["reboot"]) @catch_charm_errors From 7d5693b6a8c7cccf782e468e73e7df5694223ee4 Mon Sep 17 00:00:00 2001 From: Andrew Liaw <43424755+yhaliaw@users.noreply.github.com> Date: Tue, 4 Jul 2023 14:16:32 +0800 Subject: [PATCH 5/6] Fix return value of execute_command --- src/firewall.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/firewall.py b/src/firewall.py index a83771638..204127667 100644 --- a/src/firewall.py +++ b/src/firewall.py @@ -75,7 +75,7 @@ def refresh_firewall(self, denylist: typing.List[FirewallEntry]): current_acls = [ acl["name"] for acl in yaml.safe_load( - execute_command(["lxc", "network", "acl", "list", "-f", "yaml"]) + execute_command(["lxc", "network", "acl", "list", "-f", "yaml"])[0] ) ] if self._ACL_RULESET_NAME not in current_acls: @@ -99,7 +99,7 @@ def refresh_firewall(self, denylist: typing.List[FirewallEntry]): ] ) acl_config = yaml.safe_load( - execute_command(["/snap/bin/lxc", "network", "acl", "show", self._ACL_RULESET_NAME]) + execute_command(["/snap/bin/lxc", "network", "acl", "show", self._ACL_RULESET_NAME])[0] ) host_ip = self.get_host_ip() egress_rules = [ From 50ac735594fc67680460e92bafd48c38600b2048 Mon Sep 17 00:00:00 2001 From: Andrew Liaw <43424755+yhaliaw@users.noreply.github.com> Date: Tue, 4 Jul 2023 14:39:10 +0800 Subject: [PATCH 6/6] Ignore a mypy error --- src/charm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/charm.py b/src/charm.py index 43bed783c..0b59f00f6 100755 --- a/src/charm.py +++ b/src/charm.py @@ -37,7 +37,7 @@ from utilities import bytes_with_unit_to_kib, execute_command, get_env_var, retry if TYPE_CHECKING: - from ops.model import JsonObject # pragma: no cover + from ops.model import JsonObject # type: ignore logger = logging.getLogger(__name__)