From 5692c50b0198a8a79ec33b9dffd106005eda6822 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 15 Jul 2024 16:46:37 -0600 Subject: [PATCH] use sd_notify to update systemd in case of fatal exception and of current progress --- cloudinit/cmd/main.py | 61 +++++++++++++++----------- cloudinit/socket.py | 12 ++++- tests/unittests/test_single_process.py | 12 +++-- 3 files changed, 55 insertions(+), 30 deletions(-) diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 7cc08b2f14a..9778b1f6add 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -1146,31 +1146,42 @@ def main(sysv_args=None): return sub_main(args) LOG.info("Running cloud-init in single process mode.") - # this _must_ be called before sd_notify is called otherwise netcat may - # attempt to send "start" before a socket exists - sync = socket.SocketSync("local", "network", "config", "final") - - # notify systemd that this stage has completed - socket.sd_notify("READY=1") - - # wait for cloud-init-local.service to start - with sync("local"): - sub_main(parser.parse_args(args=["init", "--local"])) - - # wait for cloud-init.service to start - with sync("network"): - # init stage - sub_main(parser.parse_args(args=["init"])) - - # wait for cloud-config.service to start - with sync("config"): - # config stage - sub_main(parser.parse_args(args=["modules", "--mode=config"])) - - with sync("final"): - # final stage - sub_main(parser.parse_args(args=["modules", "--mode=final"])) - socket.sd_notify(b"STOPPING=1") + try: + # this _must_ be called before sd_notify is called otherwise netcat may + # attempt to send "start" before a socket exists + sync = socket.SocketSync("local", "network", "config", "final") + + # notify systemd that this stage has completed + socket.sd_notify("READY=1") + + # wait for cloud-init-local.service to start + with sync("local"): + # local stage + sub_main(parser.parse_args(args=["init", "--local"])) + + # wait for cloud-init.service to start + with sync("network"): + # init stage + sub_main(parser.parse_args(args=["init"])) + + # wait for cloud-config.service to start + with sync("config"): + # config stage + sub_main(parser.parse_args(args=["modules", "--mode=config"])) + + with sync("final"): + # final stage + return_code = sub_main( + parser.parse_args(args=["modules", "--mode=final"]) + ) + except Exception as e: + LOG.fatal("Fatal exception: %s", e, exc_info=True) + status = traceback.format_exc().replace("\n", " ") + socket.sd_notify(f"STATUS={status}") + return_code = 1 + socket.sd_notify("STATUS=Completed") + socket.sd_notify("STOPPING=1") + return return_code def sub_main(args): diff --git a/cloudinit/socket.py b/cloudinit/socket.py index 184637e4ef7..cacde1e8a14 100644 --- a/cloudinit/socket.py +++ b/cloudinit/socket.py @@ -3,6 +3,7 @@ import logging import os import socket +import time from contextlib import suppress from cloudinit.settings import DEFAULT_RUN_DIR @@ -90,7 +91,11 @@ def __enter__(self): Once the message has been received, enter the context. """ - LOG.debug("sync(%s): initial synchronization starting", self.stage) + sd_notify( + "STATUS=Waiting on external services to " + f"complete ({self.stage} stage)" + ) + start_time = time.monotonic() # block until init system sends us data # the first value returned contains a message from the init system # (should be "start") @@ -111,7 +116,10 @@ def __enter__(self): self.__exit__(None, None, None) raise ValueError(f"Unexpected path to unix socket: {self.remote}") - LOG.debug("sync(%s): initial synchronization complete", self.stage) + total = time.monotonic() - start_time + time_msg = f"took {total: .3f} " if total > 0.1 else "" + sd_notify(f"STATUS=Running ({self.stage} stage)") + LOG.debug("sync(%s): synchronization %scomplete", self.stage, time_msg) return self def __exit__(self, exc_type, exc_val, exc_tb): diff --git a/tests/unittests/test_single_process.py b/tests/unittests/test_single_process.py index 9df2d122f15..ef3d13294cd 100644 --- a/tests/unittests/test_single_process.py +++ b/tests/unittests/test_single_process.py @@ -50,7 +50,9 @@ def __exit__(self, *_): def test_single_process_times_out(tmp_path): """Verify that no "start" makes the protocol block""" - with mock.patch.object(ci_socket, "DEFAULT_RUN_DIR", tmp_path): + with mock.patch.object( + ci_socket, "DEFAULT_RUN_DIR", tmp_path + ), mock.patch.object(ci_socket, "sd_notify"): sync = ci_socket.SocketSync("first") try: @@ -71,7 +73,9 @@ def test_single_process(tmp_path): After a socket has been been bound but before it has started listening """ expected = b"done" - with mock.patch.object(ci_socket, "DEFAULT_RUN_DIR", tmp_path): + with mock.patch.object( + ci_socket, "DEFAULT_RUN_DIR", tmp_path + ), mock.patch.object(ci_socket, "sd_notify"): sync = ci_socket.SocketSync("first", "second", "third") # send all three syncs to the sockets @@ -115,7 +119,9 @@ def syncer(index: int, name: str): time.sleep(0.001 * random.randint(0, max_sleep)) sync_storage[index] = Sync(name, tmp_path) - with mock.patch.object(ci_socket, "DEFAULT_RUN_DIR", tmp_path): + with mock.patch.object( + ci_socket, "DEFAULT_RUN_DIR", tmp_path + ), mock.patch.object(ci_socket, "sd_notify"): sync = ci_socket.SocketSync( "first", "second", "third", "fourth", "fifth"