From a38d6da766175f7547b0a31932a5841dbb352e5c Mon Sep 17 00:00:00 2001 From: Ksenija Stanojevic Date: Tue, 27 Aug 2024 06:36:03 -0700 Subject: [PATCH] feat(azure): add PPS support for azure-proxy-agent (#5601) Add PPS support for azure-proxy agent and improve error logging. --- cloudinit/sources/DataSourceAzure.py | 23 ++- cloudinit/sources/azure/errors.py | 5 +- tests/unittests/sources/test_azure.py | 284 +++++++++++++++++++++++++- 3 files changed, 303 insertions(+), 9 deletions(-) diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index be4b5a1fbaf..77a5f46f100 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -577,15 +577,31 @@ def _check_azure_proxy_agent_status(self) -> None: ] out, err = subp.subp(cmd) report_diagnostic_event( - "Running azure-proxy-agent %s resulted" - "in stderr output: %s with stdout: %s" % (cmd, err, out), + "Executing %s resulted " + "in stderr=%r with stdout=%r" % (cmd, err, out), logger_func=LOG.debug, ) except subp.ProcessExecutionError as error: if isinstance(error.reason, FileNotFoundError): + LOG.error( + "Failed to activate Azure Guest Proxy Agent: " + "azure-proxy-agent not found" + ) report_error = errors.ReportableErrorProxyAgentNotFound() self._report_failure(report_error) else: + report_diagnostic_event( + "Failed to activate Azure Guest Proxy Agent: " + "status check failed " + "cmd=%r stderr=%r stdout=%r exit_code=%s" + % ( + error.cmd, + error.stderr, + error.stdout, + error.exit_code, + ), + logger_func=LOG.error, + ) reportable_error = ( errors.ReportableErrorProxyAgentStatusFailure(error) ) @@ -706,6 +722,9 @@ def crawl_metadata(self): self._wait_for_pps_unknown_reuse() md, userdata_raw, cfg, files = self._reprovision() + if cfg.get("ProvisionGuestProxyAgent"): + self._check_azure_proxy_agent_status() + # fetch metadata again as it has changed after reprovisioning imds_md = self.get_metadata_from_imds(report_failure=True) diff --git a/cloudinit/sources/azure/errors.py b/cloudinit/sources/azure/errors.py index 2f715e0c4c7..44e2418b115 100644 --- a/cloudinit/sources/azure/errors.py +++ b/cloudinit/sources/azure/errors.py @@ -199,10 +199,7 @@ def __init__(self, exception: Exception) -> None: class ReportableErrorProxyAgentNotFound(ReportableError): def __init__(self) -> None: - super().__init__( - "Unable to activate Azure Guest Proxy Agent." - "azure-proxy-agent not found" - ) + super().__init__("azure-proxy-agent not found") class ReportableErrorProxyAgentStatusFailure(ReportableError): diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 40c04016d67..a2ee3e29c89 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -4091,13 +4091,146 @@ def test_running_pps(self): self.mock_netlink.create_bound_netlink_socket.return_value = nl_sock self.mock_readurl.side_effect = [ mock.MagicMock(contents=json.dumps(imds_md_source).encode()), - mock.MagicMock(contents=construct_ovf_env().encode()), + mock.MagicMock( + contents=construct_ovf_env( + provision_guest_proxy_agent=False + ).encode() + ), + mock.MagicMock(contents=json.dumps(self.imds_md).encode()), + ] + self.mock_azure_get_metadata_from_fabric.return_value = [] + + self.azure_ds._check_and_get_data() + + assert self.mock_subp_subp.mock_calls == [] + + assert self.mock_readurl.mock_calls == [ + mock.call( + "http://169.254.169.254/metadata/instance?" + "api-version=2021-08-01&extended=true", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + infinite=True, + log_req_resp=True, + timeout=30, + ), + mock.call( + "http://169.254.169.254/metadata/reprovisiondata?" + "api-version=2019-06-01", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + log_req_resp=False, + infinite=True, + timeout=30, + ), + mock.call( + "http://169.254.169.254/metadata/instance?" + "api-version=2021-08-01&extended=true", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + infinite=True, + log_req_resp=True, + timeout=30, + ), + ] + + # Verify DHCP is setup twice. + assert self.mock_wrapping_setup_ephemeral_networking.mock_calls == [ + mock.call(timeout_minutes=20), + mock.call(timeout_minutes=5), + ] + assert self.mock_net_dhcp_maybe_perform_dhcp_discovery.mock_calls == [ + mock.call( + self.azure_ds.distro, + None, + dsaz.dhcp_log_cb, + ), + mock.call( + self.azure_ds.distro, + None, + dsaz.dhcp_log_cb, + ), + ] + assert self.azure_ds._wireserver_endpoint == "10.11.12.13" + assert self.azure_ds._is_ephemeral_networking_up() is False + + # Verify DMI usage. + assert self.mock_dmi_read_dmi_data.mock_calls == [ + mock.call("chassis-asset-tag"), + mock.call("system-uuid"), + ] + assert ( + self.azure_ds.metadata["instance-id"] + == "50109936-ef07-47fe-ac82-890c853f60d5" + ) + + # Verify IMDS metadata. + assert self.azure_ds.metadata["imds"] == self.imds_md + + # Verify reporting ready twice. + assert self.mock_azure_get_metadata_from_fabric.mock_calls == [ + mock.call( + endpoint="10.11.12.13", + distro=self.azure_ds.distro, + iso_dev="/dev/sr0", + pubkey_info=None, + ), + mock.call( + endpoint="10.11.12.13", + distro=self.azure_ds.distro, + iso_dev=None, + pubkey_info=None, + ), + ] + + # Verify netlink operations for Running PPS. + assert self.mock_netlink.mock_calls == [ + mock.call.create_bound_netlink_socket(), + mock.call.wait_for_media_disconnect_connect(mock.ANY, "ethBoot0"), + mock.call.create_bound_netlink_socket().close(), + ] + + # Verify reported_ready marker written and cleaned up. + assert self.wrapped_util_write_file.mock_calls[0] == mock.call( + self.patched_reported_ready_marker_path.as_posix(), mock.ANY + ) + assert self.patched_reported_ready_marker_path.exists() is False + + # Verify reports via KVP. + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 + assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2 + + # Verify dmesg reported via KVP. + assert len(self.mock_report_dmesg_to_kvp.mock_calls) == 2 + + def test_running_pps_gpa(self): + self.mock_subp_subp.side_effect = [ + subp.SubpResult("Guest Proxy Agent running", ""), + ] + imds_md_source = copy.deepcopy(self.imds_md) + imds_md_source["extended"]["compute"]["ppsType"] = "Running" + + nl_sock = mock.MagicMock() + self.mock_netlink.create_bound_netlink_socket.return_value = nl_sock + self.mock_readurl.side_effect = [ + mock.MagicMock(contents=json.dumps(imds_md_source).encode()), + mock.MagicMock( + contents=construct_ovf_env( + provision_guest_proxy_agent=True + ).encode() + ), mock.MagicMock(contents=json.dumps(self.imds_md).encode()), ] self.mock_azure_get_metadata_from_fabric.return_value = [] self.azure_ds._check_and_get_data() + assert self.mock_subp_subp.mock_calls == [ + mock.call( + ["azure-proxy-agent", "--status", "--wait", "120"], + ), + ] + assert self.mock_readurl.mock_calls == [ mock.call( "http://169.254.169.254/metadata/instance?" @@ -4209,13 +4342,155 @@ def test_savable_pps(self): ) self.mock_readurl.side_effect = [ mock.MagicMock(contents=json.dumps(imds_md_source).encode()), - mock.MagicMock(contents=construct_ovf_env().encode()), + mock.MagicMock( + contents=construct_ovf_env( + provision_guest_proxy_agent=False + ).encode() + ), mock.MagicMock(contents=json.dumps(self.imds_md).encode()), ] self.mock_azure_get_metadata_from_fabric.return_value = [] self.azure_ds._check_and_get_data() + assert self.mock_subp_subp.mock_calls == [] + + assert self.mock_readurl.mock_calls == [ + mock.call( + "http://169.254.169.254/metadata/instance?" + "api-version=2021-08-01&extended=true", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + infinite=True, + log_req_resp=True, + timeout=30, + ), + mock.call( + "http://169.254.169.254/metadata/reprovisiondata?" + "api-version=2019-06-01", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + log_req_resp=False, + infinite=True, + timeout=30, + ), + mock.call( + "http://169.254.169.254/metadata/instance?" + "api-version=2021-08-01&extended=true", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + infinite=True, + log_req_resp=True, + timeout=30, + ), + ] + + # Verify DHCP is setup twice. + assert self.mock_wrapping_setup_ephemeral_networking.mock_calls == [ + mock.call(timeout_minutes=20), + mock.call( + iface="ethAttached1", + timeout_minutes=20, + report_failure_if_not_primary=False, + ), + ] + assert self.mock_net_dhcp_maybe_perform_dhcp_discovery.mock_calls == [ + mock.call( + self.azure_ds.distro, + None, + dsaz.dhcp_log_cb, + ), + mock.call( + self.azure_ds.distro, + "ethAttached1", + dsaz.dhcp_log_cb, + ), + ] + assert self.azure_ds._wireserver_endpoint == "10.11.12.13" + assert self.azure_ds._is_ephemeral_networking_up() is False + + # Verify DMI usage. + assert self.mock_dmi_read_dmi_data.mock_calls == [ + mock.call("chassis-asset-tag"), + mock.call("system-uuid"), + ] + assert ( + self.azure_ds.metadata["instance-id"] + == "50109936-ef07-47fe-ac82-890c853f60d5" + ) + + # Verify IMDS metadata. + assert self.azure_ds.metadata["imds"] == self.imds_md + + # Verify reporting ready twice. + assert self.mock_azure_get_metadata_from_fabric.mock_calls == [ + mock.call( + endpoint="10.11.12.13", + distro=self.azure_ds.distro, + iso_dev="/dev/sr0", + pubkey_info=None, + ), + mock.call( + endpoint="10.11.12.13", + distro=self.azure_ds.distro, + iso_dev=None, + pubkey_info=None, + ), + ] + + # Verify netlink operations for Savable PPS. + assert self.mock_netlink.mock_calls == [ + mock.call.create_bound_netlink_socket(), + mock.call.wait_for_nic_detach_event(nl_sock), + mock.call.wait_for_nic_attach_event(nl_sock, ["ethAttached1"]), + mock.call.create_bound_netlink_socket().close(), + ] + + # Verify reported_ready marker written and cleaned up. + assert self.wrapped_util_write_file.mock_calls[0] == mock.call( + self.patched_reported_ready_marker_path.as_posix(), mock.ANY + ) + assert self.patched_reported_ready_marker_path.exists() is False + + # Verify reports via KVP. + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 + assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2 + + # Verify dmesg reported via KVP. + assert len(self.mock_report_dmesg_to_kvp.mock_calls) == 2 + + def test_savable_pps_gpa(self): + self.mock_subp_subp.side_effect = [ + subp.SubpResult("Guest Proxy Agent running", ""), + ] + imds_md_source = copy.deepcopy(self.imds_md) + imds_md_source["extended"]["compute"]["ppsType"] = "Savable" + + nl_sock = mock.MagicMock() + self.mock_netlink.create_bound_netlink_socket.return_value = nl_sock + self.mock_netlink.wait_for_nic_detach_event.return_value = "eth9" + self.mock_netlink.wait_for_nic_attach_event.return_value = ( + "ethAttached1" + ) + self.mock_readurl.side_effect = [ + mock.MagicMock(contents=json.dumps(imds_md_source).encode()), + mock.MagicMock( + contents=construct_ovf_env( + provision_guest_proxy_agent=True + ).encode() + ), + mock.MagicMock(contents=json.dumps(self.imds_md).encode()), + ] + self.mock_azure_get_metadata_from_fabric.return_value = [] + + self.azure_ds._check_and_get_data() + + assert self.mock_subp_subp.mock_calls == [ + mock.call( + ["azure-proxy-agent", "--status", "--wait", "120"], + ), + ] + assert self.mock_readurl.mock_calls == [ mock.call( "http://169.254.169.254/metadata/instance?" @@ -4728,7 +5003,10 @@ def test_check_azure_proxy_agent_status(self): subp.SubpResult("Guest Proxy Agent running", ""), ] self.azure_ds._check_azure_proxy_agent_status() - assert "Running azure-proxy-agent" in self.caplog.text + assert ( + "Executing ['azure-proxy-agent', '--status', '--wait', '120']" + in self.caplog.text + ) assert self.mock_wrapping_report_failure.mock_calls == [] def test_check_azure_proxy_agent_status_notfound(self):