From f1e43986fc564f62e83ddd86dcf2342bf4d316ba Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Mon, 11 Mar 2024 11:29:05 -0600 Subject: [PATCH] bug(vmware): initialize new DataSourceVMware attributes at unpickle (#5021) bug(vmware): initialize new DataSourceVMware attributes at unpickle When a datasource has already been cached from a previous boot, the datasource is deserialized from the cached object instead of running __init__ method. Across a cloud-init package upgrade if new instance attributes have been initialized in __init__, we need to also initialize them in the class _unpickle method to avoid AttributeErrors for any instance methods referencing new attributes. Fix AttributeError on rpctool and rpctool_fn LP: #2056439 --- cloudinit/sources/DataSourceOracle.py | 11 ++ cloudinit/sources/DataSourceScaleway.py | 14 ++ cloudinit/sources/DataSourceVMware.py | 26 +++ tests/unittests/test_upgrade.py | 226 +++++++++++++++++++++++- 4 files changed, 275 insertions(+), 2 deletions(-) diff --git a/cloudinit/sources/DataSourceOracle.py b/cloudinit/sources/DataSourceOracle.py index 535a2bfac89..1a43a55beb8 100644 --- a/cloudinit/sources/DataSourceOracle.py +++ b/cloudinit/sources/DataSourceOracle.py @@ -147,6 +147,17 @@ def __init__(self, sys_cfg, *args, **kwargs): self.url_max_wait = url_params.max_wait_seconds self.url_timeout = url_params.timeout_seconds + def _unpickle(self, ci_pkl_version: int) -> None: + super()._unpickle(ci_pkl_version) + if not hasattr(self, "_vnics_data"): + setattr(self, "_vnics_data", None) + if not hasattr(self, "_network_config_source"): + setattr( + self, + "_network_config_source", + KlibcOracleNetworkConfigSource(), + ) + def _has_network_config(self) -> bool: return bool(self._network_config.get("config", [])) diff --git a/cloudinit/sources/DataSourceScaleway.py b/cloudinit/sources/DataSourceScaleway.py index 221df256f7e..5ebe13fbda6 100644 --- a/cloudinit/sources/DataSourceScaleway.py +++ b/cloudinit/sources/DataSourceScaleway.py @@ -180,6 +180,20 @@ def __init__(self, sys_cfg, distro, paths): if "metadata_urls" in self.ds_cfg.keys(): self.metadata_urls += self.ds_cfg["metadata_urls"] + def _unpickle(self, ci_pkl_version: int) -> None: + super()._unpickle(ci_pkl_version) + attr_defaults = { + "ephemeral_fixed_address": None, + "has_ipv4": True, + "max_wait": DEF_MD_MAX_WAIT, + "metadata_urls": DS_BASE_URLS, + "userdata_url": None, + "vendordata_url": None, + } + for attr in attr_defaults: + if not hasattr(self, attr): + setattr(self, attr, attr_defaults[attr]) + def _set_metadata_url(self, urls): """ Define metadata_url based upon api-metadata URL availability. diff --git a/cloudinit/sources/DataSourceVMware.py b/cloudinit/sources/DataSourceVMware.py index 7e18fc17ef8..77a2de6cb34 100644 --- a/cloudinit/sources/DataSourceVMware.py +++ b/cloudinit/sources/DataSourceVMware.py @@ -160,6 +160,32 @@ def __init__(self, sys_cfg, distro, paths, ud_proc=None): (DATA_ACCESS_METHOD_IMC, self.get_imc_data_fn, True), ] + def _unpickle(self, ci_pkl_version: int) -> None: + super()._unpickle(ci_pkl_version) + for attr in ("rpctool", "rpctool_fn"): + if not hasattr(self, attr): + setattr(self, attr, None) + if not hasattr(self, "cfg"): + setattr(self, "cfg", {}) + if not hasattr(self, "possible_data_access_method_list"): + setattr( + self, + "possible_data_access_method_list", + [ + ( + DATA_ACCESS_METHOD_ENVVAR, + self.get_envvar_data_fn, + False, + ), + ( + DATA_ACCESS_METHOD_GUESTINFO, + self.get_guestinfo_data_fn, + True, + ), + (DATA_ACCESS_METHOD_IMC, self.get_imc_data_fn, True), + ], + ) + def __str__(self): root = sources.DataSource.__str__(self) return "%s [seed=%s]" % (root, self.data_access_method) diff --git a/tests/unittests/test_upgrade.py b/tests/unittests/test_upgrade.py index 531ed3cffc4..c1925201a9c 100644 --- a/tests/unittests/test_upgrade.py +++ b/tests/unittests/test_upgrade.py @@ -13,13 +13,15 @@ import operator import pathlib +from unittest import mock import pytest -from cloudinit.sources import pkl_load +from cloudinit import importer, settings, sources, type_utils from cloudinit.sources.DataSourceAzure import DataSourceAzure from cloudinit.sources.DataSourceNoCloud import DataSourceNoCloud from tests.unittests.helpers import resourceLocation +from tests.unittests.util import MockDistro DSNAME_TO_CLASS = { "Azure": DataSourceAzure, @@ -28,6 +30,131 @@ class TestUpgrade: + # Expect the following "gaps" in unpickling per-datasource. + # The presence of these attributes existed in 20.1. + ds_expected_unpickle_attrs = { + "AltCloud": {"seed", "supported_seed_starts"}, + "AliYun": {"identity", "metadata_address", "default_update_events"}, + "Azure": { + "_ephemeral_dhcp_ctx", + "_iso_dev", + "_network_config", + "_reported_ready_marker_file", + "_route_configured_for_imds", + "_route_configured_for_wireserver", + "_wireserver_endpoint", + "cfg", + "seed", + "seed_dir", + }, + "CloudSigma": {"cepko", "ssh_public_key"}, + "CloudStack": {"api_ver", "cfg", "seed_dir", "vr_addr"}, + "ConfigDrive": { + "_network_config", + "ec2_metadata", + "files", + "known_macs", + "network_eni", + "network_json", + "seed_dir", + "source", + "version", + }, + "DigitalOcean": { + "_network_config", + "metadata_address", + "metadata_full", + "retries", + "timeout", + "use_ip4LL", + "wait_retry", + }, + "Ec2": {"identity", "metadata_address"}, + "Exoscale": { + "api_version", + "extra_config", + "metadata_url", + "password_server_port", + "url_retries", + "url_timeout", + }, + "GCE": {"default_user", "metadata_address"}, + "Hetzner": { + "_network_config", + "dsmode", + "metadata_address", + "metadata_full", + "retries", + "timeout", + "userdata_address", + "wait_retry", + }, + "IBMCloud": {"source", "_network_config", "network_json", "platform"}, + "RbxCloud": {"cfg", "gratuitous_arp", "seed"}, + "Scaleway": { + "_network_config", + "metadata_url", + "retries", + "timeout", + }, + "Joyent": { + "_network_config", + "network_data", + "routes_data", + "script_base_d", + }, + "MAAS": {"base_url", "seed_dir"}, + "NoCloud": { + "_network_eni", + "_network_config", + "supported_seed_starts", + "seed_dir", + "seed", + "seed_dirs", + }, + "NWCS": { + "_network_config", + "dsmode", + "metadata_address", + "metadata_full", + "retries", + "timeout", + "wait_retry", + }, + "OpenNebula": {"network", "seed", "seed_dir"}, + "OpenStack": { + "ec2_metadata", + "files", + "metadata_address", + "network_json", + "ssl_details", + "version", + }, + "OVF": { + "cfg", + "environment", + "_network_config", + "seed", + "seed_dir", + "supported_seed_starts", + }, + "UpCloud": { + "_network_config", + "metadata_address", + "metadata_full", + "retries", + "timeout", + "wait_retry", + }, + "Vultr": {"netcfg"}, + "VMware": { + "data_access_method", + "rpctool", + "rpctool_fn", + }, + "WSL": {"instance_name"}, + } + @pytest.fixture( params=pathlib.Path(resourceLocation("old_pickles")).glob("*.pkl"), scope="class", @@ -39,7 +166,102 @@ def previous_obj_pkl(self, request): Test implementations _must not_ modify the ``previous_obj_pkl`` which they are passed, as that will affect tests that run after them. """ - return pkl_load(str(request.param)) + return sources.pkl_load(str(request.param)) + + @pytest.mark.parametrize( + "mode", + ( + [sources.DEP_FILESYSTEM], + [sources.DEP_FILESYSTEM, sources.DEP_NETWORK], + ), + ) + @mock.patch.object( + importer, + "match_case_insensitive_module_name", + lambda name: f"DataSource{name}", + ) + def test_all_ds_init_vs_unpickle_attributes( + self, mode, mocker, paths, tmpdir + ): + """Unpickle resets any instance attributes created in __init__ + + This test asserts that deserialization of a datasource cache + does proper initialization of any 'new' instance attributes + created as a side-effect of the __init__ method. + + Without proper _unpickle coverage for newly introduced attributes, + the new deserialized instance will hit AttributeErrors at runtime. + """ + # Load all cloud-init init-local time-frame DataSource classes + for ds_class in sources.list_sources( + settings.CFG_BUILTIN["datasource_list"], + mode, + [type_utils.obj_name(sources)], + ): + # Expected common instance attrs from __init__ that are typically + # handled via existing _unpickling and setup in _get_data + common_instance_attrs = { + "paths", + "vendordata2", + "sys_cfg", + "ud_proc", + "vendordata", + "vendordata2_raw", + "ds_cfg", + "distro", + "userdata", + "userdata_raw", + "metadata", + "vendordata_raw", + } + # Grab initial specific-class attributes from magic method + class_attrs = set(ds_class.__dict__) + + # Mock known subp calls from some datasource __init__ setup + mocker.patch("cloudinit.util.is_container", return_value=False) + mocker.patch("cloudinit.dmi.read_dmi_data", return_value="") + mocker.patch("cloudinit.subp.subp", return_value=("", "")) + + # Initialize the class to grab the instance attributes from + # instance.__dict__ magic method. + ds = ds_class(sys_cfg={}, distro=MockDistro(), paths=paths) + + if getattr(ds.__class__.__bases__[0], "dsname", None) == ds.dsname: + # We are a subclass in a different boot mode (Local/Net) and + # share a common parent with class atttributes + class_attrs.update(ds.__class__.__bases__[0].__dict__) + + # Determine new instance attributes created by __init__ + # by calling the __dict__ magic method on the instance. + # Then, subtract common_instance_attrs and + # ds_expected_unpickle_attrs from the list of current attributes. + # What's left is our 'new' instance attributes added as a + # side-effect of __init__. + init_attrs = ( + set(ds.__dict__) + - class_attrs + - common_instance_attrs + - self.ds_expected_unpickle_attrs.get(ds_class.dsname, set()) + ) + + # Remove all side-effect attributes added by __init__ + for side_effect_attr in init_attrs: + delattr(ds, side_effect_attr) + + # Pickle the version of the DataSource with all init_attrs removed + sources.pkl_store(ds, tmpdir.join(f"{ds.dsname}.obj.pkl")) + + # Reload the pickled bare-bones datasource to ensure all instance + # attributes are reconstituted by _unpickle helpers. + ds2 = sources.pkl_load(tmpdir.join(f"{ds.dsname}.obj.pkl")) + unpickled_attrs = ( + set(ds2.__dict__) - class_attrs - common_instance_attrs + ) + missing_unpickled_attrs = init_attrs - unpickled_attrs + assert not missing_unpickled_attrs, ( + f"New {ds_class.dsname} attributes need unpickle coverage:" + f" {missing_unpickled_attrs}" + ) def test_pkl_load_defines_all_init_side_effect_attributes( self, previous_obj_pkl