Skip to content

Commit

Permalink
sources: obj.pkl cache should be written anyime get_data is run (#1669)
Browse files Browse the repository at this point in the history
When metadata update events trigger a new datasource.get_data run
ensure we are syncing the cached obj.pkl to disk so subsequent
boot stages can leverage the updated metadata.

Add write_cache param to persist_instance_data to avoid
persisting instance data when init.ds_restored from cache.

This avoids a race on clouds where network config is updated
per boot in init-local timeframe but init-network uses stale network
metadata from cache because updated metadata was not
persisted.

Migate _pkl_load and _pkl_store out of stages module and into
sources as it really is only applicable to datasource serialization.
  • Loading branch information
blackboxsw authored Aug 18, 2022
1 parent 66d4095 commit 923e140
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 45 deletions.
2 changes: 1 addition & 1 deletion cloudinit/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ def _maybe_persist_instance_data(init):
init.paths.run_dir, sources.INSTANCE_JSON_FILE
)
if not os.path.exists(instance_data_file):
init.datasource.persist_instance_data()
init.datasource.persist_instance_data(write_cache=False)


def _maybe_set_hostname(init, stage, retry_stage):
Expand Down
49 changes: 47 additions & 2 deletions cloudinit/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
import copy
import json
import os
import pickle
from collections import namedtuple
from enum import Enum, unique
from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List, Optional, Tuple

from cloudinit import dmi, importer
from cloudinit import log as logging
Expand Down Expand Up @@ -373,14 +374,19 @@ def get_data(self) -> bool:
self.persist_instance_data()
return return_value

def persist_instance_data(self):
def persist_instance_data(self, write_cache=True):
"""Process and write INSTANCE_JSON_FILE with all instance metadata.
Replace any hyphens with underscores in key names for use in template
processing.
:param write_cache: boolean set True to persist obj.pkl when
instance_link exists.
@return True on successful write, False otherwise.
"""
if write_cache and os.path.lexists(self.paths.instance_link):
pkl_store(self, self.paths.get_ipath_cur("obj_pkl"))
if hasattr(self, "_crawled_metadata"):
# Any datasource with _crawled_metadata will best represent
# most recent, 'raw' metadata
Expand Down Expand Up @@ -1063,4 +1069,43 @@ def list_from_depends(depends, ds_list):
return ret_list


def pkl_store(obj: DataSource, fname: str) -> bool:
"""Use pickle to serialize Datasource to a file as a cache.
:return: True on success
"""
try:
pk_contents = pickle.dumps(obj)
except Exception:
util.logexc(LOG, "Failed pickling datasource %s", obj)
return False
try:
util.write_file(fname, pk_contents, omode="wb", mode=0o400)
except Exception:
util.logexc(LOG, "Failed pickling datasource to %s", fname)
return False
return True


def pkl_load(fname: str) -> Optional[DataSource]:
"""Use pickle to deserialize a instance Datasource from a cache file."""
pickle_contents = None
try:
pickle_contents = util.load_file(fname, decode=False)
except Exception as e:
if os.path.isfile(fname):
LOG.warning("failed loading pickle in %s: %s", fname, e)

# This is allowed so just return nothing successfully loaded...
if not pickle_contents:
return None
try:
return pickle.loads(pickle_contents)
except DatasourceUnpickleUserDataError:
return None
except Exception:
util.logexc(LOG, "Failed loading pickled blob from %s", fname)
return None


# vi: ts=4 expandtab
41 changes: 4 additions & 37 deletions cloudinit/stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import copy
import os
import pickle
import sys
from collections import namedtuple
from typing import Dict, Iterable, List, Optional, Set
Expand Down Expand Up @@ -247,7 +246,7 @@ def _restore_from_cache(self):
# We try to restore from a current link and static path
# by using the instance link, if purge_cache was called
# the file wont exist.
return _pkl_load(self.paths.get_ipath_cur("obj_pkl"))
return sources.pkl_load(self.paths.get_ipath_cur("obj_pkl"))

def _write_to_cache(self):
if self.datasource is None:
Expand All @@ -260,7 +259,9 @@ def _write_to_cache(self):
omode="w",
content="",
)
return _pkl_store(self.datasource, self.paths.get_ipath_cur("obj_pkl"))
return sources.pkl_store(
self.datasource, self.paths.get_ipath_cur("obj_pkl")
)

def _get_datasources(self):
# Any config provided???
Expand Down Expand Up @@ -973,38 +974,4 @@ def fetch_base_config():
)


def _pkl_store(obj, fname):
try:
pk_contents = pickle.dumps(obj)
except Exception:
util.logexc(LOG, "Failed pickling datasource %s", obj)
return False
try:
util.write_file(fname, pk_contents, omode="wb", mode=0o400)
except Exception:
util.logexc(LOG, "Failed pickling datasource to %s", fname)
return False
return True


def _pkl_load(fname):
pickle_contents = None
try:
pickle_contents = util.load_file(fname, decode=False)
except Exception as e:
if os.path.isfile(fname):
LOG.warning("failed loading pickle in %s: %s", fname, e)

# This is allowed so just return nothing successfully loaded...
if not pickle_contents:
return None
try:
return pickle.loads(pickle_contents)
except sources.DatasourceUnpickleUserDataError:
return None
except Exception:
util.logexc(LOG, "Failed loading pickled blob from %s", fname)
return None


# vi: ts=4 expandtab
47 changes: 44 additions & 3 deletions tests/unittests/sources/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
UNSET,
DataSource,
canonical_cloud_id,
pkl_load,
redact_sensitive_keys,
)
from cloudinit.user_data import UserDataProcessor
Expand Down Expand Up @@ -672,8 +673,12 @@ def test_get_data_handles_redacted_unserializable_content(self):
def test_persist_instance_data_writes_ec2_metadata_when_set(self):
"""When ec2_metadata class attribute is set, persist to json."""
tmp = self.tmp_dir()
cloud_dir = os.path.join(tmp, "cloud")
util.ensure_dir(cloud_dir)
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({"run_dir": tmp})
self.sys_cfg,
self.distro,
Paths({"run_dir": tmp, "cloud_dir": cloud_dir}),
)
datasource.ec2_metadata = UNSET
datasource.get_data()
Expand All @@ -690,8 +695,12 @@ def test_persist_instance_data_writes_ec2_metadata_when_set(self):
def test_persist_instance_data_writes_canonical_cloud_id_and_symlink(self):
"""canonical-cloud-id class attribute is set, persist to json."""
tmp = self.tmp_dir()
cloud_dir = os.path.join(tmp, "cloud")
util.ensure_dir(cloud_dir)
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({"run_dir": tmp})
self.sys_cfg,
self.distro,
Paths({"run_dir": tmp, "cloud_dir": cloud_dir}),
)
cloud_id_link = os.path.join(tmp, "cloud-id")
cloud_id_file = os.path.join(tmp, "cloud-id-my-cloud")
Expand Down Expand Up @@ -722,8 +731,12 @@ def test_persist_instance_data_writes_canonical_cloud_id_and_symlink(self):
def test_persist_instance_data_writes_network_json_when_set(self):
"""When network_data.json class attribute is set, persist to json."""
tmp = self.tmp_dir()
cloud_dir = os.path.join(tmp, "cloud")
util.ensure_dir(cloud_dir)
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({"run_dir": tmp})
self.sys_cfg,
self.distro,
Paths({"run_dir": tmp, "cloud_dir": cloud_dir}),
)
datasource.get_data()
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
Expand All @@ -736,6 +749,34 @@ def test_persist_instance_data_writes_network_json_when_set(self):
{"network_json": "is good"}, instance_data["ds"]["network_json"]
)

def test_persist_instance_serializes_datasource_pickle(self):
"""obj.pkl is written when instance link present and write_cache."""
tmp = self.tmp_dir()
cloud_dir = os.path.join(tmp, "cloud")
util.ensure_dir(cloud_dir)
datasource = DataSourceTestSubclassNet(
self.sys_cfg,
self.distro,
Paths({"run_dir": tmp, "cloud_dir": cloud_dir}),
)
pkl_cache_file = os.path.join(cloud_dir, "instance/obj.pkl")
self.assertFalse(os.path.exists(pkl_cache_file))
datasource.network_json = {"network_json": "is good"}
# No /var/lib/cloud/instance symlink
datasource.persist_instance_data(write_cache=True)
self.assertFalse(os.path.exists(pkl_cache_file))

# Symlink /var/lib/cloud/instance but write_cache=False
util.sym_link(cloud_dir, os.path.join(cloud_dir, "instance"))
datasource.persist_instance_data(write_cache=False)
self.assertFalse(os.path.exists(pkl_cache_file))

# Symlink /var/lib/cloud/instance and write_cache=True
datasource.persist_instance_data(write_cache=True)
self.assertTrue(os.path.exists(pkl_cache_file))
ds = pkl_load(pkl_cache_file)
self.assertEqual(datasource.network_json, ds.network_json)

def test_get_data_base64encodes_unserializable_bytes(self):
"""On py3, get_data base64encodes any unserializable content."""
tmp = self.tmp_dir()
Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/test_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import pytest

from cloudinit.stages import _pkl_load
from cloudinit.sources import pkl_load
from tests.unittests.helpers import resourceLocation


Expand All @@ -34,7 +34,7 @@ def previous_obj_pkl(self, request):
Test implementations _must not_ modify the ``previous_obj_pkl`` which
they are passed, as that will affect tests that run after them.
"""
return _pkl_load(str(request.param))
return pkl_load(str(request.param))

def test_networking_set_on_distro(self, previous_obj_pkl):
"""We always expect to have ``.networking`` on ``Distro`` objects."""
Expand Down

0 comments on commit 923e140

Please sign in to comment.