Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make exception handling explicit #5202

Closed
19 changes: 14 additions & 5 deletions cloudinit/analyze/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@

import datetime
import json
import logging
import sys
import time

from cloudinit import subp, util
from cloudinit import lifecycle, subp, util
from cloudinit.distros import uses_systemd

# Example events:
Expand Down Expand Up @@ -49,6 +50,7 @@
FAIL_CODE = "failure"
CONTAINER_CODE = "container"
TIMESTAMP_UNKNOWN = (FAIL_CODE, -1, -1, -1)
LOG = logging.getLogger(__name__)


def format_record(msg, event):
Expand Down Expand Up @@ -142,7 +144,7 @@ def subp(self):
return err
self.epoch = value
return None
except Exception as systemctl_fail:
except subp.ProcessExecutionError as systemctl_fail:
return systemctl_fail

def parse_epoch_as_float(self):
Expand Down Expand Up @@ -213,9 +215,16 @@ def gather_timestamps_using_dmesg():
# systemd wont start cloud-init in this case,
# so we cannot get that timestamp
return SUCCESS_CODE, kernel_start, kernel_end, kernel_end

except Exception:
except (ValueError, subp.ProcessExecutionError):
holmanb marked this conversation as resolved.
Show resolved Hide resolved
pass
except Exception as e:
lifecycle.log_with_downgradable_level(
logger=LOG,
version="24.4",
requested_level=logging.WARN,
msg="Unhandled exception: %s",
args=e,
)
return TIMESTAMP_UNKNOWN


Expand Down Expand Up @@ -244,10 +253,10 @@ def gather_timestamps_using_systemd():
cloudinit_sysd = base_time + delta_ci_s

except Exception as e:
LOG.warning("Unhandled exception: %s", e)
# Except ALL exceptions as Systemctl reader can throw many different
# errors, but any failure in systemctl means that timestamps cannot be
# obtained
print(e)
return TIMESTAMP_UNKNOWN
return status, kernel_start, kernel_end, cloudinit_sysd

Expand Down
4 changes: 2 additions & 2 deletions cloudinit/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def cfg(self):
# Ensure that cfg is not indirectly modified
return copy.deepcopy(self._cfg)

def run(self, name, functor, args, freq=None, clear_on_fail=False):
def run(self, name, functor, args, freq=None):
"""Run a function gated by a named semaphore for a desired frequency.

The typical case for this method would be to limit running of the
Expand All @@ -68,7 +68,7 @@ def run(self, name, functor, args, freq=None, clear_on_fail=False):
even if they happen to be run in different boot stages or across
reboots.
"""
return self._runners.run(name, functor, args, freq, clear_on_fail)
return self._runners.run(name, functor, args, freq)

def get_template_filename(self, name):
fn = self.paths.template_tpl % (name)
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/cmd/cloud_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def handle_args(name, args):
try:
with open(args.instance_data) as file:
instance_data = json.load(file)
except IOError:
except OSError:
return log_util.error(
"File not found '%s'. Provide a path to instance data json file"
" using --instance-data" % args.instance_data
Expand Down
6 changes: 6 additions & 0 deletions cloudinit/cmd/devel/hotplug_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,12 @@ def handle_hotplug(hotplug_init: Init, devpath, subsystem, udevaction):
event_handler.success()
break
except Exception as e:
# The number of possible exceptions handled here is large and
# difficult to audit. If retries fail this exception is always
# re-raised. Therefore, this callsite is allowed to be an exception
# to the rule that handle Exception must log a warning or reraise
# or call util.logexc() - since it does technically do this on
# timeout.
LOG.debug("Exception while processing hotplug event. %s", e)
time.sleep(wait)
last_exception = e
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/cmd/devel/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def render_template(user_data_path, instance_data_path=None, debug=False):
try:
with open(user_data_path) as stream:
user_data = stream.read()
except IOError:
except OSError:
LOG.error("Missing user-data file: %s", user_data_path)
return 1
try:
Expand Down
10 changes: 8 additions & 2 deletions cloudinit/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,8 +801,14 @@ def status_wrapper(name, args):
else:
try:
status = json.loads(util.load_text_file(status_path))
except Exception:
pass
except OSError:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we actually want to warn if a file isn't found here?

Regardless, I would probably downgrade these all to info.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this doesn't happen, then either a previous stage failed to create a status.json, or something removed/modified it/corrupted it, or a cloud-init stage is getting manually run out of order(basically the same thing as this PR).

All of these sound like "unexpected / bad things" to me - wouldn't a warning make sense?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TheRealFalcon waiting for response on this comment

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but that other PR already contains a warning, right? I guess it seems like fairly large shift from "it doesn't matter if this fails because we'll create it anyway" to "we need to warn you about this even though it has no bearing on the resulting artifact".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fair, I'll remove it.

LOG.warning("File %s cannot be accessed: %s", status_path, mode)
except (UnicodeDecodeError, json.JSONDecodeError) as e:
LOG.warning("File %s not valid json: %s", status_path, e)
except Exception as e:
LOG.warning(
"Unhandled exception loading file %s: %s", status_path, e
)

if mode not in status["v1"]:
# this should never happen, but leave it just to be safe
Expand Down
17 changes: 8 additions & 9 deletions cloudinit/cmd/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import logging
import os
import sys
from errno import EACCES

from cloudinit import atomic_helper, util
from cloudinit.cmd.devel import read_cfg_paths
Expand Down Expand Up @@ -147,7 +146,7 @@ def _read_instance_data(instance_data, user_data, vendor_data) -> dict:
Non-root users will have redacted INSTANCE_JSON_FILE content and redacted
vendordata and userdata values.

:raise: IOError/OSError on absence of instance-data.json file or invalid
:raise: OSError on absence of instance-data.json file or invalid
access perms.
"""
uid = os.getuid()
Expand Down Expand Up @@ -181,19 +180,19 @@ def _read_instance_data(instance_data, user_data, vendor_data) -> dict:

try:
instance_json = util.load_text_file(instance_data_fn)
except (IOError, OSError) as e:
if e.errno == EACCES:
LOG.error("No read permission on '%s'. Try sudo", instance_data_fn)
else:
LOG.error("Missing instance-data file: %s", instance_data_fn)
except PermissionError:
LOG.error("No read permission on '%s'. Try sudo", instance_data_fn)
raise
except OSError:
LOG.error("Missing instance-data file: %s", instance_data_fn)
raise

instance_data = util.load_json(instance_json)
try:
combined_cloud_config = util.load_json(
util.load_text_file(combined_cloud_config_fn)
)
except (IOError, OSError):
except OSError:
# File will not yet be present in init-local stage.
# It's created in `init` when vendor-data and user-data are processed.
combined_cloud_config = None
Expand Down Expand Up @@ -274,7 +273,7 @@ def handle_args(name, args):
instance_data = _read_instance_data(
args.instance_data, args.user_data, args.vendor_data
)
except (IOError, OSError):
except OSError:
return 1
if args.format:
payload = "## template: jinja\n{fmt}".format(fmt=args.format)
Expand Down
4 changes: 2 additions & 2 deletions cloudinit/config/cc_apt_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def apply_apt(cfg, cloud, gpg):

try:
apply_apt_config(cfg, APT_PROXY_FN, APT_CONFIG_FN)
except (IOError, OSError):
except OSError:
LOG.exception("Failed to apply proxy or apt config info:")

# Process 'apt_source -> sources {dict}'
Expand Down Expand Up @@ -733,7 +733,7 @@ def add_apt_sources(
omode = "w"

util.write_file(sourcefn, contents, omode=omode)
except IOError as detail:
except OSError as detail:
LOG.exception("failed write to file %s: %s", sourcefn, detail)
raise

Expand Down
10 changes: 3 additions & 7 deletions cloudinit/config/cc_bootcmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:
util.logexc(LOG, "Failed to shellify bootcmd: %s", str(e))
raise

try:
iid = cloud.get_instance_id()
env = {"INSTANCE_ID": str(iid)} if iid else {}
subp.subp(["/bin/sh", tmpf.name], update_env=env, capture=False)
except Exception:
util.logexc(LOG, "Failed to run bootcmd module %s", name)
raise
iid = cloud.get_instance_id()
env = {"INSTANCE_ID": str(iid)} if iid else {}
subp.subp(["/bin/sh", tmpf.name], update_env=env, capture=False)
80 changes: 75 additions & 5 deletions cloudinit/config/cc_disk_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import shlex
from pathlib import Path

from cloudinit import performance, subp, util
from cloudinit import lifecycle, performance, subp, util
from cloudinit.cloud import Cloud
from cloudinit.config import Config
from cloudinit.config.schema import MetaSchema
Expand Down Expand Up @@ -180,7 +180,18 @@ def enumerate_disk(device, nodeps=False):
info = None
try:
info, _err = subp.subp(lsblk_cmd)
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed during disk check for %s\n%s" % (device, e)
) from e
except Exception as e:
lifecycle.log_with_downgradable_level(
logger=LOG,
version="24.4",
requested_level=logging.WARN,
msg="Unhandled exception: %s",
args=e,
)
raise RuntimeError(
"Failed during disk check for %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -219,9 +230,12 @@ def is_device_valid(name, partition=False):
d_type = ""
try:
d_type = device_type(name)
except Exception:
except RuntimeError:
LOG.warning("Query against device %s failed", name)
return False
except Exception as e:
LOG.warning("Unhandled exception while querying device %s %s", name, e)
return False

if partition and d_type == "part":
return True
Expand All @@ -244,7 +258,7 @@ def check_fs(device):
blkid_cmd = ["blkid", "-c", "/dev/null", device]
try:
out, _err = subp.subp(blkid_cmd, rcs=[0, 2])
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed during disk check for %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -350,7 +364,16 @@ def get_hdd_size(device):
try:
size_in_bytes, _ = subp.subp(["blockdev", "--getsize64", device])
sector_size, _ = subp.subp(["blockdev", "--getss", device])
except subp.ProcessExecutionError as e:
raise RuntimeError("Failed to get %s size\n%s" % (device, e)) from e
except Exception as e:
lifecycle.log_with_downgradable_level(
logger=LOG,
version="24.4",
requested_level=logging.WARN,
msg="Unhandled exception: %s",
args=e,
)
raise RuntimeError("Failed to get %s size\n%s" % (device, e)) from e

return int(size_in_bytes) / int(sector_size)
Expand All @@ -369,7 +392,18 @@ def check_partition_mbr_layout(device, layout):
prt_cmd = ["sfdisk", "-l", device]
try:
out, _err = subp.subp(prt_cmd, data="%s\n" % layout)
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Error running partition command on %s\n%s" % (device, e)
) from e
except Exception as e:
lifecycle.log_with_downgradable_level(
logger=LOG,
version="24.4",
requested_level=logging.WARN,
msg="Unhandled exception: %s",
args=e,
)
raise RuntimeError(
"Error running partition command on %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -400,7 +434,18 @@ def check_partition_gpt_layout(device, layout):
prt_cmd = ["sgdisk", "-p", device]
try:
out, _err = subp.subp(prt_cmd, update_env=LANG_C_ENV)
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Error running partition command on %s\n%s" % (device, e)
) from e
except Exception as e:
lifecycle.log_with_downgradable_level(
logger=LOG,
version="24.4",
requested_level=logging.WARN,
msg="Unhandled exception: %s",
args=e,
)
raise RuntimeError(
"Error running partition command on %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -591,7 +636,7 @@ def purge_disk(device):
try:
LOG.info("Purging filesystem on /dev/%s", d["name"])
subp.subp(wipefs_cmd)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed FS purge of /dev/%s" % d["name"]
) from e
Expand Down Expand Up @@ -643,7 +688,18 @@ def exec_mkpart_mbr(device, layout):
prt_cmd = ["sfdisk", "--force", device]
try:
subp.subp(prt_cmd, data="%s\n" % layout)
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed to partition device %s\n%s" % (device, e)
) from e
except Exception as e:
lifecycle.log_with_downgradable_level(
logger=LOG,
version="24.4",
requested_level=logging.WARN,
msg="Unhandled exception: %s",
args=e,
)
raise RuntimeError(
"Failed to partition device %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -671,9 +727,14 @@ def exec_mkpart_gpt(device, layout):
subp.subp(
["sgdisk", "-t", "{}:{}".format(index, pinput), device]
)
except Exception:
except subp.ProcessExecutionError:
LOG.warning("Failed to partition device %s", device)
raise
except Exception as e:
LOG.warning(
"Unhandled exception while partitioning device %s: %s", device, e
)
raise

read_parttbl(device)

Expand Down Expand Up @@ -976,5 +1037,14 @@ def mkfs(fs_cfg):
LOG.debug("Creating file system %s on %s", label, device)
try:
subp.subp(fs_cmd, shell=shell)
except subp.ProcessExecutionError as e:
raise RuntimeError("Failed to exec of '%s':\n%s" % (fs_cmd, e)) from e
except Exception as e:
lifecycle.log_with_downgradable_level(
logger=LOG,
version="24.4",
requested_level=logging.WARN,
msg="Unhandled exception: %s",
args=e,
)
raise RuntimeError("Failed to exec of '%s':\n%s" % (fs_cmd, e)) from e
Loading
Loading