Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make exception handling explicit #5202

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
15 changes: 9 additions & 6 deletions cloudinit/analyze/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import datetime
import json
import logging
import os
import sys
import time
Expand Down Expand Up @@ -53,6 +54,7 @@
FAIL_CODE = "failure"
CONTAINER_CODE = "container"
TIMESTAMP_UNKNOWN = (FAIL_CODE, -1, -1, -1)
LOG = logging.getLogger(__name__)


def format_record(msg, event):
Expand Down Expand Up @@ -146,7 +148,7 @@ def subp(self):
return err
self.epoch = value
return None
except Exception as systemctl_fail:
except subp.ProcessExecutionError as systemctl_fail:
return systemctl_fail

def parse_epoch_as_float(self):
Expand Down Expand Up @@ -217,10 +219,11 @@ def gather_timestamps_using_dmesg():
# systemd wont start cloud-init in this case,
# so we cannot get that timestamp
return SUCCESS_CODE, kernel_start, kernel_end, kernel_end

except Exception:
pass
return TIMESTAMP_UNKNOWN
except (ValueError, subp.ProcessExecutionError):
holmanb marked this conversation as resolved.
Show resolved Hide resolved
return TIMESTAMP_UNKNOWN
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
return TIMESTAMP_UNKNOWN


def gather_timestamps_using_systemd():
Expand Down Expand Up @@ -261,10 +264,10 @@ def gather_timestamps_using_systemd():
cloudinit_sysd = base_time + delta_ci_s

except Exception as e:
LOG.warning("Unhandled exception: %s", e)
# Except ALL exceptions as Systemctl reader can throw many different
# errors, but any failure in systemctl means that timestamps cannot be
# obtained
print(e)
return TIMESTAMP_UNKNOWN
return status, kernel_start, kernel_end, cloudinit_sysd

Expand Down
2 changes: 1 addition & 1 deletion cloudinit/apport.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def attach_cloud_info(report, ui=None):
instance_data = json.load(file)
assert instance_data.get("v1", {}).get("cloud_name")
return # Valid instance-data means generic-hooks will report
except (IOError, json.decoder.JSONDecodeError, AssertionError):
except (OSError, json.decoder.JSONDecodeError, AssertionError):
pass

# No valid /run/cloud/instance-data.json on system. Prompt for cloud.
Expand Down
4 changes: 2 additions & 2 deletions cloudinit/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def cfg(self):
# Ensure that cfg is not indirectly modified
return copy.deepcopy(self._cfg)

def run(self, name, functor, args, freq=None, clear_on_fail=False):
def run(self, name, functor, args, freq=None):
"""Run a function gated by a named semaphore for a desired frequency.

The typical case for this method would be to limit running of the
Expand All @@ -68,7 +68,7 @@ def run(self, name, functor, args, freq=None, clear_on_fail=False):
even if they happen to be run in different boot stages or across
reboots.
"""
return self._runners.run(name, functor, args, freq, clear_on_fail)
return self._runners.run(name, functor, args, freq)

def get_template_filename(self, name):
fn = self.paths.template_tpl % (name)
Expand Down
3 changes: 3 additions & 0 deletions cloudinit/cmd/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import argparse
import glob
import logging
import os
import sys

Expand Down Expand Up @@ -37,6 +38,7 @@
GEN_SSH_CONFIG_FILES = [
"/etc/ssh/sshd_config.d/50-cloud-init.conf",
]
LOG = logging.getLogger(__name__)


def get_parser(parser=None):
Expand Down Expand Up @@ -149,6 +151,7 @@ def remove_artifacts(remove_logs, remove_seed=False, remove_config=None):
try:
runparts(settings.CLEAN_RUNPARTS_DIR)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I agree with these unhandled exception rules when it comes to CLI commands. The exception is being printed/handled using the error() call below this one, and does it in a more graceful way. I think a log message on the CLI like the following will just add more noise:

2024-05-01 18:10:39,200 - clean.py[WARNING]: Unhandled exception: ...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 see comment below

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No response needed, but leaving this open because I have changes to make here.

error(
f"Failure during run-parts of {settings.CLEAN_RUNPARTS_DIR}: {e}"
)
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/cmd/cloud_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def handle_args(name, args):
try:
with open(args.instance_data) as file:
instance_data = json.load(file)
except IOError:
except OSError:
return error(
"File not found '%s'. Provide a path to instance data json file"
" using --instance-data" % args.instance_data
Expand Down
6 changes: 6 additions & 0 deletions cloudinit/cmd/devel/hotplug_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,12 @@ def handle_hotplug(hotplug_init: Init, devpath, subsystem, udevaction):
event_handler.success()
break
except Exception as e:
# The number of possible exceptions handled here is large and
# difficult to audit. If retries fail this exception is always
# re-raised. Therefore, this callsite is allowed to be an exception
# to the rule that handle Exception must log a warning or reraise
# or call util.logexc() - since it does technically do this on
# timeout.
LOG.debug("Exception while processing hotplug event. %s", e)
time.sleep(wait)
last_exception = e
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/cmd/devel/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def render_template(user_data_path, instance_data_path=None, debug=False):
try:
with open(user_data_path) as stream:
user_data = stream.read()
except IOError:
except OSError:
LOG.error("Missing user-data file: %s", user_data_path)
return 1
try:
Expand Down
8 changes: 6 additions & 2 deletions cloudinit/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,8 +745,12 @@ def status_wrapper(name, args):
else:
try:
status = json.loads(util.load_text_file(status_path))
except Exception:
pass
except OSError:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we actually want to warn if a file isn't found here?

Regardless, I would probably downgrade these all to info.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this doesn't happen, then either a previous stage failed to create a status.json, or something removed/modified it/corrupted it, or a cloud-init stage is getting manually run out of order(basically the same thing as this PR).

All of these sound like "unexpected / bad things" to me - wouldn't a warning make sense?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TheRealFalcon waiting for response on this comment

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but that other PR already contains a warning, right? I guess it seems like fairly large shift from "it doesn't matter if this fails because we'll create it anyway" to "we need to warn you about this even though it has no bearing on the resulting artifact".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fair, I'll remove it.

LOG.warning("File %s not found %s.", status_path, mode)
TheRealFalcon marked this conversation as resolved.
Show resolved Hide resolved
except json.JSONDecodeError as e:
LOG.warning("File %s not valid json %s: %s", status_path, mode, e)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)

nullstatus = {
"errors": [],
Expand Down
17 changes: 8 additions & 9 deletions cloudinit/cmd/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import logging
import os
import sys
from errno import EACCES

from cloudinit import atomic_helper, util
from cloudinit.cmd.devel import read_cfg_paths
Expand Down Expand Up @@ -147,7 +146,7 @@ def _read_instance_data(instance_data, user_data, vendor_data) -> dict:
Non-root users will have redacted INSTANCE_JSON_FILE content and redacted
vendordata and userdata values.

:raise: IOError/OSError on absence of instance-data.json file or invalid
:raise: OSError on absence of instance-data.json file or invalid
access perms.
"""
uid = os.getuid()
Expand Down Expand Up @@ -181,19 +180,19 @@ def _read_instance_data(instance_data, user_data, vendor_data) -> dict:

try:
instance_json = util.load_text_file(instance_data_fn)
except (IOError, OSError) as e:
if e.errno == EACCES:
LOG.error("No read permission on '%s'. Try sudo", instance_data_fn)
else:
LOG.error("Missing instance-data file: %s", instance_data_fn)
except PermissionError:
LOG.error("No read permission on '%s'. Try sudo", instance_data_fn)
raise
except OSError:
LOG.error("Missing instance-data file: %s", instance_data_fn)
raise

instance_data = util.load_json(instance_json)
try:
combined_cloud_config = util.load_json(
util.load_text_file(combined_cloud_config_fn)
)
except (IOError, OSError):
except OSError:
# File will not yet be present in init-local stage.
# It's created in `init` when vendor-data and user-data are processed.
combined_cloud_config = None
Expand Down Expand Up @@ -274,7 +273,7 @@ def handle_args(name, args):
instance_data = _read_instance_data(
args.instance_data, args.user_data, args.vendor_data
)
except (IOError, OSError):
except OSError:
return 1
if args.format:
payload = "## template: jinja\n{fmt}".format(fmt=args.format)
Expand Down
4 changes: 2 additions & 2 deletions cloudinit/config/cc_apt_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def apply_apt(cfg, cloud, gpg):

try:
apply_apt_config(cfg, APT_PROXY_FN, APT_CONFIG_FN)
except (IOError, OSError):
except OSError:
LOG.exception("Failed to apply proxy or apt config info:")

# Process 'apt_source -> sources {dict}'
Expand Down Expand Up @@ -852,7 +852,7 @@ def add_apt_sources(
omode = "w"

util.write_file(sourcefn, contents, omode=omode)
except IOError as detail:
except OSError as detail:
LOG.exception("failed write to file %s: %s", sourcefn, detail)
raise

Expand Down
10 changes: 3 additions & 7 deletions cloudinit/config/cc_bootcmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,6 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:
util.logexc(LOG, "Failed to shellify bootcmd: %s", str(e))
raise

try:
iid = cloud.get_instance_id()
env = {"INSTANCE_ID": str(iid)} if iid else {}
subp.subp(["/bin/sh", tmpf.name], update_env=env, capture=False)
except Exception:
util.logexc(LOG, "Failed to run bootcmd module %s", name)
raise
iid = cloud.get_instance_id()
env = {"INSTANCE_ID": str(iid)} if iid else {}
subp.subp(["/bin/sh", tmpf.name], update_env=env, capture=False)
29 changes: 18 additions & 11 deletions cloudinit/config/cc_disk_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def enumerate_disk(device, nodeps=False):
info = None
try:
info, _err = subp.subp(lsblk_cmd)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed during disk check for %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -309,7 +309,11 @@ def is_device_valid(name, partition=False):
d_type = ""
try:
d_type = device_type(name)
except Exception:
except RuntimeError:
LOG.warning("Query against device %s failed", name)
return False
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
LOG.warning("Query against device %s failed", name)
return False

Expand All @@ -334,7 +338,7 @@ def check_fs(device):
blkid_cmd = [BLKID_CMD, "-c", "/dev/null", device]
try:
out, _err = subp.subp(blkid_cmd, rcs=[0, 2])
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed during disk check for %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -440,7 +444,7 @@ def get_hdd_size(device):
try:
size_in_bytes, _ = subp.subp([BLKDEV_CMD, "--getsize64", device])
sector_size, _ = subp.subp([BLKDEV_CMD, "--getss", device])
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError("Failed to get %s size\n%s" % (device, e)) from e

return int(size_in_bytes) / int(sector_size)
Expand All @@ -458,7 +462,7 @@ def check_partition_mbr_layout(device, layout):
prt_cmd = [SFDISK_CMD, "-l", device]
try:
out, _err = subp.subp(prt_cmd, data="%s\n" % layout)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Error running partition command on %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -489,7 +493,7 @@ def check_partition_gpt_layout(device, layout):
prt_cmd = [SGDISK_CMD, "-p", device]
try:
out, _err = subp.subp(prt_cmd, update_env=LANG_C_ENV)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Error running partition command on %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -680,7 +684,7 @@ def purge_disk(device):
try:
LOG.info("Purging filesystem on /dev/%s", d["name"])
subp.subp(wipefs_cmd)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed FS purge of /dev/%s" % d["name"]
) from e
Expand Down Expand Up @@ -716,7 +720,7 @@ def read_parttbl(device):
util.udevadm_settle()
try:
subp.subp(probe_cmd)
except Exception as e:
except subp.ProcessExecutionError as e:
util.logexc(LOG, "Failed reading the partition table %s" % e)

util.udevadm_settle()
Expand All @@ -731,7 +735,7 @@ def exec_mkpart_mbr(device, layout):
prt_cmd = [SFDISK_CMD, "--force", device]
try:
subp.subp(prt_cmd, data="%s\n" % layout)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed to partition device %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -759,7 +763,10 @@ def exec_mkpart_gpt(device, layout):
subp.subp(
[SGDISK_CMD, "-t", "{}:{}".format(index, pinput), device]
)
except Exception:
except subp.ProcessExecutionError:
LOG.warning("Failed to partition device %s", device)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
LOG.warning("Failed to partition device %s", device)
raise

Expand Down Expand Up @@ -1057,5 +1064,5 @@ def mkfs(fs_cfg):
LOG.debug(" Using cmd: %s", str(fs_cmd))
try:
subp.subp(fs_cmd, shell=shell)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError("Failed to exec of '%s':\n%s" % (fs_cmd, e)) from e
12 changes: 11 additions & 1 deletion cloudinit/config/cc_growpart.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""Growpart: Grow partitions"""

import base64
import binascii
import copy
import json
import logging
Expand Down Expand Up @@ -374,7 +375,7 @@ def resize_encrypted(blockdev, partition) -> Tuple[str, str]:
key = keydata["key"]
decoded_key = base64.b64decode(key)
slot = keydata["slot"]
except Exception as e:
except (OSError, binascii.Error, json.JSONDecodeError) as e:
holmanb marked this conversation as resolved.
Show resolved Hide resolved
raise RuntimeError(
"Could not load encryption key. This is expected if "
"the volume has been previously resized."
Expand Down Expand Up @@ -489,7 +490,16 @@ def resize_devices(resizer, devices, distro: Distro):
"as it is not encrypted.",
)
)
except OSError as e:
info.append(
(
devent,
RESIZE.FAILED,
f"Resizing encrypted device ({blockdev}) failed: {e}",
)
)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
TheRealFalcon marked this conversation as resolved.
Show resolved Hide resolved
info.append(
(
devent,
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/config/cc_grub_dpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:

try:
subp.subp(["debconf-set-selections"], data=dconf_sel)
except Exception as e:
except subp.ProcessExecutionError as e:
util.logexc(
LOG, "Failed to run debconf-set-selections for grub_dpkg: %s", e
)
Expand Down
10 changes: 3 additions & 7 deletions cloudinit/config/cc_keys_to_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,6 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:
cfg, "ssh_key_console_blacklist", []
)

try:
cmd = [helper_path, ",".join(fp_blacklist), ",".join(key_blacklist)]
(stdout, _stderr) = subp.subp(cmd)
util.multi_log("%s\n" % (stdout.strip()), stderr=False, console=True)
except Exception:
LOG.warning("Writing keys to the system console failed!")
raise
cmd = [helper_path, ",".join(fp_blacklist), ",".join(key_blacklist)]
stdout, _stderr = subp.subp(cmd)
util.multi_log("%s\n" % (stdout.strip()), stderr=False, console=True)
Loading
Loading