Skip to content

Commit

Permalink
handler: report errors on skipped merged cloud-config.txt parts
Browse files Browse the repository at this point in the history
Log errors instead of warnings on skipped/invalid cloud-config parts.

Also add error comments to merged /v/l/c/instance/cloud-config.txt.

Error bread crumb comments also permit cloud-init schema to better
represent failures in loading user-data parts when either MIME
messages or jinja templates represent invalid YAML or unparseable
content.

LP: #1999952
  • Loading branch information
blackboxsw authored Jun 20, 2023
1 parent 4538a2e commit e6c069d
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 107 deletions.
149 changes: 96 additions & 53 deletions cloudinit/config/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,7 @@
from cloudinit import importer, safeyaml
from cloudinit.cmd.devel import read_cfg_paths
from cloudinit.handlers import INCLUSION_TYPES_MAP, type_from_starts_with
from cloudinit.util import (
decode_binary,
error,
get_modules_from_dir,
load_file,
)
from cloudinit.util import error, get_modules_from_dir, load_file

try:
from jsonschema import ValidationError as _ValidationError
Expand Down Expand Up @@ -110,6 +105,11 @@ def format(self) -> str:
SchemaProblems = List[SchemaProblem]


class UserDataTypeAndDecodedContent(NamedTuple):
userdata_type: str
content: str


def _format_schema_problems(
schema_problems: SchemaProblems,
*,
Expand Down Expand Up @@ -517,7 +517,7 @@ class _Annotator:
def __init__(
self,
cloudconfig: dict,
original_content: bytes,
original_content: str,
schemamarks: dict,
):
self._cloudconfig = cloudconfig
Expand Down Expand Up @@ -608,10 +608,10 @@ def annotate(
self,
schema_errors: SchemaProblems,
schema_deprecations: SchemaProblems,
) -> Union[str, bytes]:
) -> str:
if not schema_errors and not schema_deprecations:
return self._original_content
lines = self._original_content.decode().split("\n")
lines = self._original_content.split("\n")
if not isinstance(self._cloudconfig, dict):
# Return a meaningful message on empty cloud-config
return "\n".join(
Expand All @@ -632,7 +632,7 @@ def annotate(

def annotated_cloudconfig_file(
cloudconfig: dict,
original_content: bytes,
original_content: str,
schemamarks: dict,
*,
schema_errors: Optional[SchemaProblems] = None,
Expand All @@ -654,56 +654,63 @@ def annotated_cloudconfig_file(
)


def validate_cloudconfig_file(
def process_merged_cloud_config_part_problems(
content: str,
) -> List[SchemaProblem]:
"""Annotate and return schema validation errors in merged cloud-config.txt
When merging multiple cloud-config parts cloud-init logs an error and
ignores any user-data parts which are declared as #cloud-config but
cannot be processed. the hanlder.cloud_config module also leaves comments
in the final merged config for every invalid part file which begin with
MERGED_CONFIG_SCHEMA_ERROR_PREFIX to aid in triage.
"""
from cloudinit.handlers.cloud_config import MERGED_PART_SCHEMA_ERROR_PREFIX

if MERGED_PART_SCHEMA_ERROR_PREFIX not in content:
return []
errors: List[SchemaProblem] = []
for line_num, line in enumerate(content.splitlines(), 1):
if line.startswith(MERGED_PART_SCHEMA_ERROR_PREFIX):
errors.append(
SchemaProblem(
f"format-l{line_num}.c1",
line.replace(
MERGED_PART_SCHEMA_ERROR_PREFIX,
"Ignored invalid user-data: ",
),
)
)
return errors


def _get_config_type_and_rendered_userdata(
config_path: str,
schema: dict,
annotate: bool = False,
content: str,
instance_data_path: str = None,
):
"""Validate cloudconfig file adheres to a specific jsonschema.
) -> UserDataTypeAndDecodedContent:
"""
Return tuple of user-data-type and rendered content.
@param config_path: Path to the yaml cloud-config file to parse, or None
to default to system userdata from Paths object.
@param schema: Dict describing a valid jsonschema to validate against.
@param annotate: Boolean set True to print original config file with error
annotations on the offending lines.
@param instance_data_path: Path to instance_data JSON, used for text/jinja
rendering.
When encountering jinja user-data, render said content.
@raises SchemaValidationError containing any of schema_errors encountered.
@raises RuntimeError when config_path does not exist.
:return: UserDataTypeAndDecodedContent
:raises: SchemaValidationError when non-jinja content found but
header declared ## template: jinja.
"""
from cloudinit.handlers.jinja_template import (
JinjaLoadError,
NotJinjaError,
render_jinja_payload_from_file,
)

content = load_file(config_path, decode=False)
user_data_type = type_from_starts_with(content)
schema_position = "format-l1.c1"
if not user_data_type:
raise SchemaValidationError(
[
SchemaProblem(
schema_position,
f"No valid cloud-init user-data header in {config_path}.\n"
"Expected first line to be one of: "
f"{', '.join(USERDATA_VALID_HEADERS)}",
)
]
)
if user_data_type not in ("text/cloud-config", "text/jinja2"):
print(
f"User-data type '{user_data_type}' not currently evaluated"
" by cloud-init schema"
)
return
if user_data_type == "text/jinja2":
try:
content = render_jinja_payload_from_file(
decode_binary(content), config_path, instance_data_path
).encode()
content, config_path, instance_data_path
)
except NotJinjaError as e:
raise SchemaValidationError(
[
Expand All @@ -718,18 +725,52 @@ def validate_cloudconfig_file(
error(str(e), sys_exit=True)
schema_position = "format-l2.c1"
user_data_type = type_from_starts_with(content)
if not user_data_type:
content_header = content[: decode_binary(content).find("\n")]
if not user_data_type: # Neither jinja2 nor #cloud-config
header_line, _, _ = content.partition("\n")
raise SchemaValidationError(
[
SchemaProblem(
schema_position,
f"Unrecognized user-data header in {config_path}:"
f" {content_header}. Expected one of the following "
f"headers: {', '.join(USERDATA_VALID_HEADERS)}",
f' "{header_line}".\nExpected first line'
f" to be one of: {', '.join(USERDATA_VALID_HEADERS)}",
)
]
)
elif user_data_type != "text/cloud-config":
print(
f"User-data type '{user_data_type}' not currently evaluated"
" by cloud-init schema"
)
return UserDataTypeAndDecodedContent(user_data_type, content)


def validate_cloudconfig_file(
config_path: str,
schema: dict,
annotate: bool = False,
instance_data_path: str = None,
):
"""Validate cloudconfig file adheres to a specific jsonschema.
@param config_path: Path to the yaml cloud-config file to parse, or None
to default to system userdata from Paths object.
@param schema: Dict describing a valid jsonschema to validate against.
@param annotate: Boolean set True to print original config file with error
annotations on the offending lines.
@param instance_data_path: Path to instance_data JSON, used for text/jinja
rendering.
@raises SchemaValidationError containing any of schema_errors encountered.
@raises RuntimeError when config_path does not exist.
"""
decoded_userdata = _get_config_type_and_rendered_userdata(
config_path, load_file(config_path, decode=True), instance_data_path
)
if decoded_userdata.userdata_type != "text/cloud-config":
return # Neither nested #cloud-config in jinja2 nor raw #cloud-config
content = decoded_userdata.content
errors = process_merged_cloud_config_part_problems(content)
try:
if annotate:
cloudconfig, marks = safeyaml.load_with_marks(content)
Expand All @@ -746,12 +787,12 @@ def validate_cloudconfig_file(
if mark:
line = mark.line + 1
column = mark.column + 1
errors = [
errors.append(
SchemaProblem(
"format-l{line}.c{col}".format(line=line, col=column),
"File {0} is not valid yaml. {1}".format(config_path, str(e)),
),
]
)
schema_error = SchemaValidationError(errors)
if annotate:
print(
Expand All @@ -769,13 +810,15 @@ def validate_cloudconfig_file(
cloudconfig, schema, strict=True, log_deprecations=False
)
except SchemaValidationError as e:
if e.has_errors():
errors += e.schema_errors
if annotate:
print(
annotated_cloudconfig_file(
cloudconfig,
content,
marks,
schema_errors=e.schema_errors,
schema_errors=errors,
schema_deprecations=e.schema_deprecations,
)
)
Expand All @@ -786,8 +829,8 @@ def validate_cloudconfig_file(
separator=", ",
)
print(message)
if e.has_errors(): # We do not consider deprecations as error
raise
if errors:
raise SchemaValidationError(schema_errors=errors) from e


def _sort_property_order(value):
Expand Down
6 changes: 6 additions & 0 deletions cloudinit/handlers/cloud_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
CLOUD_PREFIX = "#cloud-config"
JSONP_PREFIX = "#cloud-config-jsonp"

MERGED_PART_SCHEMA_ERROR_PREFIX = "# Cloud-config part ignored SCHEMA_ERROR: "


class CloudConfigPartHandler(handlers.Handler):

Expand All @@ -53,6 +55,7 @@ def __init__(self, paths, **_kwargs):
if "cloud_config_path" in _kwargs:
self.cloud_fn = paths.get_ipath(_kwargs["cloud_config_path"])
self.file_names = []
self.error_file_names = []

def _write_cloud_config(self):
if not self.cloud_fn:
Expand All @@ -66,6 +69,8 @@ def _write_cloud_config(self):
fn = "?"
file_lines.append("# %s" % (fn))
file_lines.append("")
for error_file in self.error_file_names:
file_lines.append(f"{MERGED_PART_SCHEMA_ERROR_PREFIX}{error_file}")
if self.cloud_buf is not None:
# Something was actually gathered....
lines = [
Expand Down Expand Up @@ -143,6 +148,7 @@ def handle_part(self, data, ctype, filename, payload, frequency, headers):
filename = filename.replace(i, " ")
self.file_names.append(filename.strip())
except ValueError as err:
self.error_file_names.append(filename.strip())
LOG.warning(
"Failed at merging in cloud config part from %s: %s",
filename,
Expand Down
Loading

0 comments on commit e6c069d

Please sign in to comment.