From c58ea03114838341a1c4bd99a91da5c1b98f4cef Mon Sep 17 00:00:00 2001 From: Alberto Contreras Date: Thu, 7 Jul 2022 01:23:00 +0200 Subject: [PATCH] Fuzz testing jsonchema (#1499) - Add `hypothesis` and `hypothesis-jsonschema` - Add `hypothesis-slow` tox env to run slow tests - Implement `JsonLocalResolver` to avoid `hypothesis-jsonschema` errors - Add fuzz test covering `validate_cloudconfig_schema` --- .gitignore | 1 + tests/hypothesis.py | 20 ++++++++++ tests/hypothesis_jsonschema.py | 12 ++++++ tests/unittests/config/test_schema.py | 56 +++++++++++++++++++++++++-- tests/unittests/helpers.py | 8 ++++ tox.ini | 50 +++++++++++++++++------- 6 files changed, 131 insertions(+), 16 deletions(-) create mode 100644 tests/hypothesis.py create mode 100644 tests/hypothesis_jsonschema.py diff --git a/.gitignore b/.gitignore index 6eae45c98e9..9923afae328 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ stage .vscode/ htmlcov/ tags +.hypothesis/ # Ignore packaging artifacts cloud-init.dsc diff --git a/tests/hypothesis.py b/tests/hypothesis.py new file mode 100644 index 00000000000..def9de294b3 --- /dev/null +++ b/tests/hypothesis.py @@ -0,0 +1,20 @@ +try: + from hypothesis import given + + HAS_HYPOTHESIS = True +except ImportError: + HAS_HYPOTHESIS = False + + from unittest import mock + + def given(*_, **__): # type: ignore + """Dummy implementation to make pytest collection pass""" + + @mock.Mock # Add mock to fulfill the expected hypothesis value + def run_test(item): + return item + + return run_test + + +__all__ = ["given", "HAS_HYPOTHESIS"] diff --git a/tests/hypothesis_jsonschema.py b/tests/hypothesis_jsonschema.py new file mode 100644 index 00000000000..cce7a9dac7b --- /dev/null +++ b/tests/hypothesis_jsonschema.py @@ -0,0 +1,12 @@ +try: + from hypothesis_jsonschema import from_schema + + HAS_HYPOTHESIS_JSONSCHEMA = True +except ImportError: + HAS_HYPOTHESIS_JSONSCHEMA = False + + def from_schema(*_, **__): # type: ignore + pass + + +__all__ = ["from_schema", "HAS_HYPOTHESIS_JSONSCHEMA"] diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index 4a41c4c132a..c80d06f4e92 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -9,11 +9,11 @@ import os import re import sys -from copy import copy +from copy import copy, deepcopy from pathlib import Path from textwrap import dedent from types import ModuleType -from typing import List +from typing import List, Optional, Sequence, Set import pytest @@ -37,10 +37,13 @@ from cloudinit.safeyaml import load, load_with_marks from cloudinit.settings import FREQUENCIES from cloudinit.util import load_file, write_file +from tests.hypothesis import given +from tests.hypothesis_jsonschema import from_schema from tests.unittests.helpers import ( CiTestCase, cloud_init_project_dir, mock, + skipUnlessHypothesisJsonSchema, skipUnlessJsonSchema, ) @@ -481,7 +484,7 @@ def test_validateconfig_file_error_on_non_yaml_parser_error( @skipUnlessJsonSchema() @pytest.mark.parametrize("annotate", (True, False)) - def test_validateconfig_file_sctrictly_validates_schema( + def test_validateconfig_file_strictly_validates_schema( self, annotate, tmpdir ): """validate_cloudconfig_file raises errors on invalid schema.""" @@ -1125,3 +1128,50 @@ def test_valid_meta_for_every_module(self): assert "distros" in module.meta assert {module.meta["frequency"]}.issubset(FREQUENCIES) assert set(module.meta["distros"]).issubset(all_distros) + + +def remove_modules(schema, modules: Set[str]) -> dict: + indices_to_delete = set() + for module in set(modules): + for index, ref_dict in enumerate(schema["allOf"]): + if ref_dict["$ref"] == f"#/$defs/{module}": + indices_to_delete.add(index) + continue # module found + for index in indices_to_delete: + schema["allOf"].pop(index) + return schema + + +def remove_defs(schema, defs: Set[str]) -> dict: + defs_to_delete = set(schema["$defs"].keys()).intersection(set(defs)) + for key in defs_to_delete: + del schema["$defs"][key] + return schema + + +def clean_schema( + schema=None, + modules: Optional[Sequence[str]] = None, + defs: Optional[Sequence[str]] = None, +): + schema = deepcopy(schema or get_schema()) + if modules: + remove_modules(schema, set(modules)) + if defs: + remove_defs(schema, set(defs)) + return schema + + +@pytest.mark.hypothesis_slow +class TestSchemaFuzz: + + # Avoid https://github.com/Zac-HD/hypothesis-jsonschema/issues/97 + SCHEMA = clean_schema( + modules=["cc_users_groups"], + defs=["users_groups.groups_by_groupname", "users_groups.user"], + ) + + @skipUnlessHypothesisJsonSchema() + @given(from_schema(SCHEMA)) + def test_validate_full_schema(self, config): + validate_cloudconfig_schema(config, strict=True) diff --git a/tests/unittests/helpers.py b/tests/unittests/helpers.py index 9d5a7ed2002..31e0188cdad 100644 --- a/tests/unittests/helpers.py +++ b/tests/unittests/helpers.py @@ -30,6 +30,7 @@ ) from cloudinit.sources import DataSourceNone from cloudinit.templater import JINJA_AVAILABLE +from tests.hypothesis_jsonschema import HAS_HYPOTHESIS_JSONSCHEMA _real_subp = subp.subp @@ -522,6 +523,13 @@ def skipIfJinja(): return skipIf(JINJA_AVAILABLE, "Jinja dependency present.") +def skipUnlessHypothesisJsonSchema(): + return skipIf( + not HAS_HYPOTHESIS_JSONSCHEMA, + "No python-hypothesis-jsonschema dependency present.", + ) + + # older versions of mock do not have the useful 'assert_not_called' if not hasattr(mock.Mock, "assert_not_called"): diff --git a/tox.ini b/tox.ini index 26588585d6d..1743b5e6a46 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,13 @@ [tox] -envlist = py3, lowest-supported-dev, black, flake8, isort, mypy, pylint +envlist = + py3, + lowest-supported-dev, + hypothesis-slow, + black, + flake8, + isort, + mypy, + pylint recreate = True [doc8] @@ -15,6 +23,8 @@ passenv= [format_deps] black==22.3.0 flake8==4.0.1 +hypothesis==6.31.6 +hypothesis_jsonschema==0.20.1 isort==5.10.1 mypy==0.950 pylint==2.13.9 @@ -49,19 +59,23 @@ commands = {envpython} -m isort . --check-only [testenv:mypy] deps = + hypothesis=={[format_deps]hypothesis} + hypothesis_jsonschema=={[format_deps]hypothesis_jsonschema} mypy=={[format_deps]mypy} + pytest=={[format_deps]pytest} types-jsonschema=={[format_deps]types-jsonschema} types-oauthlib=={[format_deps]types-oauthlib} types-pyyaml=={[format_deps]types-PyYAML} types-requests=={[format_deps]types-requests} types-setuptools=={[format_deps]types-setuptools} - pytest=={[format_deps]pytest} commands = {envpython} -m mypy cloudinit/ tests/ tools/ [testenv:check_format] deps = black=={[format_deps]black} flake8=={[format_deps]flake8} + hypothesis=={[format_deps]hypothesis} + hypothesis_jsonschema=={[format_deps]hypothesis_jsonschema} isort=={[format_deps]isort} mypy=={[format_deps]mypy} pylint=={[format_deps]pylint} @@ -118,8 +132,17 @@ deps = -r{toxinidir}/test-requirements.txt commands = {envpython} -m pytest \ --durations 10 \ - {posargs:--cov=cloudinit --cov-branch \ - tests/unittests} + -m "not hypothesis_slow" \ + {posargs:--cov=cloudinit --cov-branch tests/unittests} + +[testenv:hypothesis-slow] +deps = + hypothesis==6.31.6 + hypothesis_jsonschema==0.20.1 + -r{toxinidir}/test-requirements.txt +commands = {envpython} -m pytest \ + -m hypothesis_slow \ + {posargs:--hypothesis-show-statistics tests/unittests} [lowest-supported-deps] # Tox is going to install requirements from pip. This is fine for @@ -257,25 +280,26 @@ addopts = --strict log_format = %(asctime)s %(levelname)-9s %(name)s:%(filename)s:%(lineno)d %(message)s log_date_format = %Y-%m-%d %H:%M:%S markers = - allow_subp_for: allow subp usage for the given commands (disable_subp_usage) + adhoc: only run on adhoc basis, not in any CI environment (travis or jenkins) allow_all_subp: allow all subp usage (disable_subp_usage) + allow_subp_for: allow subp usage for the given commands (disable_subp_usage) + azure: test will only run on Azure platform ci: run this integration test as part of CI test runs ds_sys_cfg: a sys_cfg dict to be used by datasource fixtures ec2: test will only run on EC2 platform gce: test will only run on GCE platform - azure: test will only run on Azure platform - oci: test will only run on OCI platform - openstack: test will only run on openstack platform + hypothesis_slow: hypothesis test too slow to run as unit test + instance_name: the name to be used for the test instance + is_iscsi: whether is an instance has iscsi net cfg or not lxd_config_dict: set the config_dict passed on LXD instance creation lxd_container: test will only run in LXD container lxd_setup: specify callable to be called between init and start lxd_use_exec: `execute` will use `lxc exec` instead of SSH lxd_vm: test will only run in LXD VM - not_bionic: test cannot run on the bionic release no_container: test cannot run in a container - user_data: the user data to be passed to the test instance - instance_name: the name to be used for the test instance + not_bionic: test cannot run on the bionic release + oci: test will only run on OCI platform + openstack: test will only run on openstack platform ubuntu: this test should run on Ubuntu unstable: skip this test because it is flakey - adhoc: only run on adhoc basis, not in any CI environment (travis or jenkins) - is_iscsi: whether is an instance has iscsi net cfg or not + user_data: the user data to be passed to the test instance