Fuzz testing jsonchema (#1499)

- Add `hypothesis` and `hypothesis-jsonschema` - Add `hypothesis-slow` tox env to run slow tests - Implement `JsonLocalResolver` to avoid `hypothesis-jsonschema` errors - Add fuzz test covering `validate_cloudconfig_schema`
canonical · Jul 6, 2022 · c58ea03 · c58ea03
1 parent 4970f3b
commit c58ea03
Show file tree

Hide file tree

Showing 6 changed files with 131 additions and 16 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,6 +21,7 @@ stage
 .vscode/
 htmlcov/
 tags
+.hypothesis/
 
 # Ignore packaging artifacts
 cloud-init.dsc

diff --git a/tests/hypothesis.py b/tests/hypothesis.py
@@ -0,0 +1,20 @@
+try:
+    from hypothesis import given
+
+    HAS_HYPOTHESIS = True
+except ImportError:
+    HAS_HYPOTHESIS = False
+
+    from unittest import mock
+
+    def given(*_, **__):  # type: ignore
+        """Dummy implementation to make pytest collection pass"""
+
+        @mock.Mock  # Add mock to fulfill the expected hypothesis value
+        def run_test(item):
+            return item
+
+        return run_test
+
+
+__all__ = ["given", "HAS_HYPOTHESIS"]
diff --git a/tests/hypothesis_jsonschema.py b/tests/hypothesis_jsonschema.py
@@ -0,0 +1,12 @@
+try:
+    from hypothesis_jsonschema import from_schema
+
+    HAS_HYPOTHESIS_JSONSCHEMA = True
+except ImportError:
+    HAS_HYPOTHESIS_JSONSCHEMA = False
+
+    def from_schema(*_, **__):  # type: ignore
+        pass
+
+
+__all__ = ["from_schema", "HAS_HYPOTHESIS_JSONSCHEMA"]
diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py
@@ -9,11 +9,11 @@
 import os
 import re
 import sys
-from copy import copy
+from copy import copy, deepcopy
 from pathlib import Path
 from textwrap import dedent
 from types import ModuleType
-from typing import List
+from typing import List, Optional, Sequence, Set
 
 import pytest
 
@@ -37,10 +37,13 @@
 from cloudinit.safeyaml import load, load_with_marks
 from cloudinit.settings import FREQUENCIES
 from cloudinit.util import load_file, write_file
+from tests.hypothesis import given
+from tests.hypothesis_jsonschema import from_schema
 from tests.unittests.helpers import (
     CiTestCase,
     cloud_init_project_dir,
     mock,
+    skipUnlessHypothesisJsonSchema,
     skipUnlessJsonSchema,
 )
 
@@ -481,7 +484,7 @@ def test_validateconfig_file_error_on_non_yaml_parser_error(
 
     @skipUnlessJsonSchema()
     @pytest.mark.parametrize("annotate", (True, False))
-    def test_validateconfig_file_sctrictly_validates_schema(
+    def test_validateconfig_file_strictly_validates_schema(
         self, annotate, tmpdir
     ):
         """validate_cloudconfig_file raises errors on invalid schema."""
@@ -1125,3 +1128,50 @@ def test_valid_meta_for_every_module(self):
             assert "distros" in module.meta
             assert {module.meta["frequency"]}.issubset(FREQUENCIES)
             assert set(module.meta["distros"]).issubset(all_distros)
+
+
+def remove_modules(schema, modules: Set[str]) -> dict:
+    indices_to_delete = set()
+    for module in set(modules):
+        for index, ref_dict in enumerate(schema["allOf"]):
+            if ref_dict["$ref"] == f"#/$defs/{module}":
+                indices_to_delete.add(index)
+                continue  # module found
+    for index in indices_to_delete:
+        schema["allOf"].pop(index)
+    return schema
+
+
+def remove_defs(schema, defs: Set[str]) -> dict:
+    defs_to_delete = set(schema["$defs"].keys()).intersection(set(defs))
+    for key in defs_to_delete:
+        del schema["$defs"][key]
+    return schema
+
+
+def clean_schema(
+    schema=None,
+    modules: Optional[Sequence[str]] = None,
+    defs: Optional[Sequence[str]] = None,
+):
+    schema = deepcopy(schema or get_schema())
+    if modules:
+        remove_modules(schema, set(modules))
+    if defs:
+        remove_defs(schema, set(defs))
+    return schema
+
+
+@pytest.mark.hypothesis_slow
+class TestSchemaFuzz:
+
+    # Avoid https://github.com/Zac-HD/hypothesis-jsonschema/issues/97
+    SCHEMA = clean_schema(
+        modules=["cc_users_groups"],
+        defs=["users_groups.groups_by_groupname", "users_groups.user"],
+    )
+
+    @skipUnlessHypothesisJsonSchema()
+    @given(from_schema(SCHEMA))
+    def test_validate_full_schema(self, config):
+        validate_cloudconfig_schema(config, strict=True)
diff --git a/tests/unittests/helpers.py b/tests/unittests/helpers.py
@@ -30,6 +30,7 @@
 )
 from cloudinit.sources import DataSourceNone
 from cloudinit.templater import JINJA_AVAILABLE
+from tests.hypothesis_jsonschema import HAS_HYPOTHESIS_JSONSCHEMA
 
 _real_subp = subp.subp
 
@@ -522,6 +523,13 @@ def skipIfJinja():
     return skipIf(JINJA_AVAILABLE, "Jinja dependency present.")
 
 
+def skipUnlessHypothesisJsonSchema():
+    return skipIf(
+        not HAS_HYPOTHESIS_JSONSCHEMA,
+        "No python-hypothesis-jsonschema dependency present.",
+    )
+
+
 # older versions of mock do not have the useful 'assert_not_called'
 if not hasattr(mock.Mock, "assert_not_called"):
 

diff --git a/tox.ini b/tox.ini
@@ -1,5 +1,13 @@
 [tox]
-envlist = py3, lowest-supported-dev, black, flake8, isort, mypy, pylint
+envlist =
+    py3,
+    lowest-supported-dev,
+    hypothesis-slow,
+    black,
+    flake8,
+    isort,
+    mypy,
+    pylint
 recreate = True
 
 [doc8]
@@ -15,6 +23,8 @@ passenv=
 [format_deps]
 black==22.3.0
 flake8==4.0.1
+hypothesis==6.31.6
+hypothesis_jsonschema==0.20.1
 isort==5.10.1
 mypy==0.950
 pylint==2.13.9
@@ -49,19 +59,23 @@ commands = {envpython} -m isort . --check-only
 
 [testenv:mypy]
 deps =
+    hypothesis=={[format_deps]hypothesis}
+    hypothesis_jsonschema=={[format_deps]hypothesis_jsonschema}
     mypy=={[format_deps]mypy}
+    pytest=={[format_deps]pytest}
     types-jsonschema=={[format_deps]types-jsonschema}
     types-oauthlib=={[format_deps]types-oauthlib}
     types-pyyaml=={[format_deps]types-PyYAML}
     types-requests=={[format_deps]types-requests}
     types-setuptools=={[format_deps]types-setuptools}
-    pytest=={[format_deps]pytest}
 commands = {envpython} -m mypy cloudinit/ tests/ tools/
 
 [testenv:check_format]
 deps =
     black=={[format_deps]black}
     flake8=={[format_deps]flake8}
+    hypothesis=={[format_deps]hypothesis}
+    hypothesis_jsonschema=={[format_deps]hypothesis_jsonschema}
     isort=={[format_deps]isort}
     mypy=={[format_deps]mypy}
     pylint=={[format_deps]pylint}
@@ -118,8 +132,17 @@ deps =
     -r{toxinidir}/test-requirements.txt
 commands = {envpython} -m pytest \
             --durations 10 \
-            {posargs:--cov=cloudinit --cov-branch \
-            tests/unittests}
+            -m "not hypothesis_slow" \
+            {posargs:--cov=cloudinit --cov-branch tests/unittests}
+
+[testenv:hypothesis-slow]
+deps =
+    hypothesis==6.31.6
+    hypothesis_jsonschema==0.20.1
+    -r{toxinidir}/test-requirements.txt
+commands = {envpython} -m pytest \
+            -m hypothesis_slow \
+            {posargs:--hypothesis-show-statistics tests/unittests}
 
 [lowest-supported-deps]
 # Tox is going to install requirements from pip. This is fine for
@@ -257,25 +280,26 @@ addopts = --strict
 log_format = %(asctime)s %(levelname)-9s %(name)s:%(filename)s:%(lineno)d %(message)s
 log_date_format = %Y-%m-%d %H:%M:%S
 markers =
-    allow_subp_for: allow subp usage for the given commands (disable_subp_usage)
+    adhoc: only run on adhoc basis, not in any CI environment (travis or jenkins)
     allow_all_subp: allow all subp usage (disable_subp_usage)
+    allow_subp_for: allow subp usage for the given commands (disable_subp_usage)
+    azure: test will only run on Azure platform
     ci: run this integration test as part of CI test runs
     ds_sys_cfg: a sys_cfg dict to be used by datasource fixtures
     ec2: test will only run on EC2 platform
     gce: test will only run on GCE platform
-    azure: test will only run on Azure platform
-    oci: test will only run on OCI platform
-    openstack: test will only run on openstack platform
+    hypothesis_slow: hypothesis test too slow to run as unit test
+    instance_name: the name to be used for the test instance
+    is_iscsi: whether is an instance has iscsi net cfg or not
     lxd_config_dict: set the config_dict passed on LXD instance creation
     lxd_container: test will only run in LXD container
     lxd_setup: specify callable to be called between init and start
     lxd_use_exec: `execute` will use `lxc exec` instead of SSH
     lxd_vm: test will only run in LXD VM
-    not_bionic: test cannot run on the bionic release
     no_container: test cannot run in a container
-    user_data: the user data to be passed to the test instance
-    instance_name: the name to be used for the test instance
+    not_bionic: test cannot run on the bionic release
+    oci: test will only run on OCI platform
+    openstack: test will only run on openstack platform
     ubuntu: this test should run on Ubuntu
     unstable: skip this test because it is flakey
-    adhoc: only run on adhoc basis, not in any CI environment (travis or jenkins)
-    is_iscsi: whether is an instance has iscsi net cfg or not
+    user_data: the user data to be passed to the test instance