From f2133009cff1c7091f7754bddee3d0c04991ad99 Mon Sep 17 00:00:00 2001 From: Peder Hovdan Andresen <107681714+pederhan@users.noreply.github.com> Date: Tue, 3 Dec 2024 10:45:01 +0100 Subject: [PATCH] Compute default error duration if omitted (#91) * Compute default error duration if omitted * Add hypothesis test --- README.md | 2 ++ pyproject.toml | 2 +- tests/test_config.py | 49 +++++++++++++++++++++++++----------- zabbix_auto_config/models.py | 20 ++++++++++++--- 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index de842a8..fb48e10 100644 --- a/README.md +++ b/README.md @@ -263,6 +263,8 @@ For instance, with an `error_tolerance` of 5 and an `update_interval` of 60, `er A useful guide is to set `error_duration` as `(error_tolerance + 1) * update_interval`, providing an additional buffer equivalent to one update interval. +If `error_tolerance` is set, but `error_duration` is not, the application will set an `error_duration` that is slightly longer than the minimum required to ensure correct error detection. + #### exit_on_error `exit_on_error` (default: true) determines if the application should terminate, or disable the failing collector when number of errors exceed the tolerance. If set to `true`, the application will exit. Otherwise, the collector will be disabled for `disable_duration` seconds. For backwards compatibility with previous versions of Zabbix-auto-config, this option defaults to `true`. In a future major version, the default will be changed to `false`. diff --git a/pyproject.toml b/pyproject.toml index 5059058..53358da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ ] [project.optional-dependencies] -test = ["pytest>=7.4.3", "pytest-timeout>=2.2.0", "hypothesis>=6.62.1"] +test = ["pytest>=7.4.3", "pytest-timeout>=2.2.0", "hypothesis>=6.62.1", "inline-snapshot>=0.14.0"] [project.urls] Source = "https://github.com/unioslo/zabbix-auto-config" diff --git a/tests/test_config.py b/tests/test_config.py index 9786160..0e2af54 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -4,6 +4,10 @@ import pytest import tomli +from hypothesis import given +from hypothesis import settings +from hypothesis import strategies as st +from inline_snapshot import snapshot from pydantic import ValidationError import zabbix_auto_config.models as models @@ -69,13 +73,10 @@ def test_sourcecollectorsettings_no_tolerance() -> None: error_duration=0, ) assert settings.error_tolerance == 0 - # In case the actual implementaiton changes in the future, we don't - # want to test the _exact_ value, but we know it will not be 0 - assert settings.error_duration > 0 + assert settings.error_duration == snapshot(9999) def test_sourcecollectorsettings_no_error_duration(): - # TODO: check if we can just remove this test # In order to not have an error_duration, error_tolerance must be 0 too settings = models.SourceCollectorSettings( module_name="foo", @@ -83,18 +84,33 @@ def test_sourcecollectorsettings_no_error_duration(): error_duration=0, error_tolerance=0, ) - # See docstring in test_sourcecollectorsettings_no_tolerance - assert settings.error_duration > 0 + assert settings.error_duration == snapshot(9999) - # With tolerance raises an error - # NOTE: we test the error message in depth in test_sourcecollectorsettings_invalid_error_duration - with pytest.raises(ValidationError): - models.SourceCollectorSettings( - module_name="foo", - update_interval=60, - error_duration=0, - error_tolerance=5, - ) + # With tolerance we get a default value + settings = models.SourceCollectorSettings( + module_name="foo", + update_interval=60, + error_duration=0, + error_tolerance=5, + ) + + assert settings.error_duration == snapshot(354) + + +@given( + update_interval=st.integers(min_value=0, max_value=100), + error_tolerance=st.integers(min_value=0, max_value=100), +) +@settings(max_examples=1000) +def test_sourcecollectorsettings_no_error_duration_fuzz( + update_interval: int, error_tolerance: int +): + """Test model with a variety of update intervals and error tolerances""" + models.SourceCollectorSettings( + module_name="foo", + update_interval=update_interval, + error_tolerance=error_tolerance, + ) def test_sourcecollectorsettings_duration_too_short(): @@ -112,6 +128,9 @@ def test_sourcecollectorsettings_duration_too_short(): error = errors[0] assert "greater than 300" in error["msg"] assert error["type"] == "value_error" + assert error["msg"] == snapshot( + "Value error, Invalid value for error_duration (180). It should be greater than 300: error_tolerance (5) * update_interval (60)" + ) def test_sourcecollectorsettings_duration_negative(): diff --git a/zabbix_auto_config/models.py b/zabbix_auto_config/models.py index f7076d3..668a04b 100644 --- a/zabbix_auto_config/models.py +++ b/zabbix_auto_config/models.py @@ -213,6 +213,7 @@ class SourceCollectorSettings(ConfigBaseModel, extra="allow"): description=( "The duration in seconds that errors are stored." "If `error_tolerance` errors occur in this period, the collector is marked as failing." + "If `error_tolerance`is set, but this is not, it is set to `round(error_tolerance * update_interval + (update_interval*0.9))`." ), ge=0, ) @@ -234,12 +235,25 @@ def _validate_error_duration_is_greater(self) -> Self: # hack to ensure RollingErrorCounter.count() doesn't discard the error # before it is counted self.error_duration = 9999 - elif ( + return self + + # Set default error duration if not set + if self.error_tolerance > 0 and not self.error_duration: + # Set the error duration to tolerance * update_interval + 90% of update_interval + # so that it's possible to hit the error tolerance within the duration if all + # errors happen in succession. + self.error_duration = round( + self.error_tolerance * self.update_interval + + (self.update_interval * 0.9) + ) + + # Ensure the error duration is greater than the product of the error tolerance and update interval + if ( product := self.error_tolerance * self.update_interval ) > self.error_duration: raise ValueError( - f"Invalid value for error_duration ({self.error_duration}). It should be greater than error_tolerance ({self.error_tolerance}) " - f"times update_interval ({self.update_interval}), i.e., greater than {product}. Please adjust accordingly." + f"Invalid value for error_duration ({self.error_duration}). " + f"It should be greater than {product}: error_tolerance ({self.error_tolerance}) * update_interval ({self.update_interval})" ) return self