From f2133009cff1c7091f7754bddee3d0c04991ad99 Mon Sep 17 00:00:00 2001
From: Peder Hovdan Andresen <107681714+pederhan@users.noreply.github.com>
Date: Tue, 3 Dec 2024 10:45:01 +0100
Subject: [PATCH] Compute default error duration if omitted (#91)

* Compute default error duration if omitted

* Add hypothesis test
---
 README.md                    |  2 ++
 pyproject.toml               |  2 +-
 tests/test_config.py         | 49 +++++++++++++++++++++++++-----------
 zabbix_auto_config/models.py | 20 ++++++++++++---
 4 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index de842a8..fb48e10 100644
--- a/README.md
+++ b/README.md
@@ -263,6 +263,8 @@ For instance, with an `error_tolerance` of 5 and an `update_interval` of 60, `er
 
 A useful guide is to set `error_duration` as `(error_tolerance + 1) * update_interval`, providing an additional buffer equivalent to one update interval.
 
+If `error_tolerance` is set, but `error_duration` is not, the application will set an `error_duration` that is slightly longer than the minimum required to ensure correct error detection.
+
 #### exit_on_error
 
 `exit_on_error` (default: true) determines if the application should terminate, or disable the failing collector when number of errors exceed the tolerance. If set to `true`, the application will exit. Otherwise, the collector will be disabled for `disable_duration` seconds. For backwards compatibility with previous versions of Zabbix-auto-config, this option defaults to `true`. In a future major version, the default will be changed to `false`.
diff --git a/pyproject.toml b/pyproject.toml
index 5059058..53358da 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,7 +36,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-test = ["pytest>=7.4.3", "pytest-timeout>=2.2.0", "hypothesis>=6.62.1"]
+test = ["pytest>=7.4.3", "pytest-timeout>=2.2.0", "hypothesis>=6.62.1", "inline-snapshot>=0.14.0"]
 
 [project.urls]
 Source = "https://github.com/unioslo/zabbix-auto-config"
diff --git a/tests/test_config.py b/tests/test_config.py
index 9786160..0e2af54 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -4,6 +4,10 @@
 
 import pytest
 import tomli
+from hypothesis import given
+from hypothesis import settings
+from hypothesis import strategies as st
+from inline_snapshot import snapshot
 from pydantic import ValidationError
 
 import zabbix_auto_config.models as models
@@ -69,13 +73,10 @@ def test_sourcecollectorsettings_no_tolerance() -> None:
         error_duration=0,
     )
     assert settings.error_tolerance == 0
-    # In case the actual implementaiton changes in the future, we don't
-    # want to test the _exact_ value, but we know it will not be 0
-    assert settings.error_duration > 0
+    assert settings.error_duration == snapshot(9999)
 
 
 def test_sourcecollectorsettings_no_error_duration():
-    # TODO: check if we can just remove this test
     # In order to not have an error_duration, error_tolerance must be 0 too
     settings = models.SourceCollectorSettings(
         module_name="foo",
@@ -83,18 +84,33 @@ def test_sourcecollectorsettings_no_error_duration():
         error_duration=0,
         error_tolerance=0,
     )
-    # See docstring in test_sourcecollectorsettings_no_tolerance
-    assert settings.error_duration > 0
+    assert settings.error_duration == snapshot(9999)
 
-    # With tolerance raises an error
-    # NOTE: we test the error message in depth in test_sourcecollectorsettings_invalid_error_duration
-    with pytest.raises(ValidationError):
-        models.SourceCollectorSettings(
-            module_name="foo",
-            update_interval=60,
-            error_duration=0,
-            error_tolerance=5,
-        )
+    # With tolerance we get a default value
+    settings = models.SourceCollectorSettings(
+        module_name="foo",
+        update_interval=60,
+        error_duration=0,
+        error_tolerance=5,
+    )
+
+    assert settings.error_duration == snapshot(354)
+
+
+@given(
+    update_interval=st.integers(min_value=0, max_value=100),
+    error_tolerance=st.integers(min_value=0, max_value=100),
+)
+@settings(max_examples=1000)
+def test_sourcecollectorsettings_no_error_duration_fuzz(
+    update_interval: int, error_tolerance: int
+):
+    """Test model with a variety of update intervals and error tolerances"""
+    models.SourceCollectorSettings(
+        module_name="foo",
+        update_interval=update_interval,
+        error_tolerance=error_tolerance,
+    )
 
 
 def test_sourcecollectorsettings_duration_too_short():
@@ -112,6 +128,9 @@ def test_sourcecollectorsettings_duration_too_short():
     error = errors[0]
     assert "greater than 300" in error["msg"]
     assert error["type"] == "value_error"
+    assert error["msg"] == snapshot(
+        "Value error, Invalid value for error_duration (180). It should be greater than 300: error_tolerance (5) * update_interval (60)"
+    )
 
 
 def test_sourcecollectorsettings_duration_negative():
diff --git a/zabbix_auto_config/models.py b/zabbix_auto_config/models.py
index f7076d3..668a04b 100644
--- a/zabbix_auto_config/models.py
+++ b/zabbix_auto_config/models.py
@@ -213,6 +213,7 @@ class SourceCollectorSettings(ConfigBaseModel, extra="allow"):
         description=(
             "The duration in seconds that errors are stored."
             "If `error_tolerance` errors occur in this period, the collector is marked as failing."
+            "If `error_tolerance`is set, but this is not, it is set to `round(error_tolerance * update_interval + (update_interval*0.9))`."
         ),
         ge=0,
     )
@@ -234,12 +235,25 @@ def _validate_error_duration_is_greater(self) -> Self:
             # hack to ensure RollingErrorCounter.count() doesn't discard the error
             # before it is counted
             self.error_duration = 9999
-        elif (
+            return self
+
+        # Set default error duration if not set
+        if self.error_tolerance > 0 and not self.error_duration:
+            # Set the error duration to tolerance * update_interval + 90% of update_interval
+            # so that it's possible to hit the error tolerance within the duration if all
+            # errors happen in succession.
+            self.error_duration = round(
+                self.error_tolerance * self.update_interval
+                + (self.update_interval * 0.9)
+            )
+
+        # Ensure the error duration is greater than the product of the error tolerance and update interval
+        if (
             product := self.error_tolerance * self.update_interval
         ) > self.error_duration:
             raise ValueError(
-                f"Invalid value for error_duration ({self.error_duration}). It should be greater than error_tolerance ({self.error_tolerance}) "
-                f"times update_interval ({self.update_interval}), i.e., greater than {product}. Please adjust accordingly."
+                f"Invalid value for error_duration ({self.error_duration}). "
+                f"It should be greater than {product}: error_tolerance ({self.error_tolerance}) * update_interval ({self.update_interval})"
             )
         return self