Skip to content

Commit

Permalink
Compute default error duration if omitted (#91)
Browse files Browse the repository at this point in the history
* Compute default error duration if omitted

* Add hypothesis test
  • Loading branch information
pederhan authored Dec 3, 2024
1 parent a42e0d1 commit f213300
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 19 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ For instance, with an `error_tolerance` of 5 and an `update_interval` of 60, `er

A useful guide is to set `error_duration` as `(error_tolerance + 1) * update_interval`, providing an additional buffer equivalent to one update interval.

If `error_tolerance` is set, but `error_duration` is not, the application will set an `error_duration` that is slightly longer than the minimum required to ensure correct error detection.

#### exit_on_error

`exit_on_error` (default: true) determines if the application should terminate, or disable the failing collector when number of errors exceed the tolerance. If set to `true`, the application will exit. Otherwise, the collector will be disabled for `disable_duration` seconds. For backwards compatibility with previous versions of Zabbix-auto-config, this option defaults to `true`. In a future major version, the default will be changed to `false`.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies = [
]

[project.optional-dependencies]
test = ["pytest>=7.4.3", "pytest-timeout>=2.2.0", "hypothesis>=6.62.1"]
test = ["pytest>=7.4.3", "pytest-timeout>=2.2.0", "hypothesis>=6.62.1", "inline-snapshot>=0.14.0"]

[project.urls]
Source = "https://github.com/unioslo/zabbix-auto-config"
Expand Down
49 changes: 34 additions & 15 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

import pytest
import tomli
from hypothesis import given
from hypothesis import settings
from hypothesis import strategies as st
from inline_snapshot import snapshot
from pydantic import ValidationError

import zabbix_auto_config.models as models
Expand Down Expand Up @@ -69,32 +73,44 @@ def test_sourcecollectorsettings_no_tolerance() -> None:
error_duration=0,
)
assert settings.error_tolerance == 0
# In case the actual implementaiton changes in the future, we don't
# want to test the _exact_ value, but we know it will not be 0
assert settings.error_duration > 0
assert settings.error_duration == snapshot(9999)


def test_sourcecollectorsettings_no_error_duration():
# TODO: check if we can just remove this test
# In order to not have an error_duration, error_tolerance must be 0 too
settings = models.SourceCollectorSettings(
module_name="foo",
update_interval=60,
error_duration=0,
error_tolerance=0,
)
# See docstring in test_sourcecollectorsettings_no_tolerance
assert settings.error_duration > 0
assert settings.error_duration == snapshot(9999)

# With tolerance raises an error
# NOTE: we test the error message in depth in test_sourcecollectorsettings_invalid_error_duration
with pytest.raises(ValidationError):
models.SourceCollectorSettings(
module_name="foo",
update_interval=60,
error_duration=0,
error_tolerance=5,
)
# With tolerance we get a default value
settings = models.SourceCollectorSettings(
module_name="foo",
update_interval=60,
error_duration=0,
error_tolerance=5,
)

assert settings.error_duration == snapshot(354)


@given(
update_interval=st.integers(min_value=0, max_value=100),
error_tolerance=st.integers(min_value=0, max_value=100),
)
@settings(max_examples=1000)
def test_sourcecollectorsettings_no_error_duration_fuzz(
update_interval: int, error_tolerance: int
):
"""Test model with a variety of update intervals and error tolerances"""
models.SourceCollectorSettings(
module_name="foo",
update_interval=update_interval,
error_tolerance=error_tolerance,
)


def test_sourcecollectorsettings_duration_too_short():
Expand All @@ -112,6 +128,9 @@ def test_sourcecollectorsettings_duration_too_short():
error = errors[0]
assert "greater than 300" in error["msg"]
assert error["type"] == "value_error"
assert error["msg"] == snapshot(
"Value error, Invalid value for error_duration (180). It should be greater than 300: error_tolerance (5) * update_interval (60)"
)


def test_sourcecollectorsettings_duration_negative():
Expand Down
20 changes: 17 additions & 3 deletions zabbix_auto_config/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ class SourceCollectorSettings(ConfigBaseModel, extra="allow"):
description=(
"The duration in seconds that errors are stored."
"If `error_tolerance` errors occur in this period, the collector is marked as failing."
"If `error_tolerance`is set, but this is not, it is set to `round(error_tolerance * update_interval + (update_interval*0.9))`."
),
ge=0,
)
Expand All @@ -234,12 +235,25 @@ def _validate_error_duration_is_greater(self) -> Self:
# hack to ensure RollingErrorCounter.count() doesn't discard the error
# before it is counted
self.error_duration = 9999
elif (
return self

# Set default error duration if not set
if self.error_tolerance > 0 and not self.error_duration:
# Set the error duration to tolerance * update_interval + 90% of update_interval
# so that it's possible to hit the error tolerance within the duration if all
# errors happen in succession.
self.error_duration = round(
self.error_tolerance * self.update_interval
+ (self.update_interval * 0.9)
)

# Ensure the error duration is greater than the product of the error tolerance and update interval
if (
product := self.error_tolerance * self.update_interval
) > self.error_duration:
raise ValueError(
f"Invalid value for error_duration ({self.error_duration}). It should be greater than error_tolerance ({self.error_tolerance}) "
f"times update_interval ({self.update_interval}), i.e., greater than {product}. Please adjust accordingly."
f"Invalid value for error_duration ({self.error_duration}). "
f"It should be greater than {product}: error_tolerance ({self.error_tolerance}) * update_interval ({self.update_interval})"
)
return self

Expand Down

0 comments on commit f213300

Please sign in to comment.