diff --git a/CHANGELOG.md b/CHANGELOG.md index 537507eb9..1f2811cda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - fix `confluent_kafka.store_offsets` if `last_valid_record` is `None`, can happen if a rebalancing happens before the first message was pulled. +- fix pseudonymizer cache metrics not updated ## 14.0.0 ### Breaking diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py index a7dc90f73..b4ec2159a 100644 --- a/logprep/processor/pseudonymizer/processor.py +++ b/logprep/processor/pseudonymizer/processor.py @@ -344,9 +344,9 @@ def _wrap_hash(self, hash_string: str) -> str: def _update_cache_metrics(self): cache_info_pseudonyms = self._get_pseudonym_dict_cached.cache_info() cache_info_urls = self._pseudonymize_url_cached.cache_info() - self.metrics.new_results = cache_info_pseudonyms.misses + cache_info_urls.misses - self.metrics.cached_results = cache_info_pseudonyms.hits + cache_info_urls.hits - self.metrics.num_cache_entries = cache_info_pseudonyms.currsize + cache_info_urls.currsize - self.metrics.cache_load = (cache_info_pseudonyms.currsize + cache_info_urls.currsize) / ( + self.metrics.new_results += cache_info_pseudonyms.misses + cache_info_urls.misses + self.metrics.cached_results += cache_info_pseudonyms.hits + cache_info_urls.hits + self.metrics.num_cache_entries += cache_info_pseudonyms.currsize + cache_info_urls.currsize + self.metrics.cache_load += (cache_info_pseudonyms.currsize + cache_info_urls.currsize) / ( cache_info_pseudonyms.maxsize + cache_info_urls.maxsize ) diff --git a/tests/unit/processor/pseudonymizer/test_pseudonymizer.py b/tests/unit/processor/pseudonymizer/test_pseudonymizer.py index 1878885af..146aa96d3 100644 --- a/tests/unit/processor/pseudonymizer/test_pseudonymizer.py +++ b/tests/unit/processor/pseudonymizer/test_pseudonymizer.py @@ -821,9 +821,6 @@ def test_pseudonymize_string_adds_pseudonyms(self): assert len(self.object.result.data) == 1 def test_resolve_from_cache_pseudonym(self): - self.object.metrics.new_results = 0 - self.object.metrics.cached_results = 0 - self.object.metrics.num_cache_entries = 0 rule_dict = { "filter": "winlog.event_id: 1234 AND winlog.provider_name: Test456", "pseudonymizer": { @@ -844,15 +841,15 @@ def test_resolve_from_cache_pseudonym(self): } } self._load_specific_rule(rule_dict) + self.object.metrics.new_results = 0 + self.object.metrics.cached_results = 0 + self.object.metrics.num_cache_entries = 0 self.object.process(event) assert self.object.metrics.new_results == 1 assert self.object.metrics.cached_results == 1 assert self.object.metrics.num_cache_entries == 1 def test_resolve_from_cache_pseudonymize_urls(self): - self.object.metrics.new_results = 0 - self.object.metrics.cached_results = 0 - self.object.metrics.num_cache_entries = 0 rule_dict = { "filter": "filter_this: does_not_matter", "pseudonymizer": { @@ -869,6 +866,9 @@ def test_resolve_from_cache_pseudonymize_urls(self): "and_pseudo_this": "https://www.pseudo.this.de", } self._load_specific_rule(rule_dict) + self.object.metrics.new_results = 0 + self.object.metrics.cached_results = 0 + self.object.metrics.num_cache_entries = 0 self.object.process(event) # 1 subdomains -> pseudonym_cache, 1 url -> url_cache assert self.object.metrics.new_results == 2 @@ -1089,3 +1089,37 @@ def test_setup_raises_invalid_configuration_on_missing_regex_mapping(self): ) with pytest.raises(InvalidConfigurationError, match=error_message): self.object.setup() + + def test_cache_metrics_updated(self): + rule_dict = { + "filter": "winlog.event_id: 1234 AND winlog.provider_name: Test456", + "pseudonymizer": { + "mapping": { + "winlog.event_data.param1": "RE_WHOLE_FIELD", + } + }, + } + event = { + "@timestamp": "custom timestamp", + "winlog": { + "event_id": 1234, + "provider_name": "Test456", + "event_data": { + "param1": "Pseudonymize me - appears twice!", + }, + }, + } + self._load_specific_rule(rule_dict) + + self.object.metrics.new_results = 0 + self.object.metrics.cached_results = 0 + self.object.metrics.num_cache_entries = 0 + + self.object.process(deepcopy(event)) + self.object.process(deepcopy(event)) + self.object.process(event) + # because the event is the same, the result is cached + # metrics are mocked by integers and incremented by cache_info results + assert self.object.metrics.new_results == 3 + assert self.object.metrics.cached_results == 3 + assert self.object.metrics.num_cache_entries == 3