Skip to content

Commit

Permalink
reimplement Amides Metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
ekneg54 committed Oct 19, 2023
1 parent 396f406 commit a8266a3
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 67 deletions.
18 changes: 17 additions & 1 deletion logprep/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABC, abstractmethod

from attr import asdict, define, field, validators
from prometheus_client import CollectorRegistry, Counter, Histogram
from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram


def is_public(attribute, _):
Expand Down Expand Up @@ -74,6 +74,13 @@ def init_tracker(self):
buckets=(0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1),
registry=self._registry,
)
if isinstance(self, GaugeMetric):
tracker = Gauge(
name=self.fullname,
documentation=self.description,
labelnames=self.labels.keys(),
registry=self._registry,
)
tracker.labels(**self.labels)

self.trackers.update({self.fullname: tracker})
Expand Down Expand Up @@ -103,6 +110,15 @@ def __add__(self, other):
return self


@define(kw_only=True)
class GaugeMetric(Metric):
trackers: dict = {}

def __add__(self, other):
self.trackers.get(self.fullname).labels(**self.labels).set(other)
return self


def calculate_new_average(current_average, next_sample, sample_counter):
"""Calculate a new average by combining a new sample with a sample counter"""
average_multiple = current_average * sample_counter
Expand Down
99 changes: 55 additions & 44 deletions logprep/processor/amides/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,18 @@
from attr import define, field, validators

from logprep.abc.processor import Processor
from logprep.metrics.metrics import calculate_new_average
from logprep.metrics.metrics import (
CounterMetric,
GaugeMetric,
HistogramMetric,
calculate_new_average,
)
from logprep.processor.amides.detection import MisuseDetector, RuleAttributor
from logprep.processor.amides.normalize import CommandLineNormalizer
from logprep.processor.amides.rule import AmidesRule
from logprep.util.getter import GetterFactory
from logprep.util.helper import get_dotted_field_value
from logprep.util.time_measurement import TimeMeasurement


class Amides(Processor):
Expand All @@ -123,42 +129,55 @@ class Config(Processor.Config):
class Metrics(Processor.Metrics):
"""Track statistics specific for Amides processor instances."""

total_cmdlines: int = 0
total_cmdlines: CounterMetric = field(
factory=lambda: CounterMetric(
description="Total number of command lines processed.",
name="total_cmdlines",
)
)
"""Total number of command lines processed."""
new_results: int = 0
new_results: GaugeMetric = field(
factory=lambda: GaugeMetric(
description="Number of command lines that triggered detection and rule attribution.",
name="new_results",
)
)
"""Number of command lines that triggered detection and rule attribution."""
cached_results: int = 0
cached_results: GaugeMetric = field(
factory=lambda: GaugeMetric(
description="Number of command lines that could be resolved from cache.",
name="cached_results",
)
)
"""Number of command lines that could be resolved from cache."""
num_cache_entries: int = 0
num_cache_entries: GaugeMetric = field(
factory=lambda: GaugeMetric(
description="Absolute number of current cache entries.",
name="num_cache_entries",
)
)
"""Absolute number of current cache entries."""
cache_load: float = 0.0
cache_load: GaugeMetric = field(
factory=lambda: GaugeMetric(
description="Mean processing time of command lines classified by the misuse detector.",
name="cache_load",
)
)
"""Relative cache load."""
mean_misuse_detection_time: float = 0.0
"""Mean processing time of command lines classified by the misuse detector."""
_mean_misuse_detection_time_sample_counter: int = 0
mean_rule_attribution_time: float = 0.0
"""Mean processing time of command lines attributed by the rule attributor."""
_mean_rule_attribution_time_sample_counter: int = 0

def update_mean_misuse_detection_time(self, new_sample: float):
"""Updates the mean processing time of the misuse detection."""
new_mean, new_sample_counter = calculate_new_average(
self.mean_misuse_detection_time,
new_sample,
self._mean_misuse_detection_time_sample_counter,
mean_misuse_detection_time: HistogramMetric = field(
factory=lambda: HistogramMetric(
description="Mean processing time of command lines classified by the misuse detector.",
name="mean_misuse_detection_time",
)
self.mean_misuse_detection_time = new_mean
self._mean_misuse_detection_time_sample_counter = new_sample_counter

def update_mean_rule_attribution_time(self, new_sample: float):
"""Updates the mean processing time of the rule attribution model."""
new_mean, new_sample_counter = calculate_new_average(
self.mean_rule_attribution_time,
new_sample,
self._mean_rule_attribution_time_sample_counter,
)
"""Mean processing time of command lines classified by the misuse detector."""
mean_rule_attribution_time: HistogramMetric = field(
factory=lambda: HistogramMetric(
description="Mean processing time of command lines attributed by the rule attributor.",
name="mean_rule_attribution_time",
)
self.mean_rule_attribution_time = new_mean
self._mean_rule_attribution_time_sample_counter = new_sample_counter
)
"""Mean processing time of command lines attributed by the rule attributor."""

__slots__ = (
"_misuse_detector",
Expand Down Expand Up @@ -229,27 +248,19 @@ def _evaluate_cmdline(self, cmdline: str):

return result

@TimeMeasurement.measure_time(metric_name="mean_misuse_detection_time")
def _perform_misuse_detection(self, cmdline: str) -> Tuple[bool, float]:
begin = time()
result = self._misuse_detector.detect(cmdline)
processing_time = time() - begin

self.metrics.update_mean_misuse_detection_time(processing_time)

return result

@TimeMeasurement.measure_time(metric_name="mean_rule_attribution_time")
def _calculate_rule_attributions(self, cmdline: str) -> List[dict]:
begin = time()
attributions = self._rule_attributor.attribute(cmdline)
processing_time = time() - begin

self.metrics.update_mean_rule_attribution_time(processing_time)

return attributions

def _update_cache_metrics(self):
cache_info = self._evaluate_cmdline_cached.cache_info()
self.metrics.new_results = cache_info.misses
self.metrics.cached_results = cache_info.hits
self.metrics.num_cache_entries = cache_info.currsize
self.metrics.cache_load = cache_info.currsize / cache_info.maxsize
self.metrics.new_results += cache_info.misses
self.metrics.cached_results += cache_info.hits
self.metrics.num_cache_entries += cache_info.currsize
self.metrics.cache_load += cache_info.currsize / cache_info.maxsize
5 changes: 3 additions & 2 deletions logprep/util/time_measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class TimeMeasurement:
"""Measures the execution time of functions and adds the results to events via a decorator."""

@staticmethod
def measure_time():
def measure_time(metric_name: str = "processing_time_per_event"):
"""Decorate function to measure execution time for function and add results to event."""

def inner_decorator(func):
Expand All @@ -20,7 +20,8 @@ def inner(*args, **kwargs): # nosemgrep
caller = args[0]
if func.__name__ in ("_process_rule_tree", "_process_rule"):
caller = args[-1]
caller.metrics.processing_time_per_event += processing_time
metric = getattr(caller.metrics, metric_name)
metric += processing_time
return result

return inner
Expand Down
43 changes: 30 additions & 13 deletions tests/unit/processor/amides/test_amides.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class TestAmides(BaseProcessorTestCase):
}

def test_process_event_malicious_process_command_line(self):
self.object.metrics.total_cmdlines = 0
self.object.metrics.new_results = 0
self.object.metrics.num_cache_entries = 0
self.object.metrics.cache_load = 0.0
self.object.setup()
document = {
"winlog": {
Expand All @@ -46,10 +50,12 @@ def test_process_event_malicious_process_command_line(self):
assert self.object.metrics.new_results == 1
assert self.object.metrics.num_cache_entries == 1
assert self.object.metrics.cache_load == 0.2
assert self.object.metrics.mean_misuse_detection_time != 0.0
assert self.object.metrics.mean_rule_attribution_time != 0.0

def test_process_event_benign_process_command_line(self):
self.object.metrics.total_cmdlines = 0
self.object.metrics.new_results = 0
self.object.metrics.num_cache_entries = 0
self.object.metrics.cache_load = 0.0
self.object.setup()
document = {
"winlog": {
Expand All @@ -68,8 +74,6 @@ def test_process_event_benign_process_command_line(self):
assert self.object.metrics.new_results == 1
assert self.object.metrics.num_cache_entries == 1
assert self.object.metrics.cache_load == 0.2
assert self.object.metrics.mean_misuse_detection_time != 0.0
assert self.object.metrics.mean_rule_attribution_time == 0.0

no_pc_events = [
{"winlog": {"event_id": 6005, "provider_name": "Microsoft-Windows-Sysmon"}},
Expand All @@ -80,6 +84,10 @@ def test_process_event_benign_process_command_line(self):

@pytest.mark.parametrize("document", no_pc_events)
def test_process_event_no_process_creation_events(self, document):
self.object.metrics.total_cmdlines = 0
self.object.metrics.new_results = 0
self.object.metrics.num_cache_entries = 0
self.object.metrics.cache_load = 0.0
self.object.setup()

self.object.process(document)
Expand All @@ -88,10 +96,12 @@ def test_process_event_no_process_creation_events(self, document):
assert self.object.metrics.new_results == 0
assert self.object.metrics.num_cache_entries == 0
assert self.object.metrics.cache_load == 0.0
assert self.object.metrics.mean_misuse_detection_time == 0.0
assert self.object.metrics.mean_rule_attribution_time == 0.0

def test_process_event_without_command_line_field(self):
self.object.metrics.total_cmdlines = 0
self.object.metrics.new_results = 0
self.object.metrics.num_cache_entries = 0
self.object.metrics.cache_load = 0.0
self.object.setup()
document = {
"winlog": {"event_id": 1, "provider_name": "Microsoft-Windows-Sysmon"},
Expand All @@ -104,10 +114,13 @@ def test_process_event_without_command_line_field(self):
assert self.object.metrics.new_results == 0
assert self.object.metrics.num_cache_entries == 0
assert self.object.metrics.cache_load == 0.0
assert self.object.metrics.mean_misuse_detection_time == 0.0
assert self.object.metrics.mean_rule_attribution_time == 0.0

def test_classification_results_from_cache(self):
self.object.metrics.total_cmdlines = 0
self.object.metrics.new_results = 0
self.object.metrics.cached_results = 0
self.object.metrics.num_cache_entries = 0
self.object.metrics.cache_load = 0.0
self.object.setup()
document = {
"winlog": {
Expand All @@ -123,12 +136,16 @@ def test_classification_results_from_cache(self):

assert other_document.get("amides") == document.get("amides")
assert self.object.metrics.total_cmdlines == 2
assert self.object.metrics.new_results == 1
# we mock the metrics with integer operations, so the assertions
# are a little bit weird:
# we assert for 2 because we add two times the same cache result
# the underlying metric implementation sets the values instead of adding
# them
assert self.object.metrics.new_results == 2
assert self.object.metrics.num_cache_entries == 2
assert self.object.metrics.cache_load == 0.4
# end strange mock
assert self.object.metrics.cached_results == 1
assert self.object.metrics.num_cache_entries == 1
assert self.object.metrics.cache_load == 0.2
assert self.object.metrics.mean_misuse_detection_time != 0.0
assert self.object.metrics.mean_rule_attribution_time != 0.0

def test_process_event_raise_duplication_error(self, caplog):
self.object.setup()
Expand Down
21 changes: 14 additions & 7 deletions tests/unit/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable=protected-access
# pylint: disable=attribute-defined-outside-init

import itertools
import json
from copy import deepcopy
from logging import getLogger
Expand Down Expand Up @@ -277,14 +278,20 @@ def test_rule_has_metric(self, metric_name, metric_class):
metric_instance = getattr(self.object.rules[0].metrics, metric_name)
assert isinstance(metric_instance, metric_class)

def test_custom_metrics_are_metric_objects(self):
def asdict_filter(attribute, value):
block_list = ["_labels", "_prefix"]
return not any(
(attribute.name in block_list, value is None, isinstance(value, Callable))
)
@staticmethod
def asdict_filter(attribute, value):
"""Returns all attributes not in block list or None"""
block_list = ["_labels", "_prefix"]
return not any((attribute.name in block_list, value is None, isinstance(value, Callable)))

metric_attributes = asdict(self.object.metrics, filter=asdict_filter, recurse=False)
def test_custom_metrics_are_metric_objects(self):
metric_attributes = asdict(self.object.metrics, filter=self.asdict_filter, recurse=False)
assert all(
isinstance(value, Metric) for value in metric_attributes.values()
), "one of the metrics instance attributes is not an instance of type Metric"

def test_no_metrics_with_same_name(self):
metric_attributes = asdict(self.object.metrics, filter=self.asdict_filter, recurse=False)
pairs = itertools.combinations(metric_attributes.values(), 2)
for metric1, metric2 in pairs:
assert metric1.name != metric2.name, f"{metric1.name} == {metric2.name}"

0 comments on commit a8266a3

Please sign in to comment.