From 5c87128d0b4ac79eac9505eda241e6a509b44b02 Mon Sep 17 00:00:00 2001 From: djkhl Date: Wed, 14 Aug 2024 12:24:05 +0200 Subject: [PATCH] add first functioning but ugly way for timestamp normalization --- logprep/processor/pre_detector/processor.py | 63 +++++++++++-------- logprep/processor/pre_detector/rule.py | 32 ++++++++++ .../rules/generic/pre_detect_four.yml | 1 + .../rules/generic/pre_detect_one.json | 7 ++- .../pre_detector/test_pre_detector.py | 48 ++++++++++++-- 5 files changed, 120 insertions(+), 31 deletions(-) diff --git a/logprep/processor/pre_detector/processor.py b/logprep/processor/pre_detector/processor.py index 67a6ffa9e..752d25f25 100644 --- a/logprep/processor/pre_detector/processor.py +++ b/logprep/processor/pre_detector/processor.py @@ -30,15 +30,16 @@ .. automodule:: logprep.processor.pre_detector.rule """ -from datetime import datetime, timezone from functools import cached_property from uuid import uuid4 from attr import define, field, validators from logprep.abc.processor import Processor +from logprep.processor.base.exceptions import ProcessingWarning from logprep.processor.pre_detector.ip_alerter import IPAlerter from logprep.processor.pre_detector.rule import PreDetectorRule +from logprep.processor.timestamper.processor import Timestamper from logprep.util.helper import add_field_to, get_dotted_field_value from logprep.util.time import TimeParser, TimeParserException @@ -93,28 +94,17 @@ class Config(Processor.Config): def _ip_alerter(self): return IPAlerter(self._config.alert_ip_list_path) - def is_normalized_timestamp(self, timestamp: str): - """this method checks if the timestamp has been normalized""" - try: - datetime.fromisoformat(timestamp) - return True - except ValueError: - return False - - def detect_format_and_normalize_timestamp(self, timestamp): - """method for detecting the used source format of a timestamp and normalizing it""" - formats = [ - "%Y%m%d%H%M%S", - "UNIX", - ] - for form in formats: - if not self.is_normalized_timestamp(timestamp): - try: - return TimeParser.parse_datetime(timestamp, form, timezone.utc).isoformat() - except TimeParserException: - continue - else: - return timestamp + # def detect_format_and_normalize_timestamp(self, timestamp): + # """method for detecting the used source format of a timestamp and normalizing it""" + # # formats = [ + # # "%Y%m%d%H%M%S", + # # "UNIX", + # # ] + # # for form in formats: + # # try: + # # return TimeParser.parse_datetime(timestamp, form, timezone.utc).isoformat() + # # except TimeParserException: + # # continue def _apply_rules(self, event, rule): if not ( @@ -127,8 +117,31 @@ def _apply_rules(self, event, rule): timestamp = get_dotted_field_value(event, "@timestamp") if timestamp is not None: - timestamp = self.detect_format_and_normalize_timestamp(timestamp) - detection["@timestamp"] = timestamp + # timestamp = self.detect_format_and_normalize_timestamp(timestamp) + + source_timezone, target_timezone, source_formats = ( + rule.source_timezone, + rule.target_timezone, + rule.source_formats, + ) + parsed_successfully = False + for source_format in source_formats: + try: + parsed_datetime = TimeParser.parse_datetime( + timestamp, source_format, source_timezone + ) + except TimeParserException: + continue + result = ( + parsed_datetime.astimezone(target_timezone) + .isoformat() + .replace("+00:00", "Z") + ) + detection["@timestamp"] = result + parsed_successfully = True + break + if not parsed_successfully: + raise ProcessingWarning(str("Could not parse timestamp"), rule, event) def _get_detection_result(self, event: dict, rule: PreDetectorRule): pre_detection_id = get_dotted_field_value(event, "pre_detection_id") diff --git a/logprep/processor/pre_detector/rule.py b/logprep/processor/pre_detector/rule.py index 78c96ffd4..199aa944c 100644 --- a/logprep/processor/pre_detector/rule.py +++ b/logprep/processor/pre_detector/rule.py @@ -97,6 +97,7 @@ from functools import cached_property from typing import Optional, Union +from zoneinfo import ZoneInfo from attrs import asdict, define, field, validators @@ -133,6 +134,25 @@ class Config(Rule.Config): # pylint: disable=too-many-instance-attributes validator=validators.optional(validators.instance_of(str)), default=None ) """A link to the rule if applicable.""" + source_formats: list = field( + validator=validators.deep_iterable( + member_validator=validators.instance_of(str), + iterable_validator=validators.instance_of(list), + ), + default=["ISO8601"], + converter=lambda x: x if isinstance(x, list) else [x], + ) + """list of the source formats that can be given for normalizing the timestamp""" + timestamp_field: str = field(validator=validators.instance_of(str), default="@timestamp") + """the field which has the given timestamp to be normalized""" + source_timezone: ZoneInfo = field( + validator=[validators.instance_of(ZoneInfo)], converter=ZoneInfo, default="UTC" + ) + """ timezone of source_fields. defaults to :code:`UTC`""" + target_timezone: ZoneInfo = field( + validator=[validators.instance_of(ZoneInfo)], converter=ZoneInfo, default="UTC" + ) + """ timezone for target_field. defaults to :code:`UTC`""" def __eq__(self, other: "PreDetectorRule") -> bool: return all( @@ -160,4 +180,16 @@ def ip_fields(self) -> list: def description(self) -> str: return self._config.description + @property + def source_formats(self) -> str: + return self._config.source_formats + + @property + def target_timezone(self) -> str: + return self._config.target_timezone + + @property + def source_timezone(self) -> str: + return self._config.source_timezone + # pylint: enable=C0111 diff --git a/tests/testdata/unit/pre_detector/rules/generic/pre_detect_four.yml b/tests/testdata/unit/pre_detector/rules/generic/pre_detect_four.yml index 3c7b75179..b2eeca520 100644 --- a/tests/testdata/unit/pre_detector/rules/generic/pre_detect_four.yml +++ b/tests/testdata/unit/pre_detector/rules/generic/pre_detect_four.yml @@ -7,5 +7,6 @@ pre_detector: - attack.test1 - attack.test2 case_condition: directly + source_formats: ["UNIX"] sigma_fields: true description: Test rule four diff --git a/tests/testdata/unit/pre_detector/rules/generic/pre_detect_one.json b/tests/testdata/unit/pre_detector/rules/generic/pre_detect_one.json index bd2009401..5efcb980a 100644 --- a/tests/testdata/unit/pre_detector/rules/generic/pre_detect_one.json +++ b/tests/testdata/unit/pre_detector/rules/generic/pre_detect_one.json @@ -9,7 +9,12 @@ "attack.test1", "attack.test2" ], - "case_condition": "directly" + "case_condition": "directly", + "source_formats": [ + "ISO8601", + "%Y%m%d%H%M%S", + "UNIX" + ] }, "description": "Test rule one" } diff --git a/tests/unit/processor/pre_detector/test_pre_detector.py b/tests/unit/processor/pre_detector/test_pre_detector.py index c01bd1e8d..be51daf02 100644 --- a/tests/unit/processor/pre_detector/test_pre_detector.py +++ b/tests/unit/processor/pre_detector/test_pre_detector.py @@ -30,6 +30,10 @@ def test_perform_successful_pre_detection(self): "severity": "critical", "mitre": ["attack.test1", "attack.test2"], "case_condition": "directly", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "description": "Test rule one", "rule_filter": '(winlog.event_id:"123" AND winlog.event_data.ServiceName:"VERY BAD")', # pylint: disable=line-too-long }, @@ -53,6 +57,10 @@ def test_perform_pre_detection_that_fails_if_filter_children_were_slots(self): "mitre": ["attack.test1", "attack.test2"], "rule_filter": '(A:"*bar*" AND NOT ((A:"foo*" AND A:"*baz")))', "severity": "critical", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "title": "RULE_FOUR", }, ({"kafka": "pre_detector_alerts"},), @@ -82,6 +90,10 @@ def test_perform_successful_pre_detection_with_host_name(self): "mitre": ["attack.test1", "attack.test2"], "case_condition": "directly", "host": {"name": "Test hostname"}, + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "description": "Test rule one", "rule_filter": '(winlog.event_id:"123" AND winlog.event_data.ServiceName:"VERY BAD")', # pylint: disable=line-too-long }, @@ -104,6 +116,10 @@ def test_perform_successful_pre_detection_with_same_existing_pre_detection(self) "severity": "critical", "mitre": ["attack.test1", "attack.test2"], "case_condition": "directly", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "description": "Test rule one", "rule_filter": '(winlog.event_id:"123" AND winlog.event_data.ServiceName:"VERY BAD")', # pylint: disable=line-too-long }, @@ -128,6 +144,10 @@ def test_perform_successful_pre_detection_with_pre_detector_complex_rule_suceeds "severity": "critical", "mitre": [], "case_condition": "directly", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "description": "Test rule two", "rule_filter": '(tags:"test" AND process.program:"test" AND ' '(message:"test1*xyz" OR message:"test2*xyz"))', @@ -151,6 +171,10 @@ def test_perform_successful_pre_detection_with_pre_detector_complex_rule_succeed "severity": "critical", "mitre": [], "case_condition": "directly", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "description": "Test rule three", "rule_filter": '(tags:"test2" AND process.program:"test" AND ' '(message:"test1*xyz" OR message:"test2?xyz"))', @@ -175,6 +199,10 @@ def test_perform_successful_pre_detection_with_two_rules(self): "description": "Test two rules two", "rule_filter": '"second_match": *', "severity": "suspicious", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "title": "RULE_TWO", }, ({"kafka": "pre_detector_alerts"},), @@ -187,6 +215,10 @@ def test_perform_successful_pre_detection_with_two_rules(self): "description": "Test two rules one", "rule_filter": '"first_match": *', "severity": "critical", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "title": "RULE_ONE", }, ({"kafka": "pre_detector_alerts"},), @@ -270,6 +302,10 @@ def test_ignores_case(self): "severity": "critical", "mitre": [], "case_condition": "directly", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "description": "Test rule two", "rule_filter": '(tags:"test" AND process.program:"test" AND (message:"test1*xyz" OR message:"test2*xyz"))', # pylint: disable=line-too-long }, @@ -292,6 +328,10 @@ def test_ignores_case_list(self): "severity": "critical", "mitre": [], "case_condition": "directly", + "target_timezone": "UTC", + "source_timezone": "UTC", + "source_formats": "ISO8601", + "timestamp_field": "@timestamp", "description": "Test rule two", "rule_filter": '(tags:"test" AND process.program:"test" AND (message:"test1*xyz" OR message:"test2*xyz"))', # pylint: disable=line-too-long }, @@ -331,11 +371,11 @@ def _assert_equality_of_results( def test_adds_timestamp_to_extra_data_if_provided_by_event(self): document = { - "@timestamp": "20240812121304", + "@timestamp": "2024-08-12T12:13:04+00:00", "winlog": {"event_id": 123, "event_data": {"ServiceName": "VERY BAD"}}, } detection_results = self.object.process(document) - assert detection_results.data[0][0].get("@timestamp") == "2024-08-12T12:13:04+00:00" + assert detection_results.data[0][0].get("@timestamp") == "2024-08-12T12:13:04Z" @pytest.mark.parametrize( "testcase, timestamp", @@ -351,6 +391,4 @@ def test_timestamp_is_normalized(self, testcase, timestamp): "winlog": {"event_id": 123, "event_data": {"ServiceName": "VERY BAD"}}, } detection_results = self.object.process(document) - assert ( - detection_results.data[0][0].get("@timestamp") == "2024-08-12T12:13:04+00:00" - ), testcase + assert detection_results.data[0][0].get("@timestamp") == "2024-08-12T12:13:04Z", testcase