Skip to content

Commit

Permalink
add first functioning but ugly way for timestamp normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
djkhl committed Aug 14, 2024
1 parent 7ee1b48 commit 5c87128
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 31 deletions.
63 changes: 38 additions & 25 deletions logprep/processor/pre_detector/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@
.. automodule:: logprep.processor.pre_detector.rule
"""

from datetime import datetime, timezone
from functools import cached_property
from uuid import uuid4

from attr import define, field, validators

from logprep.abc.processor import Processor
from logprep.processor.base.exceptions import ProcessingWarning
from logprep.processor.pre_detector.ip_alerter import IPAlerter
from logprep.processor.pre_detector.rule import PreDetectorRule
from logprep.processor.timestamper.processor import Timestamper
from logprep.util.helper import add_field_to, get_dotted_field_value
from logprep.util.time import TimeParser, TimeParserException

Expand Down Expand Up @@ -93,28 +94,17 @@ class Config(Processor.Config):
def _ip_alerter(self):
return IPAlerter(self._config.alert_ip_list_path)

def is_normalized_timestamp(self, timestamp: str):
"""this method checks if the timestamp has been normalized"""
try:
datetime.fromisoformat(timestamp)
return True
except ValueError:
return False

def detect_format_and_normalize_timestamp(self, timestamp):
"""method for detecting the used source format of a timestamp and normalizing it"""
formats = [
"%Y%m%d%H%M%S",
"UNIX",
]
for form in formats:
if not self.is_normalized_timestamp(timestamp):
try:
return TimeParser.parse_datetime(timestamp, form, timezone.utc).isoformat()
except TimeParserException:
continue
else:
return timestamp
# def detect_format_and_normalize_timestamp(self, timestamp):
# """method for detecting the used source format of a timestamp and normalizing it"""
# # formats = [
# # "%Y%m%d%H%M%S",
# # "UNIX",
# # ]
# # for form in formats:
# # try:
# # return TimeParser.parse_datetime(timestamp, form, timezone.utc).isoformat()
# # except TimeParserException:
# # continue

def _apply_rules(self, event, rule):
if not (
Expand All @@ -127,8 +117,31 @@ def _apply_rules(self, event, rule):
timestamp = get_dotted_field_value(event, "@timestamp")

if timestamp is not None:
timestamp = self.detect_format_and_normalize_timestamp(timestamp)
detection["@timestamp"] = timestamp
# timestamp = self.detect_format_and_normalize_timestamp(timestamp)

source_timezone, target_timezone, source_formats = (
rule.source_timezone,
rule.target_timezone,
rule.source_formats,
)
parsed_successfully = False
for source_format in source_formats:
try:
parsed_datetime = TimeParser.parse_datetime(
timestamp, source_format, source_timezone
)
except TimeParserException:
continue
result = (
parsed_datetime.astimezone(target_timezone)
.isoformat()
.replace("+00:00", "Z")
)
detection["@timestamp"] = result
parsed_successfully = True
break
if not parsed_successfully:
raise ProcessingWarning(str("Could not parse timestamp"), rule, event)

def _get_detection_result(self, event: dict, rule: PreDetectorRule):
pre_detection_id = get_dotted_field_value(event, "pre_detection_id")
Expand Down
32 changes: 32 additions & 0 deletions logprep/processor/pre_detector/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@

from functools import cached_property
from typing import Optional, Union
from zoneinfo import ZoneInfo

from attrs import asdict, define, field, validators

Expand Down Expand Up @@ -133,6 +134,25 @@ class Config(Rule.Config): # pylint: disable=too-many-instance-attributes
validator=validators.optional(validators.instance_of(str)), default=None
)
"""A link to the rule if applicable."""
source_formats: list = field(
validator=validators.deep_iterable(
member_validator=validators.instance_of(str),
iterable_validator=validators.instance_of(list),
),
default=["ISO8601"],
converter=lambda x: x if isinstance(x, list) else [x],
)
"""list of the source formats that can be given for normalizing the timestamp"""
timestamp_field: str = field(validator=validators.instance_of(str), default="@timestamp")
"""the field which has the given timestamp to be normalized"""
source_timezone: ZoneInfo = field(
validator=[validators.instance_of(ZoneInfo)], converter=ZoneInfo, default="UTC"
)
""" timezone of source_fields. defaults to :code:`UTC`"""
target_timezone: ZoneInfo = field(
validator=[validators.instance_of(ZoneInfo)], converter=ZoneInfo, default="UTC"
)
""" timezone for target_field. defaults to :code:`UTC`"""

def __eq__(self, other: "PreDetectorRule") -> bool:
return all(
Expand Down Expand Up @@ -160,4 +180,16 @@ def ip_fields(self) -> list:
def description(self) -> str:
return self._config.description

@property
def source_formats(self) -> str:
return self._config.source_formats

@property
def target_timezone(self) -> str:
return self._config.target_timezone

@property
def source_timezone(self) -> str:
return self._config.source_timezone

# pylint: enable=C0111
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ pre_detector:
- attack.test1
- attack.test2
case_condition: directly
source_formats: ["UNIX"]
sigma_fields: true
description: Test rule four
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
"attack.test1",
"attack.test2"
],
"case_condition": "directly"
"case_condition": "directly",
"source_formats": [
"ISO8601",
"%Y%m%d%H%M%S",
"UNIX"
]
},
"description": "Test rule one"
}
Expand Down
48 changes: 43 additions & 5 deletions tests/unit/processor/pre_detector/test_pre_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def test_perform_successful_pre_detection(self):
"severity": "critical",
"mitre": ["attack.test1", "attack.test2"],
"case_condition": "directly",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"description": "Test rule one",
"rule_filter": '(winlog.event_id:"123" AND winlog.event_data.ServiceName:"VERY BAD")', # pylint: disable=line-too-long
},
Expand All @@ -53,6 +57,10 @@ def test_perform_pre_detection_that_fails_if_filter_children_were_slots(self):
"mitre": ["attack.test1", "attack.test2"],
"rule_filter": '(A:"*bar*" AND NOT ((A:"foo*" AND A:"*baz")))',
"severity": "critical",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"title": "RULE_FOUR",
},
({"kafka": "pre_detector_alerts"},),
Expand Down Expand Up @@ -82,6 +90,10 @@ def test_perform_successful_pre_detection_with_host_name(self):
"mitre": ["attack.test1", "attack.test2"],
"case_condition": "directly",
"host": {"name": "Test hostname"},
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"description": "Test rule one",
"rule_filter": '(winlog.event_id:"123" AND winlog.event_data.ServiceName:"VERY BAD")', # pylint: disable=line-too-long
},
Expand All @@ -104,6 +116,10 @@ def test_perform_successful_pre_detection_with_same_existing_pre_detection(self)
"severity": "critical",
"mitre": ["attack.test1", "attack.test2"],
"case_condition": "directly",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"description": "Test rule one",
"rule_filter": '(winlog.event_id:"123" AND winlog.event_data.ServiceName:"VERY BAD")', # pylint: disable=line-too-long
},
Expand All @@ -128,6 +144,10 @@ def test_perform_successful_pre_detection_with_pre_detector_complex_rule_suceeds
"severity": "critical",
"mitre": [],
"case_condition": "directly",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"description": "Test rule two",
"rule_filter": '(tags:"test" AND process.program:"test" AND '
'(message:"test1*xyz" OR message:"test2*xyz"))',
Expand All @@ -151,6 +171,10 @@ def test_perform_successful_pre_detection_with_pre_detector_complex_rule_succeed
"severity": "critical",
"mitre": [],
"case_condition": "directly",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"description": "Test rule three",
"rule_filter": '(tags:"test2" AND process.program:"test" AND '
'(message:"test1*xyz" OR message:"test2?xyz"))',
Expand All @@ -175,6 +199,10 @@ def test_perform_successful_pre_detection_with_two_rules(self):
"description": "Test two rules two",
"rule_filter": '"second_match": *',
"severity": "suspicious",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"title": "RULE_TWO",
},
({"kafka": "pre_detector_alerts"},),
Expand All @@ -187,6 +215,10 @@ def test_perform_successful_pre_detection_with_two_rules(self):
"description": "Test two rules one",
"rule_filter": '"first_match": *',
"severity": "critical",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"title": "RULE_ONE",
},
({"kafka": "pre_detector_alerts"},),
Expand Down Expand Up @@ -270,6 +302,10 @@ def test_ignores_case(self):
"severity": "critical",
"mitre": [],
"case_condition": "directly",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"description": "Test rule two",
"rule_filter": '(tags:"test" AND process.program:"test" AND (message:"test1*xyz" OR message:"test2*xyz"))', # pylint: disable=line-too-long
},
Expand All @@ -292,6 +328,10 @@ def test_ignores_case_list(self):
"severity": "critical",
"mitre": [],
"case_condition": "directly",
"target_timezone": "UTC",
"source_timezone": "UTC",
"source_formats": "ISO8601",
"timestamp_field": "@timestamp",
"description": "Test rule two",
"rule_filter": '(tags:"test" AND process.program:"test" AND (message:"test1*xyz" OR message:"test2*xyz"))', # pylint: disable=line-too-long
},
Expand Down Expand Up @@ -331,11 +371,11 @@ def _assert_equality_of_results(

def test_adds_timestamp_to_extra_data_if_provided_by_event(self):
document = {
"@timestamp": "20240812121304",
"@timestamp": "2024-08-12T12:13:04+00:00",
"winlog": {"event_id": 123, "event_data": {"ServiceName": "VERY BAD"}},
}
detection_results = self.object.process(document)
assert detection_results.data[0][0].get("@timestamp") == "2024-08-12T12:13:04+00:00"
assert detection_results.data[0][0].get("@timestamp") == "2024-08-12T12:13:04Z"

@pytest.mark.parametrize(
"testcase, timestamp",
Expand All @@ -351,6 +391,4 @@ def test_timestamp_is_normalized(self, testcase, timestamp):
"winlog": {"event_id": 123, "event_data": {"ServiceName": "VERY BAD"}},
}
detection_results = self.object.process(document)
assert (
detection_results.data[0][0].get("@timestamp") == "2024-08-12T12:13:04+00:00"
), testcase
assert detection_results.data[0][0].get("@timestamp") == "2024-08-12T12:13:04Z", testcase

0 comments on commit 5c87128

Please sign in to comment.