Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update add_field_to function for improved error handling #696

Merged
merged 38 commits into from
Nov 14, 2024
Merged
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
012dd6e
update add_field_to function for improved error handling
dtrai2 Nov 6, 2024
0adad9c
let add_field_to always raise FieldExistsWarning on failure
dtrai2 Nov 7, 2024
33f402d
fix field_manager tests
dtrai2 Nov 7, 2024
64cf75a
fix domain_label_extractor tests
dtrai2 Nov 7, 2024
f3da215
fix generic_adder tests
dtrai2 Nov 7, 2024
28fd58c
fix geoip_enricher tests
dtrai2 Nov 7, 2024
ad52e08
fig grokker processor
dtrai2 Nov 8, 2024
817d980
fig ip_informer processor
dtrai2 Nov 8, 2024
ad8d0d4
fig labeler tests processor
dtrai2 Nov 8, 2024
9dfd7ab
fig list_comparison processor tests
dtrai2 Nov 8, 2024
d7582f9
fig pre_detector processor tests
dtrai2 Nov 8, 2024
b124195
fix requester processor tests
dtrai2 Nov 8, 2024
1ea2526
fix ProcessingWarning init
dtrai2 Nov 8, 2024
adfd509
fix timestamper processor
dtrai2 Nov 8, 2024
f68d7c3
fix template_replacer processor
dtrai2 Nov 8, 2024
66f6696
fix input connector tests
dtrai2 Nov 8, 2024
113bb25
fix FieldExistsWarning init tests
dtrai2 Nov 8, 2024
1db38f3
fix auto_rule_tester
dtrai2 Nov 8, 2024
770a80a
fix and refactor generic_resolver
dtrai2 Nov 11, 2024
52d1823
fix and refactor hyperscan_resolver
dtrai2 Nov 11, 2024
28d12b0
fix auto_rule_tester
dtrai2 Nov 11, 2024
04e115d
fix labeler and add new test
dtrai2 Nov 11, 2024
bb7cbd0
clean up
dtrai2 Nov 11, 2024
bcae7ba
replace else statement with early return
dtrai2 Nov 12, 2024
297aa97
remove silent fail functions for field addition
dtrai2 Nov 12, 2024
4cd6db7
Refactor overwrite argument for field addition functions
dtrai2 Nov 12, 2024
0507313
add assertions to verify document state after exceptions
dtrai2 Nov 12, 2024
f56d45b
optimize imports
dtrai2 Nov 12, 2024
ea781f5
update add_field_to signature
dtrai2 Nov 12, 2024
bdb445f
enable `add_field_to` to always take a batch of fields
dtrai2 Nov 13, 2024
90038c0
revert exception signature and add rule to add_field_to method as arg…
dtrai2 Nov 13, 2024
6439b8c
revert key indexing in dissector back to original
dtrai2 Nov 13, 2024
ad2bc78
add tests for multiple field additions in add_field_to
dtrai2 Nov 13, 2024
a22bf0a
renamed 'add_field_to' to 'add_fields_to'
dtrai2 Nov 13, 2024
0facf61
fix typo in StringSplitterRule
dtrai2 Nov 13, 2024
827b7f4
remove unused conflicting_fields list
dtrai2 Nov 13, 2024
8405edd
rename _add_one_field_to to _add_field_to for clarity
dtrai2 Nov 13, 2024
2506df6
fix CHANGELOG.md
dtrai2 Nov 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
update add_field_to function for improved error handling
- Add raise_on_failure parameter to raise exceptions on failure.
- Refactor function for clarity and consistency.
- Update unit tests to reflect new exception handling logic.
dtrai2 committed Nov 13, 2024
commit 012dd6eebc9406fa344db94ca887b3daf5c5cd60
6 changes: 4 additions & 2 deletions logprep/processor/base/exceptions.py
Original file line number Diff line number Diff line change
@@ -72,8 +72,10 @@ class ProcessingWarning(Warning):

def __init__(self, message: str, rule: "Rule", event: dict, tags: List[str] = None):
self.tags = tags if tags else []
rule.metrics.number_of_warnings += 1
message = f"{message}, {rule.id=}, {rule.description=}, {event=}"
if rule:
rule.metrics.number_of_warnings += 1
message += f", {rule.id=}, {rule.description=}"
message += f", {event=}"
ekneg54 marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(f"{self.__class__.__name__}: {message}")


2 changes: 1 addition & 1 deletion logprep/processor/grokker/processor.py
dtrai2 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -82,9 +82,9 @@ def _apply_rules(self, event: dict, rule: GrokkerRule):
except TimeoutError as error:
self._handle_missing_fields(event, rule, rule.actions.keys(), source_values)
raise ProcessingError(
self,
f"Grok pattern timeout for source field: '{dotted_field}' in rule '{rule}', "
f"the grok pattern might be too complex.",
rule,
) from error
if result is None or result == {}:
continue
69 changes: 35 additions & 34 deletions logprep/util/helper.py
Original file line number Diff line number Diff line change
@@ -57,58 +57,59 @@ def _add_and_not_overwrite_key(sub_dict, key):
return sub_dict.get(key)


def add_field_to(event, output_field, content, extends_lists=False, overwrite_output_field=False):
def add_field_to(
event,
output_field,
content,
extends_lists=False,
overwrite_output_field=False,
dtrai2 marked this conversation as resolved.
Show resolved Hide resolved
raise_on_failure=None,
):
"""
Add content to an output_field in the given event. Output_field can be a dotted subfield.
In case of missing fields all intermediate fields will be created.
Add content to the output_field in the given event. Output_field can be a dotted subfield.
In case of missing fields, all intermediate fields will be created.
Parameters
----------
event: dict
Original log-event that logprep is currently processing
output_field: str
Dotted subfield string indicating the target of the output value, e.g. destination.ip
content: str, float, int, list, dict
Value that should be written into the output_field, can be a str, list or dict object
Value that should be written into the output_field, can be a str, list, or dict object
extends_lists: bool
Flag that determines whether output_field lists should be extended
overwrite_output_field: bool
Flag that determines whether the output_field should be overwritten

Returns
------
This method returns true if no conflicting fields were found during the process of the creation
of the dotted subfields. If conflicting fields were found False is returned.
bool
True if no conflicting fields were found during the process of the creation
of the dotted subfields, otherwise False.
"""

assert not (
extends_lists and overwrite_output_field
), "An output field can't be overwritten and extended at the same time"
output_field_path = [event, *get_dotted_field_list(output_field)]
target_key = output_field_path.pop()

if overwrite_output_field:
target_field = reduce(_add_and_overwrite_key, output_field_path)
target_field |= {target_key: content}
return True

if extends_lists and overwrite_output_field:
raise ValueError("An output field can't be overwritten and extended at the same time")
field_path = [event, *get_dotted_field_list(output_field)]
target_key = field_path.pop()
try:
target_field = reduce(_add_and_not_overwrite_key, output_field_path)
except KeyError:
return False

target_field_value = target_field.get(target_key)
if target_field_value is None:
target_field |= {target_key: content}
return True
if extends_lists:
if not isinstance(target_field_value, list):
return False
target_parent = reduce(_add_and_not_overwrite_key, field_path)
except KeyError as error:
if raise_on_failure:
raise raise_on_failure from error
return
if overwrite_output_field:
target_parent[target_key] = content
else:
existing_value = target_parent.get(target_key)
if existing_value is None:
target_parent[target_key] = content
if not extends_lists or not isinstance(existing_value, list):
if raise_on_failure:
raise raise_on_failure
return
if isinstance(content, list):
target_field |= {target_key: [*target_field_value, *content]}
target_parent[target_key].extend(content)
else:
target_field_value.append(content)
return True
return False
target_parent[target_key].append(content)


def _get_slice_arg(slice_item):
99 changes: 29 additions & 70 deletions tests/unit/util/test_helper_add_field.py
Original file line number Diff line number Diff line change
@@ -2,25 +2,21 @@
# pylint: disable=missing-docstring
import pytest

from logprep.abc.exceptions import LogprepException
from logprep.util.helper import add_field_to


class TestHelperAddField:
def test_add_str_content_as_new_root_field(self):
document = {"source": {"ip": "8.8.8.8"}}
expected_document = {"source": {"ip": "8.8.8.8"}, "field": "content"}

add_was_successful = add_field_to(document, "field", "content")

assert add_was_successful, "Found duplicate even though there shouldn't be one"
add_field_to(document, "field", "content")
assert document == expected_document

def test_add_str_content_as_completely_new_dotted_subfield(self):
document = {"source": {"ip": "8.8.8.8"}}
expected_document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": "content"}}

add_was_successful = add_field_to(document, "sub.field", "content")
assert add_was_successful, "Found duplicate even though there shouldn't be one"
add_field_to(document, "sub.field", "content")
assert document == expected_document

def test_add_str_content_as_partially_new_dotted_subfield(self):
@@ -30,41 +26,31 @@ def test_add_str_content_as_partially_new_dotted_subfield(self):
"sub": {"field": "content", "other_field": "other_content"},
}

add_was_successful = add_field_to(document, "sub.field", "content")

assert add_was_successful, "Found duplicate even though there shouldn't be one"
add_field_to(document, "sub.field", "content")
assert document == expected_document

def test_provoke_str_duplicate_in_root_field(self):
document = {"source": {"ip": "8.8.8.8"}, "field": "exists already"}

add_was_successful = add_field_to(document, "field", "content")

assert not add_was_successful, "Found no duplicate even though there should be one"
error = LogprepException("test error")
with pytest.raises(LogprepException, match=r"test error"):
add_field_to(document, "field", "content", raise_on_failure=error)

def test_provoke_str_duplicate_in_dotted_subfield(self):
document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": "exists already"}}

add_was_successful = add_field_to(document, "sub.field", "content")

assert not add_was_successful, "Found no duplicate even though there should be one"
error = LogprepException("test error")
with pytest.raises(LogprepException, match=r"test error"):
add_field_to(document, "sub.field", "content", raise_on_failure=error)

def test_add_dict_content_as_new_root_field(self):
document = {"source": {"ip": "8.8.8.8"}}
expected_document = {"source": {"ip": "8.8.8.8"}, "field": {"dict": "content"}}

add_was_successful = add_field_to(document, "field", {"dict": "content"})

assert add_was_successful, "Found duplicate even though there shouldn't be one"
add_field_to(document, "field", {"dict": "content"})
assert document == expected_document

def test_add_dict_content_as_completely_new_dotted_subfield(self):
document = {"source": {"ip": "8.8.8.8"}}
expected_document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": {"dict": "content"}}}

add_was_successful = add_field_to(document, "sub.field", {"dict": "content"})

assert add_was_successful, "Found duplicate even though there shouldn't be one"
add_field_to(document, "sub.field", {"dict": "content"})
assert document == expected_document

def test_add_dict_content_as_partially_new_dotted_subfield(self):
@@ -73,72 +59,47 @@ def test_add_dict_content_as_partially_new_dotted_subfield(self):
"source": {"ip": "8.8.8.8"},
"sub": {"field": {"dict": "content"}, "other_field": "other_content"},
}

add_was_successful = add_field_to(document, "sub.field", {"dict": "content"})

assert add_was_successful, "Found duplicate even though there shouldn't be one"
add_field_to(document, "sub.field", {"dict": "content"})
assert document == expected_document

def test_provoke_dict_duplicate_in_root_field(self):
document = {"source": {"ip": "8.8.8.8"}, "field": {"already_existing": "dict"}}

add_was_successful = add_field_to(document, "field", {"dict": "content"})

assert not add_was_successful, "Found no duplicate even though there should be one"
error = LogprepException("test error")
with pytest.raises(LogprepException, match=r"test error"):
add_field_to(document, "field", {"dict": "content"}, raise_on_failure=error)

def test_provoke_dict_duplicate_in_dotted_subfield(self):
document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": {"already_existing": "dict"}}}

add_was_successful = add_field_to(document, "sub.field", {"dict": "content"})

assert not add_was_successful, "Found no duplicate even though there should be one"
error = LogprepException("test error")
with pytest.raises(LogprepException, match=r"test error"):
add_field_to(document, "sub.field", {"dict": "content"}, raise_on_failure=error)

def test_add_field_to_overwrites_output_field_in_root_level(self):
document = {"some": "field", "output_field": "has already content"}

add_was_successful = add_field_to(
document, "output_field", {"dict": "content"}, overwrite_output_field=True
)

assert add_was_successful, "Output field was overwritten"
add_field_to(document, "output_field", {"dict": "content"}, overwrite_output_field=True)
assert document.get("output_field") == {"dict": "content"}

def test_add_field_to_overwrites_output_field_in_nested_level(self):
document = {"some": "field", "nested": {"output": {"field": "has already content"}}}

add_was_successful = add_field_to(
add_field_to(
document, "nested.output.field", {"dict": "content"}, overwrite_output_field=True
)

assert add_was_successful, "Output field was overwritten"
assert document.get("nested", {}).get("output", {}).get("field") == {"dict": "content"}

def test_add_field_to_extends_list_when_only_given_a_string(self):
document = {"some": "field", "some_list": ["with a value"]}

add_was_successful = add_field_to(document, "some_list", "new value", extends_lists=True)

assert add_was_successful, "Output field was overwritten"
add_field_to(document, "some_list", "new value", extends_lists=True)
assert document.get("some_list") == ["with a value", "new value"]

def test_add_field_to_extends_list_when_given_a_list(self):
document = {"some": "field", "some_list": ["with a value"]}

add_was_successful = add_field_to(
document, "some_list", ["first", "second"], extends_lists=True
)

assert add_was_successful, "Output field was overwritten"
add_field_to(document, "some_list", ["first", "second"], extends_lists=True)
assert document.get("some_list") == ["with a value", "first", "second"]

def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_same_time(self):
document = {"some": "field", "some_list": ["with a value"]}

with pytest.raises(
AssertionError,
match=r"An output field can't be overwritten and " r"extended at the same time",
):
_ = add_field_to(
with pytest.raises(ValueError, match=r"can't be overwritten and extended at the same time"):
add_field_to(
document,
"some_list",
["first", "second"],
@@ -149,8 +110,9 @@ def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_s
def test_returns_false_if_dotted_field_value_key_exists(self):
document = {"user": "Franz"}
content = ["user_inlist"]
add_was_successful = add_field_to(document, "user.in_list", content)
assert not add_was_successful
error = LogprepException("test error")
with pytest.raises(LogprepException, match=r"test error"):
add_field_to(document, "user.in_list", content, raise_on_failure=error)

def test_add_list_with_nested_keys(self):
testdict = {
@@ -163,8 +125,5 @@ def test_add_list_with_nested_keys(self):
}
}
}
add_was_successful = add_field_to(
testdict, "key1.key2.key3.key4.key5.list", ["content"], extends_lists=True
)
assert add_was_successful
add_field_to(testdict, "key1.key2.key3.key4.key5.list", ["content"], extends_lists=True)
assert testdict == expected