update add_field_to function for improved error handling

- Add raise_on_failure parameter to raise exceptions on failure. - Refactor function for clarity and consistency. - Update unit tests to reflect new exception handling logic.
fkie-cad · ekneg54 · Nov 14, 2024 · Nov 6, 2024 · Nov 7, 2024 · Nov 7, 2024
commit 012dd6eebc9406fa344db94ca887b3daf5c5cd60
diff --git a/logprep/processor/base/exceptions.py b/logprep/processor/base/exceptions.py
@@ -72,8 +72,10 @@ class ProcessingWarning(Warning):
 
     def __init__(self, message: str, rule: "Rule", event: dict, tags: List[str] = None):
         self.tags = tags if tags else []
-        rule.metrics.number_of_warnings += 1
-        message = f"{message}, {rule.id=}, {rule.description=}, {event=}"
+        if rule:
+            rule.metrics.number_of_warnings += 1
+            message += f", {rule.id=}, {rule.description=}"
+        message += f", {event=}"
         super().__init__(f"{self.__class__.__name__}: {message}")
 
 

diff --git a/logprep/processor/grokker/processor.py b/logprep/processor/grokker/processor.py
@@ -82,9 +82,9 @@ def _apply_rules(self, event: dict, rule: GrokkerRule):
             except TimeoutError as error:
                 self._handle_missing_fields(event, rule, rule.actions.keys(), source_values)
                 raise ProcessingError(
-                    self,
                     f"Grok pattern timeout for source field: '{dotted_field}' in rule '{rule}', "
                     f"the grok pattern might be too complex.",
+                    rule,
                 ) from error
             if result is None or result == {}:
                 continue

diff --git a/logprep/util/helper.py b/logprep/util/helper.py
@@ -57,58 +57,59 @@ def _add_and_not_overwrite_key(sub_dict, key):
     return sub_dict.get(key)
 
 
-def add_field_to(event, output_field, content, extends_lists=False, overwrite_output_field=False):
+def add_field_to(
+    event,
+    output_field,
+    content,
+    extends_lists=False,
+    overwrite_output_field=False,
+    raise_on_failure=None,
+):
     """
-    Add content to an output_field in the given event. Output_field can be a dotted subfield.
-    In case of missing fields all intermediate fields will be created.
+    Add content to the output_field in the given event. Output_field can be a dotted subfield.
+    In case of missing fields, all intermediate fields will be created.
     Parameters
     ----------
     event: dict
         Original log-event that logprep is currently processing
     output_field: str
         Dotted subfield string indicating the target of the output value, e.g. destination.ip
     content: str, float, int, list, dict
-        Value that should be written into the output_field, can be a str, list or dict object
+        Value that should be written into the output_field, can be a str, list, or dict object
     extends_lists: bool
         Flag that determines whether output_field lists should be extended
     overwrite_output_field: bool
         Flag that determines whether the output_field should be overwritten
-
     Returns
     ------
-    This method returns true if no conflicting fields were found during the process of the creation
-    of the dotted subfields. If conflicting fields were found False is returned.
+    bool
+        True if no conflicting fields were found during the process of the creation
+        of the dotted subfields, otherwise False.
     """
-
-    assert not (
-        extends_lists and overwrite_output_field
-    ), "An output field can't be overwritten and extended at the same time"
-    output_field_path = [event, *get_dotted_field_list(output_field)]
-    target_key = output_field_path.pop()
-
-    if overwrite_output_field:
-        target_field = reduce(_add_and_overwrite_key, output_field_path)
-        target_field |= {target_key: content}
-        return True
-
+    if extends_lists and overwrite_output_field:
+        raise ValueError("An output field can't be overwritten and extended at the same time")
+    field_path = [event, *get_dotted_field_list(output_field)]
+    target_key = field_path.pop()
     try:
-        target_field = reduce(_add_and_not_overwrite_key, output_field_path)
-    except KeyError:
-        return False
-
-    target_field_value = target_field.get(target_key)
-    if target_field_value is None:
-        target_field |= {target_key: content}
-        return True
-    if extends_lists:
-        if not isinstance(target_field_value, list):
-            return False
+        target_parent = reduce(_add_and_not_overwrite_key, field_path)
+    except KeyError as error:
+        if raise_on_failure:
+            raise raise_on_failure from error
+        return
+    if overwrite_output_field:
+        target_parent[target_key] = content
+    else:
+        existing_value = target_parent.get(target_key)
+        if existing_value is None:
+            target_parent[target_key] = content
+        if not extends_lists or not isinstance(existing_value, list):
+            if raise_on_failure:
+                raise raise_on_failure
+            return
         if isinstance(content, list):
-            target_field |= {target_key: [*target_field_value, *content]}
+            target_parent[target_key].extend(content)
         else:
-            target_field_value.append(content)
-        return True
-    return False
+            target_parent[target_key].append(content)
 
 
 def _get_slice_arg(slice_item):

diff --git a/tests/unit/util/test_helper_add_field.py b/tests/unit/util/test_helper_add_field.py
@@ -2,25 +2,21 @@
 # pylint: disable=missing-docstring
 import pytest
 
+from logprep.abc.exceptions import LogprepException
 from logprep.util.helper import add_field_to
 
 
 class TestHelperAddField:
     def test_add_str_content_as_new_root_field(self):
         document = {"source": {"ip": "8.8.8.8"}}
         expected_document = {"source": {"ip": "8.8.8.8"}, "field": "content"}
-
-        add_was_successful = add_field_to(document, "field", "content")
-
-        assert add_was_successful, "Found duplicate even though there shouldn't be one"
+        add_field_to(document, "field", "content")
         assert document == expected_document
 
     def test_add_str_content_as_completely_new_dotted_subfield(self):
         document = {"source": {"ip": "8.8.8.8"}}
         expected_document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": "content"}}
-
-        add_was_successful = add_field_to(document, "sub.field", "content")
-        assert add_was_successful, "Found duplicate even though there shouldn't be one"
+        add_field_to(document, "sub.field", "content")
         assert document == expected_document
 
     def test_add_str_content_as_partially_new_dotted_subfield(self):
@@ -30,41 +26,31 @@ def test_add_str_content_as_partially_new_dotted_subfield(self):
             "sub": {"field": "content", "other_field": "other_content"},
         }
 
-        add_was_successful = add_field_to(document, "sub.field", "content")
-
-        assert add_was_successful, "Found duplicate even though there shouldn't be one"
+        add_field_to(document, "sub.field", "content")
         assert document == expected_document
 
     def test_provoke_str_duplicate_in_root_field(self):
         document = {"source": {"ip": "8.8.8.8"}, "field": "exists already"}
-
-        add_was_successful = add_field_to(document, "field", "content")
-
-        assert not add_was_successful, "Found no duplicate even though there should be one"
+        error = LogprepException("test error")
+        with pytest.raises(LogprepException, match=r"test error"):
+            add_field_to(document, "field", "content", raise_on_failure=error)
 
     def test_provoke_str_duplicate_in_dotted_subfield(self):
         document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": "exists already"}}
-
-        add_was_successful = add_field_to(document, "sub.field", "content")
-
-        assert not add_was_successful, "Found no duplicate even though there should be one"
+        error = LogprepException("test error")
+        with pytest.raises(LogprepException, match=r"test error"):
+            add_field_to(document, "sub.field", "content", raise_on_failure=error)
 
     def test_add_dict_content_as_new_root_field(self):
         document = {"source": {"ip": "8.8.8.8"}}
         expected_document = {"source": {"ip": "8.8.8.8"}, "field": {"dict": "content"}}
-
-        add_was_successful = add_field_to(document, "field", {"dict": "content"})
-
-        assert add_was_successful, "Found duplicate even though there shouldn't be one"
+        add_field_to(document, "field", {"dict": "content"})
         assert document == expected_document
 
     def test_add_dict_content_as_completely_new_dotted_subfield(self):
         document = {"source": {"ip": "8.8.8.8"}}
         expected_document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": {"dict": "content"}}}
-
-        add_was_successful = add_field_to(document, "sub.field", {"dict": "content"})
-
-        assert add_was_successful, "Found duplicate even though there shouldn't be one"
+        add_field_to(document, "sub.field", {"dict": "content"})
         assert document == expected_document
 
     def test_add_dict_content_as_partially_new_dotted_subfield(self):
@@ -73,72 +59,47 @@ def test_add_dict_content_as_partially_new_dotted_subfield(self):
             "source": {"ip": "8.8.8.8"},
             "sub": {"field": {"dict": "content"}, "other_field": "other_content"},
         }
-
-        add_was_successful = add_field_to(document, "sub.field", {"dict": "content"})
-
-        assert add_was_successful, "Found duplicate even though there shouldn't be one"
+        add_field_to(document, "sub.field", {"dict": "content"})
         assert document == expected_document
 
     def test_provoke_dict_duplicate_in_root_field(self):
         document = {"source": {"ip": "8.8.8.8"}, "field": {"already_existing": "dict"}}
-
-        add_was_successful = add_field_to(document, "field", {"dict": "content"})
-
-        assert not add_was_successful, "Found no duplicate even though there should be one"
+        error = LogprepException("test error")
+        with pytest.raises(LogprepException, match=r"test error"):
+            add_field_to(document, "field", {"dict": "content"}, raise_on_failure=error)
 
     def test_provoke_dict_duplicate_in_dotted_subfield(self):
         document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": {"already_existing": "dict"}}}
-
-        add_was_successful = add_field_to(document, "sub.field", {"dict": "content"})
-
-        assert not add_was_successful, "Found no duplicate even though there should be one"
+        error = LogprepException("test error")
+        with pytest.raises(LogprepException, match=r"test error"):
+            add_field_to(document, "sub.field", {"dict": "content"}, raise_on_failure=error)
 
     def test_add_field_to_overwrites_output_field_in_root_level(self):
         document = {"some": "field", "output_field": "has already content"}
-
-        add_was_successful = add_field_to(
-            document, "output_field", {"dict": "content"}, overwrite_output_field=True
-        )
-
-        assert add_was_successful, "Output field was overwritten"
+        add_field_to(document, "output_field", {"dict": "content"}, overwrite_output_field=True)
         assert document.get("output_field") == {"dict": "content"}
 
     def test_add_field_to_overwrites_output_field_in_nested_level(self):
         document = {"some": "field", "nested": {"output": {"field": "has already content"}}}
-
-        add_was_successful = add_field_to(
+        add_field_to(
             document, "nested.output.field", {"dict": "content"}, overwrite_output_field=True
         )
-
-        assert add_was_successful, "Output field was overwritten"
         assert document.get("nested", {}).get("output", {}).get("field") == {"dict": "content"}
 
     def test_add_field_to_extends_list_when_only_given_a_string(self):
         document = {"some": "field", "some_list": ["with a value"]}
-
-        add_was_successful = add_field_to(document, "some_list", "new value", extends_lists=True)
-
-        assert add_was_successful, "Output field was overwritten"
+        add_field_to(document, "some_list", "new value", extends_lists=True)
         assert document.get("some_list") == ["with a value", "new value"]
 
     def test_add_field_to_extends_list_when_given_a_list(self):
         document = {"some": "field", "some_list": ["with a value"]}
-
-        add_was_successful = add_field_to(
-            document, "some_list", ["first", "second"], extends_lists=True
-        )
-
-        assert add_was_successful, "Output field was overwritten"
+        add_field_to(document, "some_list", ["first", "second"], extends_lists=True)
         assert document.get("some_list") == ["with a value", "first", "second"]
 
     def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_same_time(self):
         document = {"some": "field", "some_list": ["with a value"]}
-
-        with pytest.raises(
-            AssertionError,
-            match=r"An output field can't be overwritten and " r"extended at the same time",
-        ):
-            _ = add_field_to(
+        with pytest.raises(ValueError, match=r"can't be overwritten and extended at the same time"):
+            add_field_to(
                 document,
                 "some_list",
                 ["first", "second"],
@@ -149,8 +110,9 @@ def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_s
     def test_returns_false_if_dotted_field_value_key_exists(self):
         document = {"user": "Franz"}
         content = ["user_inlist"]
-        add_was_successful = add_field_to(document, "user.in_list", content)
-        assert not add_was_successful
+        error = LogprepException("test error")
+        with pytest.raises(LogprepException, match=r"test error"):
+            add_field_to(document, "user.in_list", content, raise_on_failure=error)
 
     def test_add_list_with_nested_keys(self):
         testdict = {
@@ -163,8 +125,5 @@ def test_add_list_with_nested_keys(self):
                 }
             }
         }
-        add_was_successful = add_field_to(
-            testdict, "key1.key2.key3.key4.key5.list", ["content"], extends_lists=True
-        )
-        assert add_was_successful
+        add_field_to(testdict, "key1.key2.key3.key4.key5.list", ["content"], extends_lists=True)
         assert testdict == expected