From 205045e323603ab53a06e6558aa5e5cada7a35c5 Mon Sep 17 00:00:00 2001 From: dtrai2 <95028228+dtrai2@users.noreply.github.com> Date: Thu, 14 Nov 2024 08:57:00 +0100 Subject: [PATCH] update add_field_to function for improved error handling (#696) --- CHANGELOG.md | 9 + logprep/abc/input.py | 71 ++++---- logprep/abc/processor.py | 20 +- logprep/metrics/metrics.py | 10 +- logprep/processor/base/exceptions.py | 11 +- logprep/processor/clusterer/processor.py | 9 +- logprep/processor/dissector/processor.py | 16 +- .../domain_label_extractor/processor.py | 28 ++- .../processor/domain_resolver/processor.py | 10 +- logprep/processor/field_manager/processor.py | 50 ++--- logprep/processor/generic_adder/processor.py | 49 ++--- .../processor/generic_resolver/processor.py | 98 ++++------ logprep/processor/geoip_enricher/processor.py | 29 ++- logprep/processor/grokker/processor.py | 28 +-- .../processor/hyperscan_resolver/processor.py | 51 ++---- logprep/processor/labeler/processor.py | 42 +---- logprep/processor/labeler/rule.py | 4 + .../processor/list_comparison/processor.py | 18 +- logprep/processor/pre_detector/processor.py | 34 ++-- logprep/processor/pseudonymizer/processor.py | 6 +- logprep/processor/requester/processor.py | 45 +++-- .../selective_extractor/processor.py | 5 +- .../processor/string_splitter/processor.py | 2 +- logprep/processor/string_splitter/rule.py | 10 +- .../processor/template_replacer/processor.py | 20 +- logprep/util/helper.py | 172 +++++++++++++----- .../rules/generic/rule_with_custom_tests.yml | 3 +- .../rules/specific/rule_with_custom_tests.yml | 3 +- .../rules/specific/auto_test_mismatch.json | 3 +- .../rules/specific/auto_test_no_test_.json | 3 +- .../dropper/rules/generic/drop_field.json | 3 +- .../dropper/rules/specific/drop_field.json | 3 +- .../generic/auto_test_labeling_match.json | 3 +- .../auto_test_labeling_match_existing.json | 3 +- .../auto_test_pre_detector_mismatch.json | 4 +- .../auto_test_pre_detector_no_test_.json | 4 +- .../rules/generic/template_replacer.json | 6 +- .../rules/specific/template_replacer.json | 6 +- tests/unit/connector/base.py | 18 +- tests/unit/processor/labeler/test_labeler.py | 8 + .../processor/requester/test_requester.py | 21 +++ .../string_splitter/test_string_splitter.py | 2 +- tests/unit/processor/test_process.py | 2 +- tests/unit/util/test_auto_rule_tester.py | 2 + tests/unit/util/test_helper_add_field.py | 158 ++++++++-------- 45 files changed, 563 insertions(+), 539 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6402c1ab8..00bdda7da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,20 @@ ## next release ### Breaking + +* `CriticalInputError` is raised when the input preprocessor values can't be set, this was so far only true + for the hmac preprocessor, but is now also applied for all other preprocessors. +* fix `delimiter` typo in `StringSplitterRule` configuration + ### Features ### Improvements * replace `BaseException` with `Exception` for custom errors * refactor `generic_resolver` to validate rules on startup instead of application of each rule +* rewrite the helper method `add_field_to` such that it always raises an `FieldExistsWarning` instead of return a bool. +* add new helper method `add_fields_to` to directly add multiple fields to one event +* refactored some processors to make use of the new helper methods + ### Bugfix diff --git a/logprep/abc/input.py b/logprep/abc/input.py index b63b6d820..396d995c0 100644 --- a/logprep/abc/input.py +++ b/logprep/abc/input.py @@ -17,7 +17,8 @@ from logprep.abc.connector import Connector from logprep.abc.exceptions import LogprepException from logprep.metrics.metrics import Metric -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.processor.base.exceptions import FieldExistsWarning +from logprep.util.helper import add_fields_to, get_dotted_field_value from logprep.util.time import UTC, TimeParser from logprep.util.validators import dict_structure_validator @@ -280,16 +281,19 @@ def get_next(self, timeout: float) -> dict | None: self.metrics.number_of_processed_events += 1 if not isinstance(event, dict): raise CriticalInputError(self, "not a dict", event) - if self._add_hmac: - event = self._add_hmac_to(event, raw_event) - if self._add_version_info: - self._add_version_information_to_event(event) - if self._add_log_arrival_time_information: - self._add_arrival_time_information_to_event(event) - if self._add_log_arrival_timedelta_information: - self._add_arrival_timedelta_information_to_event(event) - if self._add_env_enrichment: - self._add_env_enrichment_to_event(event) + try: + if self._add_hmac: + event = self._add_hmac_to(event, raw_event) + if self._add_version_info: + self._add_version_information_to_event(event) + if self._add_log_arrival_time_information: + self._add_arrival_time_information_to_event(event) + if self._add_log_arrival_timedelta_information: + self._add_arrival_timedelta_information_to_event(event) + if self._add_env_enrichment: + self._add_env_enrichment_to_event(event) + except FieldExistsWarning as error: + raise CriticalInputError(self, error.args[0], event) from error return event def batch_finished_callback(self): @@ -300,13 +304,19 @@ def _add_env_enrichment_to_event(self, event: dict): enrichments = self._config.preprocessing.get("enrich_by_env_variables") if not enrichments: return - for target_field, variable_name in enrichments.items(): - add_field_to(event, target_field, os.environ.get(variable_name, "")) + fields = { + target: os.environ.get(variable_name, "") + for target, variable_name in enrichments.items() + } + add_fields_to(event, fields) def _add_arrival_time_information_to_event(self, event: dict): - now = TimeParser.now() - target_field = self._config.preprocessing.get("log_arrival_time_target_field") - add_field_to(event, target_field, now.isoformat()) + new_field = { + self._config.preprocessing.get( + "log_arrival_time_target_field" + ): TimeParser.now().isoformat() + } + add_fields_to(event, new_field) def _add_arrival_timedelta_information_to_event(self, event: dict): log_arrival_timedelta_config = self._config.preprocessing.get("log_arrival_timedelta") @@ -322,16 +332,16 @@ def _add_arrival_timedelta_information_to_event(self, event: dict): TimeParser.from_string(log_arrival_time).astimezone(UTC) - TimeParser.from_string(time_reference).astimezone(UTC) ).total_seconds() - add_field_to(event, target_field, delta_time_sec) + add_fields_to(event, fields={target_field: delta_time_sec}) def _add_version_information_to_event(self, event: dict): """Add the version information to the event""" target_field = self._config.preprocessing.get("version_info_target_field") # pylint: disable=protected-access - add_field_to(event, target_field, self._config._version_information) + add_fields_to(event, fields={target_field: self._config._version_information}) # pylint: enable=protected-access - def _add_hmac_to(self, event_dict, raw_event) -> Tuple[dict, str]: + def _add_hmac_to(self, event_dict, raw_event) -> dict: """ Calculates an HMAC (Hash-based message authentication code) based on a given target field and adds it to the given event. If the target field has the value '' the full raw @@ -357,7 +367,7 @@ def _add_hmac_to(self, event_dict, raw_event) -> Tuple[dict, str]: ------ CriticalInputError If the hmac could not be added to the event because the desired output field already - exists or cant't be found. + exists or can't be found. """ hmac_options = self._config.preprocessing.get("hmac", {}) hmac_target_field_name = hmac_options.get("target") @@ -381,18 +391,11 @@ def _add_hmac_to(self, event_dict, raw_event) -> Tuple[dict, str]: digestmod=hashlib.sha256, ).hexdigest() compressed = zlib.compress(received_orig_message, level=-1) - hmac_output = {"hmac": hmac, "compressed_base64": base64.b64encode(compressed).decode()} - add_was_successful = add_field_to( - event_dict, - hmac_options.get("output_field"), - hmac_output, - ) - if not add_was_successful: - raise CriticalInputError( - self, - f"Couldn't add the hmac to the input event as the desired " - f"output field '{hmac_options.get('output_field')}' already " - f"exist.", - event_dict, - ) + new_field = { + hmac_options.get("output_field"): { + "hmac": hmac, + "compressed_base64": base64.b64encode(compressed).decode(), + } + } + add_fields_to(event_dict, new_field) return event_dict diff --git a/logprep/abc/processor.py b/logprep/abc/processor.py index c7c4510cc..802ba31c3 100644 --- a/logprep/abc/processor.py +++ b/logprep/abc/processor.py @@ -12,7 +12,6 @@ from logprep.framework.rule_tree.rule_tree import RuleTree, RuleTreeType from logprep.metrics.metrics import Metric from logprep.processor.base.exceptions import ( - FieldExistsWarning, ProcessingCriticalError, ProcessingError, ProcessingWarning, @@ -20,7 +19,7 @@ from logprep.util import getter from logprep.util.helper import ( add_and_overwrite, - add_field_to, + add_fields_to, get_dotted_field_value, pop_dotted_field_value, ) @@ -357,13 +356,15 @@ def _handle_warning_error(self, event, rule, error, failure_tags=None): if failure_tags is None: failure_tags = rule.failure_tags if tags is None: - add_and_overwrite(event, "tags", sorted(list({*failure_tags}))) + new_field = {"tags": sorted(list({*failure_tags}))} else: - add_and_overwrite(event, "tags", sorted(list({*tags, *failure_tags}))) + new_field = {"tags": sorted(list({*tags, *failure_tags}))} + add_and_overwrite(event, new_field, rule) if isinstance(error, ProcessingWarning): if error.tags: tags = tags if tags else [] - add_and_overwrite(event, "tags", sorted(list({*error.tags, *tags, *failure_tags}))) + new_field = {"tags": sorted(list({*error.tags, *tags, *failure_tags}))} + add_and_overwrite(event, new_field, rule) self.result.warnings.append(error) else: self.result.warnings.append(ProcessingWarning(str(error), rule, event)) @@ -381,15 +382,12 @@ def _has_missing_values(self, event, rule, source_field_dict): return False def _write_target_field(self, event: dict, rule: "Rule", result: any) -> None: - add_successful = add_field_to( + add_fields_to( event, - output_field=rule.target_field, - content=result, + fields={rule.target_field: result}, extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, + overwrite_target_field=rule.overwrite_target, ) - if not add_successful: - raise FieldExistsWarning(rule, event, [rule.target_field]) def setup(self): super().setup() diff --git a/logprep/metrics/metrics.py b/logprep/metrics/metrics.py index caf12bcf7..c89c6621c 100644 --- a/logprep/metrics/metrics.py +++ b/logprep/metrics/metrics.py @@ -124,7 +124,7 @@ from attrs import define, field, validators from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram -from logprep.util.helper import add_field_to +from logprep.util.helper import add_fields_to @define(kw_only=True, slots=False) @@ -222,12 +222,14 @@ def inner(self, *args, **kwargs): # nosemgrep if hasattr(self, "rule_type"): event = args[0] if event: - add_field_to(event, f"processing_times.{self.rule_type}", duration) + add_fields_to( + event, fields={f"processing_times.{self.rule_type}": duration} + ) if hasattr(self, "_logprep_config"): # attribute of the Pipeline class event = args[0] if event: - add_field_to(event, "processing_times.pipeline", duration) - add_field_to(event, "processing_times.hostname", gethostname()) + add_fields_to(event, fields={"processing_times.pipeline": duration}) + add_fields_to(event, fields={"processing_times.hostname": gethostname()}) return result return inner diff --git a/logprep/processor/base/exceptions.py b/logprep/processor/base/exceptions.py index 9f020b3c6..caaa40f68 100644 --- a/logprep/processor/base/exceptions.py +++ b/logprep/processor/base/exceptions.py @@ -70,17 +70,20 @@ def __init__(self, message: str, rule: "Rule"): class ProcessingWarning(Warning): """A warning occurred - log the warning, but continue processing the event.""" - def __init__(self, message: str, rule: "Rule", event: dict, tags: List[str] = None): + def __init__(self, message: str, rule: "Rule | None", event: dict, tags: List[str] = None): self.tags = tags if tags else [] - rule.metrics.number_of_warnings += 1 - message = f"{message}, {rule.id=}, {rule.description=}, {event=}" + if rule: + rule.metrics.number_of_warnings += 1 + message += f", {rule.id=}, {rule.description=}" + message += f", {event=}" super().__init__(f"{self.__class__.__name__}: {message}") class FieldExistsWarning(ProcessingWarning): """Raised if field already exists.""" - def __init__(self, rule: "Rule", event: dict, skipped_fields: List[str]): + def __init__(self, rule: "Rule | None", event: dict, skipped_fields: List[str]): + self.skipped_fields = skipped_fields message = ( "The following fields could not be written, because " "one or more subfields existed and could not be extended: " diff --git a/logprep/processor/clusterer/processor.py b/logprep/processor/clusterer/processor.py index 914a8c1a5..1bc375c83 100644 --- a/logprep/processor/clusterer/processor.py +++ b/logprep/processor/clusterer/processor.py @@ -53,7 +53,7 @@ SignaturePhaseStreaming, ) from logprep.processor.field_manager.processor import FieldManager -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value class Clusterer(FieldManager): @@ -138,12 +138,11 @@ def _cluster(self, event: dict, rule: ClustererRule): ) else: cluster_signature = cluster_signature_based_on_message - add_field_to( + add_fields_to( event, - self._config.output_field_name, - cluster_signature, + fields={self._config.output_field_name: cluster_signature}, extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, + overwrite_target_field=rule.overwrite_target, ) self._last_non_extracted_signature = sig_text diff --git a/logprep/processor/dissector/processor.py b/logprep/processor/dissector/processor.py index 1da24a239..3cc3bc34a 100644 --- a/logprep/processor/dissector/processor.py +++ b/logprep/processor/dissector/processor.py @@ -28,11 +28,14 @@ .. automodule:: logprep.processor.dissector.rule """ -from typing import Callable, List, Tuple +from typing import TYPE_CHECKING, Callable, List, Tuple from logprep.processor.dissector.rule import DissectorRule from logprep.processor.field_manager.processor import FieldManager -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value + +if TYPE_CHECKING: + from logprep.processor.base.rule import Rule class Dissector(FieldManager): @@ -51,7 +54,7 @@ def _apply_mapping(self, event, rule): for action, *args, _ in action_mappings_sorted_by_position: action(*args) - def _get_mappings(self, event, rule) -> List[Tuple[Callable, dict, str, str, str, int]]: + def _get_mappings(self, event, rule) -> List[Tuple[Callable, dict, dict, str, "Rule", int]]: current_field = None target_field_mapping = {} for rule_action in rule.actions: @@ -84,12 +87,15 @@ def _get_mappings(self, event, rule) -> List[Tuple[Callable, dict, str, str, str target_field = target_field_mapping.get(target_field.lstrip("&")) if strip_char: content = content.strip(strip_char) - yield rule_action, event, target_field, content, separator, position + field = {target_field: content} + yield rule_action, event, field, separator, rule, position def _apply_convert_datatype(self, event, rule): for target_field, converter in rule.convert_actions: try: target_value = converter(get_dotted_field_value(event, target_field)) - add_field_to(event, target_field, target_value, overwrite_output_field=True) + add_fields_to( + event, {target_field: target_value}, rule, overwrite_target_field=True + ) except ValueError as error: self._handle_warning_error(event, rule, error) diff --git a/logprep/processor/domain_label_extractor/processor.py b/logprep/processor/domain_label_extractor/processor.py index afb3e84bc..5edca61d4 100644 --- a/logprep/processor/domain_label_extractor/processor.py +++ b/logprep/processor/domain_label_extractor/processor.py @@ -46,11 +46,10 @@ from filelock import FileLock from tldextract import TLDExtract -from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.domain_label_extractor.rule import DomainLabelExtractorRule from logprep.processor.field_manager.processor import FieldManager from logprep.util.getter import GetterFactory -from logprep.util.helper import add_and_overwrite, add_field_to, get_dotted_field_value +from logprep.util.helper import add_and_overwrite, add_fields_to, get_dotted_field_value from logprep.util.validators import list_of_urls_validator logger = logging.getLogger("DomainLabelExtractor") @@ -131,27 +130,24 @@ def _apply_rules(self, event, rule: DomainLabelExtractorRule): if self._is_valid_ip(domain): tagging_field.append(f"ip_in_{rule.source_fields[0].replace('.', '_')}") - add_and_overwrite(event, self._config.tagging_field_name, tagging_field) + add_and_overwrite( + event, fields={self._config.tagging_field_name: tagging_field}, rule=rule + ) return labels = self._tld_extractor(domain) if labels.suffix != "": - labels_dict = { - "registered_domain": labels.domain + "." + labels.suffix, - "top_level_domain": labels.suffix, - "subdomain": labels.subdomain, + fields = { + f"{rule.target_field}.registered_domain": f"{labels.domain}.{labels.suffix}", + f"{rule.target_field}.top_level_domain": labels.suffix, + f"{rule.target_field}.subdomain": labels.subdomain, } - for label, value in labels_dict.items(): - output_field = f"{rule.target_field}.{label}" - add_successful = add_field_to( - event, output_field, value, overwrite_output_field=rule.overwrite_target - ) - - if not add_successful: - raise FieldExistsWarning(rule, event, [output_field]) + add_fields_to(event, fields, rule, overwrite_target_field=rule.overwrite_target) else: tagging_field.append(f"invalid_domain_in_{rule.source_fields[0].replace('.', '_')}") - add_and_overwrite(event, self._config.tagging_field_name, tagging_field) + add_and_overwrite( + event, fields={self._config.tagging_field_name: tagging_field}, rule=rule + ) @staticmethod def _is_valid_ip(domain): diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index 5f3231d2b..3b5655651 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -53,7 +53,7 @@ from logprep.util.cache import Cache from logprep.util.getter import GetterFactory from logprep.util.hasher import SHA256Hasher -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value from logprep.util.validators import list_of_urls_validator logger = logging.getLogger("DomainResolver") @@ -222,7 +222,9 @@ def _resolve_ip(self, domain, hash_string=None): def _store_debug_infos(self, event, requires_storing): event_dbg = { - "obtained_from_cache": not requires_storing, - "cache_size": len(self._domain_ip_map.keys()), + "resolved_ip_debug": { + "obtained_from_cache": not requires_storing, + "cache_size": len(self._domain_ip_map.keys()), + } } - add_field_to(event, "resolved_ip_debug", event_dbg, overwrite_output_field=True) + add_fields_to(event, event_dbg, overwrite_target_field=True) diff --git a/logprep/processor/field_manager/processor.py b/logprep/processor/field_manager/processor.py index 09eeeab6f..422dccb0f 100644 --- a/logprep/processor/field_manager/processor.py +++ b/logprep/processor/field_manager/processor.py @@ -29,15 +29,13 @@ .. automodule:: logprep.processor.field_manager.rule """ -import itertools from collections import namedtuple from logprep.abc.processor import Processor -from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.field_manager.rule import FieldManagerRule from logprep.util.helper import ( add_and_overwrite, - add_field_to, + add_fields_to, get_dotted_field_value, pop_dotted_field_value, ) @@ -72,35 +70,24 @@ def _apply_single_target_processing(self, event, rule, rule_args): self._write_to_single_target(args, extend_target_list, overwrite_target, rule) def _apply_mapping(self, event, rule, rule_args): - source_fields, _, mapping, _, _ = rule_args + source_fields, _, mapping, extend_target_list, overwrite_target = rule_args source_fields, targets = list(zip(*mapping.items())) source_field_values = self._get_field_values(event, mapping.keys()) self._handle_missing_fields(event, rule, source_fields, source_field_values) if not any(source_field_values): return source_field_values, targets = self._filter_missing_fields(source_field_values, targets) - self._write_to_multiple_targets(event, targets, source_field_values, rule, rule_args) + add_fields_to( + event, + dict(zip(targets, source_field_values)), + rule, + extend_target_list, + overwrite_target, + ) if rule.delete_source_fields: for dotted_field in source_fields: pop_dotted_field_value(event, dotted_field) - def _write_to_multiple_targets(self, event, target_fields, field_values, rule, rule_args): - _, _, _, extend_target_list, overwrite_target = rule_args - results = map( - add_field_to, - itertools.repeat(event, len(target_fields)), - target_fields, - field_values, - itertools.repeat(extend_target_list, len(target_fields)), - itertools.repeat(overwrite_target, len(target_fields)), - ) - if not all(results): - unsuccessful_indices = [i for i, x in enumerate(results) if not x] - unsuccessful_targets = [ - x for i, x in enumerate(target_fields) if i in unsuccessful_indices - ] - raise FieldExistsWarning(rule, event, unsuccessful_targets) - def _write_to_single_target(self, args, extend_target_list, overwrite_target, rule): event, target_field, source_fields_values = args target_field_value = get_dotted_field_value(event, target_field) @@ -122,7 +109,7 @@ def _write_to_single_target(self, args, extend_target_list, overwrite_target, ru case State( extend=True, overwrite=True, single_source_element=False, target_is_list=False ): - add_and_overwrite(event, target_field, source_fields_values) + add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) return case State( @@ -134,16 +121,16 @@ def _write_to_single_target(self, args, extend_target_list, overwrite_target, ru ): flattened_source_fields = self._overwrite_from_source_values(source_fields_values) source_fields_values = [*flattened_source_fields] - add_and_overwrite(event, target_field, source_fields_values) + add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) return case State(extend=True, overwrite=False, target_is_list=False, target_is_none=True): - add_and_overwrite(event, target_field, source_fields_values) + add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) return case State(extend=True, overwrite=False, target_is_list=False): source_fields_values = [target_field_value, *source_fields_values] - add_and_overwrite(event, target_field, source_fields_values) + add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) return case State( @@ -151,21 +138,18 @@ def _write_to_single_target(self, args, extend_target_list, overwrite_target, ru ): flattened_source_fields = self._overwrite_from_source_values(source_fields_values) source_fields_values = [*target_field_value, *flattened_source_fields] - add_and_overwrite(event, target_field, source_fields_values) + add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) return case State(overwrite=True, extend=True): flattened_source_fields = self._overwrite_from_source_values(source_fields_values) source_fields_values = [*flattened_source_fields] - add_and_overwrite(event, target_field, source_fields_values) + add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) return case _: - success = add_field_to( - event, target_field, source_fields_values, state.extend, state.overwrite - ) - if not success: - raise FieldExistsWarning(rule, event, [target_field]) + field = {target_field: source_fields_values} + add_fields_to(event, field, rule, state.extend, state.overwrite) def _overwrite_from_source_values(self, source_fields_values): duplicates = [] diff --git a/logprep/processor/generic_adder/processor.py b/logprep/processor/generic_adder/processor.py index 974c01040..2e9677894 100644 --- a/logprep/processor/generic_adder/processor.py +++ b/logprep/processor/generic_adder/processor.py @@ -46,10 +46,9 @@ from logprep.abc.processor import Processor from logprep.factory_error import InvalidConfigurationError -from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.generic_adder.mysql_connector import MySQLConnector from logprep.processor.generic_adder.rule import GenericAdderRule -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value def sql_config_validator(_, attribute, value): @@ -225,48 +224,32 @@ def _apply_rules(self, event: dict, rule: GenericAdderRule): FieldExistsWarning Raises if an addition would overwrite an existing field or value. """ - conflicting_fields = [] - + items_to_add = rule.add use_db = rule.db_target and self._db_table if use_db: self._update_db_table() items_to_add = self._get_items_to_add_from_db(event, rule) - else: - items_to_add = rule.add.items() - - # Add the items to the event - for dotted_field, value in items_to_add: - add_successful = add_field_to( - event, - output_field=dotted_field, - content=value, - extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, - ) - if not add_successful: - conflicting_fields.append(dotted_field) - - if conflicting_fields: - raise FieldExistsWarning(rule, event, conflicting_fields) + if items_to_add: + add_fields_to(event, items_to_add, rule, rule.extend_target_list, rule.overwrite_target) - def _get_items_to_add_from_db(self, event: dict, rule: GenericAdderRule) -> list: + def _get_items_to_add_from_db(self, event: dict, rule: GenericAdderRule) -> dict | None: """Get the sub part of the value from the event using a regex pattern""" - items_to_add = [] if not rule.db_pattern: - return items_to_add - + return value_to_check_in_db = get_dotted_field_value(event, rule.db_target) match_with_value_in_db = rule.db_pattern.match(value_to_check_in_db) if match_with_value_in_db: # Get values to add from db table using the sub part value_to_map = match_with_value_in_db.group(1).upper() add_from_db = self._db_table.get(value_to_map, []) - if rule.db_destination_prefix: - for idx, _ in enumerate(add_from_db): - if not add_from_db[idx][0].startswith(rule.db_destination_prefix): - add_from_db[idx][0] = f"{rule.db_destination_prefix}.{add_from_db[idx][0]}" - - for item in add_from_db: - items_to_add.append(item) - return items_to_add + add_from_db = [ + (self._add_prefix_if_not_present(key, rule), value) + for key, value in add_from_db + ] + return dict(add_from_db) + + def _add_prefix_if_not_present(self, key: str, rule: "GenericAdderRule") -> str: + if not key.startswith(rule.db_destination_prefix): + return f"{rule.db_destination_prefix}.{key}" + return key diff --git a/logprep/processor/generic_resolver/processor.py b/logprep/processor/generic_resolver/processor.py index b2ca66c26..e386deda2 100644 --- a/logprep/processor/generic_resolver/processor.py +++ b/logprep/processor/generic_resolver/processor.py @@ -26,12 +26,11 @@ """ import re -from typing import Union from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.field_manager.processor import FieldManager from logprep.processor.generic_resolver.rule import GenericResolverRule -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value class GenericResolver(FieldManager): @@ -41,67 +40,46 @@ class GenericResolver(FieldManager): def _apply_rules(self, event, rule): """Apply the given rule to the current event""" + source_field_values = [ + get_dotted_field_value(event, source_field) + for source_field in rule.field_mapping.keys() + ] + self._handle_missing_fields(event, rule, rule.field_mapping.keys(), source_field_values) conflicting_fields = [] - - source_values = [] for source_field, target_field in rule.field_mapping.items(): - source_value = get_dotted_field_value(event, source_field) - source_values.append(source_value) - if source_value is None: + source_field_value = get_dotted_field_value(event, source_field) + if source_field_value is None: continue - - # FILE - if rule.resolve_from_file: - pattern = f'^{rule.resolve_from_file["pattern"]}$' - replacements = rule.resolve_from_file["additions"] - matches = re.match(pattern, source_value) - if matches: - dest_val = replacements.get(matches.group("mapping")) - if dest_val: - success = self._add_uniquely_to_list(event, rule, target_field, dest_val) - if not success: - conflicting_fields.append(target_field) - - # LIST - for pattern, dest_val in rule.resolve_list.items(): - if re.search(pattern, source_value): - success = add_field_to( - event, - target_field, - dest_val, - extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, - ) - if not success: - conflicting_fields.append(target_field) - break - self._handle_missing_fields(event, rule, rule.field_mapping.keys(), source_values) + content = self._find_content_of_first_matching_pattern(rule, source_field_value) + if not content: + continue + current_content = get_dotted_field_value(event, target_field) + if isinstance(current_content, list) and content in current_content: + continue + if rule.extend_target_list and current_content is None: + content = [content] + try: + add_fields_to( + event, + fields={target_field: content}, + rule=rule, + extends_lists=rule.extend_target_list, + overwrite_target_field=rule.overwrite_target, + ) + except FieldExistsWarning as error: + conflicting_fields.extend(error.skipped_fields) if conflicting_fields: raise FieldExistsWarning(rule, event, conflicting_fields) - @staticmethod - def _add_uniquely_to_list( - event: dict, - rule: GenericResolverRule, - target: str, - content: Union[str, float, int, list, dict], - ) -> bool: - """Extend list if content is not already in the list""" - add_success = True - target_val = get_dotted_field_value(event, target) - target_is_list = isinstance(target_val, list) - if rule.extend_target_list and not target_is_list: - empty_list = [] - add_success &= add_field_to( - event, - target, - empty_list, - overwrite_output_field=rule.overwrite_target, - ) - if add_success: - target_is_list = True - target_val = empty_list - if target_is_list and content in target_val: - return add_success - add_success = add_field_to(event, target, content, extends_lists=rule.extend_target_list) - return add_success + def _find_content_of_first_matching_pattern(self, rule, source_field_value): + if rule.resolve_from_file: + pattern = f'^{rule.resolve_from_file["pattern"]}$' + replacements = rule.resolve_from_file["additions"] + matches = re.match(pattern, source_field_value) + if matches: + content = replacements.get(matches.group("mapping")) + if content: + return content + for pattern, content in rule.resolve_list.items(): + if re.search(pattern, source_field_value): + return content diff --git a/logprep/processor/geoip_enricher/processor.py b/logprep/processor/geoip_enricher/processor.py index bdf099d96..9a917f3fa 100644 --- a/logprep/processor/geoip_enricher/processor.py +++ b/logprep/processor/geoip_enricher/processor.py @@ -38,11 +38,10 @@ from geoip2 import database from geoip2.errors import AddressNotFoundError -from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.field_manager.processor import FieldManager from logprep.processor.geoip_enricher.rule import GEOIP_DATA_STUBS, GeoipEnricherRule from logprep.util.getter import GetterFactory -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value logger = logging.getLogger("GeoipEnricher") @@ -129,18 +128,14 @@ def _apply_rules(self, event, rule): geoip_data = self._try_getting_geoip_data(ip_string) if not geoip_data: return - for target_subfield, value in geoip_data.items(): - if value is None: - continue - full_output_field = f"{rule.target_field}.{target_subfield}" - if target_subfield in rule.customize_target_subfields: - full_output_field = rule.customize_target_subfields.get(target_subfield) - adding_was_successful = add_field_to( - event=event, - output_field=full_output_field, - content=value, - extends_lists=False, - overwrite_output_field=rule.overwrite_target, - ) - if not adding_was_successful: - raise FieldExistsWarning(rule, event, [full_output_field]) + fields = { + rule.customize_target_subfields.get(target, f"{rule.target_field}.{target}"): value + for target, value in geoip_data.items() + } + add_fields_to( + event, + fields, + rule=rule, + extends_lists=False, + overwrite_target_field=rule.overwrite_target, + ) diff --git a/logprep/processor/grokker/processor.py b/logprep/processor/grokker/processor.py index c0dae52be..654a1f8f1 100644 --- a/logprep/processor/grokker/processor.py +++ b/logprep/processor/grokker/processor.py @@ -38,15 +38,11 @@ from attrs import define, field, validators -from logprep.processor.base.exceptions import ( - FieldExistsWarning, - ProcessingError, - ProcessingWarning, -) +from logprep.processor.base.exceptions import ProcessingError, ProcessingWarning from logprep.processor.field_manager.processor import FieldManager from logprep.processor.grokker.rule import GrokkerRule from logprep.util.getter import GetterFactory -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value logger = logging.getLogger("Grokker") @@ -69,7 +65,6 @@ class Config(FieldManager.Config): """ def _apply_rules(self, event: dict, rule: GrokkerRule): - conflicting_fields = [] matches = [] source_values = [] for dotted_field, grok in rule.actions.items(): @@ -82,25 +77,22 @@ def _apply_rules(self, event: dict, rule: GrokkerRule): except TimeoutError as error: self._handle_missing_fields(event, rule, rule.actions.keys(), source_values) raise ProcessingError( - self, f"Grok pattern timeout for source field: '{dotted_field}' in rule '{rule}', " f"the grok pattern might be too complex.", + rule, ) from error if result is None or result == {}: continue matches.append(True) - for dotted_field, value in result.items(): - if value is None: - continue - success = add_field_to( - event, dotted_field, value, rule.extend_target_list, rule.overwrite_target - ) - if not success: - conflicting_fields.append(dotted_field) + add_fields_to( + event, + result, + rule=rule, + extends_lists=rule.extend_target_list, + overwrite_target_field=rule.overwrite_target, + ) if self._handle_missing_fields(event, rule, rule.actions.keys(), source_values): return - if conflicting_fields: - raise FieldExistsWarning(rule, event, conflicting_fields) if not matches: raise ProcessingWarning("no grok pattern matched", rule, event) diff --git a/logprep/processor/hyperscan_resolver/processor.py b/logprep/processor/hyperscan_resolver/processor.py index d942d1ee2..e19a32e4e 100644 --- a/logprep/processor/hyperscan_resolver/processor.py +++ b/logprep/processor/hyperscan_resolver/processor.py @@ -33,17 +33,17 @@ import errno from os import makedirs, path -from typing import Any, Dict, Tuple, Union +from typing import Any, Dict, Tuple from attr import define, field from logprep.processor.base.exceptions import ( FieldExistsWarning, - SkipImportError, ProcessingCriticalError, + SkipImportError, ) from logprep.processor.field_manager.processor import FieldManager -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value from logprep.util.validators import directory_validator # pylint: disable=no-name-in-module @@ -113,40 +113,25 @@ def _apply_rules(self, event: dict, rule: HyperscanResolverRule): if matches: dest_val = pattern_id_to_dest_val_map[matches[matches.index(min(matches))]] if dest_val: - add_success = self._add_uniquely_to_list(event, rule, resolve_target, dest_val) - if not add_success: - conflicting_fields.append(resolve_target) + current_content = get_dotted_field_value(event, resolve_target) + if isinstance(current_content, list) and dest_val in current_content: + continue + if rule.extend_target_list and current_content is None: + dest_val = [dest_val] + try: + add_fields_to( + event, + fields={resolve_target: dest_val}, + rule=rule, + extends_lists=rule.extend_target_list, + overwrite_target_field=rule.overwrite_target, + ) + except FieldExistsWarning as error: + conflicting_fields.extend(error.skipped_fields) self._handle_missing_fields(event, rule, rule.field_mapping.keys(), source_values) if conflicting_fields: raise FieldExistsWarning(rule, event, conflicting_fields) - @staticmethod - def _add_uniquely_to_list( - event: dict, - rule: HyperscanResolverRule, - target: str, - content: Union[str, float, int, list, dict], - ) -> bool: - """Extend list if content is not already in the list""" - add_success = True - target_val = get_dotted_field_value(event, target) - target_is_list = isinstance(target_val, list) - if rule.extend_target_list and not target_is_list: - empty_list = [] - add_success &= add_field_to( - event, - target, - empty_list, - overwrite_output_field=rule.overwrite_target, - ) - if add_success: - target_is_list = True - target_val = empty_list - if target_is_list and content in target_val: - return add_success - add_success = add_field_to(event, target, content, extends_lists=rule.extend_target_list) - return add_success - @staticmethod def _match_with_hyperscan(hyperscan_db: Database, src_val: str) -> list: if not src_val: diff --git a/logprep/processor/labeler/processor.py b/logprep/processor/labeler/processor.py index 97fb7b21d..7ee44ed0d 100644 --- a/logprep/processor/labeler/processor.py +++ b/logprep/processor/labeler/processor.py @@ -33,7 +33,7 @@ from logprep.abc.processor import Processor from logprep.processor.labeler.labeling_schema import LabelingSchema from logprep.processor.labeler.rule import LabelerRule -from logprep.util.helper import add_field_to, get_dotted_field_value, add_and_overwrite +from logprep.util.helper import add_fields_to, get_dotted_field_value class Labeler(Processor): @@ -73,35 +73,11 @@ def setup(self): def _apply_rules(self, event, rule): """Applies the rule to the current event""" - self._add_label_fields(event, rule) - self._add_label_values(event, rule) - self._convert_label_categories_to_sorted_list(event) - - @staticmethod - def _add_label_fields(event: dict, rule: LabelerRule): - """Prepares the event by adding empty label fields""" - add_field_to(event, "label", {}) - for key in rule.label: - add_field_to(event, f"label.{key}", set()) - - @staticmethod - def _add_label_values(event: dict, rule: LabelerRule): - """Adds the labels from the rule to the event""" - for key in rule.label: - label_key = f"label.{key}" - label = get_dotted_field_value(event, label_key) - if not isinstance(label, set): - label = set(label) - add_and_overwrite(event, label_key, label) - label.update(rule.label[key]) - - @staticmethod - def _convert_label_categories_to_sorted_list(event: dict): - label = get_dotted_field_value(event, "label") - if label is None: - return - for category in label: - category_key = f"label.{category}" - category_value = get_dotted_field_value(event, category_key) - sorted_category = sorted(list(category_value)) - add_and_overwrite(event, category_key, sorted_category) + fields = {key: value for key, value in rule.prefixed_label.items()} + add_fields_to(event, fields, rule=rule, extends_lists=True) + # convert sets into sorted lists + fields = { + key: sorted(set(get_dotted_field_value(event, key))) + for key, _ in rule.prefixed_label.items() + } + add_fields_to(event, fields, rule=rule, overwrite_target_field=True) diff --git a/logprep/processor/labeler/rule.py b/logprep/processor/labeler/rule.py index 804e1f109..0b7f9ea90 100644 --- a/logprep/processor/labeler/rule.py +++ b/logprep/processor/labeler/rule.py @@ -60,6 +60,10 @@ def label(self) -> dict: # pylint: enable=C0111 + @property + def prefixed_label(self) -> dict: + return {f"label.{key}": value for key, value in self.label.items()} + def conforms_to_schema(self, schema: LabelingSchema) -> bool: """Check if labels are valid.""" return schema.validate_labels(self._config.label) diff --git a/logprep/processor/list_comparison/processor.py b/logprep/processor/list_comparison/processor.py index e3166ec2e..d2064362a 100644 --- a/logprep/processor/list_comparison/processor.py +++ b/logprep/processor/list_comparison/processor.py @@ -31,16 +31,8 @@ from attr import define, field, validators from logprep.abc.processor import Processor -from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.list_comparison.rule import ListComparisonRule -from logprep.util.helper import add_field_to, get_dotted_field_value - - -class ListComparisonError(Exception): - """Base class for ListComparison related exceptions.""" - - def __init__(self, name: str, message: str): - super().__init__(f"ListComparison ({name}): {message}") +from logprep.util.helper import add_fields_to, get_dotted_field_value class ListComparison(Processor): @@ -79,14 +71,10 @@ def _apply_rules(self, event, rule): Currently applied list comparison rule. """ - comparison_result, comparison_key = self._list_comparison(rule, event) - if comparison_result is not None: - output_field = f"{ rule.target_field }.{ comparison_key }" - add_successful = add_field_to(event, output_field, comparison_result, True) - if not add_successful: - raise FieldExistsWarning(rule, event, [output_field]) + fields = {f"{rule.target_field}.{comparison_key}": comparison_result} + add_fields_to(event, fields, rule=rule, extends_lists=True) def _list_comparison(self, rule: ListComparisonRule, event: dict): """ diff --git a/logprep/processor/pre_detector/processor.py b/logprep/processor/pre_detector/processor.py index 9533cf1a1..ad10ea062 100644 --- a/logprep/processor/pre_detector/processor.py +++ b/logprep/processor/pre_detector/processor.py @@ -39,7 +39,7 @@ from logprep.processor.base.exceptions import ProcessingWarning from logprep.processor.pre_detector.ip_alerter import IPAlerter from logprep.processor.pre_detector.rule import PreDetectorRule -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value from logprep.util.time import TimeParser, TimeParserException @@ -103,14 +103,11 @@ def normalize_timestamp(self, rule: PreDetectorRule, timestamp: str) -> str: parsed_datetime.astimezone(rule.target_timezone).isoformat().replace("+00:00", "Z") ) except TimeParserException as error: - error_message = "Could not parse timestamp" - raise ( - ProcessingWarning( - error_message, - rule, - self.result.event, - tags=["_pre_detector_timeparsing_failure"], - ) + raise ProcessingWarning( + "Could not parse timestamp", + rule, + self.result.event, + tags=["_pre_detector_timeparsing_failure"], ) from error def _apply_rules(self, event: dict, rule: PreDetectorRule): @@ -129,8 +126,7 @@ def _get_detection_result(self, event: dict, rule: PreDetectorRule): pre_detection_id = get_dotted_field_value(event, "pre_detection_id") if pre_detection_id is None: pre_detection_id = str(uuid4()) - add_field_to(event, "pre_detection_id", pre_detection_id) - + add_fields_to(event, {"pre_detection_id": pre_detection_id}, rule=rule) detection_result = self._generate_detection_result(pre_detection_id, event, rule) self.result.data.append((detection_result, self._config.outputs)) @@ -139,11 +135,13 @@ def _generate_detection_result( pre_detection_id: str, event: dict, rule: PreDetectorRule ) -> dict: detection_result = rule.detection_data - detection_result["rule_filter"] = rule.filter_str - detection_result["description"] = rule.description - detection_result["pre_detection_id"] = pre_detection_id - - host_name = get_dotted_field_value(event, "host.name") - if host_name is not None: - detection_result["host"] = {"name": host_name} + detection_result.update( + { + "rule_filter": rule.filter_str, + "description": rule.description, + "pre_detection_id": pre_detection_id, + } + ) + if host_name := get_dotted_field_value(event, "host.name"): + detection_result.update({"host": {"name": host_name}}) return detection_result diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py index b4ec2159a..ff57c66e8 100644 --- a/logprep/processor/pseudonymizer/processor.py +++ b/logprep/processor/pseudonymizer/processor.py @@ -64,7 +64,7 @@ from logprep.processor.pseudonymizer.rule import PseudonymizerRule from logprep.util.getter import GetterFactory from logprep.util.hasher import SHA256Hasher -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value from logprep.util.pseudo.encrypter import ( DualPKCS1HybridCTREncrypter, DualPKCS1HybridGCMEncrypter, @@ -264,7 +264,9 @@ def _apply_rules(self, event: dict, rule: PseudonymizerRule): ] else: field_value = self._pseudonymize_field(rule, dotted_field, regex, field_value) - _ = add_field_to(event, dotted_field, field_value, overwrite_output_field=True) + add_fields_to( + event, fields={dotted_field: field_value}, rule=rule, overwrite_target_field=True + ) if "@timestamp" in event: for pseudonym, _ in self.result.data: pseudonym["@timestamp"] = event["@timestamp"] diff --git a/logprep/processor/requester/processor.py b/logprep/processor/requester/processor.py index 6b863e045..aa2e6edea 100644 --- a/logprep/processor/requester/processor.py +++ b/logprep/processor/requester/processor.py @@ -44,11 +44,7 @@ from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.field_manager.processor import FieldManager from logprep.processor.requester.rule import RequesterRule -from logprep.util.helper import ( - add_field_to, - get_dotted_field_value, - get_source_fields_dict, -) +from logprep.util.helper import add_fields_to, get_source_fields_dict TEMPLATE_KWARGS = ("url", "json", "data", "params") @@ -73,31 +69,32 @@ def _apply_rules(self, event, rule): def _handle_response(self, event, rule, response): conflicting_fields = [] if rule.target_field: - result = self._get_result(response) - successful = add_field_to( - event, - rule.target_field, - result, - rule.extend_target_list, - rule.overwrite_target, - ) - if not successful: - conflicting_fields.append(rule.target_field) + try: + add_fields_to( + event, + fields={rule.target_field: self._get_result(response)}, + rule=rule, + extends_lists=rule.extend_target_list, + overwrite_target_field=rule.overwrite_target, + ) + except FieldExistsWarning as error: + conflicting_fields.extend(error.skipped_fields) if rule.target_field_mapping: - result = self._get_result(response) - for source_field, target_field in rule.target_field_mapping.items(): - source_field_value = get_dotted_field_value(result, source_field) - successful = add_field_to( + source_fields = rule.target_field_mapping.keys() + contents = self._get_field_values(self._get_result(response), source_fields) + targets = rule.target_field_mapping.values() + try: + add_fields_to( event, - target_field, - source_field_value, + dict(zip(targets, contents)), + rule, rule.extend_target_list, rule.overwrite_target, ) - if not successful: - conflicting_fields.append(rule.target_field) + except FieldExistsWarning as error: + conflicting_fields.extend(error.skipped_fields) if conflicting_fields: - raise FieldExistsWarning(rule, event, [rule.target_field]) + raise FieldExistsWarning(rule, event, conflicting_fields) def _request(self, event, rule, kwargs): try: diff --git a/logprep/processor/selective_extractor/processor.py b/logprep/processor/selective_extractor/processor.py index 4656b5eb3..c0bcf2ddd 100644 --- a/logprep/processor/selective_extractor/processor.py +++ b/logprep/processor/selective_extractor/processor.py @@ -31,7 +31,7 @@ from logprep.processor.field_manager.processor import FieldManager from logprep.processor.selective_extractor.rule import SelectiveExtractorRule -from logprep.util.helper import add_field_to, get_source_fields_dict +from logprep.util.helper import add_fields_to, get_source_fields_dict class SelectiveExtractor(FieldManager): @@ -64,6 +64,5 @@ def _apply_rules(self, event: dict, rule: SelectiveExtractorRule): } if flattened_fields: filtered_event = {} - for field, content in flattened_fields.items(): - add_field_to(filtered_event, field, content) + add_fields_to(filtered_event, flattened_fields, rule) self.result.data.append((filtered_event, rule.outputs)) diff --git a/logprep/processor/string_splitter/processor.py b/logprep/processor/string_splitter/processor.py index f2b94e260..7d81a1d20 100644 --- a/logprep/processor/string_splitter/processor.py +++ b/logprep/processor/string_splitter/processor.py @@ -43,5 +43,5 @@ def _apply_rules(self, event: dict, rule: StringSplitterRule): self._handle_missing_fields(event, rule, rule.source_fields, [source_field_content]) if not isinstance(source_field_content, str): raise ProcessingWarning(f"source_field '{source_field}' is not a string", rule, event) - result = source_field_content.split(rule.delimeter) + result = source_field_content.split(rule.delimiter) self._write_target_field(event, rule, result) diff --git a/logprep/processor/string_splitter/rule.py b/logprep/processor/string_splitter/rule.py index e2ced13e4..b25897d90 100644 --- a/logprep/processor/string_splitter/rule.py +++ b/logprep/processor/string_splitter/rule.py @@ -61,12 +61,12 @@ class Config(FieldManagerRule.Config): validators.max_len(1), ], ) - delimeter: str = field(validator=validators.instance_of(str), default=" ") - """The delimeter for splitting. Defaults to whitespace""" + delimiter: str = field(validator=validators.instance_of(str), default=" ") + """The delimiter for splitting. Defaults to whitespace""" mapping: dict = field(default="", init=False, repr=False, eq=False) ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) @property - def delimeter(self): - """returns the configured delimeter""" - return self._config.delimeter + def delimiter(self): + """returns the configured delimiter""" + return self._config.delimiter diff --git a/logprep/processor/template_replacer/processor.py b/logprep/processor/template_replacer/processor.py index 7a0502809..e5101a292 100644 --- a/logprep/processor/template_replacer/processor.py +++ b/logprep/processor/template_replacer/processor.py @@ -38,11 +38,10 @@ from attr import define, field, validators -from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.field_manager.processor import FieldManager from logprep.processor.template_replacer.rule import TemplateReplacerRule from logprep.util.getter import GetterFactory -from logprep.util.helper import add_field_to, get_dotted_field_value +from logprep.util.helper import add_fields_to, get_dotted_field_value class TemplateReplacerError(Exception): @@ -114,16 +113,13 @@ def _perform_replacement(self, event: dict, replacement: str, rule: TemplateRepl If target value isn't None, then it exists and its parents must be dicts. Therefore, they wouldn't be replaced, and we can overwrite the existing target field. """ - if get_dotted_field_value(event, self._target_field) is None: - add_successful = add_field_to( - event, - self._target_field, - replacement, - ) - if not add_successful: - raise FieldExistsWarning(rule, event, [self._target_field]) - else: - add_field_to(event, self._target_field, replacement, overwrite_output_field=True) + overwrite = get_dotted_field_value(event, self._target_field) is not None + add_fields_to( + event, + fields={self._target_field: replacement}, + rule=rule, + overwrite_target_field=overwrite, + ) def setup(self): super().setup() diff --git a/logprep/util/helper.py b/logprep/util/helper.py index e8e4fec45..a040ac112 100644 --- a/logprep/util/helper.py +++ b/logprep/util/helper.py @@ -1,5 +1,6 @@ """This module contains helper functions that are shared by different modules.""" +import itertools import re import sys from functools import lru_cache, partial, reduce @@ -10,9 +11,11 @@ from colorama import Back, Fore from colorama.ansi import AnsiBack, AnsiFore +from logprep.processor.base.exceptions import FieldExistsWarning from logprep.util.defaults import DEFAULT_CONFIG_LOCATION if TYPE_CHECKING: # pragma: no cover + from logprep.processor.base.rule import Rule from logprep.util.configuration import Configuration @@ -57,58 +60,132 @@ def _add_and_not_overwrite_key(sub_dict, key): return sub_dict.get(key) -def add_field_to(event, output_field, content, extends_lists=False, overwrite_output_field=False): +def _add_field_to( + event: dict, + field: tuple, + rule: "Rule", + extends_lists: bool = False, + overwrite_target_field: bool = False, +) -> None: """ - Add content to an output_field in the given event. Output_field can be a dotted subfield. - In case of missing fields all intermediate fields will be created. + Add content to the target_field in the given event. target_field can be a dotted subfield. + In case of missing fields, all intermediate fields will be created. Parameters ---------- event: dict Original log-event that logprep is currently processing - output_field: str - Dotted subfield string indicating the target of the output value, e.g. destination.ip - content: str, float, int, list, dict - Value that should be written into the output_field, can be a str, list or dict object + field: tuple + A key value pair describing the field that should be added. The key is the dotted subfield string indicating + the target. The value is the content that should be added to the named target. The content can be of type + str, float, int, list, dict. + rule: Rule + A rule that initiated the field addition, is used for proper error handling. extends_lists: bool - Flag that determines whether output_field lists should be extended - overwrite_output_field: bool - Flag that determines whether the output_field should be overwritten - - Returns + Flag that determines whether target_field lists should be extended + overwrite_target_field: bool + Flag that determines whether the target_field should be overwritten + Raises ------ - This method returns true if no conflicting fields were found during the process of the creation - of the dotted subfields. If conflicting fields were found False is returned. + ValueError + If both extends_lists and overwrite_target_field are set to True. + FieldExistsWarning + If the target_field already exists and overwrite_target_field is False, or if extends_lists is True but + the existing field is not a list. + """ + if extends_lists and overwrite_target_field: + raise ValueError("An output field can't be overwritten and extended at the same time") + target_field, content = field + field_path = [event, *get_dotted_field_list(target_field)] + target_key = field_path.pop() + + if overwrite_target_field: + target_parent = reduce(_add_and_overwrite_key, field_path) + target_parent[target_key] = content + return + try: + target_parent = reduce(_add_and_not_overwrite_key, field_path) + except KeyError as error: + raise FieldExistsWarning(rule, event, [target_field]) from error + existing_value = target_parent.get(target_key) + if existing_value is None: + target_parent[target_key] = content + return + if not extends_lists or not isinstance(existing_value, list): + raise FieldExistsWarning(rule, event, [target_field]) + if isinstance(content, list | set): + target_parent[target_key].extend(content) + else: + target_parent[target_key].append(content) + + +def _add_field_to_silent_fail(*args, **kwargs) -> None | str: """ + Adds a field to an object, ignoring the FieldExistsWarning if the field already exists. Is only needed in the + add_batch_to map function. Without this the map would terminate early. - assert not ( - extends_lists and overwrite_output_field - ), "An output field can't be overwritten and extended at the same time" - output_field_path = [event, *get_dotted_field_list(output_field)] - target_key = output_field_path.pop() + Parameters: + args: tuple + Positional arguments to pass to the add_field_to function. + kwargs: dict + Keyword arguments to pass to the add_field_to function. - if overwrite_output_field: - target_field = reduce(_add_and_overwrite_key, output_field_path) - target_field |= {target_key: content} - return True + Returns: + The field that was attempted to be added, if the field already exists. + Raises: + FieldExistsWarning: If the field already exists, but this warning is caught and ignored. + """ try: - target_field = reduce(_add_and_not_overwrite_key, output_field_path) - except KeyError: - return False - - target_field_value = target_field.get(target_key) - if target_field_value is None: - target_field |= {target_key: content} - return True - if extends_lists: - if not isinstance(target_field_value, list): - return False - if isinstance(content, list): - target_field |= {target_key: [*target_field_value, *content]} - else: - target_field_value.append(content) - return True - return False + _add_field_to(*args, **kwargs) + except FieldExistsWarning as error: + return error.skipped_fields[0] + + +def add_fields_to( + event: dict, + fields: dict, + rule: "Rule" = None, + extends_lists: bool = False, + overwrite_target_field: bool = False, +) -> None: + """ + Handles the batch addition operation while raising a FieldExistsWarning with all unsuccessful targets. + + Parameters: + event: dict + The event object to which fields are to be added. + fields: dict + A dicht with key value pairs describing the fields that should be added. The key is the dotted subfield + string indicating the target. The value is the content that should be added to the named target. The + content can be of type: str, float, int, list, dict. + rule: Rule + A rule that initiated the field addition, is used for proper error handling. + extends_lists: bool + A boolean indicating whether to extend lists if the target field already exists. + overwrite_target_field: bool + A boolean indicating whether to overwrite the target field if it already exists. + + Raises: + FieldExistsWarning: If there are targets to which the content could not be added due to field + existence restrictions. + """ + # filter out None values + fields = {key: value for key, value in fields.items() if value is not None} + number_fields = len(dict(fields)) + if number_fields == 1: + _add_field_to(event, list(fields.items())[0], rule, extends_lists, overwrite_target_field) + return + unsuccessful_targets = map( + _add_field_to_silent_fail, + itertools.repeat(event, number_fields), + fields.items(), + itertools.repeat(rule, number_fields), + itertools.repeat(extends_lists, number_fields), + itertools.repeat(overwrite_target_field, number_fields), + ) + unsuccessful_targets = [item for item in unsuccessful_targets if item is not None] + if unsuccessful_targets: + raise FieldExistsWarning(rule, event, unsuccessful_targets) def _get_slice_arg(slice_item): @@ -155,7 +232,7 @@ def get_dotted_field_value(event: dict, dotted_field: str) -> Optional[Union[dic @lru_cache(maxsize=100000) def get_dotted_field_list(dotted_field: str) -> list[str]: """make lookup of dotted field in the dotted_field_lookup_table and ensures - it is added if not found. Additionally the string will be interned for faster + it is added if not found. Additionally, the string will be interned for faster followup lookups. Parameters @@ -277,23 +354,24 @@ def snake_to_camel(snake: str) -> str: return camel -append_as_list = partial(add_field_to, extends_lists=True) +append_as_list = partial(add_fields_to, extends_lists=True) -def add_and_overwrite(event, target_field, content, *_): +def add_and_overwrite(event, fields, rule, *_): """wrapper for add_field_to""" - add_field_to(event, target_field, content, overwrite_output_field=True) + add_fields_to(event, fields, rule, overwrite_target_field=True) -def append(event, target_field, content, separator): +def append(event, field, separator, rule): """appends to event""" + target_field, content = list(field.items())[0] target_value = get_dotted_field_value(event, target_field) if not isinstance(target_value, list): target_value = "" if target_value is None else target_value target_value = f"{target_value}{separator}{content}" - add_and_overwrite(event, target_field, target_value) + add_and_overwrite(event, fields={target_field: target_value}, rule=rule) else: - append_as_list(event, target_field, content) + append_as_list(event, field) def get_source_fields_dict(event, rule): diff --git a/tests/testdata/auto_tests/clusterer/rules/generic/rule_with_custom_tests.yml b/tests/testdata/auto_tests/clusterer/rules/generic/rule_with_custom_tests.yml index d6e6c21a6..aa91c5ed4 100644 --- a/tests/testdata/auto_tests/clusterer/rules/generic/rule_with_custom_tests.yml +++ b/tests/testdata/auto_tests/clusterer/rules/generic/rule_with_custom_tests.yml @@ -1,9 +1,10 @@ filter: message clusterer: + id: clusterer-rule-2 source_fields: [message] pattern: '(bytes|Bytes|Byte)' repl: 'byte' description: '...' tests: raw: 'Byte is a Bytes is a bytes is a byte' - result: 'byte is a byte is a byte is a byte' \ No newline at end of file + result: 'byte is a byte is a byte is a byte' diff --git a/tests/testdata/auto_tests/clusterer/rules/specific/rule_with_custom_tests.yml b/tests/testdata/auto_tests/clusterer/rules/specific/rule_with_custom_tests.yml index d6e6c21a6..9e51adc01 100644 --- a/tests/testdata/auto_tests/clusterer/rules/specific/rule_with_custom_tests.yml +++ b/tests/testdata/auto_tests/clusterer/rules/specific/rule_with_custom_tests.yml @@ -1,9 +1,10 @@ filter: message clusterer: + id: clusterer-rule-1 source_fields: [message] pattern: '(bytes|Bytes|Byte)' repl: 'byte' description: '...' tests: raw: 'Byte is a Bytes is a bytes is a byte' - result: 'byte is a byte is a byte is a byte' \ No newline at end of file + result: 'byte is a byte is a byte is a byte' diff --git a/tests/testdata/auto_tests/dissector/rules/specific/auto_test_mismatch.json b/tests/testdata/auto_tests/dissector/rules/specific/auto_test_mismatch.json index a84bb8ffc..d566851b1 100644 --- a/tests/testdata/auto_tests/dissector/rules/specific/auto_test_mismatch.json +++ b/tests/testdata/auto_tests/dissector/rules/specific/auto_test_mismatch.json @@ -1,9 +1,10 @@ [{ "filter": "message", "dissector": { + "id": "dissector-1", "mapping": { "message": "%{source}-%{target}" } }, "description": "Test-rule with matching auto-test" -}] \ No newline at end of file +}] diff --git a/tests/testdata/auto_tests/dissector/rules/specific/auto_test_no_test_.json b/tests/testdata/auto_tests/dissector/rules/specific/auto_test_no_test_.json index 77537bb95..a99118ee8 100644 --- a/tests/testdata/auto_tests/dissector/rules/specific/auto_test_no_test_.json +++ b/tests/testdata/auto_tests/dissector/rules/specific/auto_test_no_test_.json @@ -1,9 +1,10 @@ [{ "filter": "message", "dissector": { + "id": "dissector-2", "mapping": { "message": "%{source} %{target}" } }, "description": "Test-rule with matching auto-test" -}] \ No newline at end of file +}] diff --git a/tests/testdata/auto_tests/dropper/rules/generic/drop_field.json b/tests/testdata/auto_tests/dropper/rules/generic/drop_field.json index 30ebc797a..884d8cd3e 100644 --- a/tests/testdata/auto_tests/dropper/rules/generic/drop_field.json +++ b/tests/testdata/auto_tests/dropper/rules/generic/drop_field.json @@ -2,9 +2,10 @@ { "filter": "drop_me", "dropper": { + "id": "dropper-1", "drop": [ "drop_me" ] } } -] \ No newline at end of file +] diff --git a/tests/testdata/auto_tests/dropper/rules/specific/drop_field.json b/tests/testdata/auto_tests/dropper/rules/specific/drop_field.json index 30ebc797a..6b561618a 100644 --- a/tests/testdata/auto_tests/dropper/rules/specific/drop_field.json +++ b/tests/testdata/auto_tests/dropper/rules/specific/drop_field.json @@ -2,9 +2,10 @@ { "filter": "drop_me", "dropper": { + "id": "dropper-2", "drop": [ "drop_me" ] } } -] \ No newline at end of file +] diff --git a/tests/testdata/auto_tests/labeler/rules/generic/auto_test_labeling_match.json b/tests/testdata/auto_tests/labeler/rules/generic/auto_test_labeling_match.json index 4162a3add..e90c0e9ed 100644 --- a/tests/testdata/auto_tests/labeler/rules/generic/auto_test_labeling_match.json +++ b/tests/testdata/auto_tests/labeler/rules/generic/auto_test_labeling_match.json @@ -2,6 +2,7 @@ { "filter": "some_field: (stop OR end)", "labeler": { + "id": "labeler-1", "label": { "action": [ "terminate" @@ -10,4 +11,4 @@ }, "description": "Test-rule with matching auto-test" } -] \ No newline at end of file +] diff --git a/tests/testdata/auto_tests/labeler/rules/generic/auto_test_labeling_match_existing.json b/tests/testdata/auto_tests/labeler/rules/generic/auto_test_labeling_match_existing.json index 4162a3add..7e5c50de4 100644 --- a/tests/testdata/auto_tests/labeler/rules/generic/auto_test_labeling_match_existing.json +++ b/tests/testdata/auto_tests/labeler/rules/generic/auto_test_labeling_match_existing.json @@ -2,6 +2,7 @@ { "filter": "some_field: (stop OR end)", "labeler": { + "id": "labeler-2", "label": { "action": [ "terminate" @@ -10,4 +11,4 @@ }, "description": "Test-rule with matching auto-test" } -] \ No newline at end of file +] diff --git a/tests/testdata/auto_tests/pre_detector/rules/specific/auto_test_pre_detector_mismatch.json b/tests/testdata/auto_tests/pre_detector/rules/specific/auto_test_pre_detector_mismatch.json index a081ea335..98cf64b99 100644 --- a/tests/testdata/auto_tests/pre_detector/rules/specific/auto_test_pre_detector_mismatch.json +++ b/tests/testdata/auto_tests/pre_detector/rules/specific/auto_test_pre_detector_mismatch.json @@ -1,7 +1,7 @@ [{ "filter": "some_field", "pre_detector": { - "id": "SOME_TEST_RULE_ID", + "id": "SOME_TEST_RULE_ID_1", "title": "SOME_TEST_RULE", "severity": "critical", "mitre": [], @@ -9,4 +9,4 @@ }, "sigma_fields": true, "description": "Test-rule with mismatching auto-test" -}] \ No newline at end of file +}] diff --git a/tests/testdata/auto_tests/pre_detector/rules/specific/auto_test_pre_detector_no_test_.json b/tests/testdata/auto_tests/pre_detector/rules/specific/auto_test_pre_detector_no_test_.json index ebc751cac..e9f441735 100644 --- a/tests/testdata/auto_tests/pre_detector/rules/specific/auto_test_pre_detector_no_test_.json +++ b/tests/testdata/auto_tests/pre_detector/rules/specific/auto_test_pre_detector_no_test_.json @@ -1,7 +1,7 @@ [{ "filter": "some_field", "pre_detector": { - "id": "SOME_TEST_RULE_ID", + "id": "SOME_TEST_RULE_ID_2", "title": "SOME_TEST_RULE", "severity": "critical", "mitre": [], @@ -9,4 +9,4 @@ }, "sigma_fields": true, "description": "Test-rule without auto-test" -}] \ No newline at end of file +}] diff --git a/tests/testdata/auto_tests/template_replacer/rules/generic/template_replacer.json b/tests/testdata/auto_tests/template_replacer/rules/generic/template_replacer.json index 051249872..a2895b171 100644 --- a/tests/testdata/auto_tests/template_replacer/rules/generic/template_replacer.json +++ b/tests/testdata/auto_tests/template_replacer/rules/generic/template_replacer.json @@ -1,5 +1,7 @@ [{ "filter": "winlog.provider_name: \"the provider\" AND winlog.event_id: 123", - "template_replacer": {}, + "template_replacer": { + "id": "template-replacer-1" + }, "description": "" -}] \ No newline at end of file +}] diff --git a/tests/testdata/auto_tests/template_replacer/rules/specific/template_replacer.json b/tests/testdata/auto_tests/template_replacer/rules/specific/template_replacer.json index 9723abc3e..1073e6624 100644 --- a/tests/testdata/auto_tests/template_replacer/rules/specific/template_replacer.json +++ b/tests/testdata/auto_tests/template_replacer/rules/specific/template_replacer.json @@ -1,7 +1,9 @@ [ { "filter": "winlog.provider_name: \"the provider\" AND winlog.event_id: 123", - "template_replacer": {}, + "template_replacer": { + "id": "template-replacer-2" + }, "description": "" } -] \ No newline at end of file +] diff --git a/tests/unit/connector/base.py b/tests/unit/connector/base.py index ec8906cb9..729aec312 100644 --- a/tests/unit/connector/base.py +++ b/tests/unit/connector/base.py @@ -266,8 +266,7 @@ def test_get_next_with_hmac_result_in_already_existing_subfield(self): connector._get_event = mock.MagicMock( return_value=(test_event.copy(), raw_encoded_test_event) ) - non_critical_error_msg = "Couldn't add the hmac to the input event as the desired output field 'message' already exist." - with pytest.raises(CriticalInputError, match=non_critical_error_msg) as error: + with pytest.raises(CriticalInputError, match="could not be written") as error: _ = connector.get_next(1) assert error.value.raw_input == {"message": {"with_subfield": "content"}} @@ -312,8 +311,9 @@ def test_pipeline_preprocessing_does_not_add_versions_if_target_field_exists_alr connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content", "version_info": "something random"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) - result = connector.get_next(0.01) - assert result == {"any": "content", "version_info": "something random"} + with pytest.raises(CriticalInputError, match="could not be written") as error: + _ = connector.get_next(0.01) + assert error.value.raw_input == {"any": "content", "version_info": "something random"} def test_pipeline_preprocessing_only_version_information(self): preprocessing_config = { @@ -326,8 +326,9 @@ def test_pipeline_preprocessing_only_version_information(self): connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content", "version_info": "something random"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) - result = connector.get_next(0.01) - assert result == {"any": "content", "version_info": "something random"} + with pytest.raises(CriticalInputError, match="could not be written") as error: + _ = connector.get_next(0.01) + assert error.value.raw_input == {"any": "content", "version_info": "something random"} def test_get_raw_event_is_callable(self): # should be overwritten for special implementation @@ -377,8 +378,9 @@ def test_pipeline_preprocessing_does_not_add_log_arrival_time_if_target_field_ex connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content", "arrival_time": "does not matter"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) - result = connector.get_next(0.01) - assert result == {"any": "content", "arrival_time": "does not matter"} + with pytest.raises(CriticalInputError, match="could not be written") as error: + _ = connector.get_next(0.01) + assert error.value.raw_input == {"any": "content", "arrival_time": "does not matter"} def test_pipeline_preprocessing_adds_timestamp_delta_if_configured(self): preprocessing_config = { diff --git a/tests/unit/processor/labeler/test_labeler.py b/tests/unit/processor/labeler/test_labeler.py index c6085d7bc..cae9105b5 100644 --- a/tests/unit/processor/labeler/test_labeler.py +++ b/tests/unit/processor/labeler/test_labeler.py @@ -257,3 +257,11 @@ def test_create_loads_the_specified_labeling_schema(self): labeler = Factory.create({"test instance": config}) assert labeler._schema == expected_schema + + def test_extend_list_of_existing_labels(self): + rule = {"filter": "applyrule", "labeler": {"label": {"reporter": ["windows", "foo"]}}} + document = {"applyrule": "yes", "label": {"reporter": ["windows"]}} + expected = {"applyrule": "yes", "label": {"reporter": ["foo", "windows"]}} + self._load_specific_rule(rule) + self.object.process(document) + assert document == expected diff --git a/tests/unit/processor/requester/test_requester.py b/tests/unit/processor/requester/test_requester.py index f9ba5d1f9..17decf03a 100644 --- a/tests/unit/processor/requester/test_requester.py +++ b/tests/unit/processor/requester/test_requester.py @@ -171,6 +171,27 @@ "status": 200, }, ), + ( + "use target_field and target_field_mapping at the same time, with error in target_field", + { + "filter": "message", + "requester": { + "url": "http://mock-mock/", + "method": "GET", + "target_field": "message", # will fail as it is already present + "target_field_mapping": {"key1.key2.key3": "result.custom"}, + }, + }, + {"message": "the message"}, + {"message": "the message", "result": {"custom": "value"}, "tags": ["_requester_failure"]}, + { + "method": "GET", + "url": "http://mock-mock/", + "json": {"key1": {"key2": {"key3": "value"}}}, + "content_type": "text/plain", + "status": 200, + }, + ), ( "parses json result with simple target field mapping and overwrite target", { diff --git a/tests/unit/processor/string_splitter/test_string_splitter.py b/tests/unit/processor/string_splitter/test_string_splitter.py index 3db351526..692a8c649 100644 --- a/tests/unit/processor/string_splitter/test_string_splitter.py +++ b/tests/unit/processor/string_splitter/test_string_splitter.py @@ -22,7 +22,7 @@ "string_splitter": { "source_fields": ["message"], "target_field": "result", - "delimeter": ", ", + "delimiter": ", ", }, }, {"message": "this, is, the, message"}, diff --git a/tests/unit/processor/test_process.py b/tests/unit/processor/test_process.py index 5d985c99b..704622751 100644 --- a/tests/unit/processor/test_process.py +++ b/tests/unit/processor/test_process.py @@ -79,7 +79,7 @@ def test_apply_processor_multiple_times_until_no_new_rule_matches(self): "url": "url", } processor.process(event) - assert expected_event == event + assert event == expected_event def test_apply_processor_multiple_times_not_enabled(self): config = {"type": "dissector", "specific_rules": [], "generic_rules": []} diff --git a/tests/unit/util/test_auto_rule_tester.py b/tests/unit/util/test_auto_rule_tester.py index d66b1f3ac..6424a0557 100644 --- a/tests/unit/util/test_auto_rule_tester.py +++ b/tests/unit/util/test_auto_rule_tester.py @@ -7,6 +7,7 @@ import pytest from logprep.util.auto_rule_tester.auto_rule_tester import AutoRuleTester +from logprep.util.configuration import Configuration LOGGER = logging.getLogger() @@ -14,6 +15,7 @@ @pytest.fixture(name="auto_rule_tester") def fixture_auto_rule_tester(): config_path = "tests/testdata/config/config-auto-tests.yml" + Configuration.from_source(config_path)._verify() return AutoRuleTester(config_path) diff --git a/tests/unit/util/test_helper_add_field.py b/tests/unit/util/test_helper_add_field.py index 007069ed2..7e7731415 100644 --- a/tests/unit/util/test_helper_add_field.py +++ b/tests/unit/util/test_helper_add_field.py @@ -2,25 +2,21 @@ # pylint: disable=missing-docstring import pytest -from logprep.util.helper import add_field_to +from logprep.processor.base.exceptions import FieldExistsWarning +from logprep.util.helper import add_fields_to class TestHelperAddField: def test_add_str_content_as_new_root_field(self): document = {"source": {"ip": "8.8.8.8"}} expected_document = {"source": {"ip": "8.8.8.8"}, "field": "content"} - - add_was_successful = add_field_to(document, "field", "content") - - assert add_was_successful, "Found duplicate even though there shouldn't be one" + add_fields_to(document, {"field": "content"}) assert document == expected_document def test_add_str_content_as_completely_new_dotted_subfield(self): document = {"source": {"ip": "8.8.8.8"}} expected_document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": "content"}} - - add_was_successful = add_field_to(document, "sub.field", "content") - assert add_was_successful, "Found duplicate even though there shouldn't be one" + add_fields_to(document, {"sub.field": "content"}) assert document == expected_document def test_add_str_content_as_partially_new_dotted_subfield(self): @@ -30,41 +26,31 @@ def test_add_str_content_as_partially_new_dotted_subfield(self): "sub": {"field": "content", "other_field": "other_content"}, } - add_was_successful = add_field_to(document, "sub.field", "content") - - assert add_was_successful, "Found duplicate even though there shouldn't be one" + add_fields_to(document, {"sub.field": "content"}) assert document == expected_document def test_provoke_str_duplicate_in_root_field(self): document = {"source": {"ip": "8.8.8.8"}, "field": "exists already"} - - add_was_successful = add_field_to(document, "field", "content") - - assert not add_was_successful, "Found no duplicate even though there should be one" + with pytest.raises(FieldExistsWarning, match=r"could not be written"): + add_fields_to(document, {"field": "content"}) + assert document def test_provoke_str_duplicate_in_dotted_subfield(self): document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": "exists already"}} - - add_was_successful = add_field_to(document, "sub.field", "content") - - assert not add_was_successful, "Found no duplicate even though there should be one" + with pytest.raises(FieldExistsWarning, match=r"could not be written"): + add_fields_to(document, {"sub.field": "content"}) + assert document def test_add_dict_content_as_new_root_field(self): document = {"source": {"ip": "8.8.8.8"}} expected_document = {"source": {"ip": "8.8.8.8"}, "field": {"dict": "content"}} - - add_was_successful = add_field_to(document, "field", {"dict": "content"}) - - assert add_was_successful, "Found duplicate even though there shouldn't be one" + add_fields_to(document, {"field": {"dict": "content"}}) assert document == expected_document def test_add_dict_content_as_completely_new_dotted_subfield(self): document = {"source": {"ip": "8.8.8.8"}} expected_document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": {"dict": "content"}}} - - add_was_successful = add_field_to(document, "sub.field", {"dict": "content"}) - - assert add_was_successful, "Found duplicate even though there shouldn't be one" + add_fields_to(document, {"sub.field": {"dict": "content"}}) assert document == expected_document def test_add_dict_content_as_partially_new_dotted_subfield(self): @@ -73,84 +59,58 @@ def test_add_dict_content_as_partially_new_dotted_subfield(self): "source": {"ip": "8.8.8.8"}, "sub": {"field": {"dict": "content"}, "other_field": "other_content"}, } - - add_was_successful = add_field_to(document, "sub.field", {"dict": "content"}) - - assert add_was_successful, "Found duplicate even though there shouldn't be one" + add_fields_to(document, {"sub.field": {"dict": "content"}}) assert document == expected_document def test_provoke_dict_duplicate_in_root_field(self): document = {"source": {"ip": "8.8.8.8"}, "field": {"already_existing": "dict"}} - - add_was_successful = add_field_to(document, "field", {"dict": "content"}) - - assert not add_was_successful, "Found no duplicate even though there should be one" + with pytest.raises(FieldExistsWarning, match=r"could not be written"): + add_fields_to(document, {"field": {"dict": "content"}}) + assert document def test_provoke_dict_duplicate_in_dotted_subfield(self): document = {"source": {"ip": "8.8.8.8"}, "sub": {"field": {"already_existing": "dict"}}} - - add_was_successful = add_field_to(document, "sub.field", {"dict": "content"}) - - assert not add_was_successful, "Found no duplicate even though there should be one" + with pytest.raises(FieldExistsWarning, match=r"could not be written"): + add_fields_to(document, {"sub.field": {"dict": "content"}}) def test_add_field_to_overwrites_output_field_in_root_level(self): document = {"some": "field", "output_field": "has already content"} - - add_was_successful = add_field_to( - document, "output_field", {"dict": "content"}, overwrite_output_field=True - ) - - assert add_was_successful, "Output field was overwritten" + add_fields_to(document, {"output_field": {"dict": "content"}}, overwrite_target_field=True) assert document.get("output_field") == {"dict": "content"} def test_add_field_to_overwrites_output_field_in_nested_level(self): document = {"some": "field", "nested": {"output": {"field": "has already content"}}} - - add_was_successful = add_field_to( - document, "nested.output.field", {"dict": "content"}, overwrite_output_field=True + add_fields_to( + document, {"nested.output.field": {"dict": "content"}}, overwrite_target_field=True ) - - assert add_was_successful, "Output field was overwritten" assert document.get("nested", {}).get("output", {}).get("field") == {"dict": "content"} def test_add_field_to_extends_list_when_only_given_a_string(self): document = {"some": "field", "some_list": ["with a value"]} - - add_was_successful = add_field_to(document, "some_list", "new value", extends_lists=True) - - assert add_was_successful, "Output field was overwritten" + add_fields_to(document, {"some_list": "new value"}, extends_lists=True) assert document.get("some_list") == ["with a value", "new value"] def test_add_field_to_extends_list_when_given_a_list(self): document = {"some": "field", "some_list": ["with a value"]} - - add_was_successful = add_field_to( - document, "some_list", ["first", "second"], extends_lists=True - ) - - assert add_was_successful, "Output field was overwritten" + add_fields_to(document, {"some_list": ["first", "second"]}, extends_lists=True) assert document.get("some_list") == ["with a value", "first", "second"] def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_same_time(self): document = {"some": "field", "some_list": ["with a value"]} - - with pytest.raises( - AssertionError, - match=r"An output field can't be overwritten and " r"extended at the same time", - ): - _ = add_field_to( + with pytest.raises(ValueError, match=r"can't be overwritten and extended at the same time"): + add_fields_to( document, - "some_list", - ["first", "second"], + {"some_list": ["first", "second"]}, extends_lists=True, - overwrite_output_field=True, + overwrite_target_field=True, ) + assert document def test_returns_false_if_dotted_field_value_key_exists(self): document = {"user": "Franz"} - content = ["user_inlist"] - add_was_successful = add_field_to(document, "user.in_list", content) - assert not add_was_successful + with pytest.raises(FieldExistsWarning, match=r"could not be written"): + add_fields_to(document, {"user.in_list": ["user_inlist"]}) + assert document def test_add_list_with_nested_keys(self): testdict = { @@ -163,8 +123,54 @@ def test_add_list_with_nested_keys(self): } } } - add_was_successful = add_field_to( - testdict, "key1.key2.key3.key4.key5.list", ["content"], extends_lists=True - ) - assert add_was_successful + add_fields_to(testdict, {"key1.key2.key3.key4.key5.list": ["content"]}, extends_lists=True) assert testdict == expected + + def test_add_field_to_adds_value_not_as_list(self): + # checks if a newly added field is added not as list, even when `extends_list` is True + document = {"some": "field"} + add_fields_to(document, {"new": "list"}, extends_lists=True) + assert document.get("new") == "list" + assert not isinstance(document.get("new"), list) + + def test_add_field_to_adds_multiple_fields(self): + document = {"some": "field"} + expected = { + "some": "field", + "new": "foo", + "new2": "bar", + } + add_fields_to(document, {"new": "foo", "new2": "bar"}) + assert document == expected + + def test_add_field_too_adds_multiple_fields_and_overwrites_one(self): + document = {"some": "field", "exists_already": "original content"} + expected = { + "some": "field", + "exists_already": {"updated": "content"}, + "new": "another content", + } + new_fields = {"exists_already": {"updated": "content"}, "new": "another content"} + add_fields_to(document, new_fields, overwrite_target_field=True) + assert document == expected + + def test_add_field_too_adds_multiple_fields_and_extends_one(self): + document = {"some": "field", "exists_already": ["original content"]} + expected = { + "some": "field", + "exists_already": ["original content", "extended content"], + "new": "another content", + } + new_fields = {"exists_already": ["extended content"], "new": "another content"} + add_fields_to(document, new_fields, extends_lists=True) + assert document == expected + + def test_add_field_adds_multiple_fields_and_raises_one_field_exists_warning(self): + document = {"some": "field", "exists_already": "original content"} + with pytest.raises(FieldExistsWarning, match=r"could not be written"): + add_fields_to(document, {"exists_already": "new content", "new": "another content"}) + assert document == { + "some": "field", + "exists_already": "original content", + "new": "another content", + }