From f81bc76fa606cb4f629172b39c066b8ea75264ed Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Tue, 17 Dec 2024 16:15:25 +0100 Subject: [PATCH] First positive tests of Lucene compliant filter --- logprep/filter/lucene_filter.py | 36 ++++++++++++++++++++++--- tests/unit/filter/test_lucene_filter.py | 24 +++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/logprep/filter/lucene_filter.py b/logprep/filter/lucene_filter.py index 37405eb4a..bc648ec25 100644 --- a/logprep/filter/lucene_filter.py +++ b/logprep/filter/lucene_filter.py @@ -315,6 +315,8 @@ def _collect_children(self, tree: luqum.tree) -> List[FilterExpression]: return expressions def _create_field(self, tree: luqum.tree) -> Optional[FilterExpression]: + # ok also hier bin ich denke ich richtig. ich muss jetzt mal ueberlegen, was hier was macht. und dann kann ich + # entscheiden wie ich es umsetze if isinstance(tree.expr, (Phrase, Word)): key = tree.name.replace("\\", "") key = key.split(".") @@ -324,6 +326,15 @@ def _create_field(self, tree: luqum.tree) -> Optional[FilterExpression]: value = self._strip_quote_from_string(tree.expr.value) value = self._remove_lucene_escaping(value) return self._get_filter_expression(key, value) + elif isinstance(tree.expr, Regex): + key = tree.name.replace("\\", "") + key = key.split(".") + if tree.expr.value == "null": + return Null(key) + + value = self._strip_quote_from_string(tree.expr.value) + value = self._remove_lucene_escaping(value) + return self._get_filter_expression_regex(key, value) return None def _get_filter_expression( @@ -339,6 +350,7 @@ def _get_filter_expression( if self._special_fields.items(): for sf_key, sf_value in self._special_fields.items(): if sf_value is True or dotted_field in sf_value: + # Todo: this has to be removed at the end of the ticket. if sf_key == "regex_fields": logger.warning( "[Deprecated]: regex_fields are no longer necessary. " @@ -347,12 +359,30 @@ def _get_filter_expression( return self._special_fields_map[sf_key](key, value) - if value.startswith("/") and value.endswith("/"): - value = value.strip("/") - return RegExFilterExpression(key, value) + #Todo: this has to be removed at the end of the ticket. + + # if hasattr(self._tree, 'expr') and isinstance(self._tree.expr, Regex): + # value = value.strip("/") + # return RegExFilterExpression(key, value) + # if value.startswith("/") and value.endswith("/"): + # value = value.strip("/") + # return RegExFilterExpression(key, value) return StringFilterExpression(key, value) + def _get_filter_expression_regex( + self, key: List[str], value + ) -> Union[RegExFilterExpression, StringFilterExpression]: + key_and_modifier = key[-1].split("|") + if len(key_and_modifier) == 2: + if key_and_modifier[-1] == "re": + return RegExFilterExpression(key[:-1] + key_and_modifier[:-1], value) + + + value = value.strip("/") + return RegExFilterExpression(key, value) + + @staticmethod def _create_value_expression(word: luqum.tree) -> Union[Exists, Always]: value = word.value.replace("\\", "") diff --git a/tests/unit/filter/test_lucene_filter.py b/tests/unit/filter/test_lucene_filter.py index 2873a7a8c..cd03b8792 100644 --- a/tests/unit/filter/test_lucene_filter.py +++ b/tests/unit/filter/test_lucene_filter.py @@ -482,3 +482,27 @@ def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_o RegExFilterExpression(["regex_key_one"], ".*value.*"), StringFilterExpression(["key_two"], "value"), ) + + def test_new_lucene_compliance(self): + lucene_filter = LuceneFilter.create( + 'regex_key_one:".*value.*"' + ) + + assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") + + def test_new_lucene_compliance2(self): + lucene_filter = LuceneFilter.create( + 'regex_key_one:/.*value.*/' + ) + + assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") + + def test_new_lucene_compliance3(self): + lucene_filter = LuceneFilter.create( + 'regex_key_one:/.*value.*/ AND key_two: "/.*value.*/"', + ) + + assert lucene_filter == And(RegExFilterExpression(["regex_key_one"], ".*value.*"), + StringFilterExpression(["key_two"], "/.*value.*/")) + +