Skip to content

Commit

Permalink
First positive tests of Lucene compliant filter
Browse files Browse the repository at this point in the history
  • Loading branch information
MoessnerFabian(Group) committed Dec 17, 2024
1 parent 474d906 commit f81bc76
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 3 deletions.
36 changes: 33 additions & 3 deletions logprep/filter/lucene_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,8 @@ def _collect_children(self, tree: luqum.tree) -> List[FilterExpression]:
return expressions

def _create_field(self, tree: luqum.tree) -> Optional[FilterExpression]:
# ok also hier bin ich denke ich richtig. ich muss jetzt mal ueberlegen, was hier was macht. und dann kann ich
# entscheiden wie ich es umsetze
if isinstance(tree.expr, (Phrase, Word)):
key = tree.name.replace("\\", "")
key = key.split(".")
Expand All @@ -324,6 +326,15 @@ def _create_field(self, tree: luqum.tree) -> Optional[FilterExpression]:
value = self._strip_quote_from_string(tree.expr.value)
value = self._remove_lucene_escaping(value)
return self._get_filter_expression(key, value)
elif isinstance(tree.expr, Regex):
key = tree.name.replace("\\", "")
key = key.split(".")
if tree.expr.value == "null":
return Null(key)

value = self._strip_quote_from_string(tree.expr.value)
value = self._remove_lucene_escaping(value)
return self._get_filter_expression_regex(key, value)
return None

def _get_filter_expression(
Expand All @@ -339,6 +350,7 @@ def _get_filter_expression(
if self._special_fields.items():
for sf_key, sf_value in self._special_fields.items():
if sf_value is True or dotted_field in sf_value:
# Todo: this has to be removed at the end of the ticket.
if sf_key == "regex_fields":
logger.warning(
"[Deprecated]: regex_fields are no longer necessary. "
Expand All @@ -347,12 +359,30 @@ def _get_filter_expression(

return self._special_fields_map[sf_key](key, value)

if value.startswith("/") and value.endswith("/"):
value = value.strip("/")
return RegExFilterExpression(key, value)
#Todo: this has to be removed at the end of the ticket.

# if hasattr(self._tree, 'expr') and isinstance(self._tree.expr, Regex):
# value = value.strip("/")
# return RegExFilterExpression(key, value)
# if value.startswith("/") and value.endswith("/"):
# value = value.strip("/")
# return RegExFilterExpression(key, value)

return StringFilterExpression(key, value)

def _get_filter_expression_regex(
self, key: List[str], value
) -> Union[RegExFilterExpression, StringFilterExpression]:
key_and_modifier = key[-1].split("|")
if len(key_and_modifier) == 2:
if key_and_modifier[-1] == "re":
return RegExFilterExpression(key[:-1] + key_and_modifier[:-1], value)


value = value.strip("/")
return RegExFilterExpression(key, value)


@staticmethod
def _create_value_expression(word: luqum.tree) -> Union[Exists, Always]:
value = word.value.replace("\\", "")
Expand Down
24 changes: 24 additions & 0 deletions tests/unit/filter/test_lucene_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,3 +482,27 @@ def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_o
RegExFilterExpression(["regex_key_one"], ".*value.*"),
StringFilterExpression(["key_two"], "value"),
)

def test_new_lucene_compliance(self):
lucene_filter = LuceneFilter.create(
'regex_key_one:".*value.*"'
)

assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*")

def test_new_lucene_compliance2(self):
lucene_filter = LuceneFilter.create(
'regex_key_one:/.*value.*/'
)

assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*")

def test_new_lucene_compliance3(self):
lucene_filter = LuceneFilter.create(
'regex_key_one:/.*value.*/ AND key_two: "/.*value.*/"',
)

assert lucene_filter == And(RegExFilterExpression(["regex_key_one"], ".*value.*"),
StringFilterExpression(["key_two"], "/.*value.*/"))


0 comments on commit f81bc76

Please sign in to comment.