Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve rule loading times in rule tree #464

Merged
merged 9 commits into from
Nov 23, 2023
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
- removed `max_caching_days` config option
- add `max_cached_pseudonymized_urls` config option which defaults to 1000
- add lru caching for peudonymizatin of urls
* improve loading times for the rule tree by optimizing the rule segmentation and sorting
* add support for python 3.12 and remove support for python 3.9

### Bugfix

* fix the rule tree parsing some rules incorrectly, potentially resulting in more matches

## v8.0.0
### Breaking
Expand All @@ -54,6 +56,7 @@

### Bugfix


## v7.0.0
### Breaking

Expand Down
2 changes: 0 additions & 2 deletions logprep/framework/rule_tree/rule_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
behavior, allowing a simpler construction of the rule tree.

"""

from typing import TYPE_CHECKING

from logprep.filter.expression.filter_expression import (
Expand Down Expand Up @@ -106,7 +105,6 @@ def parse_rule(self, rule: "Rule", priority_dict: dict) -> list:
RuleSorter.sort_rule_segments(dnf_rule_segments, priority_dict)
self._add_exists_filter(dnf_rule_segments)
self._rule_tagger.add(dnf_rule_segments)

return dnf_rule_segments

@staticmethod
Expand Down
38 changes: 21 additions & 17 deletions logprep/framework/rule_tree/rule_segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,10 @@ def _segment_conjunctive_expression(expression: FilterExpression) -> list:
class CnfToDnfConverter:
"""Converts simplified rules from the conjunctive normal form to the disjunctive normal form"""

@staticmethod
def convert_cnf_to_dnf(cnf: list):
@classmethod
def convert_cnf_to_dnf(cls, cnf: list) -> list:
"""Convert rule from conjunctive normal form into disjunctive normal form.


This function handles the parsing of OR-subexpressions in AND-expression filters in a
recursive manner.
It converts an input in conjunctive normal form into the disjunctive normal form.
Expand All @@ -214,22 +213,27 @@ def convert_cnf_to_dnf(cnf: list):
result_list: list
Given input list with resolved OR-subexpressions.

Raises
------
RuleSegmenterException
Raises if converting the rule requires too much time, since the complexity of the
transformation to the disjunctive normal form is exponential.

"""
dnf = []

or_segment = CnfToDnfConverter._pop_disjunctive_segment(cnf)

CnfToDnfConverter._resolve_disjunctive_segment(or_segment, cnf, dnf)

for parsed_expression in dnf.copy():
for segment in parsed_expression:
if isinstance(segment, list):
if parsed_expression in dnf:
dnf.remove(parsed_expression)
resolved_expressions = CnfToDnfConverter.convert_cnf_to_dnf(parsed_expression)

for resolved_expression in resolved_expressions:
dnf.append(resolved_expression)
or_segments = CnfToDnfConverter._pop_disjunctive_segment(cnf)
CnfToDnfConverter._resolve_disjunctive_segment(or_segments, cnf, dnf)
dnf_len = range(len(dnf))
for idx in dnf_len:
parsed_expression = dnf[idx]
if any(isinstance(segment, list) for segment in parsed_expression):
if parsed_expression in dnf:
dnf[idx] = None
resolved_expressions = CnfToDnfConverter.convert_cnf_to_dnf(parsed_expression)

for resolved_expression in resolved_expressions:
dnf.append(resolved_expression)
dnf = [item for item in dnf if item is not None]
return dnf

@staticmethod
Expand Down
14 changes: 10 additions & 4 deletions logprep/framework/rule_tree/rule_sorter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,22 @@ def sort_rule_segments(parsed_rule_list: list, priority_dict: dict):
Dictionary with sorting priority information (key -> field name; value -> priority).

"""
sorting_keys = {}
for parsed_rule in parsed_rule_list:
parsed_rule.sort(
key=lambda expression: RuleSorter._get_sorting_key(expression, priority_dict)
)
for parsed_expression in parsed_rule:
expression_repr = repr(parsed_expression)
if expression_repr not in sorting_keys:
sorting_keys[expression_repr] = RuleSorter._get_sorting_key(
parsed_expression, priority_dict
)
for parsed_rule in parsed_rule_list:
parsed_rule.sort(key=lambda expression: sorting_keys.get(repr(expression)))

@staticmethod
def _get_sorting_key(
expression: FilterExpression, priority_dict: dict
) -> Union[dict, str, None]:
"""Get the sorting key for an expression with a priority dict..
"""Get the sorting key for an expression with a priority dict.

This function is used by the _sort_rule_segments() function in the sorting key.
It includes various cases to cover all the different expression classes. For every class it
Expand Down
4 changes: 2 additions & 2 deletions logprep/framework/rule_tree/rule_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ def add_rule(self, rule: "Rule", logger: Logger = None):
parsed_rule = self.rule_parser.parse_rule(rule, self.priority_dict)
except Exception as error: # pylint: disable=broad-except
logger.warning(
f'Error parsing rule "{rule.filter}": {type(error).__name__}: {error}.'
f"\nIgnore and continue with next rule."
f'Error parsing rule "{rule.file_name}.yml": {type(error).__name__}: {error}. '
f"Ignore and continue with next rule."
)
return
for rule_segment in parsed_rule:
Expand Down
53 changes: 44 additions & 9 deletions tests/unit/framework/rule_tree/test_rule_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# pylint: disable=missing-docstring
# pylint: disable=line-too-long
# pylint: disable=too-many-statements

import pytest

from logprep.filter.expression.filter_expression import StringFilterExpression, Not, Exists
Expand Down Expand Up @@ -439,10 +438,6 @@ class TestRuleParser:
[Exists(["A1"]), Exists(["B2"]), Exists(["C1"]), Exists(["D2"])],
[Exists(["A1"]), Exists(["B2"]), Exists(["C2"]), Exists(["D1"])],
[Exists(["A1"]), Exists(["B2"]), Exists(["C2"]), Exists(["D2"])],
[Exists(["A1"]), Exists(["C1"]), Exists(["D1"])],
[Exists(["A1"]), Exists(["C1"]), Exists(["D2"])],
[Exists(["A1"]), Exists(["C2"]), Exists(["D1"])],
[Exists(["A1"]), Exists(["C2"]), Exists(["D2"])],
[Exists(["A2"]), Exists(["B1"]), Exists(["C1"]), Exists(["D1"])],
[Exists(["A2"]), Exists(["B1"]), Exists(["C1"]), Exists(["D2"])],
[Exists(["A2"]), Exists(["B1"]), Exists(["C2"]), Exists(["D1"])],
Expand All @@ -451,10 +446,50 @@ class TestRuleParser:
[Exists(["A2"]), Exists(["B2"]), Exists(["C1"]), Exists(["D2"])],
[Exists(["A2"]), Exists(["B2"]), Exists(["C2"]), Exists(["D1"])],
[Exists(["A2"]), Exists(["B2"]), Exists(["C2"]), Exists(["D2"])],
[Exists(["A2"]), Exists(["C1"]), Exists(["D1"])],
[Exists(["A2"]), Exists(["C1"]), Exists(["D2"])],
[Exists(["A2"]), Exists(["C2"]), Exists(["D1"])],
[Exists(["A2"]), Exists(["C2"]), Exists(["D2"])],
],
),
(
PreDetectorRule._create_from_dict(
{
"filter": "(A1 OR A2) AND (B1 OR (C1 AND C2))",
"pre_detector": {
"id": 1,
"title": "1",
"severity": "0",
"case_condition": "directly",
"mitre": [],
},
}
),
{},
{},
[
[Exists(["A1"]), Exists(["B1"])],
[Exists(["A1"]), Exists(["C1"]), Exists(["C2"])],
[Exists(["A2"]), Exists(["B1"])],
[Exists(["A2"]), Exists(["C1"]), Exists(["C2"])],
],
),
(
PreDetectorRule._create_from_dict(
{
"filter": "((A1 OR A2) AND (B1 OR B2)) AND C1",
"pre_detector": {
"id": 1,
"title": "1",
"severity": "0",
"case_condition": "directly",
"mitre": [],
},
}
),
{},
{},
[
[Exists(["A1"]), Exists(["B1"]), Exists(["C1"])],
[Exists(["A1"]), Exists(["B2"]), Exists(["C1"])],
[Exists(["A2"]), Exists(["B1"]), Exists(["C1"])],
[Exists(["A2"]), Exists(["B2"]), Exists(["C1"])],
],
),
(
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/framework/rule_tree/test_rule_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def test_add_rule_fails(self, rule_dict):
):
rule_tree.add_rule(rule, logger=mocked_logger)
expected_call = mock.call.warning(
'Error parsing rule "winlog:"123"": Exception: mocked error.'
"\nIgnore and continue with next rule."
'Error parsing rule "None.yml": Exception: mocked error. '
"Ignore and continue with next rule."
)
assert expected_call in mocked_logger.mock_calls

Expand Down
Loading