diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8ceca2252..c232795bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,8 +1,11 @@ name: CI +# only run tests for pull requests cause no file has to be changed without review +# open -> open the pull request +# synchronize -> push to branch of pull request on: - push: - branches: [ 'main', 'master', 'dev-*', 'fix-*' ] + pull_request: + types: [opened, synchronize] jobs: build-pex: @@ -58,25 +61,65 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.6 - uses: actions/setup-python@v2 - with: - python-version: 3.6 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements_dev.txt - - - name: Peroform unit tests - env: - PYTEST_ADDOPTS: "--color=yes" - run: | - pytest -vv tests/unit - - - name: Peroform acceptance tests - env: - PYTEST_ADDOPTS: "--color=yes" - run: | - pytest -vv tests/acceptance \ No newline at end of file + - uses: actions/checkout@v2 + - name: Set up Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + + - name: Perform unit tests + env: + PYTEST_ADDOPTS: "--color=yes" + run: | + pytest -vv tests/unit + + - name: Perform acceptance tests + env: + PYTEST_ADDOPTS: "--color=yes" + run: | + pytest -vv tests/acceptance + + code-quality: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + + - name: Get changed python files + id: changed-files + uses: tj-actions/changed-files@v18.7 + with: + files: | + **/*.py + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + + - name: check black formating + run: | + pip install black + black --check --diff --config ./pyproject.toml . + + - name: lint changed and added files + run: | + pylint --rcfile=.pylintrc --fail-under 9.5 ${{ steps.changed-files.outputs.all_changed_files }} + + - name: Run tests and collect coverage + run: pytest tests --cov=logprep --cov-report=xml + + - name: Upload coverage reports to Codecov with GitHub Action + uses: codecov/codecov-action@v2 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 000000000..95c87dfb1 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,123 @@ +name: CI + +# ensures that main has our basic test and code quality +on: + push: + branches: [main] + +jobs: + build-pex: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + + - uses: actions/cache@v2 + id: cache + with: + path: | + /opt/hostedtoolcache/Python/3.6.15/x64/lib/python3.6/site-packages/ + /opt/hostedtoolcache/Python/3.6.15/x64/bin/ansible + /opt/hostedtoolcache/Python/3.6.15/x64/bin/virtualenv + /opt/hostedtoolcache/Python/3.6.15/x64/bin/pex + key: ${{ hashFiles('setup.py') }}-${{ hashFiles('requirements_dev.txt') }} + + - name: Install dependencies + if: steps.cache.outputs.cache-hit != 'true' + run: | + python -m pip install --upgrade pip + pip install ansible + pip install virtualenv + pip install wheel + pip install pex + + - name: Repack confluent-kafka wheel + run: | + rm -rf tmp_pip_cache && + mkdir tmp_pip_cache && + cd tmp_pip_cache && + python -m pip download $(cat ../requirements.txt | grep confluent-kafka) && + unzip * && + rm *.whl && + python -m wheel pack . + + - name: Build Pex File + run: | + pex . -r requirements.txt -o ./logprep.pex -c logprep --pex-root=tmp_pip_cache + + - name: Upload PEX + uses: actions/upload-artifact@v2 + with: + name: Logprep + path: logprep.pex + + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + + - name: Perform unit tests + env: + PYTEST_ADDOPTS: "--color=yes" + run: | + pytest -vv tests/unit + + - name: Perform acceptance tests + env: + PYTEST_ADDOPTS: "--color=yes" + run: | + pytest -vv tests/acceptance + + code-quality: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + + - name: Get changed python files + id: changed-files + uses: tj-actions/changed-files@v18.7 + with: + files: | + **/*.py + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + + - name: check black formating + run: | + pip install black + black --check --diff --config ./pyproject.toml . + + - name: lint changed and added files + run: | + pylint --rcfile=.pylintrc --fail-under 9.5 ${{ steps.changed-files.outputs.all_changed_files }} + + - name: Run tests and collect coverage + run: pytest tests --cov=logprep --cov-report=xml + + - name: Upload coverage reports to Codecov with GitHub Action + uses: codecov/codecov-action@v2 diff --git a/README.md b/README.md index dd9f8cc3f..09521a6cb 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,10 @@ ![GitHub Workflow Status (branch)](https://img.shields.io/github/workflow/status/fkie-cad/logprep/CI/main) [![Documentation Status](https://readthedocs.org/projects/logprep/badge/?version=latest)](http://logprep.readthedocs.io/?badge=latest) ![GitHub contributors](https://img.shields.io/github/contributors/fkie-cad/Logprep) -![GitHub Repo stars](https://img.shields.io/github/stars/fkie-cad/logprep?style=social) + + Coverage + +![GitHub Repo stars](https://img.shields.io/github/stars/fkie-cad/logprep?style=social) ## Introduction diff --git a/logprep/processor/generic_resolver/factory.py b/logprep/processor/generic_resolver/factory.py index 0efc985e7..77f3b962a 100644 --- a/logprep/processor/generic_resolver/factory.py +++ b/logprep/processor/generic_resolver/factory.py @@ -13,11 +13,9 @@ def create(name: str, configuration: dict, logger) -> GenericResolver: GenericResolverFactory._check_configuration(configuration) generic_resolver = GenericResolver( - name, - configuration['specific_rules'], - configuration['generic_rules'], - configuration.get("tree_config"), - logger + name=name, + configuration=configuration, + logger=logger, ) return generic_resolver diff --git a/logprep/processor/generic_resolver/processor.py b/logprep/processor/generic_resolver/processor.py index 076e249e7..0edf06470 100644 --- a/logprep/processor/generic_resolver/processor.py +++ b/logprep/processor/generic_resolver/processor.py @@ -28,8 +28,7 @@ class DuplicationError(GenericResolverError): def __init__(self, name: str, skipped_fields: List[str]): message = ( - "The following fields already existed and " - "were not overwritten by the Normalizer: " + "The following fields already existed and were not overwritten by the Normalizer: " ) message += " ".join(skipped_fields) @@ -42,11 +41,12 @@ class GenericResolver(RuleBasedProcessor): def __init__( self, name: str, - specific_rules_dirs: list, - generic_rules_dirs: list, - tree_config: str, + configuration: dict, logger: Logger, ): + specific_rules_dirs = configuration.get("specific_rules") + generic_rules_dirs = configuration.get("generic_rules") + tree_config = configuration.get("tree_config") super().__init__(name, tree_config, logger) self.ps = ProcessorStats() @@ -88,9 +88,9 @@ def add_rules_from_directory( ) self.ps.setup_rules( - [None] * self._generic_tree.rule_counter - + [None] * self._specific_tree.rule_counter + [None] * self._generic_tree.rule_counter + [None] * self._specific_tree.rule_counter ) + # pylint: enable=arguments-differ def describe(self) -> str: @@ -122,30 +122,7 @@ def _apply_rules(self, event, rule): """Apply the given rule to the current event""" conflicting_fields = [] - if rule.resolve_from_file: - if rule.resolve_from_file["path"] not in self._replacements_from_file: - try: - with open(rule.resolve_from_file["path"], "r") as add_file: - add_dict = yaml.load(add_file) - if isinstance(add_dict, dict) and all( - isinstance(value, str) for value in add_dict.values() - ): - self._replacements_from_file[ - rule.resolve_from_file["path"] - ] = add_dict - else: - raise GenericResolverError( - self._name, - f"Additions file " - f'\'{rule.resolve_from_file["path"]}\'' - f" must be a dictionary with string values!", - ) - except FileNotFoundError as error: - raise GenericResolverError( - self._name, - f'Additions file \'{rule.resolve_from_file["path"]}' - f"' not found!", - ) from error + self.ensure_rules_from_file(rule) for resolve_source, resolve_target in rule.field_mapping.items(): keys = resolve_target.split(".") @@ -153,58 +130,78 @@ def _apply_rules(self, event, rule): if rule.resolve_from_file and src_val: pattern = f'^{rule.resolve_from_file["pattern"]}$' - replacements = self._replacements_from_file[ - rule.resolve_from_file["path"] - ] + replacements = self._replacements_from_file[rule.resolve_from_file["path"]] matches = re.match(pattern, src_val) if matches: - try: - dest_val = replacements.get(matches.group("mapping")) - except IndexError as error: + mapping = matches.group("mapping") if "mapping" in matches.groupdict() else None + if mapping is None: raise GenericResolverError( self._name, "Mapping group is missing in mapping file pattern!", - ) from error + ) + dest_val = replacements.get(mapping) if dest_val: - dict_ = event for idx, key in enumerate(keys): - if key not in dict_: + if key not in event: if idx == len(keys) - 1: if rule.append_to_list: - dict_[key] = dict_.get("key", []) - if dest_val not in dict_[key]: - dict_[key].append(dest_val) + event[key] = event.get("key", []) + if dest_val not in event[key]: + event[key].append(dest_val) else: - dict_[key] = dest_val + event[key] = dest_val break - dict_[key] = dict() - if isinstance(dict_[key], dict): - dict_ = dict_[key] + event[key] = {} + if isinstance(event[key], dict): + event = event[key] else: - if rule.append_to_list and isinstance(dict_[key], list): - if dest_val not in dict_[key]: - dict_[key].append(dest_val) + if rule.append_to_list and isinstance(event[key], list): + if dest_val not in event[key]: + event[key].append(dest_val) else: conflicting_fields.append(keys[idx]) for pattern, dest_val in rule.resolve_list.items(): if src_val and re.search(pattern, src_val): - dict_ = event for idx, key in enumerate(keys): - if key not in dict_: + if key not in event: if idx == len(keys) - 1: if rule.append_to_list: - dict_[key] = dict_.get("key", []) - dict_[key].append(dest_val) + event[key] = event.get("key", []) + event[key].append(dest_val) else: - dict_[key] = dest_val + event[key] = dest_val break - dict_[key] = dict() - if isinstance(dict_[key], dict): - dict_ = dict_[key] + event[key] = {} + if isinstance(event[key], dict): + event = event[key] else: conflicting_fields.append(keys[idx]) break if conflicting_fields: raise DuplicationError(self._name, conflicting_fields) + + def ensure_rules_from_file(self, rule): + """loads rules from file""" + if rule.resolve_from_file: + if rule.resolve_from_file["path"] not in self._replacements_from_file: + try: + with open(rule.resolve_from_file["path"], "r", encoding="utf8") as add_file: + add_dict = yaml.load(add_file) + if isinstance(add_dict, dict) and all( + isinstance(value, str) for value in add_dict.values() + ): + self._replacements_from_file[rule.resolve_from_file["path"]] = add_dict + else: + raise GenericResolverError( + self._name, + f"Additions file " + f'\'{rule.resolve_from_file["path"]}\'' + f" must be a dictionary with string values!", + ) + except FileNotFoundError as error: + raise GenericResolverError( + self._name, + f'Additions file \'{rule.resolve_from_file["path"]}' f"' not found!", + ) from error diff --git a/logprep/processor/geoip_enricher/factory.py b/logprep/processor/geoip_enricher/factory.py index 425e90223..19734073d 100644 --- a/logprep/processor/geoip_enricher/factory.py +++ b/logprep/processor/geoip_enricher/factory.py @@ -14,9 +14,7 @@ def create(name: str, configuration: dict, logger) -> GeoIPEnricher: """Create a generic resolver.""" GeoIPEnricherFactory._check_configuration(configuration) - geoip_enricher = GeoIPEnricher( - name=name, configuration=configuration, logger=logger - ) + geoip_enricher = GeoIPEnricher(name=name, configuration=configuration, logger=logger) return geoip_enricher diff --git a/logprep/processor/list_comparison/processor.py b/logprep/processor/list_comparison/processor.py index a78addb3f..8bb1cf275 100644 --- a/logprep/processor/list_comparison/processor.py +++ b/logprep/processor/list_comparison/processor.py @@ -1,21 +1,15 @@ """ -This module contains functionality for checking if values exist or not exist in file lists. This processor implements +This module contains functionality for checking if values exist or not exist in +file lists. This processor implements black- and whitelisting capabilities. """ -from logging import Logger, DEBUG -from logprep.framework.rule_tree.rule_tree import RuleTree -from logprep.framework import rule_tree +from logging import DEBUG, Logger from multiprocessing import current_process -from os import walk -from os.path import isdir, realpath, join from time import time from typing import List, Optional -from logprep.processor.base.exceptions import ( - NotARulesDirectoryError, - InvalidRuleDefinitionError, - InvalidRuleFileError, -) +from logprep.framework.rule_tree.rule_tree import RuleTree +from logprep.processor.base.exceptions import InvalidRuleDefinitionError, InvalidRuleFileError from logprep.processor.base.processor import RuleBasedProcessor from logprep.processor.list_comparison.rule import ListComparisonRule from logprep.util.helper import add_field_to @@ -97,8 +91,7 @@ def add_rules_from_directory( f"({current_process().name})" ) self.ps.setup_rules( - [None] * self._generic_tree.rule_counter - + [None] * self._specific_tree.rule_counter + [None] * self._generic_tree.rule_counter + [None] * self._specific_tree.rule_counter ) # pylint: enable=arguments-differ @@ -116,9 +109,7 @@ def _load_rules_from_file(self, list_comparison_path: str): try: return ListComparisonRule.create_rules_from_file(list_comparison_path) except InvalidRuleDefinitionError as error: - raise InvalidRuleFileError( - self._name, list_comparison_path, str(error) - ) from error + raise InvalidRuleFileError(self._name, list_comparison_path, str(error)) from error def describe(self) -> str: """Return name of given processor instance.""" @@ -141,14 +132,14 @@ def process(self, event: dict): for rule in self._generic_tree.get_matching_rules(event): begin = time() self._apply_rules(event, rule) - processing_time = float("{:.10f}".format(time() - begin)) + processing_time = time() - begin idx = self._generic_tree.get_rule_id(rule) self.ps.update_per_rule(idx, processing_time) for rule in self._specific_tree.get_matching_rules(event): begin = time() self._apply_rules(event, rule) - processing_time = float("{:.10f}".format(time() - begin)) + processing_time = time() - begin idx = self._specific_tree.get_rule_id(rule) self.ps.update_per_rule(idx, processing_time) @@ -181,8 +172,8 @@ def _apply_rules(self, event, rule): def _list_comparison(self, rule: ListComparisonRule, event: dict): """ Check if field value violates block or allow list. - Returns the result of the comparison (res_key), as well as a dictionary containing the result (key) - and a list of filenames pertaining to said result (value). + Returns the result of the comparison (res_key), as well as a dictionary + containing the result (key) and a list of filenames pertaining to said result (value). """ # get value that should be checked in the lists @@ -195,7 +186,6 @@ def _list_comparison(self, rule: ListComparisonRule, event: dict): list_matches.append(compare_list) # if matching list was found return it, otherwise return all list names - if len(list_matches) > 0: - return list_matches, "in_list" - elif len(list_matches) == 0: + if len(list_matches) == 0: return list(rule.compare_sets.keys()), "not_in_list" + return list_matches, "in_list" diff --git a/logprep/processor/list_comparison/rule.py b/logprep/processor/list_comparison/rule.py index a3df6a352..f96bfb697 100644 --- a/logprep/processor/list_comparison/rule.py +++ b/logprep/processor/list_comparison/rule.py @@ -5,12 +5,10 @@ import os.path from typing import Optional - -from json import load from ruamel.yaml import YAML from logprep.filter.expression.filter_expression import FilterExpression -from logprep.processor.base.rule import Rule, InvalidRuleDefinitionError +from logprep.processor.base.rule import InvalidRuleDefinitionError, Rule yaml = YAML(typ="safe", pure=True) @@ -56,45 +54,42 @@ def __init__(self, filter_rule: FilterExpression, list_comparison_cfg: dict): self._check_field = list_comparison_cfg["check_field"] self._list_comparison_output_field = list_comparison_cfg["output_field"] - self._compare_sets = dict() + self._compare_sets = {} self._config = list_comparison_cfg self.init_list_comparison(self._config.get("list_search_base_path")) def init_list_comparison(self, list_search_base_path: Optional[str]): + """init method for list_comparision""" for key in self._config.keys(): if key.endswith("_paths"): file_paths = self._config[key] for list_path in file_paths: if list_search_base_path is not None and not os.path.isabs(list_path): list_path = os.path.join(list_search_base_path, list_path) - with open(list_path, "r") as f: - compare_elements = f.read().splitlines() + with open(list_path, "r", encoding="utf8") as list_file: + compare_elements = list_file.read().splitlines() file_elem_tuples = [ - elem - for elem in compare_elements - if not elem.startswith("#") + elem for elem in compare_elements if not elem.startswith("#") ] file_name = os.path.basename(list_path) self._compare_sets[file_name] = set(file_elem_tuples) def __eq__(self, other: "ListComparisonRule") -> bool: - return (other.filter == self._filter) and ( - self._check_field == other.check_field - ) + return (other.filter == self._filter) and (self._check_field == other.check_field) def __hash__(self) -> int: return hash(repr(self)) @property - def compare_sets(self) -> dict: + def compare_sets(self) -> dict: # pylint: disable=missing-docstring return self._compare_sets @property - def check_field(self) -> str: + def check_field(self) -> str: # pylint: disable=missing-docstring return self._check_field @property - def list_comparison_output_field(self) -> str: + def list_comparison_output_field(self) -> str: # pylint: disable=missing-docstring return self._list_comparison_output_field @staticmethod @@ -108,7 +103,8 @@ def _create_from_dict(rule: dict) -> "ListComparisonRule": @staticmethod def _check_if_valid(rule: dict): """ - Check validity of a given rule file in relation to the processor configuration in the given pipeline. + Check validity of a given rule file in relation to the processor configuration + in the given pipeline. Parameters ---------- @@ -130,8 +126,8 @@ def _check_if_valid(rule: dict): <= 4 ): raise InvalidListComparisonDefinition( - f"Allowed config fields are: {', '.join(ListComparisonRule.allowed_cfg_fields)}, and of them" - f" only one path field should be present." + f"Allowed config fields are: {', '.join(ListComparisonRule.allowed_cfg_fields)}, " + f"and of them only one path field should be present." ) # check if config contains unknown fields @@ -151,15 +147,13 @@ def _check_if_valid(rule: dict): if key in ["list_file_paths"]: if len(list_comparison_cfg[key]) == 0: raise InvalidListComparisonDefinition( - f"The rule should have at least one list configured" + "The rule should have at least one list configured" ) # iterate over all given files for path in list_comparison_cfg[key]: if not isinstance(path, str) and not os.path.isfile(path): - raise InvalidListComparisonDefinition( - f"{path} is not a existing file." - ) + raise InvalidListComparisonDefinition(f"{path} is not a existing file.") if key == "check_field": if not isinstance(list_comparison_cfg[key], str): diff --git a/tests/acceptance/test_wineventlog_processing.py b/tests/acceptance/test_wineventlog_processing.py index dcdf50c5e..84d9d5bc0 100644 --- a/tests/acceptance/test_wineventlog_processing.py +++ b/tests/acceptance/test_wineventlog_processing.py @@ -1,15 +1,19 @@ -#!/usr/bin/python3 -import pytest +# pylint: disable=missing-docstring +import logging +import os +import pytest from logprep.util.json_handling import dump_config_as_file, parse_jsonl -from tests.acceptance.util import * +from tests.acceptance.util import get_difference, get_test_output, store_latest_test_output -basicConfig(level=DEBUG, format="%(asctime)-15s %(name)-5s %(levelname)-8s: %(message)s") -logger = getLogger("Logprep-Test") +logging.basicConfig( + level=logging.DEBUG, format="%(asctime)-15s %(name)-5s %(levelname)-8s: %(message)s" +) +logger = logging.getLogger("Logprep-Test") -@pytest.fixture -def config_template(): +@pytest.fixture(name="config_template") +def fixture_config_template(): config_yml = { "process_count": 1, "print_processed_period": 600, @@ -58,8 +62,12 @@ def config_template(): ), ], ) -def test_events_labeled_correctly(tmp_path, config_template, specific_rules, generic_rules, schema, expected_output): - expected_output_path = path.join("tests/testdata/acceptance/expected_result", expected_output) +def test_events_labeled_correctly( + tmp_path, config_template, specific_rules, generic_rules, schema, expected_output +): # pylint: disable=too-many-arguments + expected_output_path = os.path.join( + "tests/testdata/acceptance/expected_result", expected_output + ) set_config(config_template, specific_rules, generic_rules, schema) config_path = str(tmp_path / "generated_config.yml") @@ -74,14 +82,14 @@ def test_events_labeled_correctly(tmp_path, config_template, specific_rules, gen assert ( result["difference"][0] == result["difference"][1] - ), "Missmatch in event at line {}!".format(result["event_line_no"]) + ), f"Missmatch in event at line {result['event_line_no']}!" def set_config(config_template, specific_rules, generic_rules, schema): - config_template["pipeline"][0]["labelername"]["schema"] = path.join("tests/testdata", schema) + config_template["pipeline"][0]["labelername"]["schema"] = os.path.join("tests/testdata", schema) config_template["pipeline"][0]["labelername"]["specific_rules"] = [ - path.join("tests/testdata", rule) for rule in specific_rules + os.path.join("tests/testdata", rule) for rule in specific_rules ] config_template["pipeline"][0]["labelername"]["generic_rules"] = [ - path.join("tests/testdata", rule) for rule in generic_rules + os.path.join("tests/testdata", rule) for rule in generic_rules ] diff --git a/tests/unit/processor/list_comparison/test_list_comparison.py b/tests/unit/processor/list_comparison/test_list_comparison.py index 133135752..b9791c295 100644 --- a/tests/unit/processor/list_comparison/test_list_comparison.py +++ b/tests/unit/processor/list_comparison/test_list_comparison.py @@ -1,15 +1,9 @@ -from logging import getLogger - +# pylint: disable=missing-docstring +# pylint: disable=protected-access import pytest -from tests.unit.processor.base import BaseProcessorTestCase - -from logprep.processor.list_comparison.processor import DuplicationError - -pytest.importorskip("logprep.processor.list_comparison") - from logprep.processor.list_comparison.factory import ListComparisonFactory - -logger = getLogger() +from logprep.processor.list_comparison.processor import DuplicationError +from tests.unit.processor.base import BaseProcessorTestCase class TestListComparison(BaseProcessorTestCase): @@ -18,7 +12,7 @@ class TestListComparison(BaseProcessorTestCase): "specific_rules": ["tests/testdata/unit/list_comparison/rules/specific"], "generic_rules": ["tests/testdata/unit/list_comparison/rules/generic"], "tree_config": "tests/testdata/unit/shared_data/tree_config.json", - "list_search_base_path": "./" + "list_search_base_path": "./", } factory = ListComparisonFactory @@ -39,7 +33,7 @@ def test_element_in_list(self): assert self.object._event.get("user_results") is not None assert isinstance(self.object._event.get("user_results"), dict) - assert document.get('user_results').get('in_list') is not None + assert document.get("user_results").get("in_list") is not None assert document.get("user_results").get("not_in_list") is None def test_element_not_in_list(self): @@ -49,7 +43,7 @@ def test_element_not_in_list(self): self.object.process(document) - assert document.get("user_results", {}).get("not_in_list") is not [] + assert len(document.get("user_results", {}).get("not_in_list")) == 1 assert document.get("user_results", {}).get("in_list") is None def test_element_in_two_lists(self): @@ -59,21 +53,22 @@ def test_element_in_two_lists(self): self.object.process(document) - assert len(document.get("user_results", {}).get("not_in_list")) is 1 + assert len(document.get("user_results", {}).get("not_in_list")) == 1 assert document.get("user_results", {}).get("in_list") is None - assert len(document.get("user_and_system_results", {}).get("in_list")) is 2 + assert len(document.get("user_and_system_results", {}).get("in_list")) == 2 assert document.get("user_and_system_results", {}).get("not_in_list") is None def test_element_not_in_two_lists(self): - # Tests if the system Gamma does not appear in two lists, and username Mark is also not in list + # Tests if the system Gamma does not appear in two lists, + # and username Mark is also not in list assert self.object.ps.processed_count == 0 document = {"user": "Mark", "system": "Gamma"} self.object.process(document) - assert len(document.get("user_and_system_results", {}).get("not_in_list")) is 2 + assert len(document.get("user_and_system_results", {}).get("not_in_list")) == 2 assert document.get("user_and_system_results", {}).get("in_list") is None - assert len(document.get("user_results", {}).get("not_in_list")) is 1 + assert len(document.get("user_results", {}).get("not_in_list")) == 1 assert document.get("user_results", {}).get("in_list") is None def test_two_lists_with_one_matched(self): @@ -82,10 +77,10 @@ def test_two_lists_with_one_matched(self): self.object.process(document) - assert document.get("user_results", {}).get("not_in_list") is not [] + assert len(document.get("user_results", {}).get("not_in_list")) != 0 assert document.get("user_results", {}).get("in_list") is None assert document.get("user_and_system_results", {}).get("not_in_list") is None - assert document.get("user_and_system_results", {}).get("in_list") is not [] + assert len(document.get("user_and_system_results", {}).get("in_list")) != 0 def test_dotted_output_field(self): # tests if outputting list_comparison results to dotted fields works @@ -94,13 +89,8 @@ def test_dotted_output_field(self): self.object.process(document) - assert ( - document.get("dotted", {}).get("user_results", {}).get("not_in_list") - is None - ) - assert ( - document.get("dotted", {}).get("user_results", {}).get("in_list") is not [] - ) + assert document.get("dotted", {}).get("user_results", {}).get("not_in_list") is None + assert len(document.get("dotted", {}).get("user_results", {}).get("in_list")) != 0 def test_deep_dotted_output_field(self): # tests if outputting list_comparison results to dotted fields works @@ -117,14 +107,6 @@ def test_deep_dotted_output_field(self): .get("not_in_list") is None ) - assert ( - document.get("more", {}) - .get("than", {}) - .get("dotted", {}) - .get("user_results", {}) - .get("not_in_list") - is not [] - ) def test_extend_dotted_output_field(self): # tests if list_comparison properly extends lists already present in output fields. @@ -137,13 +119,8 @@ def test_extend_dotted_output_field(self): self.object.process(document) - assert ( - document.get("dotted", {}).get("user_results", {}).get("not_in_list") - is None - ) - assert ( - len(document.get("dotted", {}).get("user_results", {}).get("in_list")) == 2 - ) + assert document.get("dotted", {}).get("user_results", {}).get("not_in_list") is None + assert len(document.get("dotted", {}).get("user_results", {}).get("in_list")) == 2 def test_dotted_parent_field_exists_but_subfield_doesnt(self): # tests if list_comparison properly extends lists already present in output fields. @@ -156,20 +133,10 @@ def test_dotted_parent_field_exists_but_subfield_doesnt(self): self.object.process(document) + assert document.get("dotted", {}).get("user_results", {}).get("not_in_list") is None + assert len(document.get("dotted", {}).get("user_results", {}).get("in_list")) == 1 assert ( - document.get("dotted", {}).get("user_results", {}).get("not_in_list") - is None - ) - assert ( - len(document.get("dotted", {}).get("user_results", {}).get("in_list")) == 1 - ) - assert ( - len( - document.get("dotted", {}) - .get("preexistent_output_field", {}) - .get("in_list") - ) - == 1 + len(document.get("dotted", {}).get("preexistent_output_field", {}).get("in_list")) == 1 ) def test_dotted_wrong_type(self): @@ -196,7 +163,7 @@ def test_check_in_dotted_subfield(self): self.object.process(document) - assert len(document.get("channel_results", {}).get("not_in_list")) is 2 + assert len(document.get("channel_results", {}).get("not_in_list")) == 2 assert document.get("channel_results", {}).get("in_list") is None def test_ignore_comment_in_list(self): @@ -207,5 +174,5 @@ def test_ignore_comment_in_list(self): self.object.process(document) - assert len(document.get("user_results", {}).get("not_in_list")) is 1 + assert len(document.get("user_results", {}).get("not_in_list")) == 1 assert document.get("user_results", {}).get("in_list") is None diff --git a/tests/unit/processor/list_comparison/test_list_comparison_rule.py b/tests/unit/processor/list_comparison/test_list_comparison_rule.py index 2d91d96ff..edafa0cee 100644 --- a/tests/unit/processor/list_comparison/test_list_comparison_rule.py +++ b/tests/unit/processor/list_comparison/test_list_comparison_rule.py @@ -1,23 +1,21 @@ -from logprep.filter.lucene_filter import LuceneFilter +# pylint: disable=missing-docstring +# pylint: disable=protected-access +# pylint: disable=no-self-use +from unittest import mock import pytest - -pytest.importorskip("logprep.processor.list_comparison") - -from unittest import mock +from logprep.filter.lucene_filter import LuceneFilter from logprep.processor.list_comparison.rule import ListComparisonRule -@pytest.fixture() -def specific_rule_definition(): +@pytest.fixture(name="specific_rule_definition") +def fixture_specific_rule_definition(): return { "filter": "user", "list_comparison": { "check_field": "user", "output_field": "user_results", - "list_file_paths": [ - "tests/testdata/unit/list_comparison/lists/user_list.txt" - ], + "list_file_paths": ["tests/testdata/unit/list_comparison/lists/user_list.txt"], }, "description": "", } @@ -61,20 +59,18 @@ def test_rules_are_not_equal(self, specific_rule_definition): assert rule_diff_check_field != rule_diff_filter def test_compare_set_not_empty_for_valid_rule_def(self, specific_rule_definition): - self.rule = ListComparisonRule( + rule = ListComparisonRule( LuceneFilter.create(specific_rule_definition["filter"]), specific_rule_definition["list_comparison"], ) - assert self.rule._compare_sets is not None - assert isinstance(self.rule._compare_sets, dict) - assert len(self.rule._compare_sets.keys()) > 0 + assert rule._compare_sets is not None + assert isinstance(rule._compare_sets, dict) + assert len(rule._compare_sets.keys()) > 0 - @mock.patch( - "logprep.processor.list_comparison.rule.ListComparisonRule.init_list_comparison" - ) + @mock.patch("logprep.processor.list_comparison.rule.ListComparisonRule.init_list_comparison") def test_init_compare_sets_is_called(self, mock_method, specific_rule_definition): - rule = ListComparisonRule( + _ = ListComparisonRule( LuceneFilter.create(specific_rule_definition["filter"]), specific_rule_definition["list_comparison"], )