Skip to content

Commit

Permalink
fix domain_label_extractor to deal with frozen config
Browse files Browse the repository at this point in the history
  • Loading branch information
ekneg54 committed Aug 31, 2024
1 parent 20860af commit ea8df41
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
3 changes: 1 addition & 2 deletions logprep/processor/domain_label_extractor/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from logprep.processor.domain_label_extractor.rule import DomainLabelExtractorRule
from logprep.processor.field_manager.processor import FieldManager
from logprep.util.getter import GetterFactory
from logprep.util.helper import add_field_to, get_dotted_field_value, add_and_overwrite
from logprep.util.helper import add_and_overwrite, add_field_to, get_dotted_field_value
from logprep.util.validators import list_of_urls_validator

logger = logging.getLogger("DomainLabelExtractor")
Expand Down Expand Up @@ -101,7 +101,6 @@ def setup(self):
list_path.touch()
list_path.write_bytes(GetterFactory.from_string(tld_list).get_raw())
downloaded_tld_lists_paths.append(f"file://{str(list_path.absolute())}")
self._config.tld_lists = downloaded_tld_lists_paths
logger.debug("finished tldlists download...")

def _apply_rules(self, event, rule: DomainLabelExtractorRule):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=protected-access
# pylint: disable=missing-docstring

import copy
import hashlib
import os
import shutil
Expand Down Expand Up @@ -347,7 +348,9 @@ def test_setup_downloads_tld_lists_to_separate_process_file(self):
tld_list_content = tld_list_path.read_bytes()
expected_checksum = hashlib.md5(tld_list_content).hexdigest() # nosemgrep
responses.add(responses.GET, tld_list, tld_list_content)
self.object._config.tld_lists = [tld_list]
config = copy.deepcopy(self.CONFIG)
config["tld_lists"] = [tld_list]
self.object = Factory.create({"domain_label_extractor": config})
self.object.setup()
logprep_tmp_dir = Path(tempfile.gettempdir()) / "logprep"
downloaded_file = logprep_tmp_dir / f"{self.object.name}-tldlist-0.dat"
Expand All @@ -370,7 +373,9 @@ def test_setup_doesnt_overwrite_already_existing_tld_list_file(self):
pre_existing_content = "file exists already"
tld_temp_file.touch()
tld_temp_file.write_bytes(pre_existing_content.encode("utf8"))
self.object._config.tld_lists = [tld_list]
config = copy.deepcopy(self.CONFIG)
config["tld_lists"] = [tld_list]
self.object = Factory.create({"domain_label_extractor": config})
self.object.setup()
assert tld_temp_file.exists()
assert tld_temp_file.read_bytes().decode("utf8") == pre_existing_content
Expand Down

0 comments on commit ea8df41

Please sign in to comment.