Skip to content

Commit

Permalink
fix bug following new tldextract feature
Browse files Browse the repository at this point in the history
  • Loading branch information
ekneg54 committed Sep 26, 2023
1 parent a830845 commit 03b8ccd
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions logprep/processor/pseudonymizer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import re
from functools import cached_property
from logging import Logger
from typing import Any, List, Optional, Tuple, Union, Pattern
from typing import Any, List, Optional, Pattern, Tuple, Union
from urllib.parse import parse_qs

from attr import define, field, validators
Expand Down Expand Up @@ -328,7 +328,9 @@ def _parse_url_parts(self, tld_extractor: TLDExtract, url_str: str) -> dict:
parts["domain"] = url.domain
parts["subdomain"] = url.subdomain
parts["suffix"] = url.suffix
url_list = ".".join(list(url))
url_list = list(url)
url_list.pop()
url_list = ".".join(url_list)
parts["path"] = self._find_first(
rf"(?:^[a-z0-9]+\:\/\/)?{url_list}(?:\:\d+)?([^#^\?]*).*", url_str
)
Expand Down

0 comments on commit 03b8ccd

Please sign in to comment.