Skip to content

Commit

Permalink
Fix subdomain import for subdomains with suffix more than 4 chars
Browse files Browse the repository at this point in the history
  • Loading branch information
yogeshojha committed Jul 30, 2024
1 parent 9550c0a commit 75d55dc
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions web/reNgine/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,29 @@ def get_domain_from_subdomain(subdomain):
Returns:
str: Domain name.
"""
ext = tldextract.extract(subdomain)
return '.'.join(ext[1:3])
# ext = tldextract.extract(subdomain)
# return '.'.join(ext[1:3])

if not validators.domain(subdomain):
return None

# Use tldextract to parse the subdomain
extracted = tldextract.extract(subdomain)

# if tldextract recognized the tld then its the final result
if extracted.suffix:
domain = f"{extracted.domain}.{extracted.suffix}"
else:
# Fallback method for unknown TLDs, like .clouds or .local etc
parts = subdomain.split('.')
if len(parts) >= 2:
domain = '.'.join(parts[-2:])
else:
return None

# Validate the domain before returning
return domain if validators.domain(domain) else None



def sanitize_url(http_url):
Expand Down

0 comments on commit 75d55dc

Please sign in to comment.