Skip to content

Commit

Permalink
Detecting duplicate with parent has been moved to scanning stage to s…
Browse files Browse the repository at this point in the history
…peed up report generation (#65)
  • Loading branch information
kazet authored Jan 3, 2024
1 parent 5e3ddcc commit de4b019
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 18 deletions.
19 changes: 1 addition & 18 deletions autoreporter_addons/ssl_checks/reporter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from difflib import SequenceMatcher
from pathlib import Path
from typing import Any, Callable, Dict, List
from urllib.parse import urlparse
Expand All @@ -14,7 +13,7 @@
from artemis.reporting.base.report_type import ReportType
from artemis.reporting.base.reporter import Reporter
from artemis.reporting.base.templating import ReportEmailTemplateFragment
from artemis.reporting.utils import cached_get, get_top_level_target
from artemis.reporting.utils import get_top_level_target
from bs4 import BeautifulSoup

from extra_modules_config import ExtraModulesConfig
Expand Down Expand Up @@ -61,22 +60,6 @@ def create_reports(task_result: Dict[str, Any], language: Language) -> List[Repo
if not isinstance(result, dict):
return []

try:
response = cached_get(f"https://{domain}")
parent_response = cached_get(f"https://{'.'.join(domain_parts[1:])}")
if SequenceMatcher(None, response.content, parent_response.content).quick_ratio() >= 0.8:
# Do not report misconfigurations if a domain has identical content to a parent domain - e.g.
# if we have mail.domain.com with identical content to domain.com, we assume that it's domain.com
# which is actually used, and therefore don't report subdomains.
return []
except Exception:
logger.warning(
f"Unable to check whether domain {domain} has similar content to parent domain. Artemis SSL check "
"module tries to reduce the number of false positives by skipping reports where domain has similar "
"content to parent domain, as there are cases where e.g. mail.example.com serves the same content "
"as example.com. If this fails, two similar reports may get sent."
)

if "response_status_code" in result and "response_content_prefix" in result:
response_status_code = result["response_status_code"]
response_content_prefix = result["response_content_prefix"]
Expand Down
23 changes: 23 additions & 0 deletions karton_ssl_checks/karton_ssl_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import datetime
import subprocess
import urllib.parse
from difflib import SequenceMatcher
from typing import Any, Dict, List

import requests
Expand Down Expand Up @@ -49,6 +50,28 @@ def run(self, current_task: Task) -> None:
self.db.save_task_result(task=current_task, status=TaskStatus.OK)
return

try:
response = http_requests.get(f"https://{domain}")
parent_domain = ".".join(domain_parts[1:])
parent_response = http_requests.get(f"https://{parent_domain}")
if SequenceMatcher(None, response.content, parent_response.content).quick_ratio() >= 0.8:
# Do not report misconfigurations if a domain has identical content to a parent domain - e.g.
# if we have mail.domain.com with identical content to domain.com, we assume that it's domain.com
# which is actually used, and therefore don't report subdomains.
self.db.save_task_result(
task=current_task,
status=TaskStatus.OK,
status_reason=f"Detected that {domain} has similar content to {parent_domain}, not scanning to avoid duplicate reports",
)
return
except Exception:
self.log.exception(
f"Unable to check whether domain {domain} has similar content to parent domain. Artemis SSL check "
"module tries to reduce the number of false positives by skipping scanning domains when domain has "
"similar content to parent domain, as there are cases where e.g. mail.example.com serves the same "
"content as example.com.",
)

messages = []
result: Dict[str, Any] = {}

Expand Down

0 comments on commit de4b019

Please sign in to comment.