Skip to content
This repository has been archived by the owner on Jan 6, 2022. It is now read-only.

Commit

Permalink
onion-location: factor meta tag retrieval into function
Browse files Browse the repository at this point in the history
  • Loading branch information
redshiftzero committed Sep 17, 2020
1 parent 263d272 commit 56dc2b5
Showing 1 changed file with 36 additions and 21 deletions.
57 changes: 36 additions & 21 deletions sites/management/commands/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

TIMEOUT_REQUESTS = 5


def pshtt(domain):
pshtt_cmd = ['pshtt', '--json', '--timeout', '5', domain]

Expand All @@ -31,14 +32,36 @@ def pshtt(domain):
return pshtt_results, stdout, stderr


def is_onion_loc_in_meta_tag(url: str) -> Optional[bool]:
"""
Make request to target URL, parse page content and see if there is a
tag with format:
<meta http-equiv="onion-location" content="http://myonion.onion">
"""
try:
r = requests.get(url, timeout=TIMEOUT_REQUESTS)
tree = html.fromstring(r.content)
tags = tree.xpath('//meta[@http-equiv="onion-location"]/@content')
if len(tags) >= 1:
return True
except (etree.ParserError, requests.exceptions.RequestException) as e:
# Error when requesting or parsing the page content, we log and
# continue on.
logger.error(e)
return None

return False


def is_onion_available(pshtt_results) -> Optional[bool]:
"""
For HTTPS sites, we inspect the headers to see if the
Onion-Location header is present, indicating that the
site is available as an onion service.
For HTTPS sites, we see if an Onion-Location is provided, indicating that
the site is available as an onion service.
"""
onion_available = False

# First we see if the header is provided.
for key in ["https", "httpswww"]:
try:
headers = pshtt_results["endpoints"][key]["headers"]
Expand All @@ -51,23 +74,14 @@ def is_onion_available(pshtt_results) -> Optional[bool]:
# If the header is not provided, it's possible the news organization
# has included it the HTML of the page in a meta tag using the `http-equiv`
# attribute.
if not onion_available:
for key in ["https", "httpswww"]:
try:
r = requests.get(pshtt_results["endpoints"][key]["url"],
timeout=TIMEOUT_REQUESTS)

tree = html.fromstring(r.content)
matching_meta_tags = tree.xpath('//meta[@http-equiv="onion-location"]/@content')
if len(matching_meta_tags) >= 1:
onion_available = True
return onion_available
except KeyError:
pass
except (etree.ParserError, requests.exceptions.RequestException) as e:
onion_available = None
# Error when requesting or parsing the page content, we log and continue on.
logger.error(e)
for key in ["https", "httpswww"]:
try:
url = pshtt_results["endpoints"][key]["url"]
onion_available = is_onion_loc_in_meta_tag(url)
if onion_available is not None:
return onion_available
except KeyError:
pass

return onion_available

Expand All @@ -78,7 +92,6 @@ def scan(site):

scan = Scan(
site=site,
onion_available=is_onion_available(results),

live=results['Live'],

Expand All @@ -92,6 +105,8 @@ def scan(site):
hsts_preload_ready=results['HSTS Preload Ready'],
hsts_preloaded=results['HSTS Preloaded'],

onion_available=is_onion_available(results),

pshtt_stdout=stdout,
pshtt_stderr=stderr,
)
Expand Down

0 comments on commit 56dc2b5

Please sign in to comment.