Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix missing error page and improper redirects #303

Merged
merged 4 commits into from
Jul 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions snare/cloner.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,14 @@ def get_headers(response):
"x-cache",
]

content_type = None
headers = []
for key, value in response.headers.items():
if key.lower() not in ignored_headers_lowercase:
if key.lower() == "content-type":
content_type = value
elif key.lower() not in ignored_headers_lowercase:
headers.append({key: value})
return headers
return [headers, content_type]

async def process_link(self, url, level, check_host=False):
try:
Expand Down Expand Up @@ -177,6 +180,7 @@ async def get_body(self, driver):
self.logger.debug("Cloned file: %s", file_name)
self.meta[file_name]["hash"] = hash_name
self.meta[file_name]["headers"] = headers
self.meta[file_name]["content_type"] = content_type

if content_type == "text/html":
soup = await self.replace_links(data, level)
Expand Down Expand Up @@ -218,7 +222,7 @@ async def fetch_data(self, session, current_url, level, try_count):
redirect_url = None
try:
response = await session.get(current_url, headers={"Accept": "text/html"}, timeout=10.0)
headers = self.get_headers(response)
headers, _ = self.get_headers(response)
content_type = response.content_type
response_url = yarl.URL(response.url)
if response_url.with_scheme("http") != current_url.with_scheme("http"):
Expand All @@ -233,14 +237,6 @@ async def fetch_data(self, session, current_url, level, try_count):


class HeadlessCloner(BaseCloner):
@staticmethod
def get_content_type(headers):
for header in headers:
for key, val in header.items():
if key.lower() == "content-type":
return val.split(";")[0]
return None

async def fetch_data(self, browser, current_url, level, try_count):
data = None
headers = []
Expand All @@ -250,8 +246,7 @@ async def fetch_data(self, browser, current_url, level, try_count):
try:
page = await browser.newPage()
response = await page.goto(str(current_url))
headers = self.get_headers(response)
content_type = self.get_content_type(headers)
headers, content_type = self.get_headers(response)
response_url = yarl.URL(response.url)
if response_url.with_scheme("http") != current_url.with_scheme("http"):
redirect_url = response_url
Expand Down
18 changes: 10 additions & 8 deletions snare/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ async def submit_slurp(self, data):

async def handle_request(self, request):
self.logger.info("Request path: {0}".format(request.path_qs))
if self.meta[request.path_qs].get("redirect"):
raise web.HTTPFound(self.meta[request.path_qs]["redirect"])
data = self.tanner_handler.create_data(request, 200)
if request.method == "POST":
post_data = await request.post()
Expand Down Expand Up @@ -71,18 +69,22 @@ async def handle_request(self, request):
if previous_sess_uuid is None or not previous_sess_uuid.strip() or previous_sess_uuid != cur_sess_id:
headers.add("Set-Cookie", "sess_uuid=" + cur_sess_id)

if status_code == 404:
raise web.HTTPNotFound(headers=headers)

return web.Response(body=content, status=status_code, headers=headers)
Comment on lines +72 to 75
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Special if-check for status code 404 because returning web.Response with status_code=404 did not work as intended.


async def start(self):
app = web.Application()
app.add_routes([web.route("*", "/{tail:.*}", self.handle_request)])
aiohttp_jinja2.setup(app, loader=jinja2.FileSystemLoader(self.dir))
middleware = SnareMiddleware(
error_404=self.meta["/status_404"].get("hash"),
headers=self.meta["/status_404"].get("headers", []),
server_header=self.run_args.server_header,
)
middleware.setup_middlewares(app)
if self.meta.get("/status_404"):
middleware = SnareMiddleware(
error_404=self.meta["/status_404"].get("hash"),
headers=self.meta["/status_404"].get("headers", []),
server_header=self.run_args.server_header,
)
middleware.setup_middlewares(app)

self.runner = web.AppRunner(app)
await self.runner.setup()
Expand Down
6 changes: 6 additions & 0 deletions snare/tanner_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import multidict
import json
import logging

import aiohttp
from aiohttp import web

from urllib.parse import unquote
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -103,6 +105,8 @@ async def parse_tanner_response(self, requested_name, detection):
requested_name = self.run_args.index_page
requested_name = unquote(requested_name)
try:
if self.meta.get(requested_name) and self.meta[requested_name].get("redirect"):
raise web.HTTPFound(self.meta[requested_name]["redirect"])
file_name = self.meta[requested_name]["hash"]
for header in self.meta[requested_name].get("headers", []):
for key, value in header.items():
Expand All @@ -117,6 +121,8 @@ async def parse_tanner_response(self, requested_name, detection):
break

if not file_name:
if self.meta.get("/status_404") and self.meta["/status_404"].get("redirect"):
raise web.HTTPFound(self.meta["/status_404"]["redirect"])
status_code = 404
else:
path = os.path.join(self.dir, file_name)
Expand Down