Skip to content

Commit

Permalink
Fix/redirect (#104)
Browse files Browse the repository at this point in the history
* Update txextra to fix issue in redirect handling

* Ignore private redirects

* Pass the extra argument in the correct place

* Add support for loading headers from http_request_headers dictionary

* Stringify http_request_headers to make twisted happy

* Add extra argument to web_connectivity test helper call
  • Loading branch information
hellais authored Feb 2, 2017
1 parent 68153b8 commit 6f92669
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 17 deletions.
49 changes: 49 additions & 0 deletions oonib/common/ip_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from ipaddr import IPv4Address, IPv6Address
from ipaddr import AddressValueError


def in_private_ip_space(address):
ip_address = IPv4Address(address)
return any(
[ip_address.is_private, ip_address.is_loopback]
)

def is_public_ipv4_address(address):
try:
return not in_private_ip_space(address)
except AddressValueError:
return False

def is_private_ipv4_address(address):
try:
return in_private_ip_space(address)
except AddressValueError:
return False


def is_private_address(address, only_loopback=False):
"""
Checks to see if an IP address is in private IP space and if the
hostname is either localhost or *.local.
:param address: an IP address of a hostname
:param only_loopback: will only check if the IP address is either
127.0.0.1/8 or ::1 in ipv6
:return: True if the IP address or host is in private space
"""
try:
ip_address = IPv4Address(address)
except AddressValueError:
try:
ip_address = IPv6Address(address)
except AddressValueError:
if address == "localhost":
return True
elif address.endswith(".local"):
return True
return False

candidates = [ip_address.is_loopback]
if not only_loopback:
candidates.append(ip_address.is_private)
return any(candidates)
28 changes: 27 additions & 1 deletion oonib/common/txextra.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

from twisted.python import log

from .ip_utils import is_private_address

class TrueHeaders(Headers):
def __init__(self, rawHeaders=None):
self._rawHeaders = dict()
Expand All @@ -35,6 +37,12 @@ def setRawHeaders(self, name, values):
self._rawHeaders[name.lower()]['name'] = name
self._rawHeaders[name.lower()]['values'] = values

def copy(self):
rawHeaders = {}
for k, v in self.getAllRawHeaders():
rawHeaders[k] = v
return self.__class__(rawHeaders)

def getAllRawHeaders(self):
for _, v in self._rawHeaders.iteritems():
yield v['name'], v['values']
Expand Down Expand Up @@ -168,6 +176,10 @@ class FixedRedirectAgent(BrowserLikeRedirectAgent):
This is a redirect agent with this patch manually applied:
https://twistedmatrix.com/trac/ticket/8265
"""
def __init__(self, agent, redirectLimit=20, ignorePrivateRedirects=False):
self.ignorePrivateRedirects = ignorePrivateRedirects
BrowserLikeRedirectAgent.__init__(self, agent, redirectLimit)

def _handleRedirect(self, response, method, uri, headers, redirectCount):
"""
Handle a redirect response, checking the number of redirects already
Expand All @@ -191,12 +203,26 @@ def _handleRedirect(self, response, method, uri, headers, redirectCount):
response.request.absoluteURI,
locationHeaders[0]
)
if getattr(client, 'URI', None):
uri = client.URI.fromBytes(location)
else:
# Backward compatibility with twisted 14.0.2
uri = client._URI.fromBytes(location)
if self.ignorePrivateRedirects and is_private_address(uri.host,
only_loopback=True):
return response

deferred = self._agent.request(method, location, headers)

def _chainResponse(newResponse):
if isinstance(newResponse, Failure):
# This is needed to write the response even in case of failure
newResponse.previousResponse = response
newResponse.requestLocation = location
return newResponse
newResponse.setPreviousResponse(response)
return newResponse

deferred.addCallback(_chainResponse)
deferred.addBoth(_chainResponse)
return deferred.addCallback(
self._handleResponse, method, uri, headers, redirectCount + 1)
55 changes: 40 additions & 15 deletions oonib/testhelpers/http_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,9 @@ def lookup(self, response_type, key):
defer.returnValue(value)

@defer.inlineCallbacks
def http_request(self, url, include_http_responses=False):
cached_value = yield self.lookup('http_request', url)
def http_request(self, url, http_request_headers, include_http_responses=False):
key = url + json.dumps(http_request_headers)
cached_value = yield self.lookup('http_request', key)
if cached_value is not None:
if include_http_responses is not True:
cached_value.pop('responses', None)
Expand All @@ -324,15 +325,16 @@ def http_request(self, url, include_http_responses=False):
}

agent = ContentDecoderAgent(
FixedRedirectAgent(TrueHeadersAgent(reactor)),
[('gzip', GzipDecoder)]
FixedRedirectAgent(TrueHeadersAgent(reactor),
ignorePrivateRedirects=True),
[('gzip', GzipDecoder)]
)
try:
retries = 0
while True:
try:
response = yield agent.request('GET', url,
TrueHeaders(REQUEST_HEADERS))
TrueHeaders(http_request_headers))
headers = {}
for name, value in response.headers.getAllRawHeaders():
headers[name] = unicode(value[0], errors='ignore')
Expand Down Expand Up @@ -364,11 +366,13 @@ def http_request(self, url, include_http_responses=False):
page_info['failure'] = 'connection_refused_error'
except ConnectError:
page_info['failure'] = 'connect_error'
except:
except Exception as exc:
# XXX map more failures
page_info['failure'] = 'unknown_error'
log.err("Unknown error occurred")
log.exception(exc)

yield self.cache_value('http_request', url, page_info)
yield self.cache_value('http_request', key, page_info)
if include_http_responses is not True:
page_info.pop('responses', None)
defer.returnValue(page_info)
Expand Down Expand Up @@ -451,10 +455,14 @@ class WebConnectivity(OONIBHandler):
@defer.inlineCallbacks
def control_measurement(self, http_url, socket_list,
include_http_responses,
invalid_sockets):
invalid_sockets,
http_request_headers=None):
if http_request_headers is None:
http_request_headers = {}

hostname = urlparse(http_url).netloc
dl = [
web_connectivity_cache.http_request(http_url, include_http_responses),
web_connectivity_cache.http_request(http_url, http_request_headers, include_http_responses),
web_connectivity_cache.dns_consistency(hostname)
]
for socket in socket_list:
Expand All @@ -477,12 +485,20 @@ def control_measurement(self, http_url, socket_list,
})

def validate_request(self, request):
allowed_headers = ['user-agent', 'accept', 'accept-language']
required_keys = ['http_request', 'tcp_connect']
for rk in required_keys:
if rk not in request.keys():
raise HTTPError(400, "Missing %s" % rk)
if not HTTP_REQUEST_REGEXP.match(request['http_request']):
raise HTTPError(400, "Invalid http_request URL")

http_request_headers = request.get('http_request_headers', {})
for k, v in http_request_headers.iteritems():
if k.lower() not in allowed_headers:
raise HTTPError(400, "Invalid header %s" % k)
if not isinstance(v, list):
raise HTTPError(400, "Headers must be a list")
# We don't need to check the tcp_connect field because we strip it in
# the post already.

Expand All @@ -508,13 +524,22 @@ def post(self):
request['tcp_connect'] = tcp_connect

self.validate_request(request)
include_http_responses = request.get("include_http_responses",
False)
include_http_responses = request.get(
"include_http_responses",
False
)

# We convert headers to str so twisted is happy (unicode triggers
# errors)
http_request_headers = {}
for k, v in request.get('http_request_headers', {}).iteritems():
http_request_headers[str(k)] = map(str, v)
self.control_measurement(
str(request['http_request']),
request['tcp_connect'],
include_http_responses,
invalid_sockets
http_url=str(request['http_request']),
include_http_responses=include_http_responses,
http_request_headers=http_request_headers,
socket_list=request['tcp_connect'],
invalid_sockets=invalid_sockets
)
except HTTPError:
raise
Expand Down
2 changes: 1 addition & 1 deletion oonib/tests/test_web_connectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def tearDown(self):
@defer.inlineCallbacks
def test_http_request(self):
value = yield self.web_connectivity_cache.http_request(
'https://www.google.com/humans.txt')
'https://www.google.com/humans.txt', {})
self.assertEqual(
value['body_length'], 286
)
Expand Down

0 comments on commit 6f92669

Please sign in to comment.