Skip to content

Commit

Permalink
Noticed that for PHP it is not possible to send generic payloads. So …
Browse files Browse the repository at this point in the history
…created a new grep plugin that will

detect when the application is sending serialized objects and warn the user to perform manual analysis.

This is related with: Insecure deserialization audit plugin #16280

Created grep plugin and unittests.

Also moved the base64 functions from the audit.deserialization plugin to utils lib
and improved the regular expression it was using.
  • Loading branch information
Andres Riancho committed Mar 6, 2018
1 parent cccc850 commit 23f77b5
Show file tree
Hide file tree
Showing 4 changed files with 453 additions and 30 deletions.
65 changes: 65 additions & 0 deletions w3af/core/data/misc/base64_nopadding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,30 @@
"""
base64_nopadding.py
Copyright 2018 Andres Riancho
This file is part of w3af, http://w3af.org/ .
w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.
w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
"""
import re
import base64
import binascii


BASE64_RE = re.compile('^(?:[a-zA-Z0-9+/]{4})*(?:[a-zA-Z0-9+/]{2}==|[a-zA-Z0-9+/]{3}=|[a-zA-Z0-9+/]{4})$')


def decode_base64(data):
Expand All @@ -11,3 +37,42 @@ def decode_base64(data):
if missing_padding != 0:
data += b'=' * (4 - missing_padding)
return base64.decodestring(data)


def is_base64(data):
"""
Telling if a string is base64 encoded or not is hard. Simply decoding it
with base64.b64decode will yield a lot of false positives (it successfully
decodes strings with characters outside of the base64 RFC).
:param data: A string we saw in the web application
:return: True if data is a base64 encoded string
"""
is_b64, _ = maybe_decode_base64(data)
return is_b64


def maybe_decode_base64(data):
"""
Telling if a string is base64 encoded or not is hard. Simply decoding it
with base64.b64decode will yield a lot of false positives (it successfully
decodes strings with characters outside of the base64 RFC).
:param data: A string we saw in the web application
:return: A tuple containing True and the decoded string if the data was a
base64 encoded string. A tuple containing False and None if the
data wasn't a base64 encoded string.
"""
# At least for this plugin we want long base64 strings
if len(data) < 16:
return False, None

if not BASE64_RE.match(data):
return False, None

try:
decoded_data = decode_base64(data)
except binascii.Error:
return False, None

return True, decoded_data
31 changes: 1 addition & 30 deletions w3af/plugins/audit/deserialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,23 @@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
"""
import re
import os
import json
import base64
import binascii

import w3af.core.data.constants.severity as severity

from w3af import ROOT_PATH
from w3af.core.controllers.delay_detection.exact_delay_controller import ExactDelayController
from w3af.core.controllers.delay_detection.exact_delay import ExactDelay
from w3af.core.controllers.plugins.audit_plugin import AuditPlugin
from w3af.core.data.misc.base64_nopadding import decode_base64
from w3af.core.data.misc.base64_nopadding import is_base64
from w3af.core.data.fuzzer.fuzzer import create_mutants
from w3af.core.data.dc.generic.form import Form
from w3af.core.data.kb.vuln import Vuln
from w3af.core.data.parsers.utils.form_constants import INPUT_TYPE_FILE, INPUT_TYPE_HIDDEN


BASE64_RE = re.compile('^(?:[A-Z0-9+/]{4})*(?:[A-Z0-9+/]{2}==|[A-Z0-9+/]{3}=|[A-Z0-9+/]{4})$')


class deserialization(AuditPlugin):
"""
Identify deserialization vulnerabilities.
Expand Down Expand Up @@ -293,27 +288,3 @@ def is_pickled_data(data):
:return: True if the data looks like a python pickle
"""
return data.endswith('\n.')


def is_base64(data):
"""
Telling if a string is base64 encoded or not is hard. Simply decoding it
with base64.b64decode will yield a lot of false positives (it successfully
decodes strings with characters outside of the base64 RFC).
:param data: A string we saw in the web application
:return: True if data is a base64 encoded string
"""
# At least for this plugin we want long base64 strings
if len(data) < 16:
return False

if not BASE64_RE.match(data):
return False

try:
decode_base64(data)
except binascii.Error:
return False

return True
206 changes: 206 additions & 0 deletions w3af/plugins/grep/serialized_object.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""
serialized_object.py
Copyright 2018 Andres Riancho
This file is part of w3af, http://w3af.org/ .
w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.
w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
"""
import re
import zlib
import itertools

from collections import deque

import w3af.core.data.constants.severity as severity

from w3af.core.controllers.plugins.grep_plugin import GrepPlugin
from w3af.core.data.misc.encoding import smart_str_ignore
from w3af.core.data.misc.base64_nopadding import maybe_decode_base64
from w3af.core.data.dc.cookie import Cookie
from w3af.core.data.kb.vuln import Vuln
from w3af.core.data.kb.info_set import InfoSet
from w3af.core.data.dc.factory import dc_from_hdrs_post


class serialized_object(GrepPlugin):
"""
Find serialized objects sent by the Web application
:author: Andres Riancho ([email protected])
"""

CACHE_MAX_SIZE = 100

SERIALIZED_OBJECT_RE = {
'PHP': [
re.compile('^(a|O):\d{1,3}:({[sai]|")'),
]
}

def __init__(self):
GrepPlugin.__init__(self)
self._cache = deque()

def grep(self, request, response):
"""
Plugin entry point. Search for private IPs in the header and the body.
:param request: The HTTP request object.
:param response: The HTTP response object
:return: None, results are saved to the kb.
"""
for parameter_name, parameter_value in self._get_all_parameters(request):

# Performance enhancement
if self._should_skip_analysis(parameter_value):
continue

for language, regular_expressions in self.SERIALIZED_OBJECT_RE.iteritems():
for serialized_object_re in regular_expressions:

self._analyze_param(request,
response,
parameter_name,
parameter_value,
language,
serialized_object_re)

def _should_skip_analysis(self, parameter_value):
"""
This method was introduced to improve the overall performance of the
serialized_object plugin. It tried to prevent the multiple regular
expressions from being applied against strings which we know (with
a good degree of certainty) that will not be a serialized object.
This method also has a small cache to prevent analyzing the same
parameter value multiple times.
:param parameter_value: The parameter value to inspect
:return: True if we should skip analysis phase for this parameter
"""
if len(parameter_value) <= 16:
# Really short strings can't contain a serialized object
return True

parameter_value = smart_str_ignore(parameter_value)
pv_hash = zlib.adler32(parameter_value)

if pv_hash in self._cache:
# The parameter value was found in the cache, this means that it is
# not the first time we see / analyze this parameter value (at least
# not recently), so lets skip!
return True

# The parameter value hash will be analyzed, but I only want to do that
# once (at least for some time) so I add the hash to the cache:
self._cache.append(pv_hash)

# Keep the cache size under control. For each append() call we make, also
# run a popleft() if the cache max size was reached
if len(self._cache) >= self.CACHE_MAX_SIZE:
self._cache.popleft()

return False

def _analyze_param(self, request, response, parameter_name, parameter_value,
language, serialized_object_re):
"""
Check if one parameter holds a serialized object
:param request: The HTTP request which holds the parameter
:param response: The HTTP response
:param parameter_name: The name of the parameter
:param parameter_value: The parameter value (might have been decoded from b64)
:param language: The programming language
:param serialized_object_re: The regular expression to match
:return: None. We just save the vulnerability to the KB
"""
if not serialized_object_re.search(parameter_value):
return

# We found a match! The parameter value is a serialized object
# Just report this to get the user's attention
desc = ('Identified a %s serialized object being sent by the web'
' application in a request to "%s" in a parameter named "%s".'
' While this is not a vulnerability by itself, it is a strong'
' indicator of potential insecure deserialization issues.')
desc %= (language, request.get_url(), parameter_name)

v = Vuln('Serialized object', desc, severity.LOW, response.id, self.get_name())

v.set_url(response.get_url())
v.add_to_highlight(parameter_value)
v[SerializedObjectInfoSet.ITAG] = parameter_name

self.kb_append_uniq_group(self,
'serialized_object', v,
group_klass=SerializedObjectInfoSet)

def _get_all_parameters(self, request):
"""
:param request: The HTTP request
:yield: All the HTTP request parameters as tuples of (name, value)
"""
headers = request.get_headers()
query_string = request.get_uri().get_querystring()
dc = dc_from_hdrs_post(headers, request.get_data())

cookie_str, _ = headers.iget('cookie', '')
cookie_dc = Cookie(cookie_str)

token_generators = itertools.chain(
query_string.iter_tokens(),
dc.iter_tokens(),
headers.iter_tokens(),
cookie_dc.iter_tokens()
)

for token in token_generators:
token_name = token.get_name()
token_value = token.get_value()

yield token_name, token_value

# Handle the case where the parameter is base64 encoded
is_b64, decoded_data = maybe_decode_base64(token_value)
if is_b64:
yield token_name, decoded_data

def get_long_desc(self):
"""
:return: A DETAILED description of the plugin functions and features.
"""
return """
This plugin identifies serialized objects in HTTP request parameters.
While sending serialized objects in HTTP requests is not a vulnerability
by itself, these objects could be abused by an attacker to perform
attacks such as PHP Object Injection.
"""


class SerializedObjectInfoSet(InfoSet):
ITAG = 'parameter_name'
TEMPLATE = (
'A total of {{ uris|length }} HTTP requests contained a serialized'
' object in the parameter with name "{{ parameter_name }}". The first'
' ten matching URLs are:\n'
''
'{% for url in uris[:10] %}'
' - {{ url }}\n'
'{% endfor %}'
)
Loading

0 comments on commit 23f77b5

Please sign in to comment.