diff --git a/w3af/conftest.py b/w3af/conftest.py new file mode 100644 index 0000000000..d00b6fe14e --- /dev/null +++ b/w3af/conftest.py @@ -0,0 +1,30 @@ +import pytest + +from w3af.core.data.dc.headers import Headers +from w3af.core.data.parsers.doc.url import URL +from w3af.core.data.url.HTTPRequest import HTTPRequest +from w3af.core.data.url.HTTPResponse import HTTPResponse + + +@pytest.fixture +def http_response(): + url = URL('http://example.com/') + headers = Headers([('content-type', 'text/html')]) + return HTTPResponse( + 200, + '
', + headers, + url, + url, + ) + + +@pytest.fixture +def http_request(): + url = URL('http://example.com/') + headers = Headers([('content-type', 'text/html')]) + return HTTPRequest( + url, + headers, + method='GET', + ) diff --git a/w3af/core/controllers/chrome/crawler/tests/frameworks/test_angularjs_basics.py b/w3af/core/controllers/chrome/crawler/tests/frameworks/test_angularjs_basics.py index e87ae5734d..12bd36ac85 100644 --- a/w3af/core/controllers/chrome/crawler/tests/frameworks/test_angularjs_basics.py +++ b/w3af/core/controllers/chrome/crawler/tests/frameworks/test_angularjs_basics.py @@ -24,6 +24,7 @@ from w3af.core.controllers.chrome.tests.helpers import ExtendedHttpRequestHandler +@pytest.mark.skip('uses internet') class AngularBasicTest(BaseChromeCrawlerTest): def test_angular_click(self): self._unittest_setup(AngularButtonClickRequestHandler) diff --git a/w3af/core/controllers/chrome/crawler/tests/frameworks/test_react_basics.py b/w3af/core/controllers/chrome/crawler/tests/frameworks/test_react_basics.py index 430534c6b4..fd0ad7e444 100644 --- a/w3af/core/controllers/chrome/crawler/tests/frameworks/test_react_basics.py +++ b/w3af/core/controllers/chrome/crawler/tests/frameworks/test_react_basics.py @@ -23,6 +23,7 @@ from w3af.core.controllers.chrome.crawler.tests.base import BaseChromeCrawlerTest +@pytest.mark.skip('uses internet') class ReactBasicTest(BaseChromeCrawlerTest): def test_react_hello_world_app(self): url = 'http://react-hello-world-app.surge.sh/' diff --git a/w3af/core/controllers/chrome/crawler/tests/frameworks/test_vue_basics.py b/w3af/core/controllers/chrome/crawler/tests/frameworks/test_vue_basics.py index a7611ac1cf..6eeebdc577 100644 --- a/w3af/core/controllers/chrome/crawler/tests/frameworks/test_vue_basics.py +++ b/w3af/core/controllers/chrome/crawler/tests/frameworks/test_vue_basics.py @@ -18,10 +18,13 @@ along with w3af; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ +import pytest + from w3af.core.controllers.chrome.crawler.tests.base import BaseChromeCrawlerTest class ReactBasicTest(BaseChromeCrawlerTest): + @pytest.mark.skip('uses internet') def test_vue_todo_list(self): url = 'http://vue-todo-test.surge.sh' found_uris = self._crawl(url) diff --git a/w3af/core/controllers/chrome/devtools/exceptions.py b/w3af/core/controllers/chrome/devtools/exceptions.py index 38ac0e3561..45214b34c6 100644 --- a/w3af/core/controllers/chrome/devtools/exceptions.py +++ b/w3af/core/controllers/chrome/devtools/exceptions.py @@ -27,3 +27,11 @@ class ChromeInterfaceException(Exception): class ChromeInterfaceTimeout(Exception): pass + + +class ChromeScriptRuntimeException(Exception): + def __init__(self, message, function_called=None, *args): + if function_called: + message = "function: {}, exception: {}".format(function_called, message) + super(ChromeScriptRuntimeException, self).__init__(message, *args) + pass diff --git a/w3af/core/controllers/chrome/instrumented/frame_manager.py b/w3af/core/controllers/chrome/instrumented/frame_manager.py index 14f660559a..f96f46e8c6 100644 --- a/w3af/core/controllers/chrome/instrumented/frame_manager.py +++ b/w3af/core/controllers/chrome/instrumented/frame_manager.py @@ -166,7 +166,7 @@ def _on_frame_navigated(self, message): # URL all the child frames are removed from Chrome, we should remove # them from our code too to mirror state if frame: - for child_frame_id, child_frame in frame.child_frames: + for child_frame_id, child_frame in frame.child_frames.items(): child_frame.detach(self) frame.set_navigated() diff --git a/w3af/core/controllers/chrome/instrumented/main.py b/w3af/core/controllers/chrome/instrumented/main.py index 41262e49ba..9b3672aa95 100644 --- a/w3af/core/controllers/chrome/instrumented/main.py +++ b/w3af/core/controllers/chrome/instrumented/main.py @@ -23,6 +23,7 @@ import json import w3af.core.controllers.output_manager as om +from w3af.core.controllers.chrome.devtools.exceptions import ChromeScriptRuntimeException from w3af.core.data.parsers.doc.url import URL from w3af.core.controllers.chrome.instrumented.instrumented_base import InstrumentedChromeBase @@ -297,11 +298,20 @@ def dispatch_js_event(self, selector, event_type): return True - def get_login_forms(self): + def get_login_forms(self, exact_css_selectors): """ + :param dict exact_css_selectors: Optional parameter containing css selectors + for part of form like username input or login button. :return: Yield LoginForm instances """ - result = self.js_runtime_evaluate('window._DOMAnalyzer.getLoginForms()') + func = ( + 'window._DOMAnalyzer.getLoginForms("{}", "{}")' + ) + func = func.format( + exact_css_selectors.get('username_input', '').replace('"', '\\"'), + exact_css_selectors.get('login_button', '').replace('"', '\\"'), + ) + result = self.js_runtime_evaluate(func) if result is None: raise EventTimeout('The event execution timed out') @@ -316,11 +326,20 @@ def get_login_forms(self): yield login_form - def get_login_forms_without_form_tags(self): + def get_login_forms_without_form_tags(self, exact_css_selectors): """ + :param dict exact_css_selectors: Optional parameter containing css selectors + for part of form like username input or login button. :return: Yield LoginForm instances """ - result = self.js_runtime_evaluate('window._DOMAnalyzer.getLoginFormsWithoutFormTags()') + func = ( + 'window._DOMAnalyzer.getLoginFormsWithoutFormTags("{}", "{}")' + ) + func = func.format( + exact_css_selectors.get('username_input', '').replace('"', '\\"'), + exact_css_selectors.get('login_button', '').replace('"', '\\"'), + ) + result = self.js_runtime_evaluate(func) if result is None: raise EventTimeout('The event execution timed out') @@ -406,9 +425,9 @@ def focus(self, selector): if result is None: return None - node_ids = result.get('result', {}).get('nodeIds', None) + node_ids = result.get('result', {}).get('nodeIds') - if node_ids is None: + if not node_ids: msg = ('The call to chrome.focus() failed.' ' CSS selector "%s" returned no nodes (did: %s)') args = (selector, self.debugging_id) @@ -589,19 +608,13 @@ def js_runtime_evaluate(self, expression, timeout=5): timeout=timeout) # This is a rare case where the DOM is not present - if result is None: - return None - - if 'result' not in result: - return None - - if 'result' not in result['result']: - return None - - if 'value' not in result['result']['result']: - return None - - return result['result']['result']['value'] + runtime_exception = result.get('result', {}).get('exceptionDetails') + if runtime_exception: + raise ChromeScriptRuntimeException( + runtime_exception, + function_called=expression + ) + return result.get('result', {}).get('result', {}).get('value', None) def get_js_variable_value(self, variable_name): """ diff --git a/w3af/core/controllers/chrome/js/dom_analyzer.js b/w3af/core/controllers/chrome/js/dom_analyzer.js index b8077b60a3..9b113de676 100644 --- a/w3af/core/controllers/chrome/js/dom_analyzer.js +++ b/w3af/core/controllers/chrome/js/dom_analyzer.js @@ -330,7 +330,7 @@ var _DOMAnalyzer = _DOMAnalyzer || { if( !_DOMAnalyzer.eventIsValidForTagName( tag_name, type ) ) return false; let selector = OptimalSelect.getSingleSelector(element); - + // node_type is https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType#Node_type_constants _DOMAnalyzer.event_listeners.push({"tag_name": tag_name, "node_type": element.nodeType, @@ -865,6 +865,48 @@ var _DOMAnalyzer = _DOMAnalyzer || { return false; }, + /** + * This is naive function which takes parentElement (the login form) and + * tries to find username input field within it. + * @param {Node} parentElement - parent element to scope to document.querySelectorAll() + * @param {String} exactSelector - optional CSS selector. If provided prevents + * using standard selectors + * @returns {NodeList} - result of querySelectorAll() + */ + _getUsernameInput(parentElement, exactSelector = '') { + if (exactSelector) { + return document.querySelectorAll(exactSelector, parentElement); + } + result = document.querySelectorAll("input[type='email']", parentElement); + if (!result.length) { + result = document.querySelectorAll("input[type='text']", parentElement); + } + return result; + }, + + /** + * This is naive function which takes parentElement (the login form) and tries + * to find submit button within it. + * @param {Node} parentElement - parent element to scope to document.querySelectorAll() + * @param {String} exactSelector - optional CSS selector. If provided prevents + * using standard selectors + * @returns {NodeList} - result of querySelectorAll() + */ + _getSubmitButton(parentElement, exactSelector = '') { + if (exactSelector) { + return document.querySelectorAll(exactSelector, parentElement); + } + result = document.querySelectorAll("input[type='submit']", parentElement); + if (!result.length) { + result = document.querySelectorAll("button[type='submit']", parentElement); + } + // Maybe it's just normal button without type="submit"... + if (!result.length) { + result = document.querySelectorAll('button', parentElement); + } + return result; + }, + /** * Return the CSS selector for the login forms which exist in the DOM. * @@ -874,8 +916,12 @@ var _DOMAnalyzer = _DOMAnalyzer || { * - , and * - * + * @param {String} usernameCssSelector - CSS selector for username input. If + * provided we won't try to find username input automatically. + * @param {String} submitButtonCssSelector - CSS selector for submit button. If + * provided we won't try to find submit button autmatically. */ - getLoginForms: function () { + getLoginForms: function (usernameCssSelector = '', submitButtonCssSelector = '') { let login_forms = []; // First we identify the forms with a password field using a descendant Selector @@ -898,7 +944,7 @@ var _DOMAnalyzer = _DOMAnalyzer || { let form = forms[0]; // Finally we confirm that the form has a type=text input - let text_fields = document.querySelectorAll("input[type='text']", form) + let text_fields = this._getUsernameInput(form, usernameCssSelector); // Zero text fields is most likely a password-only login form // Two text fields or more is most likely a registration from @@ -906,7 +952,7 @@ var _DOMAnalyzer = _DOMAnalyzer || { if (text_fields.length !== 1) continue; // And if there is a submit button I want that selector too - let submit_fields = document.querySelectorAll("input[type='submit']", form) + let submit_fields = this._getSubmitButton(form, submitButtonCssSelector); let submit_selector = null; if (submit_fields.length !== 0) { @@ -936,8 +982,12 @@ var _DOMAnalyzer = _DOMAnalyzer || { * - , and * - * + * @param {String} usernameCssSelector - CSS selector for username input. If + * provided we won't try to find username input automatically. + * @param {String} submitButtonCssSelector - CSS selector for submit button. If + * provided we won't try to find submit button autmatically. */ - getLoginFormsWithoutFormTags: function () { + getLoginFormsWithoutFormTags: function (usernameCssSelector = '', submitButtonCssSelector = '') { let login_forms = []; // First we identify the password fields @@ -962,7 +1012,7 @@ var _DOMAnalyzer = _DOMAnalyzer || { // go up one more level, and so one. // // Find if this parent has a type=text input - let text_fields = document.querySelectorAll("input[type='text']", parent) + let text_fields = this._getUsernameInput(parent, usernameCssSelector); // Zero text fields is most likely a password-only login form // Two text fields or more is most likely a registration from @@ -974,7 +1024,7 @@ var _DOMAnalyzer = _DOMAnalyzer || { } // And if there is a submit button I want that selector too - let submit_fields = document.querySelectorAll("input[type='submit']", parent) + let submit_fields = this._getSubmitButton(parent, submitButtonCssSelector) let submit_selector = null; if (submit_fields.length !== 0) { @@ -999,6 +1049,12 @@ var _DOMAnalyzer = _DOMAnalyzer || { return JSON.stringify(login_forms); }, + clickOnSelector(exactSelector) { + let element = document.querySelector(exactSelector); + element.click(); + return 'success' + }, + sliceAndSerialize: function (filtered_event_listeners, start, count) { return JSON.stringify(filtered_event_listeners.slice(start, start + count)); }, @@ -1142,4 +1198,4 @@ var _DOMAnalyzer = _DOMAnalyzer || { }; -_DOMAnalyzer.initialize(); \ No newline at end of file +_DOMAnalyzer.initialize(); diff --git a/w3af/core/controllers/chrome/login/find_form/main.py b/w3af/core/controllers/chrome/login/find_form/main.py index 2ee45ad7f5..2e42e13c57 100644 --- a/w3af/core/controllers/chrome/login/find_form/main.py +++ b/w3af/core/controllers/chrome/login/find_form/main.py @@ -36,16 +36,24 @@ def __init__(self, chrome, debugging_id): self.chrome = chrome self.debugging_id = debugging_id - def find_forms(self): + def find_forms(self, css_selectors=None): """ + :param dict css_selectors: optional dict of css selectors used to find + elements of form (like username input or login button) :return: Yield forms as they are found by each strategy """ + if css_selectors: + msg = 'Form finder uses the CSS selectors: "%s" (did: %s)' + args = (css_selectors, self.debugging_id) + om.out.debug(msg % args) + identified_forms = [] for strategy_klass in self.STRATEGIES: - strategy = strategy_klass(self.chrome, self.debugging_id) + strategy = strategy_klass(self.chrome, self.debugging_id, css_selectors) try: + strategy.prepare() for form in strategy.find_forms(): if form in identified_forms: continue @@ -55,6 +63,6 @@ def find_forms(self): except Exception as e: msg = 'Form finder strategy %s raised exception: "%s" (did: %s)' args = (strategy.get_name(), - e, + repr(e), self.debugging_id) om.out.debug(msg % args) diff --git a/w3af/core/controllers/chrome/login/find_form/strategies/base_find_form_strategy.py b/w3af/core/controllers/chrome/login/find_form/strategies/base_find_form_strategy.py new file mode 100644 index 0000000000..6c635adc44 --- /dev/null +++ b/w3af/core/controllers/chrome/login/find_form/strategies/base_find_form_strategy.py @@ -0,0 +1,35 @@ +from w3af.core.controllers.chrome.instrumented.exceptions import EventTimeout + + +class BaseFindFormStrategy: + def __init__(self, chrome, debugging_id, exact_css_selectors=None): + """ + :param InstrumentedChrome chrome: + :param String debugging_id: + :param dict exact_css_selectors: Optional parameter containing css selectors + for part of form like username input or login button. + """ + self.chrome = chrome + self.debugging_id = debugging_id + self.exact_css_selectors = exact_css_selectors or {} + + def prepare(self): + """ + :raises EventTimeout: + Hook called before find_forms() + """ + form_activator_selector = self.exact_css_selectors.get('form_activator') + if form_activator_selector: + func = 'window._DOMAnalyzer.clickOnSelector("{}")'.format( + form_activator_selector.replace('"', '\\"') + ) + result = self.chrome.js_runtime_evaluate(func) + if result is None: + raise EventTimeout('The event execution timed out') + + def find_forms(self): + raise NotImplementedError + + @staticmethod + def get_name(): + return 'BaseFindFormStrategy' diff --git a/w3af/core/controllers/chrome/login/find_form/strategies/form_tag.py b/w3af/core/controllers/chrome/login/find_form/strategies/form_tag.py index bf47ba4a17..ec6da6aab0 100644 --- a/w3af/core/controllers/chrome/login/find_form/strategies/form_tag.py +++ b/w3af/core/controllers/chrome/login/find_form/strategies/form_tag.py @@ -19,12 +19,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ +from w3af.core.controllers.chrome.login.find_form.strategies.base_find_form_strategy import \ + BaseFindFormStrategy -class FormTagStrategy(object): - def __init__(self, chrome, debugging_id): - self.chrome = chrome - self.debugging_id = debugging_id +class FormTagStrategy(BaseFindFormStrategy): def find_forms(self): """ @@ -37,7 +36,7 @@ def _simple_form_with_username_password_submit(self): """ :return: Yield forms that have username, password and submit inputs """ - for login_form in self.chrome.get_login_forms(): + for login_form in self.chrome.get_login_forms(self.exact_css_selectors): yield login_form @staticmethod diff --git a/w3af/core/controllers/chrome/login/find_form/strategies/password_and_parent.py b/w3af/core/controllers/chrome/login/find_form/strategies/password_and_parent.py index 1f64780502..4dbf7c654a 100644 --- a/w3af/core/controllers/chrome/login/find_form/strategies/password_and_parent.py +++ b/w3af/core/controllers/chrome/login/find_form/strategies/password_and_parent.py @@ -19,12 +19,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ +from w3af.core.controllers.chrome.login.find_form.strategies.base_find_form_strategy import \ + BaseFindFormStrategy -class PasswordAndParentStrategy(object): - def __init__(self, chrome, debugging_id): - self.chrome = chrome - self.debugging_id = debugging_id +class PasswordAndParentStrategy(BaseFindFormStrategy): def find_forms(self): """ @@ -32,8 +31,9 @@ def find_forms(self): :return: Yield forms which are identified by the strategy algorithm """ - for login_form in self.chrome.get_login_forms_without_form_tags(): + for login_form in self.chrome.get_login_forms_without_form_tags(self.exact_css_selectors): yield login_form - def get_name(self): + @staticmethod + def get_name(): return 'PasswordAndParent' diff --git a/w3af/core/controllers/chrome/login/submit_form/main.py b/w3af/core/controllers/chrome/login/submit_form/main.py index b3954a5b92..f2370de798 100644 --- a/w3af/core/controllers/chrome/login/submit_form/main.py +++ b/w3af/core/controllers/chrome/login/submit_form/main.py @@ -19,11 +19,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ +import traceback + from w3af.core.controllers import output_manager as om from w3af.core.controllers.chrome.login.submit_form.strategies.press_enter import PressEnterStrategy from w3af.core.controllers.chrome.login.submit_form.strategies.press_tab_enter import PressTabEnterStrategy -from w3af.core.controllers.chrome.login.submit_form.strategies.form_input_submit import FormInputSubmitStrategy class FormSubmitter(object): @@ -31,7 +32,7 @@ class FormSubmitter(object): STRATEGIES = [ PressEnterStrategy, PressTabEnterStrategy, - #FormInputSubmitStrategy + # FormInputSubmitStrategy ] def __init__(self, chrome, form, login_form_url, username, password, debugging_id): @@ -91,3 +92,4 @@ def _handle_exception(self, strategy, e): e, self.debugging_id) om.out.debug(msg % args) + om.out.debug(traceback.format_exc()) diff --git a/w3af/core/controllers/chrome/proxy/tests/test_proxy.py b/w3af/core/controllers/chrome/proxy/tests/test_proxy.py index 1dcf836e46..873f7f21dc 100644 --- a/w3af/core/controllers/chrome/proxy/tests/test_proxy.py +++ b/w3af/core/controllers/chrome/proxy/tests/test_proxy.py @@ -36,7 +36,7 @@ from w3af.core.data.url.extended_urllib import ExtendedUrllib -pytestmarks = pytest.mark.deprecated +pytestmark = pytest.mark.deprecated class TestProxy(unittest.TestCase): diff --git a/w3af/core/controllers/daemons/proxy/tests/test_proxy.py b/w3af/core/controllers/daemons/proxy/tests/test_proxy.py index 1d5b1dc837..e1ded8af38 100644 --- a/w3af/core/controllers/daemons/proxy/tests/test_proxy.py +++ b/w3af/core/controllers/daemons/proxy/tests/test_proxy.py @@ -54,7 +54,6 @@ def setUp(self): self.proxy_opener = urllib2.build_opener(proxy_handler, urllib2.HTTPHandler) - @pytest.mark.deprecated def tearDown(self): # Shutdown the proxy server self._proxy.stop() diff --git a/w3af/core/controllers/dependency_check/requirements.py b/w3af/core/controllers/dependency_check/requirements.py index c981441010..9275c3ffcf 100644 --- a/w3af/core/controllers/dependency_check/requirements.py +++ b/w3af/core/controllers/dependency_check/requirements.py @@ -114,6 +114,7 @@ # Calculate distances between two strings PIPDependency('Levenshtein', 'python-Levenshtein', '0.12.0'), + PIPDependency('Zeep', 'zeep', '3.4.0'), ] GUI_PIP_EXTRAS = [PIPDependency('xdot', 'xdot', '0.6')] diff --git a/w3af/core/controllers/misc/tests/test_is_private_site.py b/w3af/core/controllers/misc/tests/test_is_private_site.py index 2fad8e75cd..d2a5d3593f 100644 --- a/w3af/core/controllers/misc/tests/test_is_private_site.py +++ b/w3af/core/controllers/misc/tests/test_is_private_site.py @@ -40,6 +40,6 @@ def test_is_private_site_true_case03(self): def test_is_private_site_false_case01(self): self.assertFalse(is_private_site('192.1.0.1')) + @pytest.mark.skip('uses internet') def test_is_private_site_false_case02(self): self.assertFalse(is_private_site('www.w3af.org')) - diff --git a/w3af/core/data/options/option_list.py b/w3af/core/data/options/option_list.py index 74f3c4820d..8d883bc949 100644 --- a/w3af/core/data/options/option_list.py +++ b/w3af/core/data/options/option_list.py @@ -35,6 +35,18 @@ def add(self, option): self._internal_opt_list.append(option) append = add + def pop(self, option): + """ + DANGEROUS!! + You will probably want to deepcopy the OptionList instance before + modifying it with this method. If you'll modify the original OptionList + then user will have to set this option again. + """ + if not isinstance(option, int): + option_names = [item.get_name() for item in self._internal_opt_list] + option = option_names.index(option) + return self._internal_opt_list.pop(option) + def __len__(self): return len(self._internal_opt_list) diff --git a/w3af/core/data/parsers/doc/baseparser.py b/w3af/core/data/parsers/doc/baseparser.py index 6cb332e28b..4f7e486155 100644 --- a/w3af/core/data/parsers/doc/baseparser.py +++ b/w3af/core/data/parsers/doc/baseparser.py @@ -145,6 +145,9 @@ def get_references(self): """ raise NotImplementedError(NOT_IMPLEMENTED_FMT % 'get_references') + def get_fuzzable_requests(self): + return [] + def get_emails(self, domain=None): """ :return: A set with email addresses diff --git a/w3af/core/data/parsers/doc/open_api/requests.py b/w3af/core/data/parsers/doc/open_api/requests.py index 19366b0d7f..1a28daef88 100644 --- a/w3af/core/data/parsers/doc/open_api/requests.py +++ b/w3af/core/data/parsers/doc/open_api/requests.py @@ -184,6 +184,8 @@ def get_uri(self): continue if param_def.param_spec['type'] == 'array': + if not parameters[param_name] and not param_def.required: + continue parameters[param_name] = parameters[param_name][0] if parameters: diff --git a/w3af/core/data/parsers/doc/open_api/specification.py b/w3af/core/data/parsers/doc/open_api/specification.py index 4aaa93344e..dc338fcf72 100644 --- a/w3af/core/data/parsers/doc/open_api/specification.py +++ b/w3af/core/data/parsers/doc/open_api/specification.py @@ -21,7 +21,6 @@ """ import json -import yaml import logging from yaml import load @@ -232,7 +231,8 @@ def _load_spec_dict(self): :return: The dict with the open api data """ try: - spec_dict = json.loads(self.http_response.body) + decoded_response = self.http_response.body.decode('ascii', 'ignore') + spec_dict = json.loads(decoded_response) except ValueError: # Seems like the OpenAPI was specified using Yaml instead of # JSON. Let's parse the Yaml data! diff --git a/w3af/core/data/parsers/doc/open_api/tests/data/array_not_required_model_items.json b/w3af/core/data/parsers/doc/open_api/tests/data/array_not_required_model_items.json new file mode 100644 index 0000000000..7a7877738f --- /dev/null +++ b/w3af/core/data/parsers/doc/open_api/tests/data/array_not_required_model_items.json @@ -0,0 +1,71 @@ +{ + "swagger": "2.0", + "info": { + "version": "1.0.0", + "title": "Swagger Petstore", + "description": "A sample API that uses a petstore as an example to demonstrate features in the swagger-2.0 specification", + "termsOfService": "http://swagger.io/terms/", + "contact": { + "name": "Swagger API Team" + }, + "license": { + "name": "MIT" + } + }, + "host": "petstore.swagger.io", + "basePath": "/api", + "schemes": [ + "http" + ], + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "paths": { + "/pets": { + "post": { + "description": "Add multiple pets", + "operationId": "addMultiplePets", + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "pets", + "in": "query", + "description": "array with pets to add", + "required": false, + "type": "array", + "items": {} + } + ], + "responses": { + "200": { + "description": "pet response", + "schema": { + "$ref": "#/definitions/Pet" + } + } + } + } + } + }, + "definitions": { + "Pet": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + } + } +} diff --git a/w3af/core/data/parsers/doc/open_api/tests/data/swagger-special-chars.json b/w3af/core/data/parsers/doc/open_api/tests/data/swagger-special-chars.json new file mode 100644 index 0000000000..a3eeff15d9 --- /dev/null +++ b/w3af/core/data/parsers/doc/open_api/tests/data/swagger-special-chars.json @@ -0,0 +1,73 @@ +{ + "swagger": "2.0", + "info": { + "version": "1.0.0", + "title": "Swagger Petstore, special chars: ąęćźżó^żć√≤Ķńå", + "description": "A sample API that uses a petstore as an example to demonstrate features in the swagger-2.0 specification", + "termsOfService": "http://swagger.io/terms/", + "contact": { + "name": "Swagger API Team" + }, + "license": { + "name": "MIT" + } + }, + "host": "petstore.swagger.io", + "basePath": "/api", + "schemes": [ + "http" + ], + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "paths": { + "/pets": { + "get": { + "description": "Returns all pets from the system that the user has access to", + "produces": [ + "application/json" + ], + "responses": { + "200": { + "description": "A list of pets.", + "schema": { + "type": "array", + "items": { + "$ref": "#/definitions/Pet" + } + } + } + } + } + }, + "/pets/{ąęćźżó^żć√≤Ķńå}": { + "get": { + "description": "Let's see if I'll return an error" + } + } + }, + "definitions": { + "Pet": { + "type": "object", + "required": [ + "id", + "name" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "name": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + } + } +} diff --git a/w3af/core/data/parsers/doc/open_api/tests/test_requests.py b/w3af/core/data/parsers/doc/open_api/tests/test_requests.py index 45027e6090..2a454e128a 100644 --- a/w3af/core/data/parsers/doc/open_api/tests/test_requests.py +++ b/w3af/core/data/parsers/doc/open_api/tests/test_requests.py @@ -20,6 +20,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ +import json import unittest from w3af.core.data.parsers.doc.url import URL @@ -312,6 +313,24 @@ def test_array_with_model_items_param_in_json(self): self.assertEqual(fuzzable_request.get_headers(), e_headers) self.assertEqual(fuzzable_request.get_data(), e_data) + def test_array_param_not_required_in_json(self): + """ + Regression test when param type is array and param is not required. + Param must be in query, not in body. + """ + test_spec_filename = ( + 'w3af/core/data/parsers/doc/open_api/tests/data/array_not_required_model_items.json' + ) + with open(test_spec_filename, 'r') as file_: + specification_as_string = file_.read() + + http_response = self.generate_response(specification_as_string) + handler = SpecificationHandler(http_response) + data = [item for item in handler.get_api_information()] + for spec_obj in data: + factory = RequestFactory(*spec_obj) + req = factory.get_fuzzable_request() + def test_model_param_nested_allOf_in_json(self): specification_as_string = NestedModel().get_specification() http_response = self.generate_response(specification_as_string) diff --git a/w3af/core/data/parsers/doc/open_api/tests/test_specification.py b/w3af/core/data/parsers/doc/open_api/tests/test_specification.py index e6e96efb6a..3eb9636167 100644 --- a/w3af/core/data/parsers/doc/open_api/tests/test_specification.py +++ b/w3af/core/data/parsers/doc/open_api/tests/test_specification.py @@ -595,6 +595,18 @@ def test_parameter_handler_multiple_paths_and_headers(self): handler = SpecificationHandler(http_response) self.check_parameter_setting(handler) + def test_specification_handler_can_handle_spec_with_non_ascii_chars(self): + with open( + 'w3af/core/data/parsers/doc/open_api/tests/data/swagger-special-chars.json', + ) as file_: + spec_as_string = file_.read() + http_response = self.generate_response(spec_as_string) + spec_handler = SpecificationHandler(http_response) + result = spec_handler.get_api_information() + for _ in result: + pass + self.assertFalse(spec_handler._parsing_errors) + def check_parameter_setting(self, spec_handler): data = [d for d in spec_handler.get_api_information()] self.assertIsNotNone(data) diff --git a/w3af/core/data/parsers/doc/tests/test_wsdl.py b/w3af/core/data/parsers/doc/tests/test_wsdl.py new file mode 100644 index 0000000000..e272cc532f --- /dev/null +++ b/w3af/core/data/parsers/doc/tests/test_wsdl.py @@ -0,0 +1,153 @@ +import pytest +from mock import MagicMock, patch + +from w3af.core.data.dc.headers import Headers +from w3af.core.data.parsers.doc.url import URL +from w3af.core.data.parsers.doc.wsdl import ZeepTransport, WSDLParser +from w3af.core.data.url.HTTPResponse import HTTPResponse +from w3af.core.data.url.extended_urllib import ExtendedUrllib +from w3af.plugins.tests.plugin_testing_tools import NetworkPatcher + + +@pytest.fixture +def mocked_http_client(): + return MagicMock() + + +@pytest.fixture +def zeep_transport(mocked_http_client): + transport = ZeepTransport() + transport.uri_opener = mocked_http_client + return transport + + +@pytest.fixture +def zeep_transport_from_class(zeep_transport): + return lambda *args, **kwargs: zeep_transport + + +@pytest.fixture +def http_response(): + return HTTPResponse( + 200, + '', + Headers(), + URL('https://example.com/'), + URL('https://example.com/'), + ) + + +class TestZeepTransport: + def setup_method(self): + self.url = 'http://example.com/' + + def test_it_implements_all_needed_methods(self): + zeep_transport = ZeepTransport() + required_methods = [ + 'get', + 'load', + 'post', + 'post_xml', + ] + for method in required_methods: + assert hasattr(zeep_transport, method) + + def test_it_calls_http_client_on_get_method(self, zeep_transport, mocked_http_client): + zeep_transport.get(self.url, '', {}) + assert mocked_http_client.GET.called + + def test_it_calls_http_client_on_post_method(self, zeep_transport, mocked_http_client): + zeep_transport.post(self.url, 'some data', {}) + assert mocked_http_client.POST.called + + def test_it_calls_http_client_on_post_xml_method(self, zeep_transport, mocked_http_client): + from lxml import etree # feeding Zeep dependencies + zeep_transport.post_xml(self.url, etree.Element('test'), {}) + assert mocked_http_client.POST.called + + def test_it_loads_the_response_content(self, zeep_transport, mocked_http_client): + mocked_response = MagicMock(name='mocked_response') + mocked_response.body = 'test' + mocked_http_client.GET = MagicMock(return_value=mocked_response) + + result = zeep_transport.load(self.url) + assert result == 'test' + + def test_it_reports_requests_performed(self, zeep_transport): + assert not zeep_transport.requests_performed + zeep_transport.get(self.url, '', {}) + logged_request = { + 'url': self.url, + 'method': 'GET', + 'headers': {}, + 'data': None, + } + assert logged_request in zeep_transport.requests_performed + + def test_it_reports_proper_url_if_url_params_are_passed(self, zeep_transport): + params = {'test': True, 'some_val': 5} + zeep_transport.get(self.url, params, {}) + logged_request = { + 'url': '{}?test=True&some_val=5'.format(self.url), + 'method': 'GET', + 'headers': {}, + 'data': None, + } + assert logged_request in zeep_transport.requests_performed + + def test_it_reports_headers_properly(self, zeep_transport): + zeep_transport.get(self.url, '', {'test': True}) + logged_request = { + 'url': self.url, + 'method': 'GET', + 'headers': {'test': True}, + 'data': None, + } + assert logged_request in zeep_transport.requests_performed + + +class TestZeepTransportIntegration: + def test_it_can_perform_get_request(self): + url = 'http://example.com/' + with NetworkPatcher() as network_patcher: + zeep_transport = ZeepTransport() + zeep_transport.get(url, {}, {}) + assert url in network_patcher.mocked_server.urls_requested + + def test_it_can_perform_post_request(self): + url = 'http://example.com/' + with NetworkPatcher() as network_patcher: + zeep_transport = ZeepTransport() + zeep_transport.post(url, 'some data', {}) + assert url in network_patcher.mocked_server.urls_requested + + def test_it_can_load_url(self): + url = 'http://example.com/' + with NetworkPatcher() as network_patcher: + zeep_transport = ZeepTransport() + zeep_transport.load('http://example.com/') + assert url in network_patcher.mocked_server.urls_requested + + +class TestWSDLParserIntegration: + def test_wsdl_zeep_transport_uses_extended_urllib(self): + zeep_transport = ZeepTransport() + assert isinstance(zeep_transport.uri_opener, ExtendedUrllib) + + def test_it_uses_extended_urllib_for_performing_requests( + self, + mocked_http_client, + zeep_transport_from_class, + http_response, + ): + mocked_http_client.GET = MagicMock(return_value=http_response) + with patch('w3af.core.data.parsers.doc.wsdl.ZeepTransport', zeep_transport_from_class): + WSDLParser(http_response=http_response) + assert mocked_http_client.GET.called + + def test_it_produces_fuzzable_requests(self, http_response): + with NetworkPatcher(): + wsdl_parser = WSDLParser(http_response=http_response) + fuzzable_requests = wsdl_parser.get_fuzzable_requests() + assert len(fuzzable_requests) == 1 + assert fuzzable_requests[0].get_url() == http_response.get_url() diff --git a/w3af/core/data/parsers/doc/wsdl.py b/w3af/core/data/parsers/doc/wsdl.py index 13e71599d5..aa531015ec 100644 --- a/w3af/core/data/parsers/doc/wsdl.py +++ b/w3af/core/data/parsers/doc/wsdl.py @@ -19,194 +19,179 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ -import xml.parsers.expat as expat - -import SOAPpy - -import w3af.core.controllers.output_manager as om -from w3af.core.controllers.exceptions import BaseFrameworkException +import contextlib +import sys +import urllib +from cStringIO import StringIO + +import zeep +from requests import HTTPError +from zeep.exceptions import XMLSyntaxError + +import w3af.core.data.kb.knowledge_base as kb +from w3af.core.data.kb.info import Info +from w3af.core.data.parsers.doc.baseparser import BaseParser from w3af.core.data.parsers.doc.url import URL +from w3af.core.controllers import output_manager +from w3af.core.data.request.fuzzable_request import FuzzableRequest -class WSDLParser(object): +class ZeepTransport(zeep.Transport): + """ + Custom Zeep Transport class which overrides it's methods to use w3af's HTTP client. + We don't call super() on any overwritten method as we want to force Zeep to use + our client, not their. + + Tradeoff: + As WSDLParser has to be tight coupled to Zeep by design we have to also + make tight coupling between WSDLParser and ExtendedUrllib. And that's because + parser by design is not intended to perform any requests by itself. Although + Zeep is constructed in this specific way that it performs request when it's + instantiated. + As parsers are not intended to make requests there's also no obvious way to + pass uri_opener into parser. + """ + def __init__(self): + super(ZeepTransport, self).__init__() + from w3af.core.data.url.extended_urllib import ExtendedUrllib + self.uri_opener = ExtendedUrllib() + self.uri_opener.setup(disable_cache=True) + self.requests_performed = [] + + def get(self, address, params, headers): + self._save_request(address, method='GET', params=params, headers=headers) + return self.uri_opener.GET(address, params, headers=headers) + + def post(self, address, message, headers): + self._save_request(address, method='POST', data=message, headers=headers) + return self.uri_opener.POST(address, data=message, headers=headers) + + def post_xml(self, address, envelope, headers): + from zeep.wsdl.utils import etree_to_string + message = etree_to_string(envelope) + self._save_request(address, method='POST', data=message, headers=headers) + return self.uri_opener.POST(address, data=message, headers=headers) + + def load(self, url): + self._save_request(address=url, method='GET') + response = self.uri_opener.GET(url) + return response.body + + def _save_request(self, address, method, params=None, headers=None, data=None): + uri = address + if params: + uri += '?{}'.format(urllib.urlencode(params)) + self.requests_performed.append({ + 'url': uri, + 'method': method, + 'headers': headers, + 'data': data, + }) + + +class ZeepClientAdapter(zeep.Client): + def __init__(self, url, transport=None, *args, **kwargs): + transport = transport or ZeepTransport() + super(ZeepClientAdapter, self).__init__(url, transport=transport, *args, **kwargs) + + +class WSDLParser(BaseParser): """ This class parses WSDL documents. :author: Andres Riancho (andres.riancho@gmail.com) """ - def __init__(self): + def __init__(self, http_response): self._proxy = None - - def is_WSDL(self, data): - """ - This is not a 100% accurate test, the real WSDL parsing is performed - in "SOAPpy.WSDL.Proxy( xmlData )". This test was mostly added to - enhance framework's performance. - - :param data: A string that might represent a WSDL - :return: True if the data parameter is a WSDL document. - """ - return False - if '