From 877f3398c9eea9e0df7cb8fa800beaf6d9a5eb52 Mon Sep 17 00:00:00 2001 From: Spicy <58888570+brandonscholet@users.noreply.github.com> Date: Thu, 12 Jan 2023 12:43:18 -0600 Subject: [PATCH 1/4] ensuring selector is valid before parsing --- Wappalyzer/Wappalyzer.py | 43 ++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/Wappalyzer/Wappalyzer.py b/Wappalyzer/Wappalyzer.py index 8169048..bc0c80a 100644 --- a/Wappalyzer/Wappalyzer.py +++ b/Wappalyzer/Wappalyzer.py @@ -16,6 +16,14 @@ logger = logging.getLogger(name="python-Wappalyzer") +def is_valid_selector(sel): + try: + sv.compile(sel) + except (sv.SelectorSyntaxError, NotImplementedError): + print(sel) + return False + return True + class WappalyzerError(Exception): # unused for now """ @@ -219,23 +227,24 @@ def _has_technology(self, tech_fingerprint: Fingerprint, webpage: IWebPage) -> b # - "text": "regex": check if the .innerText property of the element that matches the css selector matches the regex (with version extraction). # - "attributes": {dict from attr name to regex}: check if the attribute value of the element that matches the css selector matches the regex (with version extraction). for selector in tech_fingerprint.dom: - for item in webpage.select(selector.selector): - if selector.exists: - self._set_detected_app(webpage.url, tech_fingerprint, 'dom', Pattern(string=selector.selector), value='') - has_tech = True - if selector.text: - for pattern in selector.text: - if pattern.regex.search(item.inner_html): - self._set_detected_app(webpage.url, tech_fingerprint, 'dom', pattern, value=item.inner_html) - has_tech = True - if selector.attributes: - for attrname, patterns in list(selector.attributes.items()): - _content = item.attributes.get(attrname) - if _content: - for pattern in patterns: - if pattern.regex.search(_content): - self._set_detected_app(webpage.url, tech_fingerprint, 'dom', pattern, value=_content) - has_tech = True + if is_valid_selector(selector.selector): + for item in webpage.select(selector.selector): + if selector.exists: + self._set_detected_app(webpage.url, tech_fingerprint, 'dom', Pattern(string=selector.selector), value='') + has_tech = True + if selector.text: + for pattern in selector.text: + if pattern.regex.search(item.inner_html): + self._set_detected_app(webpage.url, tech_fingerprint, 'dom', pattern, value=item.inner_html) + has_tech = True + if selector.attributes: + for attrname, patterns in list(selector.attributes.items()): + _content = item.attributes.get(attrname) + if _content: + for pattern in patterns: + if pattern.regex.search(_content): + self._set_detected_app(webpage.url, tech_fingerprint, 'dom', pattern, value=_content) + has_tech = True return has_tech def _set_detected_app(self, url:str, From 42d84562e46c9f21d1d7d006796e7c0393d5e348 Mon Sep 17 00:00:00 2001 From: Spicy <58888570+brandonscholet@users.noreply.github.com> Date: Thu, 12 Jan 2023 12:45:50 -0600 Subject: [PATCH 2/4] bumping version for selector fix --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9c8c46a..4a5ffca 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = "python-Wappalyzer", - version = "0.4.0", + version = "0.4.1", description = "Python implementation of the Wappalyzer web application " "detection utility", long_description = (pathlib.Path(__file__).parent / "README.rst").read_text(), @@ -31,4 +31,4 @@ "types-requests", "types-pkg_resources", "aioresponses"] }, python_requires = '>=3.6', -) \ No newline at end of file +) From 5266a18911b8bf004f6e3b1569909443cd982a12 Mon Sep 17 00:00:00 2001 From: Brandon Scholet Date: Thu, 12 Jan 2023 17:41:38 -0500 Subject: [PATCH 3/4] fixed missing import --- Wappalyzer/Wappalyzer.py | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Wappalyzer/Wappalyzer.py b/Wappalyzer/Wappalyzer.py index bc0c80a..cfd346d 100644 --- a/Wappalyzer/Wappalyzer.py +++ b/Wappalyzer/Wappalyzer.py @@ -7,6 +7,7 @@ import os import pathlib import requests +import soupsieve as sv from datetime import datetime, timedelta from typing import Optional diff --git a/setup.py b/setup.py index 4a5ffca..16fb7f6 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = "python-Wappalyzer", - version = "0.4.1", + version = "0.4.2", description = "Python implementation of the Wappalyzer web application " "detection utility", long_description = (pathlib.Path(__file__).parent / "README.rst").read_text(), From 2576cfaeaf4488c700562c799c3b7e584c3fd0cb Mon Sep 17 00:00:00 2001 From: Spicy <58888570+brandonscholet@users.noreply.github.com> Date: Tue, 17 Jan 2023 16:15:39 -0600 Subject: [PATCH 4/4] Update Wappalyzer.py --- Wappalyzer/Wappalyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Wappalyzer/Wappalyzer.py b/Wappalyzer/Wappalyzer.py index cfd346d..f0036a0 100644 --- a/Wappalyzer/Wappalyzer.py +++ b/Wappalyzer/Wappalyzer.py @@ -21,7 +21,7 @@ def is_valid_selector(sel): try: sv.compile(sel) except (sv.SelectorSyntaxError, NotImplementedError): - print(sel) + logger.debug("Broken Selector:",sel) return False return True