Skip to content
This repository has been archived by the owner on Apr 3, 2024. It is now read-only.

created test for valid selector that does not increase time #79

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 27 additions & 17 deletions Wappalyzer/Wappalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import pathlib
import requests
import soupsieve as sv

from datetime import datetime, timedelta
from typing import Optional
Expand All @@ -16,6 +17,14 @@

logger = logging.getLogger(name="python-Wappalyzer")

def is_valid_selector(sel):
try:
sv.compile(sel)
except (sv.SelectorSyntaxError, NotImplementedError):
logger.debug("Broken Selector:",sel)
return False
return True

class WappalyzerError(Exception):
# unused for now
"""
Expand Down Expand Up @@ -219,23 +228,24 @@ def _has_technology(self, tech_fingerprint: Fingerprint, webpage: IWebPage) -> b
# - "text": "regex": check if the .innerText property of the element that matches the css selector matches the regex (with version extraction).
# - "attributes": {dict from attr name to regex}: check if the attribute value of the element that matches the css selector matches the regex (with version extraction).
for selector in tech_fingerprint.dom:
for item in webpage.select(selector.selector):
if selector.exists:
self._set_detected_app(webpage.url, tech_fingerprint, 'dom', Pattern(string=selector.selector), value='')
has_tech = True
if selector.text:
for pattern in selector.text:
if pattern.regex.search(item.inner_html):
self._set_detected_app(webpage.url, tech_fingerprint, 'dom', pattern, value=item.inner_html)
has_tech = True
if selector.attributes:
for attrname, patterns in list(selector.attributes.items()):
_content = item.attributes.get(attrname)
if _content:
for pattern in patterns:
if pattern.regex.search(_content):
self._set_detected_app(webpage.url, tech_fingerprint, 'dom', pattern, value=_content)
has_tech = True
if is_valid_selector(selector.selector):
for item in webpage.select(selector.selector):
if selector.exists:
self._set_detected_app(webpage.url, tech_fingerprint, 'dom', Pattern(string=selector.selector), value='')
has_tech = True
if selector.text:
for pattern in selector.text:
if pattern.regex.search(item.inner_html):
self._set_detected_app(webpage.url, tech_fingerprint, 'dom', pattern, value=item.inner_html)
has_tech = True
if selector.attributes:
for attrname, patterns in list(selector.attributes.items()):
_content = item.attributes.get(attrname)
if _content:
for pattern in patterns:
if pattern.regex.search(_content):
self._set_detected_app(webpage.url, tech_fingerprint, 'dom', pattern, value=_content)
has_tech = True
return has_tech

def _set_detected_app(self, url:str,
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name = "python-Wappalyzer",
version = "0.4.0",
version = "0.4.2",
description = "Python implementation of the Wappalyzer web application "
"detection utility",
long_description = (pathlib.Path(__file__).parent / "README.rst").read_text(),
Expand Down Expand Up @@ -31,4 +31,4 @@
"types-requests", "types-pkg_resources", "aioresponses"]
},
python_requires = '>=3.6',
)
)