From 9f7c9bc1e29b5b3a4d05786d05d5603f1ce90f46 Mon Sep 17 00:00:00 2001 From: dhruv1955 Date: Sat, 4 Oct 2025 23:36:13 +0530 Subject: [PATCH 1/5] Add proxy URL validation with regex check --- sherlock_project/sherlock.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 250175a57..3b6f418b4 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -756,7 +756,12 @@ def main(): print(f"A problem occurred while checking for an update: {error}") # Argument check - # TODO regex check on args.proxy + if args.proxy is not None: + # Validate proxy URL format + proxy_pattern = r'^(https?|socks[45])://[^\s/$.?#].[^\s]*$' + if not re.match(proxy_pattern, args.proxy): + raise ValueError(f"Invalid proxy URL format: {args.proxy}. Expected format: protocol://host:port (e.g., socks5://127.0.0.1:1080)") + if args.tor and (args.proxy is not None): raise Exception("Tor and Proxy cannot be set at the same time.") From add0837b358bd8ec3ca1c931a3fd28a50ad23355 Mon Sep 17 00:00:00 2001 From: dhruv1955 Date: Mon, 6 Oct 2025 00:17:04 +0530 Subject: [PATCH 2/5] refactor: remove deprecated Tor flags/usage --- sherlock_project/sherlock.py | 66 ++++-------------------------------- 1 file changed, 7 insertions(+), 59 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 3b6f418b4..9708a1281 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -171,8 +171,6 @@ def sherlock( username: str, site_data: dict[str, dict[str, str]], query_notify: QueryNotify, - tor: bool = False, - unique_tor: bool = False, dump_response: bool = False, proxy: Optional[str] = None, timeout: int = 60, @@ -188,8 +186,6 @@ def sherlock( query_notify -- Object with base type of QueryNotify(). This will be used to notify the caller about query results. - tor -- Boolean indicating whether to use a tor circuit for the requests. - unique_tor -- Boolean indicating whether to use a new tor circuit for each request. proxy -- String indicating the proxy URL timeout -- Time in seconds to wait before timing out request. Default is 60 seconds. @@ -210,32 +206,9 @@ def sherlock( # Notify caller that we are starting the query. query_notify.start(username) - # Create session based on request methodology - if tor or unique_tor: - try: - from torrequest import TorRequest # noqa: E402 - except ImportError: - print("Important!") - print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.") - print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.") - print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n") - sys.exit(query_notify.finish()) - - print("Important!") - print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.") - - # Requests using Tor obfuscation - try: - underlying_request = TorRequest() - except OSError: - print("Tor not found in system path. Unable to continue.\n") - sys.exit(query_notify.finish()) - - underlying_session = underlying_request.session - else: - # Normal requests - underlying_session = requests.session() - underlying_request = requests.Request() + # Create session using standard requests (Tor deprecated) + underlying_session = requests.session() + underlying_request = requests.Request() # Limit number of workers to 20. # This is probably vastly overkill. @@ -359,9 +332,7 @@ def sherlock( # Store future in data for access later net_info["request_future"] = future - # Reset identify for tor (if needed) - if unique_tor: - underlying_request.reset_identity() + # Tor support removed; no per-request identity reset # Add this site's results into final dictionary with all the other results. results_total[social_network] = results_site @@ -596,22 +567,7 @@ def main(): dest="output", help="If using single username, the output of the result will be saved to this file.", ) - parser.add_argument( - "--tor", - "-t", - action="store_true", - dest="tor", - default=False, - help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.", - ) - parser.add_argument( - "--unique-tor", - "-u", - action="store_true", - dest="unique_tor", - default=False, - help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.", - ) + # Tor options removed in 0.17.0 parser.add_argument( "--csv", action="store_true", @@ -762,19 +718,13 @@ def main(): if not re.match(proxy_pattern, args.proxy): raise ValueError(f"Invalid proxy URL format: {args.proxy}. Expected format: protocol://host:port (e.g., socks5://127.0.0.1:1080)") - if args.tor and (args.proxy is not None): - raise Exception("Tor and Proxy cannot be set at the same time.") + # Tor support removed; no need to check Tor/Proxy exclusivity # Make prompts if args.proxy is not None: print("Using the proxy: " + args.proxy) - if args.tor or args.unique_tor: - print("Using Tor to make requests") - - print( - "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." - ) + # Tor messaging removed if args.no_color: # Disable color output. @@ -876,8 +826,6 @@ def main(): username, site_data, query_notify, - tor=args.tor, - unique_tor=args.unique_tor, dump_response=args.dump_response, proxy=args.proxy, timeout=args.timeout, From 825afaf58c76773d7084f4fc45cbdd2283cca467 Mon Sep 17 00:00:00 2001 From: dhruv1955 Date: Mon, 6 Oct 2025 00:32:18 +0530 Subject: [PATCH 3/5] Clean up merge conflicts; remove Tor; add proxy URL validation --- sherlock_project/sherlock.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index f1ef20f0e..476b686a2 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -206,15 +206,9 @@ def sherlock( # Notify caller that we are starting the query. query_notify.start(username) -<<<<<<< HEAD # Create session using standard requests (Tor deprecated) underlying_session = requests.session() underlying_request = requests.Request() -======= - - # Normal requests - underlying_session = requests.session() ->>>>>>> f38a2341d24411a2589c3a4ee9c251c2e42f97a2 # Limit number of workers to 20. # This is probably vastly overkill. @@ -338,11 +332,7 @@ def sherlock( # Store future in data for access later net_info["request_future"] = future -<<<<<<< HEAD # Tor support removed; no per-request identity reset - -======= ->>>>>>> f38a2341d24411a2589c3a4ee9c251c2e42f97a2 # Add this site's results into final dictionary with all the other results. results_total[social_network] = results_site @@ -579,10 +569,7 @@ def main(): dest="output", help="If using single username, the output of the result will be saved to this file.", ) -<<<<<<< HEAD # Tor options removed in 0.17.0 -======= ->>>>>>> f38a2341d24411a2589c3a4ee9c251c2e42f97a2 parser.add_argument( "--csv", action="store_true", @@ -749,11 +736,7 @@ def main(): if args.proxy is not None: print("Using the proxy: " + args.proxy) -<<<<<<< HEAD # Tor messaging removed - -======= ->>>>>>> f38a2341d24411a2589c3a4ee9c251c2e42f97a2 if args.no_color: # Disable color output. init(strip=True, convert=False) From 83a9b7b75382dca7456c595494eea9cdd5b6a978 Mon Sep 17 00:00:00 2001 From: dhruv1955 Date: Mon, 6 Oct 2025 11:40:58 +0530 Subject: [PATCH 4/5] fix: remove unused underlying_request variable --- sherlock_project/sherlock.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 476b686a2..ea7162721 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -208,7 +208,6 @@ def sherlock( query_notify.start(username) # Create session using standard requests (Tor deprecated) underlying_session = requests.session() - underlying_request = requests.Request() # Limit number of workers to 20. # This is probably vastly overkill. From bf796f12c5d47e9713bc6f26ed81a81a5113aaee Mon Sep 17 00:00:00 2001 From: dhruv1955 Date: Mon, 6 Oct 2025 11:55:47 +0530 Subject: [PATCH 5/5] fix: refine WAF detection to avoid false positives --- sherlock_project/sherlock.py | 37 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index ea7162721..a786a0092 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -377,24 +377,8 @@ def sherlock( query_status = QueryStatus.UNKNOWN error_context = None - # As WAFs advance and evolve, they will occasionally block Sherlock and - # lead to false positives and negatives. Fingerprints should be added - # here to filter results that fail to bypass WAFs. Fingerprints should - # be highly targetted. Comment at the end of each fingerprint to - # indicate target and date fingerprinted. - WAFHitMsgs = [ - r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare - r'', # 2024-11-11 Cloudflare error page - r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS) - r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security - ] - if error_text is not None: error_context = error_text - - elif any(hitMsg in r.text for hitMsg in WAFHitMsgs): - query_status = QueryStatus.WAF - else: if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): error_context = f"Unknown error type '{error_type}' for {social_network}" @@ -450,6 +434,27 @@ def sherlock( else: query_status = QueryStatus.AVAILABLE + # As WAFs advance and evolve, they will occasionally block Sherlock and + # lead to false positives and negatives. Fingerprints should be added + # here to filter results that fail to bypass WAFs. Fingerprints should + # be highly targetted. Comment at the end of each fingerprint to + # indicate target and date fingerprinted. + WAFHitMsgs = [ + r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare + r'', # 2024-11-11 Cloudflare error page + r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS) + r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:', # 2024-04-09 PerimeterX / Human Security + ] + + # Only override with WAF if we didn't confidently detect a claim + if query_status in (QueryStatus.AVAILABLE, QueryStatus.UNKNOWN): + try: + if any(hitMsg in r.text for hitMsg in WAFHitMsgs): + query_status = QueryStatus.WAF + except Exception: + # If response text isn't accessible, keep prior status + pass + if dump_response: print("+++++++++++++++++++++") print(f"TARGET NAME : {social_network}")