From 72186b5da6a0e245582d8f1e9d284bffd2529fba Mon Sep 17 00:00:00 2001 From: Islam hamdy <65929613+ISLAM-XGAMER@users.noreply.github.com> Date: Thu, 13 Jun 2024 21:04:02 +0300 Subject: [PATCH 1/2] Update main.py --- paramspider/main.py | 47 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/paramspider/main.py b/paramspider/main.py index 38ff9f5..33bf991 100644 --- a/paramspider/main.py +++ b/paramspider/main.py @@ -3,9 +3,11 @@ import logging import colorama from colorama import Fore, Style -from . import client # Importing client from a module named "client" +from . import client # Importing client from a module named "client" from urllib.parse import urlparse, parse_qs, urlencode import os +from datetime import datetime + yellow_color_code = "\033[93m" reset_color_code = "\033[0m" @@ -21,6 +23,16 @@ ".css", ".js", ".webp", ".woff", ".woff2", ".eot", ".ttf", ".otf", ".mp4", ".txt" ] +now = datetime.now() +timestamp = now.strftime("%Y_%m_%d_%H:%M:%S") + + + + + + + + def has_extension(url, extensions): """ Check if the URL has a file extension matching any of the provided extensions. @@ -90,9 +102,13 @@ def fetch_and_clean_urls(domain, extensions, stream_output,proxy, placeholder): Returns: None """ + + logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Fetching URLs for {Fore.CYAN + domain + Style.RESET_ALL}") wayback_uri = f"https://web.archive.org/cdx/search/cdx?url={domain}/*&output=txt&collapse=urlkey&fl=original&page=/" response = client.fetch_url_content(wayback_uri,proxy) + if response == None : + return urls = response.text.split() logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(urls)) + Style.RESET_ALL} URLs for {Fore.CYAN + domain + Style.RESET_ALL}") @@ -102,18 +118,37 @@ def fetch_and_clean_urls(domain, extensions, stream_output,proxy, placeholder): logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(cleaned_urls)) + Style.RESET_ALL} URLs after cleaning") logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Extracting URLs with parameters") + + results_dir = "results" if not os.path.exists(results_dir): os.makedirs(results_dir) - result_file = os.path.join(results_dir, f"{domain}.txt") - with open(result_file, "w") as f: + + + + + + if "/" in domain: + domain = domain.replace("/" , "\u2044") # "\u2044" is for fraction slash character since we can not use the normal slash / with File systems + + + + + result_file = os.path.join(results_dir, f"{domain}.txt") + session_file = f"Session : {timestamp}.txt" + + with open(result_file, "w") as f , open(f"{results_dir}/{session_file}" , "a") as s : for url in cleaned_urls: if "?" in url: f.write(url + "\n") + s.write(url + "\n") + if stream_output: print(url) + + logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Saved cleaned URLs to {Fore.CYAN + result_file + Style.RESET_ALL}") @@ -163,6 +198,10 @@ def main(): if args.list: for domain in domains: fetch_and_clean_urls(domain, extensions, args.stream,args.proxy, args.placeholder) + + + + if __name__ == "__main__": - main() \ No newline at end of file + main() From f2e71aa5105b552f9c31b66b43bc03fc8a6709a5 Mon Sep 17 00:00:00 2001 From: Islam hamdy <65929613+ISLAM-XGAMER@users.noreply.github.com> Date: Thu, 13 Jun 2024 21:16:38 +0300 Subject: [PATCH 2/2] Update client.py --- paramspider/client.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/paramspider/client.py b/paramspider/client.py index 09db654..33ac79e 100644 --- a/paramspider/client.py +++ b/paramspider/client.py @@ -4,7 +4,8 @@ import logging import time import sys - +import colorama +from colorama import Fore, Style logging.basicConfig(level=logging.INFO) @@ -56,12 +57,12 @@ def fetch_url_content(url,proxy): response = requests.get(url, proxies=proxy,headers=headers) response.raise_for_status() return response - except (requests.exceptions.RequestException, ValueError): - logging.warning(f"Error fetching URL {url}. Retrying in 5 seconds...") + except (requests.exceptions.RequestException, ValueError): + logging.warning(f"\n{Fore.YELLOW}[RETRY] {Style.RESET_ALL}Error fetching URL {Fore.CYAN + url + Style.RESET_ALL}. Retrying in 5 seconds...\n") time.sleep(5) except KeyboardInterrupt: logging.warning("Keyboard Interrupt re ceived. Exiting gracefully...") sys.exit() logging.error(f"Failed to fetch URL {url} after {MAX_RETRIES} retries.") - sys.exit() + return None