|
1 | 1 | import os
|
2 | 2 | from datetime import datetime as dt
|
3 | 3 | from pathlib import Path
|
| 4 | +from typing import List |
4 | 5 |
|
5 | 6 | import requests
|
6 | 7 | import rich
|
7 | 8 | from dotenv import load_dotenv
|
8 |
| -from rich.progress import Progress |
| 9 | +from rich.progress import Progress, SpinnerColumn |
| 10 | +# from rich import print |
9 | 11 |
|
10 | 12 | load_dotenv()
|
11 | 13 |
|
12 | 14 | API_KEY = os.getenv("MALSHARE_API_KEY")
|
| 15 | +SELECTED_FILES_AND_CONTENTS = [] |
13 | 16 |
|
14 | 17 |
|
15 |
| -def scrap_malshare(): |
16 |
| - """List hashes from the past 24 hours |
17 |
| - """ |
18 |
| - with Progress() as progress: |
19 |
| - if API_KEY is None or API_KEY == "": |
20 |
| - return None |
21 |
| - try: |
22 |
| - response = requests.post( |
23 |
| - "https://malshare.com/api.php", |
24 |
| - verify=True, |
25 |
| - params={"api_key": API_KEY, "action": "getlist"}, |
26 |
| - ) |
27 |
| - response.raise_for_status() |
28 |
| - hashes = response.json() |
29 |
| - except requests.RequestException as err: |
30 |
| - rich.print(f"[red] Malshare will be skipped. An expected error occurred: {err} ") |
31 |
| - return None |
32 |
| - rich.print(f"[green] - Malshare: {len(hashes)} Samples") |
33 |
| - sha256_ids = [hashe["sha256"] for hashe in hashes] |
34 |
| - task = progress.add_task( |
35 |
| - "-[green]Downloading Malware Files...", total=len(sha256_ids) |
36 |
| - ) |
37 |
| - Path("Downloaded-Malwares").mkdir(exist_ok=True) |
38 |
| - downloaded_malwares_path = Path("Downloaded-Malwares").absolute() |
39 |
| - for sha256_id in sha256_ids: |
40 |
| - if (downloaded_malwares_path / f"malware_{sha256_id[:6]}.zip").exists(): |
41 |
| - progress.update(task, advance=1) |
42 |
| - continue |
| 18 | + |
| 19 | +class Malshare: |
| 20 | + |
| 21 | + @classmethod |
| 22 | + def parsing_malshare_sha256(cls,fetched_hashes:List[dict],progress: Progress): |
| 23 | + sha256_ids = list() |
| 24 | + for id_names in fetched_hashes: |
| 25 | + sha256_id = id_names["sha256"] |
| 26 | + sha256_ids.append(sha256_id) |
| 27 | + return sha256_ids |
| 28 | + |
| 29 | + @classmethod |
| 30 | + def get_malshare_treshold(cls,hashes: list[dict], limit: int): |
| 31 | + """ |
| 32 | + Malshare treshold |
| 33 | + """ |
| 34 | + if limit == 0 : |
| 35 | + return hashes |
| 36 | + else: |
| 37 | + treshold = hashes[:limit] |
| 38 | + return treshold |
| 39 | + |
| 40 | + |
| 41 | + @classmethod |
| 42 | + def gather_selected_malwares(cls,new_hashes: list[str], choosen_path: Path , progress: Progress,req_session: requests.Session ): |
| 43 | + """ |
| 44 | + Download selected malware samples |
| 45 | +
|
| 46 | + Args: |
| 47 | + new_hashes (list[str]): list of sha256 hashes |
| 48 | + choosen_path (str, optional): path to save the downloaded malware samples. Defaults to None. |
| 49 | + progress (Progress, optional): rich progress object. Defaults to None. |
| 50 | +
|
| 51 | + Returns: |
| 52 | + None |
| 53 | + """ |
| 54 | + dl_task = progress.add_task("-[green] gathering malwares ...") |
| 55 | + for sha256_id in new_hashes: |
43 | 56 | try:
|
44 |
| - response = requests.post( |
| 57 | + response = req_session.post( |
45 | 58 | "https://malshare.com/api.php",
|
46 | 59 | params={"api_key": API_KEY, "action": "getfile", "hash": sha256_id},
|
47 | 60 | verify=True,
|
48 | 61 | )
|
49 | 62 | response.raise_for_status()
|
50 | 63 | except requests.RequestException as err:
|
51 | 64 | raise err
|
52 |
| - curr_time = dt.now().date().strftime("%Y-%m-%d") |
53 | 65 | if response.status_code == 502:
|
54 | 66 | json_response = response.json()
|
55 | 67 | if json_response["query_status"] == "file_not_found":
|
56 |
| - rich.print(f" [red]sha256_hash: {sha256_id[:6]} not found skipping") |
| 68 | + rich.print(f"[red]sha256_hash: {sha256_id[:6]} not found skipping") |
57 | 69 | continue
|
58 | 70 | if json_response["query_status"] == "illegal_sha256_hash":
|
59 |
| - rich.print(f" [red]Illegal SHA256 hash provided: {sha256_id[:6]} skipping") |
| 71 | + rich.print(f"[red]Illegal SHA256 hash provided: {sha256_id[:6]} skipping") |
60 | 72 | continue
|
61 |
| - file_path = f"malware_{sha256_id[:6]}_{curr_time}.zip" |
62 |
| - final_path = (downloaded_malwares_path/file_path) |
63 |
| - with open(file=final_path, mode="wb") as f: |
64 |
| - f.write(response.content) |
65 |
| - progress.update(task, advance=1) |
| 73 | + curr_time = dt.now().date().strftime("%Y-%m-%d") |
| 74 | + malware_content = response.content |
| 75 | + file_path = f"malware_{sha256_id[:6]}_{curr_time}-{str(malware_content[:4]).replace('\\','').replace("<!","").replace("<h","")}.zip" |
| 76 | + progress.update(dl_task, advance=100/len(new_hashes)) |
| 77 | + path_and_content = ((choosen_path/file_path),malware_content) |
| 78 | + global SELECTED_FILES_AND_CONTENTS |
| 79 | + SELECTED_FILES_AND_CONTENTS.append(path_and_content) |
| 80 | + |
| 81 | + @classmethod |
| 82 | + def download_selected_malwares(cls,new_hashes: list[str], progress: Progress ): |
| 83 | + """ |
| 84 | + Download selected malware samples |
| 85 | +
|
| 86 | + Args: |
| 87 | + new_hashes (list[str]): list of sha256 hashes |
| 88 | + choosen_path (str, optional): path to save the downloaded malware samples. Defaults to None. |
| 89 | + progress (Progress, optional): rich progress object. Defaults to None. |
| 90 | +
|
| 91 | + Returns: |
| 92 | + None |
| 93 | + """ |
| 94 | + dl_task = progress.add_task("-[green] Download gathered malwares ...") |
| 95 | + global SELECTED_FILES_AND_CONTENTS |
| 96 | + total_file = len(SELECTED_FILES_AND_CONTENTS) |
| 97 | + for malwares_path_and_content in SELECTED_FILES_AND_CONTENTS: |
| 98 | + with open(file=malwares_path_and_content[0], mode="wb") as f: |
| 99 | + f.write(malwares_path_and_content[1]) |
| 100 | + progress.update(dl_task, advance=100/total_file) |
| 101 | + |
66 | 102 |
|
| 103 | + @classmethod |
| 104 | + def catch_new_malwares(cls,hashes: list[str], choosen_path: Path ,current_progress: Progress ): |
| 105 | + """ |
| 106 | + Catch new malware samples since the last fecth from malshare.com to avoid duplicates |
| 107 | +
|
| 108 | + Args: |
| 109 | + hashes (list): list of sha256 hashes |
| 110 | + choosen_path (str, optional): path to save the downloaded malware samples. Defaults to None. |
| 111 | +
|
| 112 | + Returns: |
| 113 | + list: list of sha256 hashes |
| 114 | + """ |
| 115 | + new_hashes = [] |
| 116 | + task = current_progress.add_task( |
| 117 | + "-[green]Checking existing malwares ...", total=len(hashes),start=False |
| 118 | + ) |
| 119 | + for sha256_id in hashes: |
| 120 | + if (choosen_path / f"malware_{sha256_id[:6]}.zip").exists(): |
| 121 | + continue |
| 122 | + else: |
| 123 | + new_hashes.append(sha256_id) |
| 124 | + current_progress.start_task(task_id=task) |
| 125 | + current_progress.update(task, advance=1) |
| 126 | + return new_hashes |
| 127 | + |
| 128 | + @classmethod |
| 129 | + def create_dir_and_return_path(cls,str_path: str) -> Path: |
| 130 | + """ |
| 131 | + Create directory and return path to save the downloaded malware samples |
| 132 | + Args: |
| 133 | + str_path (str): path to save the downloaded malware samples |
| 134 | +
|
| 135 | + Returns: |
| 136 | + Path: path to save the downloaded malware samples |
| 137 | + """ |
| 138 | + if str_path == "": |
| 139 | + (Path()/"Downloaded-Malwares").mkdir(parents=True, exist_ok=True) |
| 140 | + return (Path()/"Downloaded-Malwares") |
| 141 | + Path(str_path).mkdir(parents=True, exist_ok=True) |
| 142 | + return Path(str_path) |
| 143 | + |
| 144 | + @classmethod |
| 145 | + def scrap_malshare(cls,choosen_path: str="", not_twins: bool = False, limit: int = 24): |
| 146 | + """ |
| 147 | + Scrap malshare.com and download the last 24 hours malware samples |
| 148 | +
|
| 149 | + Args: |
| 150 | + choosen_path (str): path to save the downloaded malware samples |
| 151 | + not_twins (bool, optional): download already downloaded malware samples. Defaults to False. |
| 152 | +
|
| 153 | + Returns: |
| 154 | + None |
| 155 | + """ |
| 156 | + with Progress( |
| 157 | + SpinnerColumn(finished_text="[bold green]finished ✓[/bold green]"), |
| 158 | + *Progress.get_default_columns(), |
| 159 | + transient=True, |
| 160 | + ) as progress: |
| 161 | + if API_KEY is None or API_KEY == "": |
| 162 | + return None |
| 163 | + try: |
| 164 | + response = requests.post( |
| 165 | + "https://malshare.com/api.php", |
| 166 | + verify=True, |
| 167 | + params={"api_key": API_KEY, "action": "getlist"}, |
| 168 | + ) |
| 169 | + response.raise_for_status() |
| 170 | + except requests.RequestException as err: |
| 171 | + rich.print(f"[red]Malshare will be skipped. An unexpected error occurred: {err} ") |
| 172 | + return None |
| 173 | + hashes = response.json() |
| 174 | + sliced_hashes = cls.get_malshare_treshold(hashes, limit) |
| 175 | + rich.print(f"[green] - Malshare: {len(sliced_hashes)} Samples") |
| 176 | + sha256_ids = cls.parsing_malshare_sha256(sliced_hashes, progress) |
| 177 | + downloaded_malwares_path = cls.create_dir_and_return_path(choosen_path) |
| 178 | + with requests.Session() as session: |
| 179 | + if not_twins: |
| 180 | + new_hashes = cls.catch_new_malwares(sha256_ids, downloaded_malwares_path, progress) |
| 181 | + cls.gather_selected_malwares(new_hashes=new_hashes, choosen_path=downloaded_malwares_path, progress=progress,req_session=session) |
| 182 | + cls.download_selected_malwares(new_hashes=new_hashes, progress=progress) |
| 183 | + else: |
| 184 | + new_hashes = sha256_ids |
| 185 | + cls.gather_selected_malwares(new_hashes=new_hashes, choosen_path=downloaded_malwares_path, progress=progress,req_session=session) |
| 186 | + cls.download_selected_malwares(new_hashes, progress) |
67 | 187 |
|
68 |
| -if __name__ == "__main__": |
69 |
| - scrap_malshare() |
|
0 commit comments