Skip to content

Commit 4be6c07

Browse files
committed
refactor: replace argparse with typer and create an entry script command
1 parent 7a6835c commit 4be6c07

File tree

7 files changed

+389
-189
lines changed

7 files changed

+389
-189
lines changed

poetry.lock

+41-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+7-1
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,18 @@ description = "Downloads and aggregates fresh malware samples collected from OSI
55
authors = ["Robert Thomas", "Prashant Shubham", "Mboula Penda Paul O’neal"]
66
readme = "README.md"
77

8+
9+
[tool.poetry.scripts]
10+
dd-run = "your_daily_dose_malware.commands:app"
11+
12+
13+
814
[tool.poetry.dependencies]
915
python = "^3.9"
1016
requests = "^2.32.2"
11-
click = "^8.1.7"
1217
python-dotenv = "^1.0.1"
1318
rich = "^13.7.1"
19+
typer = "^0.12.5"
1420

1521
[build-system]
1622
requires = ["poetry-core"]

your_daily_dose_malware/__main__.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from your_daily_dose_malware.commands import app
2+
app(prog_name="dd-run")

your_daily_dose_malware/backends/malshare.py

+158-40
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,187 @@
11
import os
22
from datetime import datetime as dt
33
from pathlib import Path
4+
from typing import List
45

56
import requests
67
import rich
78
from dotenv import load_dotenv
8-
from rich.progress import Progress
9+
from rich.progress import Progress, SpinnerColumn
10+
# from rich import print
911

1012
load_dotenv()
1113

1214
API_KEY = os.getenv("MALSHARE_API_KEY")
15+
SELECTED_FILES_AND_CONTENTS = []
1316

1417

15-
def scrap_malshare():
16-
"""List hashes from the past 24 hours
17-
"""
18-
with Progress() as progress:
19-
if API_KEY is None or API_KEY == "":
20-
return None
21-
try:
22-
response = requests.post(
23-
"https://malshare.com/api.php",
24-
verify=True,
25-
params={"api_key": API_KEY, "action": "getlist"},
26-
)
27-
response.raise_for_status()
28-
hashes = response.json()
29-
except requests.RequestException as err:
30-
rich.print(f"[red] Malshare will be skipped. An expected error occurred: {err} ")
31-
return None
32-
rich.print(f"[green] - Malshare: {len(hashes)} Samples")
33-
sha256_ids = [hashe["sha256"] for hashe in hashes]
34-
task = progress.add_task(
35-
"-[green]Downloading Malware Files...", total=len(sha256_ids)
36-
)
37-
Path("Downloaded-Malwares").mkdir(exist_ok=True)
38-
downloaded_malwares_path = Path("Downloaded-Malwares").absolute()
39-
for sha256_id in sha256_ids:
40-
if (downloaded_malwares_path / f"malware_{sha256_id[:6]}.zip").exists():
41-
progress.update(task, advance=1)
42-
continue
18+
19+
class Malshare:
20+
21+
@classmethod
22+
def parsing_malshare_sha256(cls,fetched_hashes:List[dict],progress: Progress):
23+
sha256_ids = list()
24+
for id_names in fetched_hashes:
25+
sha256_id = id_names["sha256"]
26+
sha256_ids.append(sha256_id)
27+
return sha256_ids
28+
29+
@classmethod
30+
def get_malshare_treshold(cls,hashes: list[dict], limit: int):
31+
"""
32+
Malshare treshold
33+
"""
34+
if limit == 0 :
35+
return hashes
36+
else:
37+
treshold = hashes[:limit]
38+
return treshold
39+
40+
41+
@classmethod
42+
def gather_selected_malwares(cls,new_hashes: list[str], choosen_path: Path , progress: Progress,req_session: requests.Session ):
43+
"""
44+
Download selected malware samples
45+
46+
Args:
47+
new_hashes (list[str]): list of sha256 hashes
48+
choosen_path (str, optional): path to save the downloaded malware samples. Defaults to None.
49+
progress (Progress, optional): rich progress object. Defaults to None.
50+
51+
Returns:
52+
None
53+
"""
54+
dl_task = progress.add_task("-[green] gathering malwares ...")
55+
for sha256_id in new_hashes:
4356
try:
44-
response = requests.post(
57+
response = req_session.post(
4558
"https://malshare.com/api.php",
4659
params={"api_key": API_KEY, "action": "getfile", "hash": sha256_id},
4760
verify=True,
4861
)
4962
response.raise_for_status()
5063
except requests.RequestException as err:
5164
raise err
52-
curr_time = dt.now().date().strftime("%Y-%m-%d")
5365
if response.status_code == 502:
5466
json_response = response.json()
5567
if json_response["query_status"] == "file_not_found":
56-
rich.print(f" [red]sha256_hash: {sha256_id[:6]} not found skipping")
68+
rich.print(f"[red]sha256_hash: {sha256_id[:6]} not found skipping")
5769
continue
5870
if json_response["query_status"] == "illegal_sha256_hash":
59-
rich.print(f" [red]Illegal SHA256 hash provided: {sha256_id[:6]} skipping")
71+
rich.print(f"[red]Illegal SHA256 hash provided: {sha256_id[:6]} skipping")
6072
continue
61-
file_path = f"malware_{sha256_id[:6]}_{curr_time}.zip"
62-
final_path = (downloaded_malwares_path/file_path)
63-
with open(file=final_path, mode="wb") as f:
64-
f.write(response.content)
65-
progress.update(task, advance=1)
73+
curr_time = dt.now().date().strftime("%Y-%m-%d")
74+
malware_content = response.content
75+
file_path = f"malware_{sha256_id[:6]}_{curr_time}-{str(malware_content[:4]).replace('\\','').replace("<!","").replace("<h","")}.zip"
76+
progress.update(dl_task, advance=100/len(new_hashes))
77+
path_and_content = ((choosen_path/file_path),malware_content)
78+
global SELECTED_FILES_AND_CONTENTS
79+
SELECTED_FILES_AND_CONTENTS.append(path_and_content)
80+
81+
@classmethod
82+
def download_selected_malwares(cls,new_hashes: list[str], progress: Progress ):
83+
"""
84+
Download selected malware samples
85+
86+
Args:
87+
new_hashes (list[str]): list of sha256 hashes
88+
choosen_path (str, optional): path to save the downloaded malware samples. Defaults to None.
89+
progress (Progress, optional): rich progress object. Defaults to None.
90+
91+
Returns:
92+
None
93+
"""
94+
dl_task = progress.add_task("-[green] Download gathered malwares ...")
95+
global SELECTED_FILES_AND_CONTENTS
96+
total_file = len(SELECTED_FILES_AND_CONTENTS)
97+
for malwares_path_and_content in SELECTED_FILES_AND_CONTENTS:
98+
with open(file=malwares_path_and_content[0], mode="wb") as f:
99+
f.write(malwares_path_and_content[1])
100+
progress.update(dl_task, advance=100/total_file)
101+
66102

103+
@classmethod
104+
def catch_new_malwares(cls,hashes: list[str], choosen_path: Path ,current_progress: Progress ):
105+
"""
106+
Catch new malware samples since the last fecth from malshare.com to avoid duplicates
107+
108+
Args:
109+
hashes (list): list of sha256 hashes
110+
choosen_path (str, optional): path to save the downloaded malware samples. Defaults to None.
111+
112+
Returns:
113+
list: list of sha256 hashes
114+
"""
115+
new_hashes = []
116+
task = current_progress.add_task(
117+
"-[green]Checking existing malwares ...", total=len(hashes),start=False
118+
)
119+
for sha256_id in hashes:
120+
if (choosen_path / f"malware_{sha256_id[:6]}.zip").exists():
121+
continue
122+
else:
123+
new_hashes.append(sha256_id)
124+
current_progress.start_task(task_id=task)
125+
current_progress.update(task, advance=1)
126+
return new_hashes
127+
128+
@classmethod
129+
def create_dir_and_return_path(cls,str_path: str) -> Path:
130+
"""
131+
Create directory and return path to save the downloaded malware samples
132+
Args:
133+
str_path (str): path to save the downloaded malware samples
134+
135+
Returns:
136+
Path: path to save the downloaded malware samples
137+
"""
138+
if str_path == "":
139+
(Path()/"Downloaded-Malwares").mkdir(parents=True, exist_ok=True)
140+
return (Path()/"Downloaded-Malwares")
141+
Path(str_path).mkdir(parents=True, exist_ok=True)
142+
return Path(str_path)
143+
144+
@classmethod
145+
def scrap_malshare(cls,choosen_path: str="", not_twins: bool = False, limit: int = 24):
146+
"""
147+
Scrap malshare.com and download the last 24 hours malware samples
148+
149+
Args:
150+
choosen_path (str): path to save the downloaded malware samples
151+
not_twins (bool, optional): download already downloaded malware samples. Defaults to False.
152+
153+
Returns:
154+
None
155+
"""
156+
with Progress(
157+
SpinnerColumn(finished_text="[bold green]finished ✓[/bold green]"),
158+
*Progress.get_default_columns(),
159+
transient=True,
160+
) as progress:
161+
if API_KEY is None or API_KEY == "":
162+
return None
163+
try:
164+
response = requests.post(
165+
"https://malshare.com/api.php",
166+
verify=True,
167+
params={"api_key": API_KEY, "action": "getlist"},
168+
)
169+
response.raise_for_status()
170+
except requests.RequestException as err:
171+
rich.print(f"[red]Malshare will be skipped. An unexpected error occurred: {err} ")
172+
return None
173+
hashes = response.json()
174+
sliced_hashes = cls.get_malshare_treshold(hashes, limit)
175+
rich.print(f"[green] - Malshare: {len(sliced_hashes)} Samples")
176+
sha256_ids = cls.parsing_malshare_sha256(sliced_hashes, progress)
177+
downloaded_malwares_path = cls.create_dir_and_return_path(choosen_path)
178+
with requests.Session() as session:
179+
if not_twins:
180+
new_hashes = cls.catch_new_malwares(sha256_ids, downloaded_malwares_path, progress)
181+
cls.gather_selected_malwares(new_hashes=new_hashes, choosen_path=downloaded_malwares_path, progress=progress,req_session=session)
182+
cls.download_selected_malwares(new_hashes=new_hashes, progress=progress)
183+
else:
184+
new_hashes = sha256_ids
185+
cls.gather_selected_malwares(new_hashes=new_hashes, choosen_path=downloaded_malwares_path, progress=progress,req_session=session)
186+
cls.download_selected_malwares(new_hashes, progress)
67187

68-
if __name__ == "__main__":
69-
scrap_malshare()

0 commit comments

Comments
 (0)