Skip to content

Commit

Permalink
Merge pull request #390 from cccs-kevin/update/downloaders
Browse files Browse the repository at this point in the history
Migrating download utilities for YARA and parsers to community
  • Loading branch information
doomedraven authored Jan 3, 2024
2 parents e131a12 + 1858752 commit feffdf3
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 0 deletions.
51 changes: 51 additions & 0 deletions utils/download_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import glob
import os
import re

import requests

ROOT = "/opt/CAPEv2"

PARSER_SUBPATH = "/modules/processing/parsers"
CAPE = "CAPE"
RATDECODERS = "RATDecoders"
MALDUCK = "malduck"
MWCP = "mwcp"
PARSER_PATH_DIRS = [CAPE, RATDECODERS, MALDUCK, MWCP]
PARSER_PATH = f"{ROOT}{PARSER_SUBPATH}"

PARSER_URL = f"https://github.com/kevoreilly/CAPEv2/tree/master{PARSER_SUBPATH}/%s"
PARSER_RAW_URL = f"https://raw.githubusercontent.com/kevoreilly/CAPEv2/master{PARSER_SUBPATH}/%s"

PARSER_REGEX = "([\w\-\d]+\.py)"

parser_file_names = set()

# Grab all of the parsers available at the analyzer subpaths on GitHub
for d in PARSER_PATH_DIRS:
resp = requests.get(PARSER_URL % d)
page_content = resp.json().get("payload", {}).get("tree", {}).get("items", [])
for line in page_content:
if not line:
continue
match = re.search(PARSER_REGEX, line["name"])
if match and match.group(0) not in ["__init__.py"] and not match.group(0).startswith("test_"):
parser_file_names.add(f"{d}/{match.group(0)}")

# Delete current yara files to make sure to remove old rules
for d in PARSER_PATH_DIRS:
parser_files = glob.glob("%s/*" % os.path.join(PARSER_PATH, d))
for f in parser_files:
if not f.endswith("__init__.py") and "test_" not in f and f.endswith(".py"):
if os.path.isfile(f):
print(f"Successfully deleted {f}!")
os.remove(f)

# Now, get the content for each YARA rule and write it to disk
for file_name in sorted(list(parser_file_names)):
file_content = requests.get(PARSER_RAW_URL % file_name).text

parser_file_path = os.path.join(PARSER_PATH, file_name)
with open(parser_file_path, "w") as f:
f.write(file_content)
print(f"Successfully downloaded and wrote {parser_file_path}!")
85 changes: 85 additions & 0 deletions utils/download_yara.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import glob
import os
import re

import requests

ROOT = "/opt/CAPEv2"

ANALYZER_YARA_SUBPATH = "/analyzer/windows/data/yara"
ANALYZER_YARA_PATH = f"{ROOT}{ANALYZER_YARA_SUBPATH}"

ANALYZER_YARA_URL = f"https://github.com/kevoreilly/CAPEv2/tree/master{ANALYZER_YARA_SUBPATH}"
ANALYZER_YARA_RAW_URL = f"https://raw.githubusercontent.com/kevoreilly/CAPEv2/master{ANALYZER_YARA_SUBPATH}/%s"

CAPE = "/CAPE"
BINARIES = "/binaries"
MACRO = "/macro"
MEMORY = "/memory"
MONITOR = "/monitor"
URLS = "/urls"
SERVER_SIDE_YARA_SUBPATH = "/data/yara"
SERVER_SIDE_YARA_PATH_DIRS = [CAPE, BINARIES, MACRO, MEMORY, MONITOR, URLS]
SERVER_SIDE_YARA_PATH = f"{ROOT}{SERVER_SIDE_YARA_SUBPATH}%s"

SERVER_SIDE_YARA_URL = f"https://github.com/kevoreilly/CAPEv2/tree/master{SERVER_SIDE_YARA_SUBPATH}%s"
SERVER_SIDE_YARA_RAW_URL = f"https://raw.githubusercontent.com/kevoreilly/CAPEv2/master{SERVER_SIDE_YARA_SUBPATH}%s"


YARA_REGEX = "([\w\-\d]+\.yar)"

yara_file_names = set()

# First, grab all of the YARA rules available at the analyzer subpath on GitHub
resp = requests.get(ANALYZER_YARA_URL)
page_content = resp.json().get("payload", {}).get("tree", {}).get("items", [])
for line in page_content:
if not line:
continue
match = re.search(YARA_REGEX, line["name"])
if match:
yara_file_names.add(match.group(0))


# Delete current yara files to make sure to remove old rules
yara_files = glob.glob("%s/*" % ANALYZER_YARA_PATH)
for f in yara_files:
os.remove(f)
# Now, get the content for each YARA rule and write it to disk
for file_name in sorted(list(yara_file_names)):
file_content = requests.get(ANALYZER_YARA_RAW_URL % file_name).text

yara_file_path = os.path.join(ANALYZER_YARA_PATH, file_name)
with open(yara_file_path, "w") as f:
f.write(file_content)
print(f"Successfully downloaded and wrote {yara_file_path}!")

print("\n\n\nDeep breath...\n\n\n")

# Reset
yara_file_names = set()

# Next, grab all of the YARA rules available at the server side subpaths on GitHub
for d in SERVER_SIDE_YARA_PATH_DIRS:
resp = requests.get(SERVER_SIDE_YARA_URL % d)
page_content = resp.json().get("payload", {}).get("tree", {}).get("items", [])
for line in page_content:
if not line:
continue
match = re.search(YARA_REGEX, line["name"])
if match:
yara_file_subpath = os.path.join(d, match.group(0))
yara_file_names.add(yara_file_subpath)

# Delete current yara files to make sure to remove old rules
yara_files = glob.glob("%s/*" % SERVER_SIDE_YARA_PATH)
for f in yara_files:
os.remove(f)
# Now, get the content for each YARA rule and write it to disk
for file_name in sorted(list(yara_file_names)):
file_content = requests.get(SERVER_SIDE_YARA_RAW_URL % file_name).text

yara_file_path = os.path.join(SERVER_SIDE_YARA_PATH % file_name)
with open(yara_file_path, "w") as f:
f.write(file_content)
print(f"Successfully downloaded and wrote {yara_file_path}!")

0 comments on commit feffdf3

Please sign in to comment.