diff --git a/.gitignore b/.gitignore index e12df7d..e7dab63 100644 --- a/.gitignore +++ b/.gitignore @@ -108,4 +108,5 @@ ENV/ .vscode/ logs/ -.c9 \ No newline at end of file +.c9 +pastehunter/YaraRules/custom_keywords.yar diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..19a05d5 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,36 @@ +language: python +sudo: required +dist: bionic +group: edge +cache: + pip: true +python: +- 3.6 +- 3.6-dev +before_install: +- sudo apt-get update -qq +- sudo apt-get install automake libtool make gcc libmagic-dev -yqq python3-pip unzip +- wget https://github.com/VirusTotal/yara/archive/v3.10.0.tar.gz +- tar -xzvf v3.10.0.tar.gz +- cd yara-3.10.0/ && ./bootstrap.sh && ./configure --enable-dotnet --enable-magic + && make && sudo make install && cd ../ +- git clone --recursive https://github.com/VirusTotal/yara-python +- pip3 install pytest codecov pytest-cov +- cd yara-python +- python setup.py build --enable-magic --enable-dotnet +- python setup.py install && cd ../ && rm -rf yara-python && rm -rf yara-3.10.0/ +install: +- pip install -r requirements.txt +- pip install -e . +script: +- pastehunter-cli +after_success: +- python setup.py sdist +deploy: + provider: pypi + user: __token__ + password: + secure: ZYILSwAsPcCWa4Ccslu2F+HVw02Rafdf4HqnQla3uCCTlEQQ+cFyuTKxQB46xytgblFQv/99oxq3SwVTUX4C6cIa8D+zHm/6lR4Tu+YPthYZX9IashF/AMKkyKks8bxbB0x/3t7hBX+7w++OcC1wwCXUyX7btsiOBa28k1NZCsB26NgdpBn02wF/GwqDhkxKkW9Bi7KDjb58GdiyhgVXxOOaOYbRyKiNZqUKQx504zmc0aGSPYCs0gSPwoA0T3FUet4IBcjjTP9DsjjkyQ7K6iMWYNGsAP91HnZe5J4sZYqwrGs++vndJVa/bYpiyMCjUrG4c6okdS0zpSmfbrqJay12wH5qroqqLxwuLtrXcHK+ChlyvhsGHMN51rqX811zdt/IzDwi+hXz84e8Y8/YgUTx7j0/HPEdrHjIIbMoIEd9Wy42+TcRCHJOULjsg7Kc7KLd1ILvxxyV+REnkfaazeqmgSNlqFxM2A65dkq3xNt9CDtYQlX/IhTDBy2/qY3m60uOh92ptd5f5eHF28W89APnkRAHD2JSEVRym1fHNrvPl1NCJT8NavbdYup/dH8hQadMx72X022lmyFASHN92G78O3uA0fZ8B/hzCpVQ4KTTIT4/LqkAXuWlfW4z9wC62V2ZdL6E76lqbMPokeXfH8Tf+chAaw/XHr7Wk6bWkOQ= + on: + branch: master + skip_existing: true diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..41e3ab8 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [1.2.1] - 2019-12-29 +### Changed +- move config file to ~/.config +- move custom yara rules +- refactor yara rules location + +## [1.2.0] - 2019-12-28 +### Added +- Changelog +- travis CI +- PyPi Installation + +### Changed +- FilePaths to enable pip diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..fac9ba1 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include pastehunter/YaraRules *.yar \ No newline at end of file diff --git a/README.md b/README.md index 9b696e3..d84601b 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,11 @@ by an organisation or a researcher. For setup instructions please see the official documentation https://pastehunter.readthedocs.io/en/latest/installation.html +[![PyPI version](https://badge.fury.io/py/pastehunter.svg)](https://badge.fury.io/py/pastehunter) + +[![Build Status](https://travis-ci.org/kevthehermit/PasteHunter.svg?branch=master)](https://travis-ci.org/kevthehermit/PasteHunter) + + ## Supported Inputs Pastehunter currently has support for the following sites: - pastebin.com diff --git a/YaraRules/index.yar b/YaraRules/index.yar deleted file mode 100644 index d09326e..0000000 --- a/YaraRules/index.yar +++ /dev/null @@ -1,14 +0,0 @@ -include "api_keys.yar" -include "aws.yar" -include "base64.yar" -include "blacklist.yar" -include "certificates.yar" -include "core_keywords.yar" -include "CryptoExchangeApi.yar" -include "database.yar" -include "email_filter.yar" -include "general.yar" -include "github_dorks.yar" -include "hak5.yar" -include "password_leak.yar" -include "powershell.yar" diff --git a/common.py b/common.py deleted file mode 100644 index 1ea162b..0000000 --- a/common.py +++ /dev/null @@ -1,16 +0,0 @@ -import json -import logging - -logger = logging.getLogger('pastehunter') - -# Parse the config file in to a dict -def parse_config(): - conf_file = 'settings.json' - conf = None - try: - with open(conf_file, 'r') as read_conf: - conf = json.load(read_conf) - except Exception as e: - logger.error("Unable to parse config file: {0}".format(e)) - - return conf diff --git a/pastehunter-cli b/pastehunter-cli new file mode 100644 index 0000000..8cab6e2 --- /dev/null +++ b/pastehunter-cli @@ -0,0 +1,405 @@ +#!/usr/bin/python3 +import errno +import hashlib +import importlib +import json +import logging +import multiprocessing +import os +import signal +import sys +import time +from io import BytesIO +from logging import handlers +from time import sleep +from urllib.parse import unquote_plus + +import requests +import yara +import pastehunter +from pastehunter.common import parse_config + +VERSION = 1.0 + +# Setup Default logging +root = logging.getLogger() +ch = logging.StreamHandler() +ch.setLevel(logging.DEBUG) +formatter = logging.Formatter('%(levelname)s:%(filename)s:%(message)s') +ch.setFormatter(formatter) +root.addHandler(ch) + +logger = logging.getLogger('pastehunter') +logger.setLevel(logging.INFO) + +# Version info +logger.info("Starting PasteHunter Version: {0}".format(VERSION)) + +# Parse the config file +logger.info("Reading Configs") +conf = parse_config() + +# If the config failed to parse +if not conf: + sys.exit() + +class TimeoutError(Exception): + pass + +class timeout: + def __init__(self, seconds=1, error_message='Timeout'): + self.seconds = seconds + self.error_message = error_message + def handle_timeout(self, signum, frame): + raise TimeoutError("Process timeout: {0}".format(self.error_message)) + def __enter__(self): + signal.signal(signal.SIGALRM, self.handle_timeout) + signal.alarm(self.seconds) + def __exit__(self, type, value, traceback): + signal.alarm(0) + + + +# Set up the log file +if "log" in conf and conf["log"]["log_to_file"]: + if conf["log"]["log_path"] != "": + logfile = "{0}/{1}.log".format(conf["log"]["log_path"], conf["log"]["log_file"]) + # Assure directory exists + try: os.makedirs(conf["log"]["log_path"], exist_ok=True) # Python>3.2 + except TypeError: + try: + os.makedirs(conf["log"]["log_path"]) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(conf["log"]["log_path"]): + pass + else: logger.error("Can not create log file {0}: {1}".format(conf["log"]["log_path"], exc)) + else: + logfile = "{0}.log".format(conf["log"]["log_file"]) + fileHandler = handlers.RotatingFileHandler(logfile, mode='a+', maxBytes=(1048576*5), backupCount=7) + if conf["log"]["format"] != "": + fileFormatter = logging.Formatter("{0}".format(conf["log"]["format"])) + fileHandler.setFormatter(fileFormatter) + else: + fileHandler.setFormatter(formatter) + fileHandler.setLevel(conf["log"]["logging_level"]) + logger.addHandler(fileHandler) + logger.info("Enabled Log File: {0}".format(logfile)) +else: + logger.info("Logging to file disabled.") + +# Override Log level if needed +if "logging_level" in conf["log"]: + log_level = conf["log"]["logging_level"] +elif "logging_level" in conf["general"]: + # For old configs + log_level = conf["general"]["logging_level"] +else: + # For older configs + logger.error("Log Level not in config file. Update your base config file!") + log_level = 20 + +logger.info("Setting Log Level to {0}".format(log_level)) +logging.getLogger('requests').setLevel(log_level) +logging.getLogger('elasticsearch').setLevel(log_level) +logging.getLogger('pastehunter').setLevel(log_level) + +# Configure Inputs +logger.info("Configure Inputs") +input_list = [] +for input_type, input_values in conf["inputs"].items(): + if input_values["enabled"]: + input_list.append(input_values["module"]) + logger.info("Enabled Input: {0}".format(input_type)) + + +# Configure Outputs +logger.info("Configure Outputs") +outputs = [] +for output_type, output_values in conf["outputs"].items(): + if output_values["enabled"]: + logger.info("Enabled Output: {0}".format(output_type)) + _module = importlib.import_module(output_values["module"]) + _class = getattr(_module, output_values["classname"]) + instance = _class() + outputs.append(instance) + + +def yara_index(default_rules, custom_rules, exclude_rules, blacklist, test_rules): + rules_list = {} + counter = 0 + if default_rules: + for filename in os.listdir(default_rules): + if filename in exclude_rules: + continue + if filename == 'blacklist.yar': + if blacklist: + logger.info("Enable Blacklist Rules") + else: + continue + if filename == 'test_rules.yar': + if test_rules: + logger.info("Enable Test Rules") + else: + continue + rules_list['namespace{0}'.format(counter)] = os.path.join(default_rules, filename) + logger.info("Adding rules from {0}".format(filename)) + counter += 1 + if custom_rules: + for filename in os.listdir(custom_rules): + rules_list['namespace{0}'.format(counter)] = os.path.join(custom_rules, filename) + logger.info("Adding custom rules from {0}".format(filename)) + counter += 1 + return rules_list + + +def paste_scanner(paste_data, rules_buff): + # Grab yara rules from passed buffer + # Fetch the raw paste + # scan the Paste + # Store the Paste + + rules_buff.seek(0) + rules = yara.load(file=rules_buff) + try: + with timeout(seconds=conf['general']['process_timeout']): + # Start a timer + start_time = time.time() + logger.debug("Found New {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid'])) + # get raw paste and hash them + try: + + # Stack questions dont have a raw endpoint + if ('stackexchange' in conf['inputs']) and (paste_data['pastesite'] in conf['inputs']['stackexchange']['site_list']): + # The body is already included in the first request so we do not need a second call to the API. + + # Unescape the code block strings in the json body. + raw_body = paste_data['body'] + raw_paste_data = unquote_plus(raw_body) + + # now remove the old body key as we dont need it any more + del paste_data['body'] + + else: + raw_paste_uri = paste_data['scrape_url'] + if not raw_paste_uri: + logger.info('Unable to retrieve paste, no uri found.') + logger.debug(json.dumps(paste_data)) + raw_paste_data = "" + else: + raw_paste_data = requests.get(raw_paste_uri).text + + # Cover fetch site SSLErrors + except requests.exceptions.SSLError as e: + logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) + raw_paste_data = "" + + # General Exception + except Exception as e: + logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) + raw_paste_data = "" + + # Pastebin Cache + if raw_paste_data == "File is not ready for scraping yet. Try again in 1 minute.": + logger.info("Paste is still cached sleeping to try again") + sleep(45) + # get raw paste and hash them + raw_paste_uri = paste_data['scrape_url'] + # Cover fetch site SSLErrors + try: + raw_paste_data = requests.get(raw_paste_uri).text + except requests.exceptions.SSLError as e: + logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) + raw_paste_data = "" + + # General Exception + except Exception as e: + logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) + raw_paste_data = "" + + # Process the paste data here + try: + # Scan with yara + matches = rules.match(data=raw_paste_data, externals={'filename': paste_data.get('filename', '')}) + except Exception as e: + logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) + return False + + results = [] + for match in matches: + # For keywords get the word from the matched string + if match.rule == 'core_keywords' or match.rule == 'custom_keywords': + for s in match.strings: + rule_match = s[1].lstrip('$') + if rule_match not in results: + results.append(rule_match) + results.append(str(match.rule)) + + # But a break in here for the base64. Will use it later. + elif match.rule.startswith('b64'): + results.append(match.rule) + + # Else use the rule name + else: + results.append(match.rule) + + # Store additional fields for passing on to post processing + encoded_paste_data = raw_paste_data.encode('utf-8') + md5 = hashlib.md5(encoded_paste_data).hexdigest() + sha256 = hashlib.sha256(encoded_paste_data).hexdigest() + paste_data['MD5'] = md5 + paste_data['SHA256'] = sha256 + paste_data['raw_paste'] = raw_paste_data + paste_data['YaraRule'] = results + # Set the size for all pastes - This will override any size set by the source + paste_data['size'] = len(raw_paste_data) + + # Store all OverRides other options. + paste_site = paste_data['confname'] + store_all = conf['inputs'][paste_site]['store_all'] + # remove the confname key as its not really needed past this point + del paste_data['confname'] + + + # Blacklist Check + # If any of the blacklist rules appear then empty the result set + blacklisted = False + if conf['yara']['blacklist'] and 'blacklist' in results: + results = [] + blacklisted = True + logger.info("Blacklisted {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid'])) + + + # Post Process + + # If post module is enabled and the paste has a matching rule. + post_results = paste_data + for post_process, post_values in conf["post_process"].items(): + if post_values["enabled"]: + if any(i in results for i in post_values["rule_list"]) or "ALL" in post_values["rule_list"]: + if not blacklisted: + logger.info("Running Post Module {0} on {1}".format(post_values["module"], paste_data["pasteid"])) + post_module = importlib.import_module(post_values["module"]) + post_results = post_module.run(results, + raw_paste_data, + paste_data + ) + + # Throw everything back to paste_data for ease. + paste_data = post_results + + + # If we have a result add some meta data and send to storage + # If results is empty, ie no match, and store_all is True, + # then append "no_match" to results. This will then force output. + + if store_all is True: + if len(results) == 0: + results.append('no_match') + + if len(results) > 0: + for output in outputs: + try: + output.store_paste(paste_data) + except Exception as e: + logger.error("Unable to store {0} to {1} with error {2}".format(paste_data["pasteid"], output, e)) + + end_time = time.time() + logger.debug("Processing Finished for {0} in {1} seconds".format( + paste_data["pasteid"], + (end_time - start_time) + )) + return True + except TimeoutError: + return False + +def main(): + logger.info("Compile Yara Rules") + try: + if conf['yara']['default_rules']: + pastehunter_path = pastehunter.__path__[0] + default_rules = os.path.join(pastehunter_path, "YaraRules") + else: + default_rules = False + + if conf["yara"]["custom_rules"] != "none": + custom_rules = conf["yara"]["custom_rules"] + else: + custom_rules = False + + rule_files = yara_index( + default_rules, + custom_rules, + conf['yara']['exclude_rules'], + conf['yara']['blacklist'], + conf['yara']['test_rules'] + ) + + rules = yara.compile(filepaths=rule_files, externals={'filename': ''}) + + # Used for sharing across processes + rules_buff = BytesIO() + rules.save(file=rules_buff) + + except Exception as e: + logger.exception("Unable to Create Yara index: {0}".format(e)) + sys.exit() + + # Create Queue to hold paste URI's + pool = multiprocessing.Pool(processes=5) + results = [] + + # Now Fill the Queue + try: + while True: + queue_count = 0 + + # Paste History + logger.info("Populating Queue") + if os.path.exists('paste_history.tmp'): + with open('paste_history.tmp') as json_file: + paste_history = json.load(json_file) + else: + paste_history = {} + + for input_name in input_list: + if input_name in paste_history: + input_history = paste_history[input_name] + else: + input_history = [] + + try: + + i = importlib.import_module(input_name) + # Get list of recent pastes + logger.info("Fetching paste list from {0}".format(input_name)) + paste_list, history = i.recent_pastes(conf, input_history) + for paste in paste_list: + # Create a new async job for the existing pool and apply it to "results" + results.append(pool.apply_async(paste_scanner, (paste, rules_buff))) + queue_count += 1 + paste_history[input_name] = history + except Exception as e: + logger.error("Unable to fetch list from {0}: {1}".format(input_name, e)) + + logger.debug("Writing History") + # Write History + with open('paste_history.tmp', 'w') as outfile: + json.dump(paste_history, outfile) + logger.info("Added {0} Items to the queue".format(queue_count)) + + # Wait for all work to finish + [result.wait() for result in results] + + # Slow it down a little + logger.info("Sleeping for " + str(conf['general']['run_frequency']) + " Seconds") + sleep(conf['general']['run_frequency']) + + + + except KeyboardInterrupt: + logger.info("Stopping Processes") + pool.terminate() + pool.join() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/pastehunter.py b/pastehunter.py deleted file mode 100644 index aeaf2d1..0000000 --- a/pastehunter.py +++ /dev/null @@ -1,399 +0,0 @@ -#!/usr/bin/python3 - -import os -import sys -import yara -import json -import hashlib -import requests -import multiprocessing -import importlib -import logging -from logging import handlers -import time -import errno -import signal -from time import sleep -from urllib.parse import unquote_plus -from common import parse_config -from postprocess import post_email - - -from multiprocessing import Queue - -VERSION = 1.0 - -# Setup Default logging -root = logging.getLogger() -ch = logging.StreamHandler() -ch.setLevel(logging.DEBUG) -formatter = logging.Formatter('%(levelname)s:%(filename)s:%(message)s') -ch.setFormatter(formatter) -root.addHandler(ch) - -logger = logging.getLogger('pastehunter') -logger.setLevel(logging.INFO) - -# Version info -logger.info("Starting PasteHunter Version: {0}".format(VERSION)) - -# Parse the config file -logger.info("Reading Configs") -conf = parse_config() - -# If the config failed to parse -if not conf: - sys.exit() - -class TimeoutError(Exception): - pass - -class timeout: - def __init__(self, seconds=1, error_message='Timeout'): - self.seconds = seconds - self.error_message = error_message - def handle_timeout(self, signum, frame): - print("Process timeout: {0}".format(self.error_message)) - sys.exit(0) - def __enter__(self): - signal.signal(signal.SIGALRM, self.handle_timeout) - signal.alarm(self.seconds) - def __exit__(self, type, value, traceback): - signal.alarm(0) - - - -# Set up the log file -if "log" in conf and conf["log"]["log_to_file"]: - if conf["log"]["log_path"] != "": - logfile = "{0}/{1}.log".format(conf["log"]["log_path"], conf["log"]["log_file"]) - # Assure directory exists - try: os.makedirs(conf["log"]["log_path"], exist_ok=True) # Python>3.2 - except TypeError: - try: - os.makedirs(conf["log"]["log_path"]) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST and os.path.isdir(conf["log"]["log_path"]): - pass - else: logger.error("Can not create log file {0}: {1}".format(conf["log"]["log_path"], exc)) - else: - logfile = "{0}.log".format(conf["log"]["log_file"]) - fileHandler = handlers.RotatingFileHandler(logfile, mode='a+', maxBytes=(1048576*5), backupCount=7) - if conf["log"]["format"] != "": - fileFormatter = logging.Formatter("{0}".format(conf["log"]["format"])) - fileHandler.setFormatter(fileFormatter) - else: - fileHandler.setFormatter(logFormatter) - fileHandler.setLevel(conf["log"]["logging_level"]) - logger.addHandler(fileHandler) - logger.info("Enabled Log File: {0}".format(logfile)) -else: - logger.info("Logging to file disabled.") - -# Override Log level if needed -if "logging_level" in conf["log"]: - log_level = conf["log"]["logging_level"] -elif "logging_level" in conf["general"]: - # For old configs - log_level = conf["general"]["logging_level"] -else: - # For older configs - logger.error("Log Level not in config file. Update your base config file!") - log_level = 20 - -logger.info("Setting Log Level to {0}".format(log_level)) -logging.getLogger('requests').setLevel(log_level) -logging.getLogger('elasticsearch').setLevel(log_level) -logging.getLogger('pastehunter').setLevel(log_level) - -# Configure Inputs -logger.info("Configure Inputs") -input_list = [] -for input_type, input_values in conf["inputs"].items(): - if input_values["enabled"]: - input_list.append(input_values["module"]) - logger.info("Enabled Input: {0}".format(input_type)) - - -# Configure Outputs -logger.info("Configure Outputs") -outputs = [] -for output_type, output_values in conf["outputs"].items(): - if output_values["enabled"]: - logger.info("Enabled Output: {0}".format(output_type)) - _module = importlib.import_module(output_values["module"]) - _class = getattr(_module, output_values["classname"]) - instance = _class() - outputs.append(instance) - - -def yara_index(rule_path, blacklist, test_rules): - index_file = os.path.join(rule_path, 'index.yar') - with open(index_file, 'w') as yar: - for filename in os.listdir(rule_path): - if filename.endswith('.yar') and filename != 'index.yar': - if filename == 'blacklist.yar': - if blacklist: - logger.info("Enable Blacklist Rules") - else: - continue - if filename == 'test_rules.yar': - if test_rules: - logger.info("Enable Test Rules") - else: - continue - include = 'include "{0}"\n'.format(filename) - yar.write(include) - - -def paste_scanner(): - # Get a paste URI from the Queue - # Fetch the raw paste - # scan the Paste - # Store the Paste - while True: - if q.empty(): - # Queue was empty, sleep to prevent busy loop - sleep(0.5) - else: - paste_data = q.get() - with timeout(seconds=conf['general']['process_timeout']): - # Start a timer - start_time = time.time() - logger.debug("Found New {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid'])) - # get raw paste and hash them - try: - - # Stack questions dont have a raw endpoint - if ('stackexchange' in conf['inputs']) and (paste_data['pastesite'] in conf['inputs']['stackexchange']['site_list']): - # The body is already included in the first request so we do not need a second call to the API. - - # Unescape the code block strings in the json body. - raw_body = paste_data['body'] - raw_paste_data = unquote_plus(raw_body) - - # now remove the old body key as we dont need it any more - del paste_data['body'] - - else: - raw_paste_uri = paste_data['scrape_url'] - if not raw_paste_uri: - logger.info('Unable to retrieve paste, no uri found.') - logger.debug(json.dumps(paste_data)) - raw_paste_data = "" - else: - raw_paste_data = requests.get(raw_paste_uri).text - - # Cover fetch site SSLErrors - except requests.exceptions.SSLError as e: - logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) - raw_paste_data = "" - - # General Exception - except Exception as e: - logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) - raw_paste_data = "" - - # Pastebin Cache - if raw_paste_data == "File is not ready for scraping yet. Try again in 1 minute.": - logger.info("Paste is still cached sleeping to try again") - sleep(45) - # get raw paste and hash them - raw_paste_uri = paste_data['scrape_url'] - # Cover fetch site SSLErrors - try: - raw_paste_data = requests.get(raw_paste_uri).text - except requests.exceptions.SSLError as e: - logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) - raw_paste_data = "" - - # General Exception - except Exception as e: - logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) - raw_paste_data = "" - - # Process the paste data here - try: - # Scan with yara - matches = rules.match(data=raw_paste_data, externals={'filename': paste_data.get('filename', '')}) - except Exception as e: - logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e)) - continue - - results = [] - for match in matches: - # For keywords get the word from the matched string - if match.rule == 'core_keywords' or match.rule == 'custom_keywords': - for s in match.strings: - rule_match = s[1].lstrip('$') - if rule_match not in results: - results.append(rule_match) - results.append(str(match.rule)) - - # But a break in here for the base64. Will use it later. - elif match.rule.startswith('b64'): - results.append(match.rule) - - # Else use the rule name - else: - results.append(match.rule) - - # Store additional fields for passing on to post processing - encoded_paste_data = raw_paste_data.encode('utf-8') - md5 = hashlib.md5(encoded_paste_data).hexdigest() - sha256 = hashlib.sha256(encoded_paste_data).hexdigest() - paste_data['MD5'] = md5 - paste_data['SHA256'] = sha256 - paste_data['raw_paste'] = raw_paste_data - paste_data['YaraRule'] = results - # Set the size for all pastes - This will override any size set by the source - paste_data['size'] = len(raw_paste_data) - - # Store all OverRides other options. - paste_site = paste_data['confname'] - store_all = conf['inputs'][paste_site]['store_all'] - # remove the confname key as its not really needed past this point - del paste_data['confname'] - - - # Blacklist Check - # If any of the blacklist rules appear then empty the result set - blacklisted = False - if conf['yara']['blacklist'] and 'blacklist' in results: - results = [] - blacklisted = True - logger.info("Blacklisted {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid'])) - - - # Post Process - - # If post module is enabled and the paste has a matching rule. - post_results = paste_data - for post_process, post_values in conf["post_process"].items(): - if post_values["enabled"]: - if any(i in results for i in post_values["rule_list"]) or "ALL" in post_values["rule_list"]: - if not blacklisted: - logger.info("Running Post Module {0} on {1}".format(post_values["module"], paste_data["pasteid"])) - post_module = importlib.import_module(post_values["module"]) - post_results = post_module.run(results, - raw_paste_data, - paste_data - ) - - # Throw everything back to paste_data for ease. - paste_data = post_results - - - # If we have a result add some meta data and send to storage - # If results is empty, ie no match, and store_all is True, - # then append "no_match" to results. This will then force output. - - if store_all is True: - if len(results) == 0: - results.append('no_match') - - if len(results) > 0: - for output in outputs: - try: - output.store_paste(paste_data) - except Exception as e: - logger.error("Unable to store {0} to {1} with error {2}".format(paste_data["pasteid"], output, e)) - - end_time = time.time() - logger.debug("Processing Finished for {0} in {1} seconds".format( - paste_data["pasteid"], - (end_time - start_time) - )) - - - -if __name__ == "__main__": - logger.info("Compile Yara Rules") - try: - # Update the yara rules index - yara_index(conf['yara']['rule_path'], - conf['yara']['blacklist'], - conf['yara']['test_rules']) - - # Compile the yara rules we will use to match pastes - index_file = os.path.join(conf['yara']['rule_path'], 'index.yar') - rules = yara.compile(index_file, externals={'filename': ''}) - except Exception as e: - logger.exception("Unable to Create Yara index: ", e) - sys.exit() - - # Create Queue to hold paste URI's - q = Queue() - processes = [] - - # Now Fill the Queue - try: - while True: - queue_count = 0 - counter = 0 - if len(processes) < 5: - for i in range(5-len(processes)): - logger.warning("Creating New Process") - m = multiprocessing.Process(target=paste_scanner) - # Add new process to list so we can run join on them later. - processes.append(m) - m.start() - for process in processes: - if not process.is_alive(): - logger.warning("Restarting Dead Process") - del processes[counter] - m = multiprocessing.Process(target=paste_scanner) - # Add new process to list so we can run join on them later. - processes.append(m) - m.start() - counter += 1 - - # Check if the processors are active - # Paste History - logger.info("Populating Queue") - if os.path.exists('paste_history.tmp'): - with open('paste_history.tmp') as json_file: - paste_history = json.load(json_file) - else: - paste_history = {} - - for input_name in input_list: - if input_name in paste_history: - input_history = paste_history[input_name] - else: - input_history = [] - - try: - - i = importlib.import_module(input_name) - # Get list of recent pastes - logger.info("Fetching paste list from {0}".format(input_name)) - paste_list, history = i.recent_pastes(conf, input_history) - for paste in paste_list: - q.put(paste) - queue_count += 1 - paste_history[input_name] = history - except Exception as e: - logger.error("Unable to fetch list from {0}: {1}".format(input_name, e)) - - logger.debug("Writing History") - # Write History - with open('paste_history.tmp', 'w') as outfile: - json.dump(paste_history, outfile) - logger.info("Added {0} Items to the queue".format(queue_count)) - - for proc in processes: - proc.join(2) - - # Slow it down a little - logger.info("Sleeping for " + str(conf['general']['run_frequency']) + " Seconds") - sleep(conf['general']['run_frequency']) - - - - except KeyboardInterrupt: - logger.info("Stopping Processes") - for proc in processes: - proc.terminate() - proc.join() - diff --git a/pastehunter/YaraRules/CryptoExchangeApi.yar b/pastehunter/YaraRules/CryptoExchangeApi.yar new file mode 100644 index 0000000..c130889 --- /dev/null +++ b/pastehunter/YaraRules/CryptoExchangeApi.yar @@ -0,0 +1,88 @@ +rule CryptoExchangeApi +{ + meta: + description = "Contains Crypro Exchange API URL" + author = "Jason Schorr (0xBanana)" + source = "https://github.com/cryptodefense/PasteHunter-Yara/blob/master/CryptoExchangeApi.yar" + strings: + $a = "api.binance.com" nocase wide ascii + $a0 = "1btcxe.com/api" nocase wide ascii + $a1 = "acx.io/api" nocase wide ascii + $a2 = "anxpro.com/api" nocase wide ascii + $a3 = "anybits.com/api" nocase wide ascii + $a4 = "www.bcex.top" nocase wide ascii + $a5 = "api.bibox.com" nocase wide ascii + $a6 = "bit2c.co.il" nocase wide ascii + $a7 = "api.bitfinex.com" nocase wide ascii + $a8 = "api.bitfinex.com" nocase wide ascii + $a9 = "api.bitflyer.jp" nocase wide ascii + $aa = "api.bitforex.com" nocase wide ascii + $ab = "bitibu.com" nocase wide ascii + $ac = "bitlish.com/api" nocase wide ascii + $ad = "www.bitmex.com" nocase wide ascii + $ae = "bitsane.com/api" nocase wide ascii + $af = "api.bitso.com" nocase wide ascii + $ag = "www.bitstamp.net/api" nocase wide ascii + $ah = "www.bitstamp.net/api" nocase wide ascii + $ai = "api.bl3p.eu" nocase wide ascii + $aj = "braziliex.com/api/v1" nocase wide ascii + $ak = "btc-alpha.com/api" nocase wide ascii + $al = "www.btcbox.co.jp/api" nocase wide ascii + $am = "www.btcexchange.ph/api" nocase wide ascii + $an = "btc-trade.com.ua/api" nocase wide ascii + $ao = "www.btcturk.com/api" nocase wide ascii + $ap = "www.buda.com/api" nocase wide ascii + $aq = "bx.in.th/api" nocase wide ascii + $ar = "cex.io/api" nocase wide ascii + $as = "api.cobinhood.com" nocase wide ascii + $at = "api.coinbase.com" nocase wide ascii + $au = "api.prime.coinbase.com" nocase wide ascii + $av = "api.pro.coinbase.com" nocase wide ascii + $aw = "coincheck.com/api" nocase wide ascii + $ax = "www.coinexchange.io/api/v1" nocase wide ascii + $ay = "coinfalcon.com" nocase wide ascii + $az = "webapi.coinfloor.co.uk:8090/bist" nocase wide ascii + $aa1 = "coinmate.io/api" nocase wide ascii + $aa2 = "api.coinone.co.kr" nocase wide ascii + $aa3 = "api.crex24.com" nocase wide ascii + $aa4 = "api.cryptonbtc.com" nocase wide ascii + $aa5 = "www.deribit.com" nocase wide ascii + $aa6 = "api.ethfinex.com" nocase wide ascii + $aa7 = "api.fcoin.com" nocase wide ascii + $aa8 = "api.flowbtc.com:8405/ajax" nocase wide ascii + $aa9 = "www.fybse.se/api/SEK" nocase wide ascii + $aa0 = "www.fybsg.com/api/SGD" nocase wide ascii + $aab = "api.gatecoin.com" nocase wide ascii + $aac = "api.gdax.com" nocase wide ascii + $aad = "api.gemini.com" nocase wide ascii + $aae = "getbtc.org/api" nocase wide ascii + $aaf = "api.hitbtc.com" nocase wide ascii + $aag = "api.hitbtc.com" nocase wide ascii + $aah = "api.huobi.com" nocase wide ascii + $aai = "ice3x.com/api" nocase wide ascii + $aaj = "api.itbit.com" nocase wide ascii + $aak = "www.jubi.com/api" nocase wide ascii + $aal = "kuna.io" nocase wide ascii + $aam = "api.lakebtc.com" nocase wide ascii + $aan = "api.lbank.info" nocase wide ascii + $aao = "api.liquid.com" nocase wide ascii + $aap = "api.livecoin.net" nocase wide ascii + $aaq = "api.mybitx.com/api" nocase wide ascii + $aar = "mixcoins.com/api" nocase wide ascii + $aas = "novaexchange.com/remote" nocase wide ascii + $aat = "paymium.com/api" nocase wide ascii + $aau = "api.quadrigacx.com" nocase wide ascii + $aav = "www.rightbtc.com/api" nocase wide ascii + $aaw = "www.southxchange.com/api" nocase wide ascii + $aax = "api.theocean.trade/api" nocase wide ascii + $aay = "api.therocktrading.com" nocase wide ascii + $aaz = "www.tidebit.com" nocase wide ascii + $ba = "open-api.uex.com/open/api" nocase wide ascii + $bb = "api.vaultoro.com" nocase wide ascii + $bc = "cryptottlivewebapi.xbtce.net:8443/api" nocase wide ascii + $bd = "yunbi.com" nocase wide ascii + $be = "api.zaif.jp" nocase wide ascii + + condition: + any of them +} \ No newline at end of file diff --git a/YaraRules/api_keys.yar b/pastehunter/YaraRules/api_keys.yar similarity index 100% rename from YaraRules/api_keys.yar rename to pastehunter/YaraRules/api_keys.yar diff --git a/pastehunter/YaraRules/aws.yar b/pastehunter/YaraRules/aws.yar new file mode 100644 index 0000000..37ef230 --- /dev/null +++ b/pastehunter/YaraRules/aws.yar @@ -0,0 +1,36 @@ +rule aws_cli +{ + meta: + author = "@KevTheHermit" + info = "Part of PasteHunter" + reference = "https://github.com/kevthehermit/PasteHunter" + + strings: + $a1 = "aws s3 " ascii + $a2 = "aws ec2 " ascii + $a3 = "aws ecr " ascii + $a4 = "aws cognito-identity" ascii + $a5 = "aws iam "ascii + $a6 = "aws waf " ascii + + condition: + any of them + +} + +rule sw_bucket +{ + meta: + author = "@KevTheHermit" + info = "Part of PasteHunter" + reference = "https://github.com/kevthehermit/PasteHunter" + + strings: + $a1 = "s3.amazonaws.com" ascii + + condition: + any of them + + + +} diff --git a/YaraRules/base64.yar b/pastehunter/YaraRules/base64.yar similarity index 90% rename from YaraRules/base64.yar rename to pastehunter/YaraRules/base64.yar index 6b3b6f0..913d12c 100644 --- a/YaraRules/base64.yar +++ b/pastehunter/YaraRules/base64.yar @@ -88,7 +88,14 @@ rule b64_url $a4 = "V1dXLg" // WWW. // ignore vendor certs in this rule. The certs rule will pick them up if we want them - $not1 = "GlobalSign Root CA" nocase + $not1 = "GlobalSign Root CA" nocase + + // Ignore data: uris. These are common in html and svg files. + $not2 = /data:[a-z\/]+;(base64,)?aHR0cDov/ nocase + $not3 = /data:[a-z\/]+;(base64,)?SFRUUDov/ nocase + $not4 = /data:[a-z\/]+;(base64,)?d3d3Lg/ nocase + $not5 = /data:[a-z\/]+;(base64,)?V1dXLg/ nocase + condition: any of ($a*) and not any of ($not*) diff --git a/pastehunter/YaraRules/blacklist.yar b/pastehunter/YaraRules/blacklist.yar new file mode 100644 index 0000000..117e526 --- /dev/null +++ b/pastehunter/YaraRules/blacklist.yar @@ -0,0 +1,20 @@ +rule blacklist +{ + meta: + author = "@KevTheHermit" + info = "Part of PasteHunter" + reference = "https://github.com/kevthehermit/PasteHunter" + + strings: + $a = "#EXTINF:" nocase // IPTV stream Lists. + $b = "--app-name=LeagueClient" nocase // League of Legends Debug Log + $c = "common.application_name: LeagueClient" // League of Legends Debug Log + $d = /java\.(util|lang|io)/ // Minecraft and java errors + $e = "Traceback (most recent call last)" + $f = /define\(.*?\)|require_once\(.*?\)/ + $g = "Technic Launcher is starting" // Minecraft mod dumps + $h = "OTL logfile created on" // + condition: + any of them + +} \ No newline at end of file diff --git a/pastehunter/YaraRules/certificates.yar b/pastehunter/YaraRules/certificates.yar new file mode 100644 index 0000000..6b6a667 --- /dev/null +++ b/pastehunter/YaraRules/certificates.yar @@ -0,0 +1,25 @@ +/* + This rule will look for common encoded certificates and secrets +*/ + +rule certificates +{ + meta: + author = "@KevTheHermit" + info = "Part of PasteHunter" + reference = "https://github.com/kevthehermit/PasteHunter" + + strings: + $ssh_priv = "BEGIN RSA PRIVATE KEY" wide ascii nocase + $openssh_priv = "BEGIN OPENSSH PRIVATE KEY" wide ascii nocase + $dsa_priv = "BEGIN DSA PRIVATE KEY" wide ascii nocase + $ec_priv = "BEGIN EC PRIVATE KEY" wide ascii nocase + $pgp_priv = "BEGIN PGP PRIVATE KEY" wide ascii nocase + $pem_cert = "BEGIN CERTIFICATE" wide ascii nocase + $pkcs7 = "BEGIN PKCS7" + + condition: + any of them + +} + diff --git a/YaraRules/core_keywords.yar b/pastehunter/YaraRules/core_keywords.yar similarity index 100% rename from YaraRules/core_keywords.yar rename to pastehunter/YaraRules/core_keywords.yar diff --git a/YaraRules/database.yar b/pastehunter/YaraRules/database.yar similarity index 100% rename from YaraRules/database.yar rename to pastehunter/YaraRules/database.yar diff --git a/pastehunter/YaraRules/email_filter.yar b/pastehunter/YaraRules/email_filter.yar new file mode 100644 index 0000000..8d546bd --- /dev/null +++ b/pastehunter/YaraRules/email_filter.yar @@ -0,0 +1,19 @@ +/* + These rules attempt to find email leaks +*/ + +rule email_filter +{ + meta: + author = "@kovacsbalu" + info = "Better email pattern" + reference = "https://github.com/securenetworx/PasteHunter/tree/fix-email-filter" + + strings: + $email_add = /\b[\w-]+(\.[\w-]+)*@[\w-]+(\.[\w-]+)*\.[a-zA-Z-]+[\w-]\b/ + condition: + #email_add > 20 + +} + + diff --git a/YaraRules/general.yar b/pastehunter/YaraRules/general.yar similarity index 100% rename from YaraRules/general.yar rename to pastehunter/YaraRules/general.yar diff --git a/pastehunter/YaraRules/github_dorks.yar b/pastehunter/YaraRules/github_dorks.yar new file mode 100644 index 0000000..227e83b --- /dev/null +++ b/pastehunter/YaraRules/github_dorks.yar @@ -0,0 +1,551 @@ +/* + These are rule derived from github-dorks (https://github.com/techgaun/github-dorks) + github-dorks is under the Apache License 2.0: + https://github.com/techgaun/github-dorks/blob/master/LICENSE +*/ +rule NPMRegistry { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "NPM Registry files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "_auth" nocase + condition: + all of them and filename matches /.*\.npmrc$/is +} + +rule DockerCfg { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Docker config files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "auth" nocase + condition: + all of them and filename matches /.*\.dockercfg$/is +} +rule PrivateKeys { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Private key files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "private" nocase + condition: + all of them and (filename matches /.*\.pem$/is or filename matches /\.ppk$/is + or filename matches /(\/|^)id_(r|d)sa$/is) +} +rule SQLDump { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "SQL dumps (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "mysql" nocase + $ = "dump" nocase + condition: + all of them and (filename matches /.*\.sql$/is) +} +rule S3Credentials { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "S3 Credentials (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "aws_access_key_id" nocase + condition: + filename matches /(\/|^)\.s3cfg$/is or filename matches /(\/|^)credentials$/is and all of them +} +rule WPConfig { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Wordpress config files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)wp-config.php$/is +} +rule HTPasswd { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "htpasswd files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)\.htpasswd$/is +} +rule EnvFile { + meta: + author = "Dylan Katz (@Plazmaz)" + description = ".env files, Matches laravel, mailservers, and various CI and config files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $db_usr = "DB_USERNAME" + $mail_host = "MAIL_HOST=smtp." + $excl = "homestead" nocase + condition: + filename matches /(\/|^)\.env/is and any of ($db_usr, $mail_host) and not $excl +} +rule GitCredentials { + meta: + author = "Dylan Katz (@Plazmaz)" + description = ".git-credentials files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)\.git-credentials$/is +} +rule PivotalToken { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "PivotalTracker token (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "PT_TOKEN" + condition: + any of them +} + +rule BashProfile { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Sensitive info in profile files, specifically .bashrc and .bash_profile (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "password" nocase + $ = "mailchimp" nocase + $ = "aws" nocase + $ = "secret" nocase + condition: + filename matches /(\/|^)\.bash(rc|_profile)$/is and any of them +} +rule AmazonCredentials { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Generic AWS credentials for RDS or EC2 (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $rds = "rds.amazonaws.com" nocase + $ec2 = "ec2.amazonaws.com" nocase + $pass = "password" nocase + condition: + $pass and ($rds or $ec2) +} +rule MongoLab { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "MongoLab Credentials (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "mongolab.com" nocase + condition: + filename matches /.*(\.conf|\.yaml|\.yml|\.json)$/is and all of them +} +rule RoboMongo { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "RoboMongo Credentials (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)robomongo\.json$/is +} +rule JSForce { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Salesforce Credentials for JSForce (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "jsforce" nocase + $ = "conn.login" nocase + condition: + filename matches /.*js$/is and all of them +} +rule Salesforce { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Generic salesforce Credentials (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "SF_USERNAME" nocase + $ = "salesforce" nocase + condition: + all of them +} +rule Tugboat { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "DigitalOcean Tugboat Configurations (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "_tugboat" + condition: + filename matches /(\/|^)\.tugboat$/is and not any of them +} +rule Hub { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Hub files that contain oauth tokens (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = /oauth_token: [a-zA-Z0-9]+/ nocase + condition: + filename matches /(\/|^)hub$/is and any of them +} +rule NetRC { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Netrc files that contain 'password' or 'key' (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "password" + $ = "key" + condition: + filename matches /(\/|^)\.?_?netrc/is and any of them +} +rule Filezilla { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Filezilla configuration files with passwords (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "Pass" + condition: + (filename matches /(\/|^)filezilla\.xml$/is or filename matches /(\/|^)recentservers.xml$/is) and any of them +} +rule Docker { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Docker authentication config (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "auths" + condition: + filename matches /(\/|^)config\.json$/is and any of them +} +rule IdeaKey { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "License Keys for IDEA IDEs (IntelliJ, PyCharm, etc) (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)idea[0-9]{0,}\.key$/is +} +rule DBServers { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Database servers (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)connections\.xml$/is + or filename matches /(\/|^)\.pgpass$/is +} +rule Proftpd { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Proftpd configuration files created by cpanel (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)proftpdpasswd$/is +} +rule Ventrilo { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Ventrilo server configuration files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)ventrilo_srv\.ini/is +} +rule WinFrameClient { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "WinFrame-Client configuration used to connect to Citrix Application Servers (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "[WFClient] Password=" + condition: + all of them and filename matches /.*\.ica/is +} +rule CounterStrikeRCON { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "RCON Credentials for CounterStrike servers (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "rcon" nocase + $ = "password" nocase + condition: + all of them and filename matches /(\/|^)server\.cfg/is +} +rule JekyllGitHub { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Jekyll Token for GitHub (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "JEKYLL_GITHUB_TOKEN" nocase + condition: + all of them +} +rule SshdConfig { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "SSHD config files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)sshd_config/is +} +rule DhcpConfig { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "DHCP Config files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)dhcpd\.conf/is +} +rule Phoenix { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Phoenix prod config and secret files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "prod.secret.exs" + condition: + filename matches /(\/|^)prod\.secret\.exs/is or (filename matches /(\/|^)prod\.exs/is and not any of them) +} +rule JoomlaConfig { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Joomla config files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "JConfig" nocase + $ = "password" nocase + condition: + filename matches /(\/|^)configuration.php/is and all of them +} +rule PasswdFile { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Unix /etc/passwd files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "/bin/bash" nocase + $ = "/bin/sh" nocase + $ = "/usr/sbin/nologin" nocase + condition: + filename matches /(\/|^)passwd$/is and any of them +} +rule ShadowFile { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Unix /etc/shadow files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = ":17737:0:99999" nocase + $ = "root:*:" nocase + // MD5 + $ = "$1" nocase + // SHA-256 + $ = "$5" nocase + // SHA-1 + $ = "$6" nocase + condition: + filename matches /(\/|^)passwd$/is and any of them +} +rule Shodan { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Shodan API Keys (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = /shodan_api_key: [a-zA-Z0-9]+/ nocase + $ = /shodan_api_key=[a-zA-Z0-9]+/ nocase + condition: + any of them +} +rule Avast { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Avast license files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "support.avast.com" nocase + condition: + all of them and (filename matches /.*\.avastlic$/is) +} +rule DBeaver { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "DBeaver configuration files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)dbeaver-data-sources.xml$/is +} +rule ESmtp { + meta: + author = "Dylan Katz (@Plazmaz)" + description = ".esmtpdrc files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "password" nocase + condition: + filename matches /(\/|^)\.esmtprc$/is and all of them +} +rule Homebrew { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Homebrew github tokens (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "HOMEBREW_GITHUB_API_TOKEN" nocase + condition: + all of them +} +rule MLab { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "MLab mongodb credentials (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = ".mlab.com" nocase + $ = "password" nocase + condition: + all of them +} +rule Firefox { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Firefox saved passwords (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)logins\.json$/is +} +rule CCCam { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "CCCam server config files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)CCCam\.cfg$/is +} +rule IRC { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Nickserv auth configs (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "msg nickserv identify" nocase + condition: + filename matches /(\/|^)config$/is and all of them +} +rule Django { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Django secret keys (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "SECRET_KEY" nocase + condition: + filename matches /(\/|^)settings.py$/is and all of them +} +rule RailsSecrets { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Ruby on rails secrets.yml files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "password" nocase + condition: + filename matches /(\/|^)secrets\.yml$/is and all of them +} +rule RailsMasterKey { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Rails master key files (used for decrypting credentials.yml.enc for Rails 5.2+) (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + strings: + $ = "password" nocase + condition: + filename matches /(\/|^)config\/master\.key$/is and all of them +} +rule AtomDeployments { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Multiple files created by different atom extensions for authentication (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)deployment-config\.json$/is or + filename matches /(\/|^)remote-sync\.json$/is or + filename matches /(\/|^)\.ftpconfig$/is +} +rule VscodeSftp { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "VSCode SFTP files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)\.vscode\/sftp\.json$/is +} +rule SublimeSftp { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Sublime SFTP files (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)sftp-config\.json$/is +} +rule JetbrainsCreds { + meta: + author = "Dylan Katz (@Plazmaz)" + description = "Jetbrains IDE webserver credentials with encoded passwords (Created as part of PasteHunter)" + reference = "https://github.com/techgaun/github-dorks" + date = "09/15/19" + condition: + filename matches /(\/|^)WebServers\.xml$/is +} \ No newline at end of file diff --git a/YaraRules/hak5.yar b/pastehunter/YaraRules/hak5.yar similarity index 100% rename from YaraRules/hak5.yar rename to pastehunter/YaraRules/hak5.yar diff --git a/YaraRules/password_leak.yar b/pastehunter/YaraRules/password_leak.yar similarity index 100% rename from YaraRules/password_leak.yar rename to pastehunter/YaraRules/password_leak.yar diff --git a/YaraRules/powershell.yar b/pastehunter/YaraRules/powershell.yar similarity index 100% rename from YaraRules/powershell.yar rename to pastehunter/YaraRules/powershell.yar diff --git a/pastehunter/YaraRules/test_rules.yar b/pastehunter/YaraRules/test_rules.yar new file mode 100644 index 0000000..c6f884f --- /dev/null +++ b/pastehunter/YaraRules/test_rules.yar @@ -0,0 +1,66 @@ +/* + These are test rules +*/ + +rule test_hex_MZ +{ + meta: + author = "kevthehermit" + info = "Part of PasteHunter" + reference = "https://github.com/kevthehermit/PasteHunter" + + strings: + $mz_hex = "4d5a" nocase wide ascii + + condition: + $mz_hex at 0 + +} + +rule test_vbscript +{ + meta: + author = "kevthehermit" + info = "Part of PasteHunter" + reference = "https://github.com/kevthehermit/PasteHunter" + + strings: + $a = "Function" nocase wide ascii fullword + $b = "CreateObject" nocase wide ascii fullword + $c = "Wscript" nocase wide ascii fullword + $d = "As Long" nocase wide ascii fullword + $e = "run" nocase wide ascii fullword + $f = "for each" nocase wide ascii fullword + $g = "end function" nocase wide ascii fullword + $h = "NtAllocateVirtualMemory" nocase wide ascii fullword + $i = "NtWriteVirtualMemory" nocase wide ascii fullword + + + condition: + 5 of them +} + +rule test_autoit +{ + meta: + author = "kevthehermit" + info = "Part of PasteHunter" + reference = "https://github.com/kevthehermit/PasteHunter" + + strings: + $tray = "NoTrayIcon" nocase wide ascii fullword + $a = "iniread" nocase wide ascii fullword + $b = "fileinstall" nocase wide ascii fullword + $c = "EndFunc" nocase wide ascii fullword + $d = "FileRead" nocase wide ascii fullword + $e = "DllStructSetData" nocase wide ascii fullword + $f = "Global Const" nocase wide ascii fullword + $g = "Run(@AutoItExe" nocase wide ascii fullword + $h = "StringReplace" nocase wide ascii fullword + $i = "filewrite" nocase wide ascii fullword + + + + condition: + ($tray and 3 of them) or (5 of them) +} \ No newline at end of file diff --git a/outputs/__init__.py b/pastehunter/__init__.py similarity index 100% rename from outputs/__init__.py rename to pastehunter/__init__.py diff --git a/pastehunter/common.py b/pastehunter/common.py new file mode 100644 index 0000000..54fd4a3 --- /dev/null +++ b/pastehunter/common.py @@ -0,0 +1,28 @@ +import json +import logging +import os.path + +logger = logging.getLogger('pastehunter') +home = os.path.expanduser("~") + +# Parse the config file in to a dict +def parse_config(): + conf = None + settings_file = os.path.join(home, ".config", "pastehunter.json") + + if os.path.exists(settings_file): + conf_file = settings_file + else: + #ToDo: Copy base settings to the settings file + conf_file = None + + if conf_file: + try: + with open(conf_file, 'r') as read_conf: + conf = json.load(read_conf) + except Exception as e: + logger.error("Unable to parse config file: {0}".format(e)) + else: + logger.error("Unable to read config file '~/.config/pastehunter.json'") + + return conf diff --git a/pastehunter/inputs/__init__.py b/pastehunter/inputs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pastehunter/inputs/dumpz.py b/pastehunter/inputs/dumpz.py new file mode 100644 index 0000000..b58e7a6 --- /dev/null +++ b/pastehunter/inputs/dumpz.py @@ -0,0 +1,47 @@ +import requests +import logging + +logger = logging.getLogger('pastehunter') + +def recent_pastes(conf, input_history): + # populate vars from config + paste_limit = conf['inputs']['dumpz']['paste_limit'] + api_scrape = conf['inputs']['dumpz']['api_scrape'] + history = [] + paste_list = [] + try: + # Create the API uri + scrape_uri = '{0}?limit={1}'.format(api_scrape, paste_limit) + # Get some pastes and convert to json + # Get last 'paste_limit' pastes + paste_list_request = requests.get(scrape_uri) + paste_list_json = paste_list_request.json() + + for paste in paste_list_json['dumps']: + # Track paste ids to prevent dupes + history.append(paste['id']) + if paste['id'] in input_history: + continue + + # We don't want password protected pastes + if paste['pwd'] == 1: + continue + + # Create a new paste dict for us to normalize + paste_data = paste + paste_data['confname'] = 'dumpz' + paste_data['pasteid'] = paste['id'] + paste_data['pastesite'] = 'dumpz.org' + + #paste_data['scrape_url'] = '{0}{1}'.format(conf['dumpz']['api_raw'], paste['id']) + + paste_data['scrape_url'] = 'https://dumpz.org/{0}/text/'.format(paste['id']) + + # Add a date field that kibana will map + paste_data['@timestamp'] = paste_data['date'] + paste_list.append(paste_data) + return paste_list, history + + except Exception as e: + logger.error("Unable to parse paste results: {0}".format(e)) + return paste_list, history \ No newline at end of file diff --git a/pastehunter/inputs/gists.py b/pastehunter/inputs/gists.py new file mode 100644 index 0000000..fd7b1e7 --- /dev/null +++ b/pastehunter/inputs/gists.py @@ -0,0 +1,89 @@ +import requests +import math +import logging +from datetime import datetime + +# Set some logging options +logger = logging.getLogger('pastehunter') +logging.getLogger('requests').setLevel(logging.ERROR) + +api_uri = 'https://api.github.com/gists/public' +api_version = 'application/vnd.github.v3+json' # Set Accept header to force api v3 + +# Some people use gists to store large blobs of data every 17 minutes. This just slows down the kibana UI + + + +def recent_pastes(conf, input_history): + oauth_token = conf['inputs']['gists']['api_token'] + gist_limit = conf['inputs']['gists']['api_limit'] + headers = {'user-agent': 'PasteHunter', + 'Accept': api_version, + 'Authorization': 'token {0}'.format(oauth_token)} + + # calculate number of pages + page_count = int(math.ceil(gist_limit / 100)) + + result_pages = [] + history = [] + paste_list = [] + + gist_file_blacklist = conf['inputs']['gists']['file_blacklist'] + gist_user_blacklist = conf['inputs']['gists']['user_blacklist'] + + try: + # Get the required amount of entries via pagination + for page_num in range(1, page_count + 1): + url = '{0}?page={1}&per_page=100'.format(api_uri, page_num) + logger.debug("Fetching page: {0}".format(page_num)) + req = requests.get(url, headers=headers) + # Check some headers + reset_date = datetime.utcfromtimestamp(float(req.headers['X-RateLimit-Reset'])).isoformat() + # logging.info("Limit Reset: {0}".format(reset_date)) + logger.info("Remaining Limit: {0}. Resets at {1}".format(req.headers['X-RateLimit-Remaining'], + reset_date)) + + if req.status_code == 200: + result_pages.append(req.json()) + + if req.status_code == 401: + logger.error("Auth Failed") + + elif req.status_code == 403: + logger.error("Login Attempts Exceeded") + + # Parse results + + for page in result_pages: + for gist_meta in page: + # Track paste ids to prevent dupes + history.append(gist_meta['id']) + if gist_meta['id'] in input_history: + continue + + if gist_meta['user'] in gist_user_blacklist: + logger.info("Blacklisting Gist from user: {0}".format(gist_meta['owner']['login'])) + continue + + for file_name, file_meta in gist_meta['files'].items(): + + if file_name in gist_file_blacklist: + logger.info("Blacklisting Paste {0}".format(file_name)) + continue + + gist_data = file_meta + gist_data['confname'] = 'gists' + gist_data['@timestamp'] = gist_meta['created_at'] + gist_data['pasteid'] = gist_meta['id'] + gist_data['user'] = gist_meta['user'] + gist_data['pastesite'] = 'gist.github.com' + gist_data['scrape_url'] = file_meta['raw_url'] + # remove some origional keys just to keep it a bit cleaner + del gist_data['raw_url'] + paste_list.append(gist_data) + + # Return results and history + return paste_list, history + except Exception as e: + logger.error("Unable to parse paste results: {0}".format(e)) + return paste_list, history diff --git a/pastehunter/inputs/github.py b/pastehunter/inputs/github.py new file mode 100644 index 0000000..25b44ec --- /dev/null +++ b/pastehunter/inputs/github.py @@ -0,0 +1,133 @@ +import logging +import math +from datetime import datetime + +import fnmatch +import requests + +# Future work/improvement that can happen here: support PR diffs, they contain a patch URL +# Set some logging options +logger = logging.getLogger('pastehunter') +logging.getLogger('requests').setLevel(logging.ERROR) + +api_uri = 'https://api.github.com/events' +# This event refers to a commit being pushed, and is +# probably the most significant thing we're concerned about. +event_types = ['PushEvent'] +api_version = 'application/vnd.github.v3+json' # Set Accept header to force api v3 +# Important note from github: +# 'We delay the public events feed by five minutes, which means the most recent event returned by the public events API actually occurred at least five minutes ago.' + +# Beware, git diffs can sometimes be very large files, including binaries and zips. +# MB KB B +diff_size_limit = 500 * 1000 * 1000 + + +def _make_request(url, headers): + req = requests.get(url, headers=headers) + reset_date = datetime.utcfromtimestamp(float(req.headers['X-RateLimit-Reset'])).isoformat() + logger.info('Remaining Limit: {0}. Resets at {1}'.format(req.headers['X-RateLimit-Remaining'], + reset_date)) + + if req.status_code == 200: + return req.json() + + if req.status_code == 401: + logger.error('Auth Failed') + return None + + elif req.status_code == 403: + logger.error('Login Attempts Exceeded') + return None + +def recent_pastes(conf, input_history): + oauth_token = conf['inputs']['github']['api_token'] + conf_limit = conf['inputs']['github']['api_limit'] + gh_limit = min(conf_limit, 300) + # From GitHub Docs (https://developer.github.com/v3/activity/events/#list-public-events): + # Events support pagination, however the per_page option is unsupported. The fixed page size is 30 items. Fetching up to ten pages is supported, for a total of 300 events. + # We modify this to be 100 per page, but the limit is still 300. + if gh_limit != conf_limit: + logger.warning('gh_limit exceeds github items returned from public feed. Limiting to 300.') + headers = {'user-agent': 'PasteHunter', + 'Accept': api_version, + 'Authorization': 'token {0}'.format(oauth_token)} + + # calculate number of pages + page_count = int(math.ceil(gh_limit / 100)) + + result_pages = [] + history = [] + paste_list = [] + + gh_file_blacklist = conf['inputs']['github']['file_blacklist'] + gh_user_blacklist = conf['inputs']['github']['user_blacklist'] + ignore_bots = conf['inputs']['github']['ignore_bots'] + + try: + # Get the required amount of entries via pagination + for page_num in range(1, page_count + 1): + url = '{0}?page={1}&per_page=100'.format(api_uri, page_num) + logger.debug('Fetching page: {0}'.format(page_num)) + req = _make_request(url, headers) + if req is not None: + result_pages.append(req) + + # Parse results + + for page in result_pages: + for event_meta in page: + # Track paste ids to prevent dupes + event_id = event_meta['id'] + history.append(event_id) + if event_id in input_history: + continue + if event_meta['type'] not in event_types: + logger.debug('Skipping event {} due to unwanted type "{}"'.format(event_id, event_meta['type'])) + # Actor may have been deleted or changed + if 'actor' in event_meta: + # If the username is None, this will return false, while event_meta['login'] would error. + if event_meta.get('actor').get('login') in gh_user_blacklist: + logger.info('Blacklisting GitHub event from user: {0}'.format(event_meta.get('login'))) + continue + if ignore_bots and event_meta.get('actor').get('login').endswith("[bot]"): + logger.info('Ignoring GitHub event from bot user: {0}'.format(event_meta.get('login'))) + continue + + payload = event_meta.get('payload') + if not 'commits' in payload: + # Debug, because this is high output + logger.debug('Skipping event {} due to no commits.'.format(event_id)) + continue + for commit_meta in payload.get('commits'): + commit_url = commit_meta.get('url') + commit_data = _make_request(commit_url, headers) + if not commit_data: + logger.info('No data returned for url {}. Skipping...'.format(commit_url)) + continue + if commit_data.get('committer') and commit_data.get('committer').get('login') in gh_user_blacklist: + logger.info('Blacklisting GitHub event from user: {0}'.format(event_meta['owner']['login'])) + continue + for file in commit_data.get('files'): + file_path = file.get('filename') + for pattern in gh_file_blacklist: + if fnmatch.fnmatch(file_path, pattern): + logger.info('Blacklisting file {0} from event {1} (matched pattern "{2}")'.format(file_path, event_id, pattern)) + continue + + gist_data = file + gist_data['confname'] = 'github' + gist_data['@timestamp'] = event_meta['created_at'] + gist_data['pasteid'] = event_id + gist_data['user'] = event_meta.get('actor').get('login') + gist_data['pastesite'] = 'github.com' + gist_data['scrape_url'] = file.get('raw_url') + # remove some original keys just to keep it a bit cleaner + del gist_data['raw_url'] + paste_list.append(gist_data) + + # Return results and history + return paste_list, history + except Exception as e: + logger.exception('Unable to parse paste results: {0}'.format(e), e) + return paste_list, history diff --git a/pastehunter/inputs/pastebin.py b/pastehunter/inputs/pastebin.py new file mode 100644 index 0000000..23cb0a0 --- /dev/null +++ b/pastehunter/inputs/pastebin.py @@ -0,0 +1,49 @@ +import requests +import logging +from datetime import datetime + +logger = logging.getLogger('pastehunter') + +def recent_pastes(conf, input_history): + # populate vars from config + paste_limit = conf['inputs']['pastebin']['paste_limit'] + api_scrape = conf['inputs']['pastebin']['api_scrape'] + history = [] + paste_list = [] + try: + # Create the API uri + scrape_uri = '{0}?limit={1}'.format(api_scrape, paste_limit) + # Get some pastes and convert to json + # Get last 'paste_limit' pastes + paste_list_request = requests.get(scrape_uri) + + # Check to see if our IP is whitelisted or not. + if 'DOES NOT HAVE ACCESS' in paste_list_request.text: + logger.error("Your IP is not whitelisted visits 'https://pastebin.com/doc_scraping_api'") + return [], [] + paste_list_json = paste_list_request.json() + + for paste in paste_list_json: + # Track paste ids to prevent dupes + history.append(paste['key']) + if paste['key'] in input_history: + continue + + # Create a new paste dict for us to normalize + paste_data = paste + paste_data['filename'] = paste['key'] + paste_data['confname'] = 'pastebin' + paste_data['pasteid'] = paste['key'] + paste_data['pastesite'] = 'pastebin.com' + # Add a date field that kibana will map + date = datetime.utcfromtimestamp(float(paste_data['date'])).isoformat() + paste_data['@timestamp'] = date + paste_list.append(paste_data) + return paste_list, history + + except Exception as e: + logger.error("Unable to parse paste results: {0}".format(e)) + return paste_list, history + + + diff --git a/pastehunter/inputs/slexy.py b/pastehunter/inputs/slexy.py new file mode 100644 index 0000000..856be13 --- /dev/null +++ b/pastehunter/inputs/slexy.py @@ -0,0 +1,102 @@ +import logging +import re +import urllib.request as urllib +from datetime import datetime + +logger = logging.getLogger('pastehunter') + + +class SlexySite(object): + + def __init__(self): + self.site = "slexy.org" + url_slexy = "https://" + self.site + self.url_recent = url_slexy + "/recent" + self.url_view = url_slexy + "/view" + self.url_raw = url_slexy + "/raw" + + def view_link(self, pid): + return self.create_req("%s/%s" % (self.url_view, pid)) + + def raw_link(self, pid, args): + return self.create_req("%s/%s%s" % (self.url_raw, pid, args)) + + def create_req(self, url): + return urllib.Request( + url, + data=None, + headers={ + 'Referer': self.url_recent, + 'User-Agent': 'PasteHunter' + } + ) + + +class SlexyPaste(SlexySite): + def __init__(self, pid): + super(SlexyPaste, self).__init__() + self.pid = pid + self.site = self.site + self.url = None + self.timestamp = None + self.parse() + + def parse(self): + data = urllib.urlopen(self.view_link(self.pid), timeout=10).read().decode('utf-8') + self.url = self.get_raw_link(data) + self.timestamp = self.get_timestamp(data) + + def get_raw_link(self, data): + pattern = '', getdata) + return list(set(pids)) + + +def recent_pastes(conf, input_history): + history = [] + paste_list = [] + my_scraper = SlexyScraper() + recent_pids = my_scraper.get_recents() + pid_to_process = set() + for pid in recent_pids: + if pid in input_history: + history.append(pid) + else: + pid_to_process.add(pid) + try: + for pid in pid_to_process: + paste = SlexyPaste(pid) + history.append(paste.pid) + paste_data = {} + paste_data['confname'] = 'slexy' + paste_data['scrape_url'] = paste.url.full_url + paste_data['pasteid'] = paste.pid + paste_data['pastesite'] = paste.site + paste_data['@timestamp'] = paste.timestamp + paste_list.append(paste_data) + return paste_list, history + except Exception as e: + logger.error("Unable to parse paste results: %s", e) + return paste_list, history diff --git a/pastehunter/inputs/stackexchange.py b/pastehunter/inputs/stackexchange.py new file mode 100644 index 0000000..244e1f5 --- /dev/null +++ b/pastehunter/inputs/stackexchange.py @@ -0,0 +1,87 @@ +import requests +import math +import logging +from datetime import datetime + +# Set some logging options +logger = logging.getLogger('pastehunter') +logging.getLogger('requests').setLevel(logging.ERROR) + +# Test API Key from the docs - U4DMV*8nvpm3EOpvf69Rxw(( +# https://api.stackexchange.com/2.2/questions?key=U4DMV*8nvpm3EOpvf69Rxw((&site=stackoverflow&page=1&pagesize=100&order=desc&sort=creation&filter=default + + + +def recent_pastes(conf, input_history): + api_key = conf['inputs']['stackexchange']['api_key'] + api_scrape = conf['inputs']['stackexchange']['api_scrape'] + site_list = conf['inputs']['stackexchange']['site_list'] + store_filter = conf['inputs']['stackexchange']['store_filter'] + question_body_filter = '!bA1dOlliDM)pi9' + pagesize = 100 # Default = 30 + headers = {'user-agent': 'PasteHunter'} + + if api_key == '': + logger.error("No API Key configured for StackExchange Access") + return [], [] + + result_pages = [] + history = [] + paste_list = [] + + try: + + # For each of the stack sites we want to query + for site in site_list: + logger.info("Query Stack Exchange site: {0}".format(site)) + + # Create the API uri + scrape_uri = '{0}?key={1}&site={2}&page=1&pagesize=100&order=desc&sort=creation&filter={3}'.format(api_scrape, api_key, site, store_filter) + # Get some pastes and convert to json + # Get last 'paste_limit' pastes + paste_list_request = requests.get(scrape_uri) + + # ToDo: Add an API rate test in here. + paste_list_json = paste_list_request.json() + + if "error_id" in paste_list_json: + logging.error("StackExchange API Error: {0}".format(paste_list_json['error_message'])) + return [], [] + + + + for question in paste_list_json['items']: + # Track question ids to prevent dupes + history.append(question['question_id']) + if question['question_id'] in input_history: + continue + + # Create a new question dict for us to normalize + question_data = question + question_data['filename'] = '' + question_data['confname'] = "stackexchange" + # Force type to string else it breaks ES Index mappings + question_data['pasteid'] = str(question['question_id']) + question_data['pastesite'] = site + # Set the raw uri to avoid breaking other things. Defaults to empty if not found + question_data['scrape_url'] = question.get('link', '') + # Get the author and then trim the data we store. + question_data['username'] = question['owner']['display_name'] + del question_data['owner'] + # Add a date field that kibana will map + date = datetime.utcfromtimestamp(float(question_data['creation_date'])).isoformat() + question_data['@timestamp'] = date + paste_list.append(question_data) + + + # Record API Quota on last call to save some logging. + quota_max = paste_list_json['quota_max'] + quota_remaining = paste_list_json['quota_remaining'] + + logger.info("Used {0} of {1} of StackExchange api quota".format(quota_remaining, quota_max)) + # Return the pastes and update history + return paste_list, history + + except Exception as e: + logger.error("Unable to parse question results: {0}".format(e)) + return paste_list, history \ No newline at end of file diff --git a/pastehunter/outputs/__init__.py b/pastehunter/outputs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/outputs/csv_output.py b/pastehunter/outputs/csv_output.py similarity index 79% rename from outputs/csv_output.py rename to pastehunter/outputs/csv_output.py index d79cf67..7cebe1b 100644 --- a/outputs/csv_output.py +++ b/pastehunter/outputs/csv_output.py @@ -1,11 +1,13 @@ +import logging import os import datetime -from common import parse_config +from pastehunter.common import parse_config +logger = logging.getLogger('pastehunter') config = parse_config() -class CSVOutput(): +class CSVOutput(object): def __init__(self): base_path = config['outputs']['csv_output']['output_path'] # Get todays CSV @@ -18,7 +20,7 @@ def __init__(self): os.makedirs(base_path) self.test = True except OSError as e: - print("Unable to create CSV Path: {0}".format(e)) + logger.error("Unable to create CSV Path: {}".format(e)) self.test = False else: self.test = True @@ -34,4 +36,4 @@ def store_paste(self, paste_data): with open(self.csv_path, 'a') as out: out.write('{0}\n'.format(csv_line)) else: - print("CSV Output Error") + logging.error("CSV Output Error. Output path '{}' was never created.".format(self.csv_path)) diff --git a/outputs/elastic_output.py b/pastehunter/outputs/elastic_output.py similarity index 98% rename from outputs/elastic_output.py rename to pastehunter/outputs/elastic_output.py index 12eccbe..66ce467 100644 --- a/outputs/elastic_output.py +++ b/pastehunter/outputs/elastic_output.py @@ -1,5 +1,5 @@ from elasticsearch import Elasticsearch -from common import parse_config +from pastehunter.common import parse_config from datetime import datetime import logging diff --git a/outputs/json_output.py b/pastehunter/outputs/json_output.py similarity index 95% rename from outputs/json_output.py rename to pastehunter/outputs/json_output.py index d98d279..e578a53 100644 --- a/outputs/json_output.py +++ b/pastehunter/outputs/json_output.py @@ -2,7 +2,7 @@ import logging import os -from common import parse_config +from pastehunter.common import parse_config logger = logging.getLogger('pastehunter') diff --git a/pastehunter/outputs/slack_output.py b/pastehunter/outputs/slack_output.py new file mode 100644 index 0000000..8e0abac --- /dev/null +++ b/pastehunter/outputs/slack_output.py @@ -0,0 +1,48 @@ +import logging +import requests +from pastehunter.common import parse_config + +logger = logging.getLogger('pastehunter') + +config = parse_config() + + +class SlackOutput(): + def __init__(self): + self.valid = True + self.webhook_url = config['outputs']['slack_output']['webhook_url'] + self.accepted_rules = config['outputs']['slack_output']['rule_list'] + + if self.webhook_url == '': + logging.error("Slack Webhook not configured") + self.valid = False + if self.webhook_url == '': + logging.error("No Rules configured to alert") + + def store_paste(self, paste_data): + if self.valid: + send = ('all' in self.accepted_rules) + + for rule in self.accepted_rules: + if rule in paste_data['YaraRule']: + send = True + + if send: + json_data = { + "text": "Pastehunter alert!", + "attachments": [ + { + "fallback": "Plan a vacation", + "author_name": "PasteHunter", + "title": "Paste ID {0}".format(paste_data['pasteid']), + "text": "Yara Rule {0} Found on {1}\n\r{2}".format(paste_data['YaraRule'], paste_data['pastesite'], paste_data['scrape_url']) + } + ] + } + + req = requests.post(self.webhook_url, json=json_data) + if req.status_code == 200 and req.text == 'ok': + logger.debug("Paste sent to slack") + else: + logger.error( + "Failed to post to slack Status Code {0}".format(req.status_code)) diff --git a/outputs/smtp_output.py b/pastehunter/outputs/smtp_output.py similarity index 99% rename from outputs/smtp_output.py rename to pastehunter/outputs/smtp_output.py index 6090511..dd549ec 100644 --- a/outputs/smtp_output.py +++ b/pastehunter/outputs/smtp_output.py @@ -9,7 +9,7 @@ import json import logging -from common import parse_config +from pastehunter.common import parse_config logger = logging.getLogger('pastehunter') config = parse_config() diff --git a/pastehunter/outputs/splunk_output.py b/pastehunter/outputs/splunk_output.py new file mode 100644 index 0000000..9406c42 --- /dev/null +++ b/pastehunter/outputs/splunk_output.py @@ -0,0 +1,42 @@ +from pastehunter.common import parse_config +import json +import logging +import splunklib.client as client + +logger = logging.getLogger('pastehunter') +config = parse_config() + +class SplunkOutput(): + def __init__(self): + # Set up the database connection + splunk_host = config['outputs']['splunk_output']['splunk_host'] + splunk_port = config['outputs']['splunk_output']['splunk_port'] + splunk_user = config['outputs']['splunk_output']['splunk_user'] + splunk_pass = config['outputs']['splunk_output']['splunk_pass'] + self.splunk_index = config['outputs']['splunk_output']['splunk_index'] + + try: + self.service = client.connect( + host=splunk_host, + port=splunk_port, + username=splunk_user, + password=splunk_pass, + autologin=True) + + self.index = self.service.indexes[self.splunk_index] + except Exception as e: + logger.error(e) + raise Exception('Unable to connect or missing index') from None + + def store_paste(self, paste_data): + # Make a copy so we don't affect any other output modules + local_data = dict(paste_data) + if not config['outputs']['splunk_output']['store_raw']: + del local_data['raw_paste'] + + try: + # The edit_tcp capability is required to access this API + sourcetype = config['outputs']['splunk_output']['splunk_sourcetype'] + self.index.submit(json.dumps(local_data), sourcetype=sourcetype) + except Exception as e: + logger.exception('Error submitting paste_data to splunk', e) diff --git a/outputs/syslog_output.py b/pastehunter/outputs/syslog_output.py similarity index 94% rename from outputs/syslog_output.py rename to pastehunter/outputs/syslog_output.py index 6618eba..88df0a4 100644 --- a/outputs/syslog_output.py +++ b/pastehunter/outputs/syslog_output.py @@ -1,5 +1,5 @@ import socket -from common import parse_config +from pastehunter.common import parse_config config = parse_config() diff --git a/pastehunter/outputs/twilio_output.py b/pastehunter/outputs/twilio_output.py new file mode 100644 index 0000000..c9012af --- /dev/null +++ b/pastehunter/outputs/twilio_output.py @@ -0,0 +1,66 @@ +import logging +from twilio.rest import Client +from pastehunter.common import parse_config + +logger = logging.getLogger('pastehunter') +config = parse_config() + +class TwilioOutput(object): + def __init__(self): + self.account_sid = config['outputs']['twilio_output']['account_sid'] + self.auth_token = config['outputs']['twilio_output']['auth_token'] + self.twilio_sender = config['outputs']['twilio_output']['twilio_sender'] + self.recipient_list = config['outputs']['twilio_output']['recipient_list'] + self.accepted_rules = config['outputs']['twilio_output']['rule_list'] + self.message_type = 'sms' # Whatsapp is still in beta on twilio. + try: + self.client = Client(self.account_sid, self.auth_token) + self.test = True + except Exception as e: + logging.error("Unable to create twilio Client: {0}".format(e)) + self.test = False + + + def store_paste(self, paste_data): + if self.test: + + + send = ('all' in self.accepted_rules) + + for rule in self.accepted_rules: + if rule in paste_data['YaraRule']: + send = True + + if send: + message_body = "Yara Rule {0} Found on {1}\n\r{2}".format( + paste_data['YaraRule'], + paste_data['pastesite'], + paste_data['scrape_url'] + ) + + logger.debug("Sending Twilio Message") + if self.message_type == 'sms': + for recipient in self.recipient_list: + try: + message = self.client.messages.create( + from_=self.twilio_sender, + body=message_body, + to=recipient + ) + logging.debug("Sent twilio message with ID: {0}".format(message.sid)) + except Exception as e: + logging.error(e) + + elif self.message_type == 'whatsapp': + for recipient in self.recipient_list: + try: + message = self.client.messages.create( + from_='whatsapp:{0}'.format(self.twilio_sender), + body=message_body, + to='whatsapp:{0}'.format(recipient) + ) + logging.debug("Sent twilio message with ID: {0}".format(message.sid)) + except Exception as e: + logging.error(e) + else: + logging.error("No Valid twilio message type found") diff --git a/pastehunter/postprocess/__init__.py b/pastehunter/postprocess/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pastehunter/postprocess/post_b64.py b/pastehunter/postprocess/post_b64.py new file mode 100644 index 0000000..7a45941 --- /dev/null +++ b/pastehunter/postprocess/post_b64.py @@ -0,0 +1,71 @@ +import hashlib +import importlib +import gzip +import logging +from base64 import b64decode +# This gets the raw paste and the paste_data json object +from pastehunter.common import parse_config +conf = parse_config() + +logger = logging.getLogger('pastehunter') + +def run(results, raw_paste_data, paste_object): + + ''' + + ToDo: Lets look at multiple base64 streams + for now only accept if the entire paste is + + # Figure out which b64 rule fire + + # The base64 re can hang on occasion with this one + # b64_re = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)' + + # This one has a few empty results i need to catch but doesn't kill pastehunter + b64_re = '(?:[A-Za-z0-9+/]{4}){3,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?' + b64_strings = re.findall(b64_re, raw_paste_data) + + + # Set a counter for multiple streams. + counter = 0 + for b64_str in b64_strings: + + ''' + + for rule in results: + if len(raw_paste_data) > 0: + if rule == 'b64_gzip': + # Lets try to decode and get a file listing. + # Also get the MD5 of the decoded file + try: + uncompressed = gzip.decompress(b64decode(raw_paste_data)) + encoded = uncompressed.encode('utf-8') + paste_object["decompressed_stream"] = encoded + except Exception as e: + logger.error("Unable to decompress gzip stream") + + if rule == 'b64_exe': + try: + raw_exe = b64decode(raw_paste_data) + paste_object["exe_size"] = len(raw_exe) + paste_object["exe_md5"] = hashlib.md5(raw_exe).hexdigest() + paste_object["exe_sha256"] = hashlib.sha256(raw_exe).hexdigest() + + # We are guessing that the sample has been submitted, and crafting a URL + paste_object["VT"] = 'https://www.virustotal.com/#/file/{0}'.format(paste_object["exe_md5"]) + + # If sandbox modules are enabled then submit the file + for sandbox, sandbox_values in conf["sandboxes"].items(): + if sandbox_values["enabled"]: + logger.info("Uploading file {0} using {1}".format(paste_object["pasteid"], sandbox_values["module"])) + sandbox_module = importlib.import_module(sandbox_values["module"]) + paste_object = sandbox_module.upload_file(raw_exe, paste_object) + + except Exception as e: + logger.error("Unable to decode exe file") + + # Get unique domain count + # Update the json + + # Send the updated json back + return paste_object diff --git a/pastehunter/postprocess/post_compress.py b/pastehunter/postprocess/post_compress.py new file mode 100644 index 0000000..d47f9fb --- /dev/null +++ b/pastehunter/postprocess/post_compress.py @@ -0,0 +1,26 @@ +import lzma +import base64 +import logging +from pastehunter.common import parse_config +logger = logging.getLogger('pastehunter') +config = parse_config() + +def run(results, raw_paste_data, paste_object): + if config['outputs']['json_output']['store_raw']: + original = raw_paste_data + orig_size = len(original.encode()) + logger.debug("Compressing paste... Pre-compression size: {}", orig_size) + compressed = base64.b64encode(lzma.compress(raw_paste_data.encode())) + compressed_size = len(compressed) + logger.debug("Compressing paste... Post-compression size: {}", compressed_size) + + # In some cases compressed blobs may be larger + # if not much data is compressed + if orig_size > compressed_size: + paste_object['raw_paste'] = compressed.decode('utf-8') + logger.debug("Compressed data smaller than original blob. Keeping compressed.") + else: + logger.debug("Original smaller than compressed blob. Keeping original.") + + # Regardless of modification, return the paste object + return paste_object diff --git a/pastehunter/postprocess/post_email.py b/pastehunter/postprocess/post_email.py new file mode 100644 index 0000000..53bbc79 --- /dev/null +++ b/pastehunter/postprocess/post_email.py @@ -0,0 +1,29 @@ +import re + + +def run(results, raw_paste_data, paste_object): + # Use the rule name to determine what postprocess to do + + # Get total unique emails. + + all_emails = re.findall('[\w\.-]+@[\w\.-]+\.\w+', raw_paste_data) + domain_list = [] + for email_address in all_emails: + email_domain = email_address.split("@") + domain_list.append(email_domain[-1]) + + unique_emails = set(all_emails) + unique_domains = set(domain_list) + # We can filter some of the false positives from the yara match here + + if len(unique_emails) < 10: + paste_object["results"] = [] + + # Get unique domain count + # Update the json + paste_object["total_emails"] = len(all_emails) + paste_object["unique_emails"] = len(unique_emails) + paste_object["unique_domains"] = len(unique_domains) + + # Send the updated json back + return paste_object diff --git a/pastehunter/postprocess/post_entropy.py b/pastehunter/postprocess/post_entropy.py new file mode 100644 index 0000000..ca12b67 --- /dev/null +++ b/pastehunter/postprocess/post_entropy.py @@ -0,0 +1,16 @@ +import re +import math +from collections import Counter + +def shannon_entropy(s): + # https://rosettacode.org/wiki/Entropy#Python + s = str(s) + p, lns = Counter(s), float(len(s)) + return -sum(count / lns * math.log(count / lns, 2) for count in p.values()) + + +def run(results, raw_paste_data, paste_object): + # Calculate the Shannon Entropy for the raw paste + paste_object["Shannon Entropy"] = shannon_entropy(raw_paste_data) + # Send the updated json back + return paste_object diff --git a/pastehunter/sandboxes/__init__.py b/pastehunter/sandboxes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pastehunter/sandboxes/cuckoo.py b/pastehunter/sandboxes/cuckoo.py new file mode 100644 index 0000000..fda3dbd --- /dev/null +++ b/pastehunter/sandboxes/cuckoo.py @@ -0,0 +1,36 @@ +import io +import logging +import requests +from pastehunter.common import parse_config +conf = parse_config() + +logger = logging.getLogger('pastehunter') + +def upload_file(raw_file, paste_object): + try: + task_id = send_to_cuckoo(raw_file, paste_object["pasteid"]) + paste_object["Cuckoo Task ID"] = task_id + logger.info("exe submitted to Cuckoo with task id {0}".format(task_id)) + except Exception as e: + logger.error("Unabled to submit sample to cuckoo") + + # Send any updated json back + return paste_object + +def send_to_cuckoo(raw_exe, pasteid): + cuckoo_ip = conf["sandboxes"]["cuckoo"]["api_host"] + cuckoo_port = conf["sandboxes"]["cuckoo"]["api_port"] + cuckoo_host = 'http://{0}:{1}'.format(cuckoo_ip, cuckoo_port) + submit_file_url = '{0}/tasks/create/file'.format(cuckoo_host) + files = {'file': ('{0}.exe'.format(pasteid), io.BytesIO(raw_exe))} + submit_file = requests.post(submit_file_url, files=files).json() + task_id = None + try: + task_id = submit_file['task_id'] + except KeyError: + try: + task_id = submit_file['task_ids'][0] + except KeyError: + logger.error(submit_file) + + return task_id diff --git a/pastehunter/sandboxes/viper.py b/pastehunter/sandboxes/viper.py new file mode 100644 index 0000000..f77fb2d --- /dev/null +++ b/pastehunter/sandboxes/viper.py @@ -0,0 +1,19 @@ +import io +import logging +import requests +from pastehunter.common import parse_config +conf = parse_config() + +logger = logging.getLogger('pastehunter') + +def upload_file(raw_file, paste_object): + viper_ip = conf["sandboxes"]["viper"]["api_host"] + viper_port = conf["sandboxes"]["viper"]["api_port"] + viper_host = 'http://{0}:{1}'.format(viper_ip, viper_port) + + submit_file_url = '{0}/tasks/create/file'.format(viper_host) + files = {'file': ('{0}.exe'.format(paste_object["pasteid"]), io.BytesIO(raw_file))} + submit_file = requests.post(submit_file_url, files=files).json() + + # Send any updated json back + return paste_object diff --git a/settings.json.sample b/settings.json.sample index f2cc1b0..138878d 100644 --- a/settings.json.sample +++ b/settings.json.sample @@ -2,7 +2,7 @@ "inputs": { "pastebin":{ "enabled": true, - "module": "inputs.pastebin", + "module": "pastehunter.inputs.pastebin", "api_scrape": "https://scrape.pastebin.com/api_scraping.php", "api_raw": "https://scrape.pastebin.com/api_scrape_item.php?i=", "paste_limit": 200, @@ -11,7 +11,7 @@ "dumpz": { "enabled": false, "comment": "This api endpoint has been removed.", - "module": "inputs.dumpz", + "module": "pastehunter.inputs.dumpz", "api_scrape": "https://dumpz.org/api/recent", "api_raw": "https://dumpz.org/api/dump", "paste_limit": 200, @@ -19,7 +19,7 @@ }, "gists": { "enabled": true, - "module": "inputs.gists", + "module": "pastehunter.inputs.gists", "api_token": "", "api_limit": 100, "store_all": false, @@ -28,16 +28,17 @@ }, "github": { "enabled": false, - "module": "inputs.github", + "module": "pastehunter.inputs.github", "api_token": "", "api_limit": 100, "store_all": false, + "ignore_bots": false, "user_blacklist": [], - "file_blacklist": ["node_modules/*", "__pycache__/*", "*/grahamcofborg-eval-package-list", "*/yarn.lock", "*.3ds", "*.3g2", "*.3gp", "*.7z", "*.DS_Store", "*.a", "*.aac", "*.adp", "*.ai", "*.aif", "*.aiff", "*.alz", "*.ape", "*.apk", "*.ar", "*.arj", "*.asf", "*.au", "*.avi", "*.bak", "*.bh", "*.bin", "*.bk", "*.bmp", "*.btif", "*.bz2", "*.bzip2", "*.cab", "*.caf", "*.cgm", "*.class", "*.cmx", "*.cpio", "*.cr2", "*.csv", "*.cur", "*.dat", "*.deb", "*.dex", "*.djvu", "*.dll", "*.dmg", "*.dng", "*.doc", "*.docm", "*.docx", "*.dot", "*.dotm", "*.dra", "*.dsk", "*.dts", "*.dtshd", "*.dvb", "*.dwg", "*.dxf", "*.ecelp4800", "*.ecelp7470", "*.ecelp9600", "*.egg", "*.eol", "*.eot", "*.epub", "*.exe", "*.f4v", "*.fbs", "*.fh", "*.fla", "*.flac", "*.fli", "*.flv", "*.fpx", "*.fst", "*.fvt", "*.g3", "*.gif", "*.graffle", "*.gz", "*.gzip", "*.h261", "*.h263", "*.h264", "*.ico", "*.ief", "*.img", "*.ipa", "*.iso", "*.jar", "*.jpeg", "*.jpg", "*.jpgv", "*.jpm", "*.jxr", "*.key", "*.ktx", "*.lha", "*.lvp", "*.lz", "*.lzh", "*.lzma", "*.lzo", "*.m3u", "*.m4a", "*.m4v", "*.mar", "*.mdi", "*.mht", "*.mid", "*.midi", "*.mj2", "*.mka", "*.mkv", "*.mmr", "*.mng", "*.mobi", "*.mov", "*.movie", "*.mp3", "*.mp4", "*.mp4a", "*.mpeg", "*.mpg", "*.mpga", "*.mxu", "*.nef", "*.npx", "*.numbers", "*.o", "*.oga", "*.ogg", "*.ogv", "*.otf", "*.pages", "*.pbm", "*.pcx", "*.pdf", "*.pea", "*.pgm", "*.pic", "*.png", "*.pnm", "*.pot", "*.potm", "*.potx", "*.ppa", "*.ppam", "*.ppm", "*.pps", "*.ppsm", "*.ppsx", "*.ppt", "*.pptm", "*.pptx", "*.psd", "*.pya", "*.pyc", "*.pyo", "*.pyv", "*.qt", "*.rar", "*.ras", "*.raw", "*.rgb", "*.rip", "*.rlc", "*.rmf", "*.rmvb", "*.rtf", "*.rz", "*.s3m", "*.s7z", "*.scpt", "*.sgi", "*.shar", "*.sil", "*.sketch", "*.slk", "*.smv", "*.so", "*.sub", "*.swf", "*.tar", "*.tbz", "*.tbz2", "*.tga", "*.tgz", "*.thmx", "*.tif", "*.tiff", "*.tlz", "*.ttc", "*.ttf", "*.txz", "*.udf", "*.uvh", "*.uvi", "*.uvm", "*.uvp", "*.uvs", "*.uvu", "*.viv", "*.vob", "*.war", "*.wav", "*.wax", "*.wbmp", "*.wdp", "*.weba", "*.webm", "*.webp", "*.whl", "*.wim", "*.wm", "*.wma", "*.wmv", "*.wmx", "*.woff", "*.woff2", "*.wvx", "*.xbm", "*.xif", "*.xla", "*.xlam", "*.xls", "*.xlsb", "*.xlsm", "*.xlsx", "*.xlt", "*.xltm", "*.xltx", "*.xm", "*.xmind", "*.xpi", "*.xpm", "*.xwd", "*.xz", "*.z", "*.zip", "*.zipx"] + "file_blacklist": ["node_modules/*", "__pycache__/*", "*/grahamcofborg-eval-package-list", "*.lock", "*.3ds", "*.3g2", "*.3gp", "*.7z", "*.DS_Store", "*.a", "*.aac", "*.adp", "*.ai", "*.aif", "*.aiff", "*.alz", "*.ape", "*.apk", "*.ar", "*.arj", "*.asf", "*.au", "*.avi", "*.bak", "*.bh", "*.bin", "*.bk", "*.bmp", "*.btif", "*.bz2", "*.bzip2", "*.cab", "*.caf", "*.cgm", "*.class", "*.cmx", "*.cpio", "*.cr2", "*.cur", "*.dat", "*.deb", "*.dex", "*.djvu", "*.dll", "*.dmg", "*.dng", "*.doc", "*.docm", "*.docx", "*.dot", "*.dotm", "*.dra", "*.dsk", "*.dts", "*.dtshd", "*.dvb", "*.dwg", "*.dxf", "*.ecelp4800", "*.ecelp7470", "*.ecelp9600", "*.egg", "*.eol", "*.eot", "*.epub", "*.exe", "*.f4v", "*.fbs", "*.fh", "*.fla", "*.flac", "*.fli", "*.flv", "*.fpx", "*.fst", "*.fvt", "*.g3", "*.gif", "*.graffle", "*.gz", "*.gzip", "*.h261", "*.h263", "*.h264", "*.ico", "*.ief", "*.img", "*.ipa", "*.iso", "*.jar", "*.jpeg", "*.jpg", "*.jpgv", "*.jpm", "*.jxr","*.ktx", "*.lha", "*.lvp", "*.lz", "*.lzh", "*.lzma", "*.lzo", "*.m3u", "*.m4a", "*.m4v", "*.mar", "*.mdi", "*.mht", "*.mid", "*.midi", "*.mj2", "*.mka", "*.mkv", "*.mmr", "*.mng", "*.mobi", "*.mov", "*.movie", "*.mp3", "*.mp4", "*.mp4a", "*.mpeg", "*.mpg", "*.mpga", "*.mxu", "*.nef", "*.npx", "*.numbers", "*.o", "*.oga", "*.ogg", "*.ogv", "*.otf", "*.pages", "*.pbm", "*.pcx", "*.pdf", "*.pea", "*.pgm", "*.pic", "*.png", "*.pnm", "*.pot", "*.potm", "*.potx", "*.ppa", "*.ppam", "*.ppm", "*.pps", "*.ppsm", "*.ppsx", "*.ppt", "*.pptm", "*.pptx", "*.psd", "*.pya", "*.pyc", "*.pyo", "*.pyv", "*.qt", "*.rar", "*.ras", "*.raw", "*.rgb", "*.rip", "*.rlc", "*.rmf", "*.rmvb", "*.rtf", "*.rz", "*.s3m", "*.s7z", "*.scpt", "*.sgi", "*.shar", "*.sil", "*.sketch", "*.slk", "*.smv", "*.so", "*.sub", "*.swf", "*.tar", "*.tbz", "*.tbz2", "*.tga", "*.tgz", "*.thmx", "*.tif", "*.tiff", "*.tlz", "*.ttc", "*.ttf", "*.txz", "*.udf", "*.uvh", "*.uvi", "*.uvm", "*.uvp", "*.uvs", "*.uvu", "*.viv", "*.vob", "*.war", "*.wav", "*.wax", "*.wbmp", "*.wdp", "*.weba", "*.webm", "*.webp", "*.whl", "*.wim", "*.wm", "*.wma", "*.wmv", "*.wmx", "*.woff", "*.woff2", "*.wvx", "*.xbm", "*.xif", "*.xla", "*.xlam", "*.xls", "*.xlsb", "*.xlsm", "*.xlsx", "*.xlt", "*.xltm", "*.xltx", "*.xm", "*.xmind", "*.xpi", "*.xpm", "*.xwd", "*.xz", "*.z", "*.zip", "*.zipx"] }, "slexy":{ "enabled": true, - "module": "inputs.slexy", + "module": "pastehunter.inputs.slexy", "store_all": false, "api_scrape": "http://slexy.org/recent", "api_raw": "http://slexy.org/raw", @@ -45,7 +46,7 @@ }, "stackexchange":{ "enabled": false, - "module": "inputs.stackexchange", + "module": "pastehunter.inputs.stackexchange", "site_list": ["stackoverflow","serverfault", "superuser", "webapps", "webmasters", "dba"], "api_key": "", "store_filter": "!)r_ttsG0v3bE1vo3*8Ki", @@ -57,7 +58,7 @@ "outputs": { "elastic_output": { "enabled": true, - "module": "outputs.elastic_output", + "module": "pastehunter.outputs.elastic_output", "classname": "ElasticOutput", "elastic_index": "paste-test", "elastic_host": "172.16.10.10", @@ -69,7 +70,7 @@ }, "splunk_output": { "enabled": false, - "module": "outputs.splunk_output", + "module": "pastehunter.outputs.splunk_output", "classname": "SplunkOutput", "splunk_host": "host", "splunk_port": 8089, @@ -81,7 +82,7 @@ }, "json_output": { "enabled": false, - "module": "outputs.json_output", + "module": "pastehunter.outputs.json_output", "classname": "JsonOutput", "output_path": "logs/json/", "store_raw": true, @@ -89,20 +90,20 @@ }, "csv_output": { "enabled": false, - "module": "outputs.csv_output", + "module": "pastehunter.outputs.csv_output", "classname": "CSVOutput", "output_path": "logs/csv/" }, "syslog_output": { "enabled": false, - "module": "outputs.syslog_output", + "module": "pastehunter.outputs.syslog_output", "classname": "SyslogOutput", "host": "192.168.1.1", "port": 514 }, "smtp_output": { "enabled": false, - "module": "outputs.smtp_output", + "module": "pastehunter.outputs.smtp_output", "classname": "SMTPOutput", "smtp_host": "smtp.server.com", "smtp_port": 25, @@ -124,14 +125,14 @@ }, "slack_output": { "enabled": false, - "module": "outputs.slack_output", + "module": "pastehunter.outputs.slack_output", "classname": "SlackOutput", "webhook_url": "", "rule_list": ["custom_keywords"] }, "twilio_output": { "enabled": false, - "module": "outputs.twilio_output", + "module": "pastehunter.outputs.twilio_output", "classname": "TwilioOutput", "account_sid": "", "auth_token": "", @@ -141,7 +142,9 @@ } }, "yara": { - "rule_path": "YaraRules", + "default_rules": true, + "custom_rules": "none", + "exclude_rules": [], "blacklist": true, "test_rules": false }, @@ -159,13 +162,13 @@ "sandboxes": { "cuckoo": { "enabled": false, - "module": "sandboxes.cuckoo", + "module": "pastehunter.sandboxes.cuckoo", "api_host": "127.0.0.1", "api_port": 8080 }, "viper": { "enabled": false, - "module": "sandboxes.viper", + "module": "pastehunter.sandboxes.viper", "api_host": "127.0.0.1", "api_port": 8080 } @@ -173,22 +176,22 @@ "post_process": { "post_email": { "enabled": true, - "module": "postprocess.post_email", + "module": "pastehunter.postprocess.post_email", "rule_list": ["email_list"] }, "post_b64": { "enabled": true, - "module": "postprocess.post_b64", + "module": "pastehunter.postprocess.post_b64", "rule_list": ["b64_exe", "b64_rar", "b64_zip", "b64_gzip"] }, "post_entropy": { "enabled": false, - "module": "postprocess.post_entropy", + "module": "pastehunter.postprocess.post_entropy", "rule_list": ["ALL"] }, "post_compress": { "enabled": false, - "module": "postprocess.post_compress", + "module": "pastehunter.postprocess.post_compress", "rule_list": ["ALL"] } } diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..84e2751 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +from setuptools import setup, find_packages + +with open("README.md", "r") as fh: + long_description = fh.read() + +setup( + name='pastehunter', + version='1.2.1', + author='@kevthehermit @Plazmaz', + author_email='info@pastehunter.com', + description="Pastehunter", + long_description=long_description, + long_description_content_type="text/markdown", + url='https://pastehunter.com', + license='GNU V3', + zip_safe=False, + packages=find_packages(), + include_package_data=True, + install_requires=[ + 'yara-python', + 'requests', + 'elasticsearch', + 'splunk-sdk' + ], + scripts=['pastehunter-cli'], + package_data={'': ['*.yar', 'README.md, LICENSE']} +) \ No newline at end of file