diff --git a/.gitignore b/.gitignore
index e12df7d..e7dab63 100644
--- a/.gitignore
+++ b/.gitignore
@@ -108,4 +108,5 @@ ENV/
.vscode/
logs/
-.c9
\ No newline at end of file
+.c9
+pastehunter/YaraRules/custom_keywords.yar
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..19a05d5
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,36 @@
+language: python
+sudo: required
+dist: bionic
+group: edge
+cache:
+ pip: true
+python:
+- 3.6
+- 3.6-dev
+before_install:
+- sudo apt-get update -qq
+- sudo apt-get install automake libtool make gcc libmagic-dev -yqq python3-pip unzip
+- wget https://github.com/VirusTotal/yara/archive/v3.10.0.tar.gz
+- tar -xzvf v3.10.0.tar.gz
+- cd yara-3.10.0/ && ./bootstrap.sh && ./configure --enable-dotnet --enable-magic
+ && make && sudo make install && cd ../
+- git clone --recursive https://github.com/VirusTotal/yara-python
+- pip3 install pytest codecov pytest-cov
+- cd yara-python
+- python setup.py build --enable-magic --enable-dotnet
+- python setup.py install && cd ../ && rm -rf yara-python && rm -rf yara-3.10.0/
+install:
+- pip install -r requirements.txt
+- pip install -e .
+script:
+- pastehunter-cli
+after_success:
+- python setup.py sdist
+deploy:
+ provider: pypi
+ user: __token__
+ password:
+ secure: ZYILSwAsPcCWa4Ccslu2F+HVw02Rafdf4HqnQla3uCCTlEQQ+cFyuTKxQB46xytgblFQv/99oxq3SwVTUX4C6cIa8D+zHm/6lR4Tu+YPthYZX9IashF/AMKkyKks8bxbB0x/3t7hBX+7w++OcC1wwCXUyX7btsiOBa28k1NZCsB26NgdpBn02wF/GwqDhkxKkW9Bi7KDjb58GdiyhgVXxOOaOYbRyKiNZqUKQx504zmc0aGSPYCs0gSPwoA0T3FUet4IBcjjTP9DsjjkyQ7K6iMWYNGsAP91HnZe5J4sZYqwrGs++vndJVa/bYpiyMCjUrG4c6okdS0zpSmfbrqJay12wH5qroqqLxwuLtrXcHK+ChlyvhsGHMN51rqX811zdt/IzDwi+hXz84e8Y8/YgUTx7j0/HPEdrHjIIbMoIEd9Wy42+TcRCHJOULjsg7Kc7KLd1ILvxxyV+REnkfaazeqmgSNlqFxM2A65dkq3xNt9CDtYQlX/IhTDBy2/qY3m60uOh92ptd5f5eHF28W89APnkRAHD2JSEVRym1fHNrvPl1NCJT8NavbdYup/dH8hQadMx72X022lmyFASHN92G78O3uA0fZ8B/hzCpVQ4KTTIT4/LqkAXuWlfW4z9wC62V2ZdL6E76lqbMPokeXfH8Tf+chAaw/XHr7Wk6bWkOQ=
+ on:
+ branch: master
+ skip_existing: true
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..41e3ab8
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,22 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [1.2.1] - 2019-12-29
+### Changed
+- move config file to ~/.config
+- move custom yara rules
+- refactor yara rules location
+
+## [1.2.0] - 2019-12-28
+### Added
+- Changelog
+- travis CI
+- PyPi Installation
+
+### Changed
+- FilePaths to enable pip
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..fac9ba1
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include pastehunter/YaraRules *.yar
\ No newline at end of file
diff --git a/README.md b/README.md
index 9b696e3..d84601b 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,11 @@ by an organisation or a researcher.
For setup instructions please see the official documentation https://pastehunter.readthedocs.io/en/latest/installation.html
+[![PyPI version](https://badge.fury.io/py/pastehunter.svg)](https://badge.fury.io/py/pastehunter)
+
+[![Build Status](https://travis-ci.org/kevthehermit/PasteHunter.svg?branch=master)](https://travis-ci.org/kevthehermit/PasteHunter)
+
+
## Supported Inputs
Pastehunter currently has support for the following sites:
- pastebin.com
diff --git a/YaraRules/index.yar b/YaraRules/index.yar
deleted file mode 100644
index d09326e..0000000
--- a/YaraRules/index.yar
+++ /dev/null
@@ -1,14 +0,0 @@
-include "api_keys.yar"
-include "aws.yar"
-include "base64.yar"
-include "blacklist.yar"
-include "certificates.yar"
-include "core_keywords.yar"
-include "CryptoExchangeApi.yar"
-include "database.yar"
-include "email_filter.yar"
-include "general.yar"
-include "github_dorks.yar"
-include "hak5.yar"
-include "password_leak.yar"
-include "powershell.yar"
diff --git a/common.py b/common.py
deleted file mode 100644
index 1ea162b..0000000
--- a/common.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import json
-import logging
-
-logger = logging.getLogger('pastehunter')
-
-# Parse the config file in to a dict
-def parse_config():
- conf_file = 'settings.json'
- conf = None
- try:
- with open(conf_file, 'r') as read_conf:
- conf = json.load(read_conf)
- except Exception as e:
- logger.error("Unable to parse config file: {0}".format(e))
-
- return conf
diff --git a/pastehunter-cli b/pastehunter-cli
new file mode 100644
index 0000000..8cab6e2
--- /dev/null
+++ b/pastehunter-cli
@@ -0,0 +1,405 @@
+#!/usr/bin/python3
+import errno
+import hashlib
+import importlib
+import json
+import logging
+import multiprocessing
+import os
+import signal
+import sys
+import time
+from io import BytesIO
+from logging import handlers
+from time import sleep
+from urllib.parse import unquote_plus
+
+import requests
+import yara
+import pastehunter
+from pastehunter.common import parse_config
+
+VERSION = 1.0
+
+# Setup Default logging
+root = logging.getLogger()
+ch = logging.StreamHandler()
+ch.setLevel(logging.DEBUG)
+formatter = logging.Formatter('%(levelname)s:%(filename)s:%(message)s')
+ch.setFormatter(formatter)
+root.addHandler(ch)
+
+logger = logging.getLogger('pastehunter')
+logger.setLevel(logging.INFO)
+
+# Version info
+logger.info("Starting PasteHunter Version: {0}".format(VERSION))
+
+# Parse the config file
+logger.info("Reading Configs")
+conf = parse_config()
+
+# If the config failed to parse
+if not conf:
+ sys.exit()
+
+class TimeoutError(Exception):
+ pass
+
+class timeout:
+ def __init__(self, seconds=1, error_message='Timeout'):
+ self.seconds = seconds
+ self.error_message = error_message
+ def handle_timeout(self, signum, frame):
+ raise TimeoutError("Process timeout: {0}".format(self.error_message))
+ def __enter__(self):
+ signal.signal(signal.SIGALRM, self.handle_timeout)
+ signal.alarm(self.seconds)
+ def __exit__(self, type, value, traceback):
+ signal.alarm(0)
+
+
+
+# Set up the log file
+if "log" in conf and conf["log"]["log_to_file"]:
+ if conf["log"]["log_path"] != "":
+ logfile = "{0}/{1}.log".format(conf["log"]["log_path"], conf["log"]["log_file"])
+ # Assure directory exists
+ try: os.makedirs(conf["log"]["log_path"], exist_ok=True) # Python>3.2
+ except TypeError:
+ try:
+ os.makedirs(conf["log"]["log_path"])
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST and os.path.isdir(conf["log"]["log_path"]):
+ pass
+ else: logger.error("Can not create log file {0}: {1}".format(conf["log"]["log_path"], exc))
+ else:
+ logfile = "{0}.log".format(conf["log"]["log_file"])
+ fileHandler = handlers.RotatingFileHandler(logfile, mode='a+', maxBytes=(1048576*5), backupCount=7)
+ if conf["log"]["format"] != "":
+ fileFormatter = logging.Formatter("{0}".format(conf["log"]["format"]))
+ fileHandler.setFormatter(fileFormatter)
+ else:
+ fileHandler.setFormatter(formatter)
+ fileHandler.setLevel(conf["log"]["logging_level"])
+ logger.addHandler(fileHandler)
+ logger.info("Enabled Log File: {0}".format(logfile))
+else:
+ logger.info("Logging to file disabled.")
+
+# Override Log level if needed
+if "logging_level" in conf["log"]:
+ log_level = conf["log"]["logging_level"]
+elif "logging_level" in conf["general"]:
+ # For old configs
+ log_level = conf["general"]["logging_level"]
+else:
+ # For older configs
+ logger.error("Log Level not in config file. Update your base config file!")
+ log_level = 20
+
+logger.info("Setting Log Level to {0}".format(log_level))
+logging.getLogger('requests').setLevel(log_level)
+logging.getLogger('elasticsearch').setLevel(log_level)
+logging.getLogger('pastehunter').setLevel(log_level)
+
+# Configure Inputs
+logger.info("Configure Inputs")
+input_list = []
+for input_type, input_values in conf["inputs"].items():
+ if input_values["enabled"]:
+ input_list.append(input_values["module"])
+ logger.info("Enabled Input: {0}".format(input_type))
+
+
+# Configure Outputs
+logger.info("Configure Outputs")
+outputs = []
+for output_type, output_values in conf["outputs"].items():
+ if output_values["enabled"]:
+ logger.info("Enabled Output: {0}".format(output_type))
+ _module = importlib.import_module(output_values["module"])
+ _class = getattr(_module, output_values["classname"])
+ instance = _class()
+ outputs.append(instance)
+
+
+def yara_index(default_rules, custom_rules, exclude_rules, blacklist, test_rules):
+ rules_list = {}
+ counter = 0
+ if default_rules:
+ for filename in os.listdir(default_rules):
+ if filename in exclude_rules:
+ continue
+ if filename == 'blacklist.yar':
+ if blacklist:
+ logger.info("Enable Blacklist Rules")
+ else:
+ continue
+ if filename == 'test_rules.yar':
+ if test_rules:
+ logger.info("Enable Test Rules")
+ else:
+ continue
+ rules_list['namespace{0}'.format(counter)] = os.path.join(default_rules, filename)
+ logger.info("Adding rules from {0}".format(filename))
+ counter += 1
+ if custom_rules:
+ for filename in os.listdir(custom_rules):
+ rules_list['namespace{0}'.format(counter)] = os.path.join(custom_rules, filename)
+ logger.info("Adding custom rules from {0}".format(filename))
+ counter += 1
+ return rules_list
+
+
+def paste_scanner(paste_data, rules_buff):
+ # Grab yara rules from passed buffer
+ # Fetch the raw paste
+ # scan the Paste
+ # Store the Paste
+
+ rules_buff.seek(0)
+ rules = yara.load(file=rules_buff)
+ try:
+ with timeout(seconds=conf['general']['process_timeout']):
+ # Start a timer
+ start_time = time.time()
+ logger.debug("Found New {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid']))
+ # get raw paste and hash them
+ try:
+
+ # Stack questions dont have a raw endpoint
+ if ('stackexchange' in conf['inputs']) and (paste_data['pastesite'] in conf['inputs']['stackexchange']['site_list']):
+ # The body is already included in the first request so we do not need a second call to the API.
+
+ # Unescape the code block strings in the json body.
+ raw_body = paste_data['body']
+ raw_paste_data = unquote_plus(raw_body)
+
+ # now remove the old body key as we dont need it any more
+ del paste_data['body']
+
+ else:
+ raw_paste_uri = paste_data['scrape_url']
+ if not raw_paste_uri:
+ logger.info('Unable to retrieve paste, no uri found.')
+ logger.debug(json.dumps(paste_data))
+ raw_paste_data = ""
+ else:
+ raw_paste_data = requests.get(raw_paste_uri).text
+
+ # Cover fetch site SSLErrors
+ except requests.exceptions.SSLError as e:
+ logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
+ raw_paste_data = ""
+
+ # General Exception
+ except Exception as e:
+ logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
+ raw_paste_data = ""
+
+ # Pastebin Cache
+ if raw_paste_data == "File is not ready for scraping yet. Try again in 1 minute.":
+ logger.info("Paste is still cached sleeping to try again")
+ sleep(45)
+ # get raw paste and hash them
+ raw_paste_uri = paste_data['scrape_url']
+ # Cover fetch site SSLErrors
+ try:
+ raw_paste_data = requests.get(raw_paste_uri).text
+ except requests.exceptions.SSLError as e:
+ logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
+ raw_paste_data = ""
+
+ # General Exception
+ except Exception as e:
+ logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
+ raw_paste_data = ""
+
+ # Process the paste data here
+ try:
+ # Scan with yara
+ matches = rules.match(data=raw_paste_data, externals={'filename': paste_data.get('filename', '')})
+ except Exception as e:
+ logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
+ return False
+
+ results = []
+ for match in matches:
+ # For keywords get the word from the matched string
+ if match.rule == 'core_keywords' or match.rule == 'custom_keywords':
+ for s in match.strings:
+ rule_match = s[1].lstrip('$')
+ if rule_match not in results:
+ results.append(rule_match)
+ results.append(str(match.rule))
+
+ # But a break in here for the base64. Will use it later.
+ elif match.rule.startswith('b64'):
+ results.append(match.rule)
+
+ # Else use the rule name
+ else:
+ results.append(match.rule)
+
+ # Store additional fields for passing on to post processing
+ encoded_paste_data = raw_paste_data.encode('utf-8')
+ md5 = hashlib.md5(encoded_paste_data).hexdigest()
+ sha256 = hashlib.sha256(encoded_paste_data).hexdigest()
+ paste_data['MD5'] = md5
+ paste_data['SHA256'] = sha256
+ paste_data['raw_paste'] = raw_paste_data
+ paste_data['YaraRule'] = results
+ # Set the size for all pastes - This will override any size set by the source
+ paste_data['size'] = len(raw_paste_data)
+
+ # Store all OverRides other options.
+ paste_site = paste_data['confname']
+ store_all = conf['inputs'][paste_site]['store_all']
+ # remove the confname key as its not really needed past this point
+ del paste_data['confname']
+
+
+ # Blacklist Check
+ # If any of the blacklist rules appear then empty the result set
+ blacklisted = False
+ if conf['yara']['blacklist'] and 'blacklist' in results:
+ results = []
+ blacklisted = True
+ logger.info("Blacklisted {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid']))
+
+
+ # Post Process
+
+ # If post module is enabled and the paste has a matching rule.
+ post_results = paste_data
+ for post_process, post_values in conf["post_process"].items():
+ if post_values["enabled"]:
+ if any(i in results for i in post_values["rule_list"]) or "ALL" in post_values["rule_list"]:
+ if not blacklisted:
+ logger.info("Running Post Module {0} on {1}".format(post_values["module"], paste_data["pasteid"]))
+ post_module = importlib.import_module(post_values["module"])
+ post_results = post_module.run(results,
+ raw_paste_data,
+ paste_data
+ )
+
+ # Throw everything back to paste_data for ease.
+ paste_data = post_results
+
+
+ # If we have a result add some meta data and send to storage
+ # If results is empty, ie no match, and store_all is True,
+ # then append "no_match" to results. This will then force output.
+
+ if store_all is True:
+ if len(results) == 0:
+ results.append('no_match')
+
+ if len(results) > 0:
+ for output in outputs:
+ try:
+ output.store_paste(paste_data)
+ except Exception as e:
+ logger.error("Unable to store {0} to {1} with error {2}".format(paste_data["pasteid"], output, e))
+
+ end_time = time.time()
+ logger.debug("Processing Finished for {0} in {1} seconds".format(
+ paste_data["pasteid"],
+ (end_time - start_time)
+ ))
+ return True
+ except TimeoutError:
+ return False
+
+def main():
+ logger.info("Compile Yara Rules")
+ try:
+ if conf['yara']['default_rules']:
+ pastehunter_path = pastehunter.__path__[0]
+ default_rules = os.path.join(pastehunter_path, "YaraRules")
+ else:
+ default_rules = False
+
+ if conf["yara"]["custom_rules"] != "none":
+ custom_rules = conf["yara"]["custom_rules"]
+ else:
+ custom_rules = False
+
+ rule_files = yara_index(
+ default_rules,
+ custom_rules,
+ conf['yara']['exclude_rules'],
+ conf['yara']['blacklist'],
+ conf['yara']['test_rules']
+ )
+
+ rules = yara.compile(filepaths=rule_files, externals={'filename': ''})
+
+ # Used for sharing across processes
+ rules_buff = BytesIO()
+ rules.save(file=rules_buff)
+
+ except Exception as e:
+ logger.exception("Unable to Create Yara index: {0}".format(e))
+ sys.exit()
+
+ # Create Queue to hold paste URI's
+ pool = multiprocessing.Pool(processes=5)
+ results = []
+
+ # Now Fill the Queue
+ try:
+ while True:
+ queue_count = 0
+
+ # Paste History
+ logger.info("Populating Queue")
+ if os.path.exists('paste_history.tmp'):
+ with open('paste_history.tmp') as json_file:
+ paste_history = json.load(json_file)
+ else:
+ paste_history = {}
+
+ for input_name in input_list:
+ if input_name in paste_history:
+ input_history = paste_history[input_name]
+ else:
+ input_history = []
+
+ try:
+
+ i = importlib.import_module(input_name)
+ # Get list of recent pastes
+ logger.info("Fetching paste list from {0}".format(input_name))
+ paste_list, history = i.recent_pastes(conf, input_history)
+ for paste in paste_list:
+ # Create a new async job for the existing pool and apply it to "results"
+ results.append(pool.apply_async(paste_scanner, (paste, rules_buff)))
+ queue_count += 1
+ paste_history[input_name] = history
+ except Exception as e:
+ logger.error("Unable to fetch list from {0}: {1}".format(input_name, e))
+
+ logger.debug("Writing History")
+ # Write History
+ with open('paste_history.tmp', 'w') as outfile:
+ json.dump(paste_history, outfile)
+ logger.info("Added {0} Items to the queue".format(queue_count))
+
+ # Wait for all work to finish
+ [result.wait() for result in results]
+
+ # Slow it down a little
+ logger.info("Sleeping for " + str(conf['general']['run_frequency']) + " Seconds")
+ sleep(conf['general']['run_frequency'])
+
+
+
+ except KeyboardInterrupt:
+ logger.info("Stopping Processes")
+ pool.terminate()
+ pool.join()
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/pastehunter.py b/pastehunter.py
deleted file mode 100644
index aeaf2d1..0000000
--- a/pastehunter.py
+++ /dev/null
@@ -1,399 +0,0 @@
-#!/usr/bin/python3
-
-import os
-import sys
-import yara
-import json
-import hashlib
-import requests
-import multiprocessing
-import importlib
-import logging
-from logging import handlers
-import time
-import errno
-import signal
-from time import sleep
-from urllib.parse import unquote_plus
-from common import parse_config
-from postprocess import post_email
-
-
-from multiprocessing import Queue
-
-VERSION = 1.0
-
-# Setup Default logging
-root = logging.getLogger()
-ch = logging.StreamHandler()
-ch.setLevel(logging.DEBUG)
-formatter = logging.Formatter('%(levelname)s:%(filename)s:%(message)s')
-ch.setFormatter(formatter)
-root.addHandler(ch)
-
-logger = logging.getLogger('pastehunter')
-logger.setLevel(logging.INFO)
-
-# Version info
-logger.info("Starting PasteHunter Version: {0}".format(VERSION))
-
-# Parse the config file
-logger.info("Reading Configs")
-conf = parse_config()
-
-# If the config failed to parse
-if not conf:
- sys.exit()
-
-class TimeoutError(Exception):
- pass
-
-class timeout:
- def __init__(self, seconds=1, error_message='Timeout'):
- self.seconds = seconds
- self.error_message = error_message
- def handle_timeout(self, signum, frame):
- print("Process timeout: {0}".format(self.error_message))
- sys.exit(0)
- def __enter__(self):
- signal.signal(signal.SIGALRM, self.handle_timeout)
- signal.alarm(self.seconds)
- def __exit__(self, type, value, traceback):
- signal.alarm(0)
-
-
-
-# Set up the log file
-if "log" in conf and conf["log"]["log_to_file"]:
- if conf["log"]["log_path"] != "":
- logfile = "{0}/{1}.log".format(conf["log"]["log_path"], conf["log"]["log_file"])
- # Assure directory exists
- try: os.makedirs(conf["log"]["log_path"], exist_ok=True) # Python>3.2
- except TypeError:
- try:
- os.makedirs(conf["log"]["log_path"])
- except OSError as exc: # Python >2.5
- if exc.errno == errno.EEXIST and os.path.isdir(conf["log"]["log_path"]):
- pass
- else: logger.error("Can not create log file {0}: {1}".format(conf["log"]["log_path"], exc))
- else:
- logfile = "{0}.log".format(conf["log"]["log_file"])
- fileHandler = handlers.RotatingFileHandler(logfile, mode='a+', maxBytes=(1048576*5), backupCount=7)
- if conf["log"]["format"] != "":
- fileFormatter = logging.Formatter("{0}".format(conf["log"]["format"]))
- fileHandler.setFormatter(fileFormatter)
- else:
- fileHandler.setFormatter(logFormatter)
- fileHandler.setLevel(conf["log"]["logging_level"])
- logger.addHandler(fileHandler)
- logger.info("Enabled Log File: {0}".format(logfile))
-else:
- logger.info("Logging to file disabled.")
-
-# Override Log level if needed
-if "logging_level" in conf["log"]:
- log_level = conf["log"]["logging_level"]
-elif "logging_level" in conf["general"]:
- # For old configs
- log_level = conf["general"]["logging_level"]
-else:
- # For older configs
- logger.error("Log Level not in config file. Update your base config file!")
- log_level = 20
-
-logger.info("Setting Log Level to {0}".format(log_level))
-logging.getLogger('requests').setLevel(log_level)
-logging.getLogger('elasticsearch').setLevel(log_level)
-logging.getLogger('pastehunter').setLevel(log_level)
-
-# Configure Inputs
-logger.info("Configure Inputs")
-input_list = []
-for input_type, input_values in conf["inputs"].items():
- if input_values["enabled"]:
- input_list.append(input_values["module"])
- logger.info("Enabled Input: {0}".format(input_type))
-
-
-# Configure Outputs
-logger.info("Configure Outputs")
-outputs = []
-for output_type, output_values in conf["outputs"].items():
- if output_values["enabled"]:
- logger.info("Enabled Output: {0}".format(output_type))
- _module = importlib.import_module(output_values["module"])
- _class = getattr(_module, output_values["classname"])
- instance = _class()
- outputs.append(instance)
-
-
-def yara_index(rule_path, blacklist, test_rules):
- index_file = os.path.join(rule_path, 'index.yar')
- with open(index_file, 'w') as yar:
- for filename in os.listdir(rule_path):
- if filename.endswith('.yar') and filename != 'index.yar':
- if filename == 'blacklist.yar':
- if blacklist:
- logger.info("Enable Blacklist Rules")
- else:
- continue
- if filename == 'test_rules.yar':
- if test_rules:
- logger.info("Enable Test Rules")
- else:
- continue
- include = 'include "{0}"\n'.format(filename)
- yar.write(include)
-
-
-def paste_scanner():
- # Get a paste URI from the Queue
- # Fetch the raw paste
- # scan the Paste
- # Store the Paste
- while True:
- if q.empty():
- # Queue was empty, sleep to prevent busy loop
- sleep(0.5)
- else:
- paste_data = q.get()
- with timeout(seconds=conf['general']['process_timeout']):
- # Start a timer
- start_time = time.time()
- logger.debug("Found New {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid']))
- # get raw paste and hash them
- try:
-
- # Stack questions dont have a raw endpoint
- if ('stackexchange' in conf['inputs']) and (paste_data['pastesite'] in conf['inputs']['stackexchange']['site_list']):
- # The body is already included in the first request so we do not need a second call to the API.
-
- # Unescape the code block strings in the json body.
- raw_body = paste_data['body']
- raw_paste_data = unquote_plus(raw_body)
-
- # now remove the old body key as we dont need it any more
- del paste_data['body']
-
- else:
- raw_paste_uri = paste_data['scrape_url']
- if not raw_paste_uri:
- logger.info('Unable to retrieve paste, no uri found.')
- logger.debug(json.dumps(paste_data))
- raw_paste_data = ""
- else:
- raw_paste_data = requests.get(raw_paste_uri).text
-
- # Cover fetch site SSLErrors
- except requests.exceptions.SSLError as e:
- logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
- raw_paste_data = ""
-
- # General Exception
- except Exception as e:
- logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
- raw_paste_data = ""
-
- # Pastebin Cache
- if raw_paste_data == "File is not ready for scraping yet. Try again in 1 minute.":
- logger.info("Paste is still cached sleeping to try again")
- sleep(45)
- # get raw paste and hash them
- raw_paste_uri = paste_data['scrape_url']
- # Cover fetch site SSLErrors
- try:
- raw_paste_data = requests.get(raw_paste_uri).text
- except requests.exceptions.SSLError as e:
- logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
- raw_paste_data = ""
-
- # General Exception
- except Exception as e:
- logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
- raw_paste_data = ""
-
- # Process the paste data here
- try:
- # Scan with yara
- matches = rules.match(data=raw_paste_data, externals={'filename': paste_data.get('filename', '')})
- except Exception as e:
- logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
- continue
-
- results = []
- for match in matches:
- # For keywords get the word from the matched string
- if match.rule == 'core_keywords' or match.rule == 'custom_keywords':
- for s in match.strings:
- rule_match = s[1].lstrip('$')
- if rule_match not in results:
- results.append(rule_match)
- results.append(str(match.rule))
-
- # But a break in here for the base64. Will use it later.
- elif match.rule.startswith('b64'):
- results.append(match.rule)
-
- # Else use the rule name
- else:
- results.append(match.rule)
-
- # Store additional fields for passing on to post processing
- encoded_paste_data = raw_paste_data.encode('utf-8')
- md5 = hashlib.md5(encoded_paste_data).hexdigest()
- sha256 = hashlib.sha256(encoded_paste_data).hexdigest()
- paste_data['MD5'] = md5
- paste_data['SHA256'] = sha256
- paste_data['raw_paste'] = raw_paste_data
- paste_data['YaraRule'] = results
- # Set the size for all pastes - This will override any size set by the source
- paste_data['size'] = len(raw_paste_data)
-
- # Store all OverRides other options.
- paste_site = paste_data['confname']
- store_all = conf['inputs'][paste_site]['store_all']
- # remove the confname key as its not really needed past this point
- del paste_data['confname']
-
-
- # Blacklist Check
- # If any of the blacklist rules appear then empty the result set
- blacklisted = False
- if conf['yara']['blacklist'] and 'blacklist' in results:
- results = []
- blacklisted = True
- logger.info("Blacklisted {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid']))
-
-
- # Post Process
-
- # If post module is enabled and the paste has a matching rule.
- post_results = paste_data
- for post_process, post_values in conf["post_process"].items():
- if post_values["enabled"]:
- if any(i in results for i in post_values["rule_list"]) or "ALL" in post_values["rule_list"]:
- if not blacklisted:
- logger.info("Running Post Module {0} on {1}".format(post_values["module"], paste_data["pasteid"]))
- post_module = importlib.import_module(post_values["module"])
- post_results = post_module.run(results,
- raw_paste_data,
- paste_data
- )
-
- # Throw everything back to paste_data for ease.
- paste_data = post_results
-
-
- # If we have a result add some meta data and send to storage
- # If results is empty, ie no match, and store_all is True,
- # then append "no_match" to results. This will then force output.
-
- if store_all is True:
- if len(results) == 0:
- results.append('no_match')
-
- if len(results) > 0:
- for output in outputs:
- try:
- output.store_paste(paste_data)
- except Exception as e:
- logger.error("Unable to store {0} to {1} with error {2}".format(paste_data["pasteid"], output, e))
-
- end_time = time.time()
- logger.debug("Processing Finished for {0} in {1} seconds".format(
- paste_data["pasteid"],
- (end_time - start_time)
- ))
-
-
-
-if __name__ == "__main__":
- logger.info("Compile Yara Rules")
- try:
- # Update the yara rules index
- yara_index(conf['yara']['rule_path'],
- conf['yara']['blacklist'],
- conf['yara']['test_rules'])
-
- # Compile the yara rules we will use to match pastes
- index_file = os.path.join(conf['yara']['rule_path'], 'index.yar')
- rules = yara.compile(index_file, externals={'filename': ''})
- except Exception as e:
- logger.exception("Unable to Create Yara index: ", e)
- sys.exit()
-
- # Create Queue to hold paste URI's
- q = Queue()
- processes = []
-
- # Now Fill the Queue
- try:
- while True:
- queue_count = 0
- counter = 0
- if len(processes) < 5:
- for i in range(5-len(processes)):
- logger.warning("Creating New Process")
- m = multiprocessing.Process(target=paste_scanner)
- # Add new process to list so we can run join on them later.
- processes.append(m)
- m.start()
- for process in processes:
- if not process.is_alive():
- logger.warning("Restarting Dead Process")
- del processes[counter]
- m = multiprocessing.Process(target=paste_scanner)
- # Add new process to list so we can run join on them later.
- processes.append(m)
- m.start()
- counter += 1
-
- # Check if the processors are active
- # Paste History
- logger.info("Populating Queue")
- if os.path.exists('paste_history.tmp'):
- with open('paste_history.tmp') as json_file:
- paste_history = json.load(json_file)
- else:
- paste_history = {}
-
- for input_name in input_list:
- if input_name in paste_history:
- input_history = paste_history[input_name]
- else:
- input_history = []
-
- try:
-
- i = importlib.import_module(input_name)
- # Get list of recent pastes
- logger.info("Fetching paste list from {0}".format(input_name))
- paste_list, history = i.recent_pastes(conf, input_history)
- for paste in paste_list:
- q.put(paste)
- queue_count += 1
- paste_history[input_name] = history
- except Exception as e:
- logger.error("Unable to fetch list from {0}: {1}".format(input_name, e))
-
- logger.debug("Writing History")
- # Write History
- with open('paste_history.tmp', 'w') as outfile:
- json.dump(paste_history, outfile)
- logger.info("Added {0} Items to the queue".format(queue_count))
-
- for proc in processes:
- proc.join(2)
-
- # Slow it down a little
- logger.info("Sleeping for " + str(conf['general']['run_frequency']) + " Seconds")
- sleep(conf['general']['run_frequency'])
-
-
-
- except KeyboardInterrupt:
- logger.info("Stopping Processes")
- for proc in processes:
- proc.terminate()
- proc.join()
-
diff --git a/pastehunter/YaraRules/CryptoExchangeApi.yar b/pastehunter/YaraRules/CryptoExchangeApi.yar
new file mode 100644
index 0000000..c130889
--- /dev/null
+++ b/pastehunter/YaraRules/CryptoExchangeApi.yar
@@ -0,0 +1,88 @@
+rule CryptoExchangeApi
+{
+ meta:
+ description = "Contains Crypro Exchange API URL"
+ author = "Jason Schorr (0xBanana)"
+ source = "https://github.com/cryptodefense/PasteHunter-Yara/blob/master/CryptoExchangeApi.yar"
+ strings:
+ $a = "api.binance.com" nocase wide ascii
+ $a0 = "1btcxe.com/api" nocase wide ascii
+ $a1 = "acx.io/api" nocase wide ascii
+ $a2 = "anxpro.com/api" nocase wide ascii
+ $a3 = "anybits.com/api" nocase wide ascii
+ $a4 = "www.bcex.top" nocase wide ascii
+ $a5 = "api.bibox.com" nocase wide ascii
+ $a6 = "bit2c.co.il" nocase wide ascii
+ $a7 = "api.bitfinex.com" nocase wide ascii
+ $a8 = "api.bitfinex.com" nocase wide ascii
+ $a9 = "api.bitflyer.jp" nocase wide ascii
+ $aa = "api.bitforex.com" nocase wide ascii
+ $ab = "bitibu.com" nocase wide ascii
+ $ac = "bitlish.com/api" nocase wide ascii
+ $ad = "www.bitmex.com" nocase wide ascii
+ $ae = "bitsane.com/api" nocase wide ascii
+ $af = "api.bitso.com" nocase wide ascii
+ $ag = "www.bitstamp.net/api" nocase wide ascii
+ $ah = "www.bitstamp.net/api" nocase wide ascii
+ $ai = "api.bl3p.eu" nocase wide ascii
+ $aj = "braziliex.com/api/v1" nocase wide ascii
+ $ak = "btc-alpha.com/api" nocase wide ascii
+ $al = "www.btcbox.co.jp/api" nocase wide ascii
+ $am = "www.btcexchange.ph/api" nocase wide ascii
+ $an = "btc-trade.com.ua/api" nocase wide ascii
+ $ao = "www.btcturk.com/api" nocase wide ascii
+ $ap = "www.buda.com/api" nocase wide ascii
+ $aq = "bx.in.th/api" nocase wide ascii
+ $ar = "cex.io/api" nocase wide ascii
+ $as = "api.cobinhood.com" nocase wide ascii
+ $at = "api.coinbase.com" nocase wide ascii
+ $au = "api.prime.coinbase.com" nocase wide ascii
+ $av = "api.pro.coinbase.com" nocase wide ascii
+ $aw = "coincheck.com/api" nocase wide ascii
+ $ax = "www.coinexchange.io/api/v1" nocase wide ascii
+ $ay = "coinfalcon.com" nocase wide ascii
+ $az = "webapi.coinfloor.co.uk:8090/bist" nocase wide ascii
+ $aa1 = "coinmate.io/api" nocase wide ascii
+ $aa2 = "api.coinone.co.kr" nocase wide ascii
+ $aa3 = "api.crex24.com" nocase wide ascii
+ $aa4 = "api.cryptonbtc.com" nocase wide ascii
+ $aa5 = "www.deribit.com" nocase wide ascii
+ $aa6 = "api.ethfinex.com" nocase wide ascii
+ $aa7 = "api.fcoin.com" nocase wide ascii
+ $aa8 = "api.flowbtc.com:8405/ajax" nocase wide ascii
+ $aa9 = "www.fybse.se/api/SEK" nocase wide ascii
+ $aa0 = "www.fybsg.com/api/SGD" nocase wide ascii
+ $aab = "api.gatecoin.com" nocase wide ascii
+ $aac = "api.gdax.com" nocase wide ascii
+ $aad = "api.gemini.com" nocase wide ascii
+ $aae = "getbtc.org/api" nocase wide ascii
+ $aaf = "api.hitbtc.com" nocase wide ascii
+ $aag = "api.hitbtc.com" nocase wide ascii
+ $aah = "api.huobi.com" nocase wide ascii
+ $aai = "ice3x.com/api" nocase wide ascii
+ $aaj = "api.itbit.com" nocase wide ascii
+ $aak = "www.jubi.com/api" nocase wide ascii
+ $aal = "kuna.io" nocase wide ascii
+ $aam = "api.lakebtc.com" nocase wide ascii
+ $aan = "api.lbank.info" nocase wide ascii
+ $aao = "api.liquid.com" nocase wide ascii
+ $aap = "api.livecoin.net" nocase wide ascii
+ $aaq = "api.mybitx.com/api" nocase wide ascii
+ $aar = "mixcoins.com/api" nocase wide ascii
+ $aas = "novaexchange.com/remote" nocase wide ascii
+ $aat = "paymium.com/api" nocase wide ascii
+ $aau = "api.quadrigacx.com" nocase wide ascii
+ $aav = "www.rightbtc.com/api" nocase wide ascii
+ $aaw = "www.southxchange.com/api" nocase wide ascii
+ $aax = "api.theocean.trade/api" nocase wide ascii
+ $aay = "api.therocktrading.com" nocase wide ascii
+ $aaz = "www.tidebit.com" nocase wide ascii
+ $ba = "open-api.uex.com/open/api" nocase wide ascii
+ $bb = "api.vaultoro.com" nocase wide ascii
+ $bc = "cryptottlivewebapi.xbtce.net:8443/api" nocase wide ascii
+ $bd = "yunbi.com" nocase wide ascii
+ $be = "api.zaif.jp" nocase wide ascii
+
+ condition:
+ any of them
+}
\ No newline at end of file
diff --git a/YaraRules/api_keys.yar b/pastehunter/YaraRules/api_keys.yar
similarity index 100%
rename from YaraRules/api_keys.yar
rename to pastehunter/YaraRules/api_keys.yar
diff --git a/pastehunter/YaraRules/aws.yar b/pastehunter/YaraRules/aws.yar
new file mode 100644
index 0000000..37ef230
--- /dev/null
+++ b/pastehunter/YaraRules/aws.yar
@@ -0,0 +1,36 @@
+rule aws_cli
+{
+ meta:
+ author = "@KevTheHermit"
+ info = "Part of PasteHunter"
+ reference = "https://github.com/kevthehermit/PasteHunter"
+
+ strings:
+ $a1 = "aws s3 " ascii
+ $a2 = "aws ec2 " ascii
+ $a3 = "aws ecr " ascii
+ $a4 = "aws cognito-identity" ascii
+ $a5 = "aws iam "ascii
+ $a6 = "aws waf " ascii
+
+ condition:
+ any of them
+
+}
+
+rule sw_bucket
+{
+ meta:
+ author = "@KevTheHermit"
+ info = "Part of PasteHunter"
+ reference = "https://github.com/kevthehermit/PasteHunter"
+
+ strings:
+ $a1 = "s3.amazonaws.com" ascii
+
+ condition:
+ any of them
+
+
+
+}
diff --git a/YaraRules/base64.yar b/pastehunter/YaraRules/base64.yar
similarity index 90%
rename from YaraRules/base64.yar
rename to pastehunter/YaraRules/base64.yar
index 6b3b6f0..913d12c 100644
--- a/YaraRules/base64.yar
+++ b/pastehunter/YaraRules/base64.yar
@@ -88,7 +88,14 @@ rule b64_url
$a4 = "V1dXLg" // WWW.
// ignore vendor certs in this rule. The certs rule will pick them up if we want them
- $not1 = "GlobalSign Root CA" nocase
+ $not1 = "GlobalSign Root CA" nocase
+
+ // Ignore data: uris. These are common in html and svg files.
+ $not2 = /data:[a-z\/]+;(base64,)?aHR0cDov/ nocase
+ $not3 = /data:[a-z\/]+;(base64,)?SFRUUDov/ nocase
+ $not4 = /data:[a-z\/]+;(base64,)?d3d3Lg/ nocase
+ $not5 = /data:[a-z\/]+;(base64,)?V1dXLg/ nocase
+
condition:
any of ($a*) and not any of ($not*)
diff --git a/pastehunter/YaraRules/blacklist.yar b/pastehunter/YaraRules/blacklist.yar
new file mode 100644
index 0000000..117e526
--- /dev/null
+++ b/pastehunter/YaraRules/blacklist.yar
@@ -0,0 +1,20 @@
+rule blacklist
+{
+ meta:
+ author = "@KevTheHermit"
+ info = "Part of PasteHunter"
+ reference = "https://github.com/kevthehermit/PasteHunter"
+
+ strings:
+ $a = "#EXTINF:" nocase // IPTV stream Lists.
+ $b = "--app-name=LeagueClient" nocase // League of Legends Debug Log
+ $c = "common.application_name: LeagueClient" // League of Legends Debug Log
+ $d = /java\.(util|lang|io)/ // Minecraft and java errors
+ $e = "Traceback (most recent call last)"
+ $f = /define\(.*?\)|require_once\(.*?\)/
+ $g = "Technic Launcher is starting" // Minecraft mod dumps
+ $h = "OTL logfile created on" //
+ condition:
+ any of them
+
+}
\ No newline at end of file
diff --git a/pastehunter/YaraRules/certificates.yar b/pastehunter/YaraRules/certificates.yar
new file mode 100644
index 0000000..6b6a667
--- /dev/null
+++ b/pastehunter/YaraRules/certificates.yar
@@ -0,0 +1,25 @@
+/*
+ This rule will look for common encoded certificates and secrets
+*/
+
+rule certificates
+{
+ meta:
+ author = "@KevTheHermit"
+ info = "Part of PasteHunter"
+ reference = "https://github.com/kevthehermit/PasteHunter"
+
+ strings:
+ $ssh_priv = "BEGIN RSA PRIVATE KEY" wide ascii nocase
+ $openssh_priv = "BEGIN OPENSSH PRIVATE KEY" wide ascii nocase
+ $dsa_priv = "BEGIN DSA PRIVATE KEY" wide ascii nocase
+ $ec_priv = "BEGIN EC PRIVATE KEY" wide ascii nocase
+ $pgp_priv = "BEGIN PGP PRIVATE KEY" wide ascii nocase
+ $pem_cert = "BEGIN CERTIFICATE" wide ascii nocase
+ $pkcs7 = "BEGIN PKCS7"
+
+ condition:
+ any of them
+
+}
+
diff --git a/YaraRules/core_keywords.yar b/pastehunter/YaraRules/core_keywords.yar
similarity index 100%
rename from YaraRules/core_keywords.yar
rename to pastehunter/YaraRules/core_keywords.yar
diff --git a/YaraRules/database.yar b/pastehunter/YaraRules/database.yar
similarity index 100%
rename from YaraRules/database.yar
rename to pastehunter/YaraRules/database.yar
diff --git a/pastehunter/YaraRules/email_filter.yar b/pastehunter/YaraRules/email_filter.yar
new file mode 100644
index 0000000..8d546bd
--- /dev/null
+++ b/pastehunter/YaraRules/email_filter.yar
@@ -0,0 +1,19 @@
+/*
+ These rules attempt to find email leaks
+*/
+
+rule email_filter
+{
+ meta:
+ author = "@kovacsbalu"
+ info = "Better email pattern"
+ reference = "https://github.com/securenetworx/PasteHunter/tree/fix-email-filter"
+
+ strings:
+ $email_add = /\b[\w-]+(\.[\w-]+)*@[\w-]+(\.[\w-]+)*\.[a-zA-Z-]+[\w-]\b/
+ condition:
+ #email_add > 20
+
+}
+
+
diff --git a/YaraRules/general.yar b/pastehunter/YaraRules/general.yar
similarity index 100%
rename from YaraRules/general.yar
rename to pastehunter/YaraRules/general.yar
diff --git a/pastehunter/YaraRules/github_dorks.yar b/pastehunter/YaraRules/github_dorks.yar
new file mode 100644
index 0000000..227e83b
--- /dev/null
+++ b/pastehunter/YaraRules/github_dorks.yar
@@ -0,0 +1,551 @@
+/*
+ These are rule derived from github-dorks (https://github.com/techgaun/github-dorks)
+ github-dorks is under the Apache License 2.0:
+ https://github.com/techgaun/github-dorks/blob/master/LICENSE
+*/
+rule NPMRegistry {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "NPM Registry files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "_auth" nocase
+ condition:
+ all of them and filename matches /.*\.npmrc$/is
+}
+
+rule DockerCfg {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Docker config files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "auth" nocase
+ condition:
+ all of them and filename matches /.*\.dockercfg$/is
+}
+rule PrivateKeys {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Private key files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "private" nocase
+ condition:
+ all of them and (filename matches /.*\.pem$/is or filename matches /\.ppk$/is
+ or filename matches /(\/|^)id_(r|d)sa$/is)
+}
+rule SQLDump {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "SQL dumps (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "mysql" nocase
+ $ = "dump" nocase
+ condition:
+ all of them and (filename matches /.*\.sql$/is)
+}
+rule S3Credentials {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "S3 Credentials (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "aws_access_key_id" nocase
+ condition:
+ filename matches /(\/|^)\.s3cfg$/is or filename matches /(\/|^)credentials$/is and all of them
+}
+rule WPConfig {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Wordpress config files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)wp-config.php$/is
+}
+rule HTPasswd {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "htpasswd files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)\.htpasswd$/is
+}
+rule EnvFile {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = ".env files, Matches laravel, mailservers, and various CI and config files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $db_usr = "DB_USERNAME"
+ $mail_host = "MAIL_HOST=smtp."
+ $excl = "homestead" nocase
+ condition:
+ filename matches /(\/|^)\.env/is and any of ($db_usr, $mail_host) and not $excl
+}
+rule GitCredentials {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = ".git-credentials files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)\.git-credentials$/is
+}
+rule PivotalToken {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "PivotalTracker token (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "PT_TOKEN"
+ condition:
+ any of them
+}
+
+rule BashProfile {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Sensitive info in profile files, specifically .bashrc and .bash_profile (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "password" nocase
+ $ = "mailchimp" nocase
+ $ = "aws" nocase
+ $ = "secret" nocase
+ condition:
+ filename matches /(\/|^)\.bash(rc|_profile)$/is and any of them
+}
+rule AmazonCredentials {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Generic AWS credentials for RDS or EC2 (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $rds = "rds.amazonaws.com" nocase
+ $ec2 = "ec2.amazonaws.com" nocase
+ $pass = "password" nocase
+ condition:
+ $pass and ($rds or $ec2)
+}
+rule MongoLab {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "MongoLab Credentials (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "mongolab.com" nocase
+ condition:
+ filename matches /.*(\.conf|\.yaml|\.yml|\.json)$/is and all of them
+}
+rule RoboMongo {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "RoboMongo Credentials (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)robomongo\.json$/is
+}
+rule JSForce {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Salesforce Credentials for JSForce (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "jsforce" nocase
+ $ = "conn.login" nocase
+ condition:
+ filename matches /.*js$/is and all of them
+}
+rule Salesforce {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Generic salesforce Credentials (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "SF_USERNAME" nocase
+ $ = "salesforce" nocase
+ condition:
+ all of them
+}
+rule Tugboat {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "DigitalOcean Tugboat Configurations (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "_tugboat"
+ condition:
+ filename matches /(\/|^)\.tugboat$/is and not any of them
+}
+rule Hub {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Hub files that contain oauth tokens (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = /oauth_token: [a-zA-Z0-9]+/ nocase
+ condition:
+ filename matches /(\/|^)hub$/is and any of them
+}
+rule NetRC {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Netrc files that contain 'password' or 'key' (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "password"
+ $ = "key"
+ condition:
+ filename matches /(\/|^)\.?_?netrc/is and any of them
+}
+rule Filezilla {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Filezilla configuration files with passwords (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "Pass"
+ condition:
+ (filename matches /(\/|^)filezilla\.xml$/is or filename matches /(\/|^)recentservers.xml$/is) and any of them
+}
+rule Docker {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Docker authentication config (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "auths"
+ condition:
+ filename matches /(\/|^)config\.json$/is and any of them
+}
+rule IdeaKey {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "License Keys for IDEA IDEs (IntelliJ, PyCharm, etc) (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)idea[0-9]{0,}\.key$/is
+}
+rule DBServers {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Database servers (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)connections\.xml$/is
+ or filename matches /(\/|^)\.pgpass$/is
+}
+rule Proftpd {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Proftpd configuration files created by cpanel (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)proftpdpasswd$/is
+}
+rule Ventrilo {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Ventrilo server configuration files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)ventrilo_srv\.ini/is
+}
+rule WinFrameClient {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "WinFrame-Client configuration used to connect to Citrix Application Servers (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "[WFClient] Password="
+ condition:
+ all of them and filename matches /.*\.ica/is
+}
+rule CounterStrikeRCON {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "RCON Credentials for CounterStrike servers (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "rcon" nocase
+ $ = "password" nocase
+ condition:
+ all of them and filename matches /(\/|^)server\.cfg/is
+}
+rule JekyllGitHub {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Jekyll Token for GitHub (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "JEKYLL_GITHUB_TOKEN" nocase
+ condition:
+ all of them
+}
+rule SshdConfig {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "SSHD config files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)sshd_config/is
+}
+rule DhcpConfig {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "DHCP Config files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)dhcpd\.conf/is
+}
+rule Phoenix {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Phoenix prod config and secret files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "prod.secret.exs"
+ condition:
+ filename matches /(\/|^)prod\.secret\.exs/is or (filename matches /(\/|^)prod\.exs/is and not any of them)
+}
+rule JoomlaConfig {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Joomla config files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "JConfig" nocase
+ $ = "password" nocase
+ condition:
+ filename matches /(\/|^)configuration.php/is and all of them
+}
+rule PasswdFile {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Unix /etc/passwd files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "/bin/bash" nocase
+ $ = "/bin/sh" nocase
+ $ = "/usr/sbin/nologin" nocase
+ condition:
+ filename matches /(\/|^)passwd$/is and any of them
+}
+rule ShadowFile {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Unix /etc/shadow files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = ":17737:0:99999" nocase
+ $ = "root:*:" nocase
+ // MD5
+ $ = "$1" nocase
+ // SHA-256
+ $ = "$5" nocase
+ // SHA-1
+ $ = "$6" nocase
+ condition:
+ filename matches /(\/|^)passwd$/is and any of them
+}
+rule Shodan {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Shodan API Keys (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = /shodan_api_key: [a-zA-Z0-9]+/ nocase
+ $ = /shodan_api_key=[a-zA-Z0-9]+/ nocase
+ condition:
+ any of them
+}
+rule Avast {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Avast license files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "support.avast.com" nocase
+ condition:
+ all of them and (filename matches /.*\.avastlic$/is)
+}
+rule DBeaver {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "DBeaver configuration files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)dbeaver-data-sources.xml$/is
+}
+rule ESmtp {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = ".esmtpdrc files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "password" nocase
+ condition:
+ filename matches /(\/|^)\.esmtprc$/is and all of them
+}
+rule Homebrew {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Homebrew github tokens (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "HOMEBREW_GITHUB_API_TOKEN" nocase
+ condition:
+ all of them
+}
+rule MLab {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "MLab mongodb credentials (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = ".mlab.com" nocase
+ $ = "password" nocase
+ condition:
+ all of them
+}
+rule Firefox {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Firefox saved passwords (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)logins\.json$/is
+}
+rule CCCam {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "CCCam server config files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)CCCam\.cfg$/is
+}
+rule IRC {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Nickserv auth configs (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "msg nickserv identify" nocase
+ condition:
+ filename matches /(\/|^)config$/is and all of them
+}
+rule Django {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Django secret keys (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "SECRET_KEY" nocase
+ condition:
+ filename matches /(\/|^)settings.py$/is and all of them
+}
+rule RailsSecrets {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Ruby on rails secrets.yml files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "password" nocase
+ condition:
+ filename matches /(\/|^)secrets\.yml$/is and all of them
+}
+rule RailsMasterKey {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Rails master key files (used for decrypting credentials.yml.enc for Rails 5.2+) (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ strings:
+ $ = "password" nocase
+ condition:
+ filename matches /(\/|^)config\/master\.key$/is and all of them
+}
+rule AtomDeployments {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Multiple files created by different atom extensions for authentication (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)deployment-config\.json$/is or
+ filename matches /(\/|^)remote-sync\.json$/is or
+ filename matches /(\/|^)\.ftpconfig$/is
+}
+rule VscodeSftp {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "VSCode SFTP files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)\.vscode\/sftp\.json$/is
+}
+rule SublimeSftp {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Sublime SFTP files (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)sftp-config\.json$/is
+}
+rule JetbrainsCreds {
+ meta:
+ author = "Dylan Katz (@Plazmaz)"
+ description = "Jetbrains IDE webserver credentials with encoded passwords (Created as part of PasteHunter)"
+ reference = "https://github.com/techgaun/github-dorks"
+ date = "09/15/19"
+ condition:
+ filename matches /(\/|^)WebServers\.xml$/is
+}
\ No newline at end of file
diff --git a/YaraRules/hak5.yar b/pastehunter/YaraRules/hak5.yar
similarity index 100%
rename from YaraRules/hak5.yar
rename to pastehunter/YaraRules/hak5.yar
diff --git a/YaraRules/password_leak.yar b/pastehunter/YaraRules/password_leak.yar
similarity index 100%
rename from YaraRules/password_leak.yar
rename to pastehunter/YaraRules/password_leak.yar
diff --git a/YaraRules/powershell.yar b/pastehunter/YaraRules/powershell.yar
similarity index 100%
rename from YaraRules/powershell.yar
rename to pastehunter/YaraRules/powershell.yar
diff --git a/pastehunter/YaraRules/test_rules.yar b/pastehunter/YaraRules/test_rules.yar
new file mode 100644
index 0000000..c6f884f
--- /dev/null
+++ b/pastehunter/YaraRules/test_rules.yar
@@ -0,0 +1,66 @@
+/*
+ These are test rules
+*/
+
+rule test_hex_MZ
+{
+ meta:
+ author = "kevthehermit"
+ info = "Part of PasteHunter"
+ reference = "https://github.com/kevthehermit/PasteHunter"
+
+ strings:
+ $mz_hex = "4d5a" nocase wide ascii
+
+ condition:
+ $mz_hex at 0
+
+}
+
+rule test_vbscript
+{
+ meta:
+ author = "kevthehermit"
+ info = "Part of PasteHunter"
+ reference = "https://github.com/kevthehermit/PasteHunter"
+
+ strings:
+ $a = "Function" nocase wide ascii fullword
+ $b = "CreateObject" nocase wide ascii fullword
+ $c = "Wscript" nocase wide ascii fullword
+ $d = "As Long" nocase wide ascii fullword
+ $e = "run" nocase wide ascii fullword
+ $f = "for each" nocase wide ascii fullword
+ $g = "end function" nocase wide ascii fullword
+ $h = "NtAllocateVirtualMemory" nocase wide ascii fullword
+ $i = "NtWriteVirtualMemory" nocase wide ascii fullword
+
+
+ condition:
+ 5 of them
+}
+
+rule test_autoit
+{
+ meta:
+ author = "kevthehermit"
+ info = "Part of PasteHunter"
+ reference = "https://github.com/kevthehermit/PasteHunter"
+
+ strings:
+ $tray = "NoTrayIcon" nocase wide ascii fullword
+ $a = "iniread" nocase wide ascii fullword
+ $b = "fileinstall" nocase wide ascii fullword
+ $c = "EndFunc" nocase wide ascii fullword
+ $d = "FileRead" nocase wide ascii fullword
+ $e = "DllStructSetData" nocase wide ascii fullword
+ $f = "Global Const" nocase wide ascii fullword
+ $g = "Run(@AutoItExe" nocase wide ascii fullword
+ $h = "StringReplace" nocase wide ascii fullword
+ $i = "filewrite" nocase wide ascii fullword
+
+
+
+ condition:
+ ($tray and 3 of them) or (5 of them)
+}
\ No newline at end of file
diff --git a/outputs/__init__.py b/pastehunter/__init__.py
similarity index 100%
rename from outputs/__init__.py
rename to pastehunter/__init__.py
diff --git a/pastehunter/common.py b/pastehunter/common.py
new file mode 100644
index 0000000..54fd4a3
--- /dev/null
+++ b/pastehunter/common.py
@@ -0,0 +1,28 @@
+import json
+import logging
+import os.path
+
+logger = logging.getLogger('pastehunter')
+home = os.path.expanduser("~")
+
+# Parse the config file in to a dict
+def parse_config():
+ conf = None
+ settings_file = os.path.join(home, ".config", "pastehunter.json")
+
+ if os.path.exists(settings_file):
+ conf_file = settings_file
+ else:
+ #ToDo: Copy base settings to the settings file
+ conf_file = None
+
+ if conf_file:
+ try:
+ with open(conf_file, 'r') as read_conf:
+ conf = json.load(read_conf)
+ except Exception as e:
+ logger.error("Unable to parse config file: {0}".format(e))
+ else:
+ logger.error("Unable to read config file '~/.config/pastehunter.json'")
+
+ return conf
diff --git a/pastehunter/inputs/__init__.py b/pastehunter/inputs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pastehunter/inputs/dumpz.py b/pastehunter/inputs/dumpz.py
new file mode 100644
index 0000000..b58e7a6
--- /dev/null
+++ b/pastehunter/inputs/dumpz.py
@@ -0,0 +1,47 @@
+import requests
+import logging
+
+logger = logging.getLogger('pastehunter')
+
+def recent_pastes(conf, input_history):
+ # populate vars from config
+ paste_limit = conf['inputs']['dumpz']['paste_limit']
+ api_scrape = conf['inputs']['dumpz']['api_scrape']
+ history = []
+ paste_list = []
+ try:
+ # Create the API uri
+ scrape_uri = '{0}?limit={1}'.format(api_scrape, paste_limit)
+ # Get some pastes and convert to json
+ # Get last 'paste_limit' pastes
+ paste_list_request = requests.get(scrape_uri)
+ paste_list_json = paste_list_request.json()
+
+ for paste in paste_list_json['dumps']:
+ # Track paste ids to prevent dupes
+ history.append(paste['id'])
+ if paste['id'] in input_history:
+ continue
+
+ # We don't want password protected pastes
+ if paste['pwd'] == 1:
+ continue
+
+ # Create a new paste dict for us to normalize
+ paste_data = paste
+ paste_data['confname'] = 'dumpz'
+ paste_data['pasteid'] = paste['id']
+ paste_data['pastesite'] = 'dumpz.org'
+
+ #paste_data['scrape_url'] = '{0}{1}'.format(conf['dumpz']['api_raw'], paste['id'])
+
+ paste_data['scrape_url'] = 'https://dumpz.org/{0}/text/'.format(paste['id'])
+
+ # Add a date field that kibana will map
+ paste_data['@timestamp'] = paste_data['date']
+ paste_list.append(paste_data)
+ return paste_list, history
+
+ except Exception as e:
+ logger.error("Unable to parse paste results: {0}".format(e))
+ return paste_list, history
\ No newline at end of file
diff --git a/pastehunter/inputs/gists.py b/pastehunter/inputs/gists.py
new file mode 100644
index 0000000..fd7b1e7
--- /dev/null
+++ b/pastehunter/inputs/gists.py
@@ -0,0 +1,89 @@
+import requests
+import math
+import logging
+from datetime import datetime
+
+# Set some logging options
+logger = logging.getLogger('pastehunter')
+logging.getLogger('requests').setLevel(logging.ERROR)
+
+api_uri = 'https://api.github.com/gists/public'
+api_version = 'application/vnd.github.v3+json' # Set Accept header to force api v3
+
+# Some people use gists to store large blobs of data every 17 minutes. This just slows down the kibana UI
+
+
+
+def recent_pastes(conf, input_history):
+ oauth_token = conf['inputs']['gists']['api_token']
+ gist_limit = conf['inputs']['gists']['api_limit']
+ headers = {'user-agent': 'PasteHunter',
+ 'Accept': api_version,
+ 'Authorization': 'token {0}'.format(oauth_token)}
+
+ # calculate number of pages
+ page_count = int(math.ceil(gist_limit / 100))
+
+ result_pages = []
+ history = []
+ paste_list = []
+
+ gist_file_blacklist = conf['inputs']['gists']['file_blacklist']
+ gist_user_blacklist = conf['inputs']['gists']['user_blacklist']
+
+ try:
+ # Get the required amount of entries via pagination
+ for page_num in range(1, page_count + 1):
+ url = '{0}?page={1}&per_page=100'.format(api_uri, page_num)
+ logger.debug("Fetching page: {0}".format(page_num))
+ req = requests.get(url, headers=headers)
+ # Check some headers
+ reset_date = datetime.utcfromtimestamp(float(req.headers['X-RateLimit-Reset'])).isoformat()
+ # logging.info("Limit Reset: {0}".format(reset_date))
+ logger.info("Remaining Limit: {0}. Resets at {1}".format(req.headers['X-RateLimit-Remaining'],
+ reset_date))
+
+ if req.status_code == 200:
+ result_pages.append(req.json())
+
+ if req.status_code == 401:
+ logger.error("Auth Failed")
+
+ elif req.status_code == 403:
+ logger.error("Login Attempts Exceeded")
+
+ # Parse results
+
+ for page in result_pages:
+ for gist_meta in page:
+ # Track paste ids to prevent dupes
+ history.append(gist_meta['id'])
+ if gist_meta['id'] in input_history:
+ continue
+
+ if gist_meta['user'] in gist_user_blacklist:
+ logger.info("Blacklisting Gist from user: {0}".format(gist_meta['owner']['login']))
+ continue
+
+ for file_name, file_meta in gist_meta['files'].items():
+
+ if file_name in gist_file_blacklist:
+ logger.info("Blacklisting Paste {0}".format(file_name))
+ continue
+
+ gist_data = file_meta
+ gist_data['confname'] = 'gists'
+ gist_data['@timestamp'] = gist_meta['created_at']
+ gist_data['pasteid'] = gist_meta['id']
+ gist_data['user'] = gist_meta['user']
+ gist_data['pastesite'] = 'gist.github.com'
+ gist_data['scrape_url'] = file_meta['raw_url']
+ # remove some origional keys just to keep it a bit cleaner
+ del gist_data['raw_url']
+ paste_list.append(gist_data)
+
+ # Return results and history
+ return paste_list, history
+ except Exception as e:
+ logger.error("Unable to parse paste results: {0}".format(e))
+ return paste_list, history
diff --git a/pastehunter/inputs/github.py b/pastehunter/inputs/github.py
new file mode 100644
index 0000000..25b44ec
--- /dev/null
+++ b/pastehunter/inputs/github.py
@@ -0,0 +1,133 @@
+import logging
+import math
+from datetime import datetime
+
+import fnmatch
+import requests
+
+# Future work/improvement that can happen here: support PR diffs, they contain a patch URL
+# Set some logging options
+logger = logging.getLogger('pastehunter')
+logging.getLogger('requests').setLevel(logging.ERROR)
+
+api_uri = 'https://api.github.com/events'
+# This event refers to a commit being pushed, and is
+# probably the most significant thing we're concerned about.
+event_types = ['PushEvent']
+api_version = 'application/vnd.github.v3+json' # Set Accept header to force api v3
+# Important note from github:
+# 'We delay the public events feed by five minutes, which means the most recent event returned by the public events API actually occurred at least five minutes ago.'
+
+# Beware, git diffs can sometimes be very large files, including binaries and zips.
+# MB KB B
+diff_size_limit = 500 * 1000 * 1000
+
+
+def _make_request(url, headers):
+ req = requests.get(url, headers=headers)
+ reset_date = datetime.utcfromtimestamp(float(req.headers['X-RateLimit-Reset'])).isoformat()
+ logger.info('Remaining Limit: {0}. Resets at {1}'.format(req.headers['X-RateLimit-Remaining'],
+ reset_date))
+
+ if req.status_code == 200:
+ return req.json()
+
+ if req.status_code == 401:
+ logger.error('Auth Failed')
+ return None
+
+ elif req.status_code == 403:
+ logger.error('Login Attempts Exceeded')
+ return None
+
+def recent_pastes(conf, input_history):
+ oauth_token = conf['inputs']['github']['api_token']
+ conf_limit = conf['inputs']['github']['api_limit']
+ gh_limit = min(conf_limit, 300)
+ # From GitHub Docs (https://developer.github.com/v3/activity/events/#list-public-events):
+ # Events support pagination, however the per_page option is unsupported. The fixed page size is 30 items. Fetching up to ten pages is supported, for a total of 300 events.
+ # We modify this to be 100 per page, but the limit is still 300.
+ if gh_limit != conf_limit:
+ logger.warning('gh_limit exceeds github items returned from public feed. Limiting to 300.')
+ headers = {'user-agent': 'PasteHunter',
+ 'Accept': api_version,
+ 'Authorization': 'token {0}'.format(oauth_token)}
+
+ # calculate number of pages
+ page_count = int(math.ceil(gh_limit / 100))
+
+ result_pages = []
+ history = []
+ paste_list = []
+
+ gh_file_blacklist = conf['inputs']['github']['file_blacklist']
+ gh_user_blacklist = conf['inputs']['github']['user_blacklist']
+ ignore_bots = conf['inputs']['github']['ignore_bots']
+
+ try:
+ # Get the required amount of entries via pagination
+ for page_num in range(1, page_count + 1):
+ url = '{0}?page={1}&per_page=100'.format(api_uri, page_num)
+ logger.debug('Fetching page: {0}'.format(page_num))
+ req = _make_request(url, headers)
+ if req is not None:
+ result_pages.append(req)
+
+ # Parse results
+
+ for page in result_pages:
+ for event_meta in page:
+ # Track paste ids to prevent dupes
+ event_id = event_meta['id']
+ history.append(event_id)
+ if event_id in input_history:
+ continue
+ if event_meta['type'] not in event_types:
+ logger.debug('Skipping event {} due to unwanted type "{}"'.format(event_id, event_meta['type']))
+ # Actor may have been deleted or changed
+ if 'actor' in event_meta:
+ # If the username is None, this will return false, while event_meta['login'] would error.
+ if event_meta.get('actor').get('login') in gh_user_blacklist:
+ logger.info('Blacklisting GitHub event from user: {0}'.format(event_meta.get('login')))
+ continue
+ if ignore_bots and event_meta.get('actor').get('login').endswith("[bot]"):
+ logger.info('Ignoring GitHub event from bot user: {0}'.format(event_meta.get('login')))
+ continue
+
+ payload = event_meta.get('payload')
+ if not 'commits' in payload:
+ # Debug, because this is high output
+ logger.debug('Skipping event {} due to no commits.'.format(event_id))
+ continue
+ for commit_meta in payload.get('commits'):
+ commit_url = commit_meta.get('url')
+ commit_data = _make_request(commit_url, headers)
+ if not commit_data:
+ logger.info('No data returned for url {}. Skipping...'.format(commit_url))
+ continue
+ if commit_data.get('committer') and commit_data.get('committer').get('login') in gh_user_blacklist:
+ logger.info('Blacklisting GitHub event from user: {0}'.format(event_meta['owner']['login']))
+ continue
+ for file in commit_data.get('files'):
+ file_path = file.get('filename')
+ for pattern in gh_file_blacklist:
+ if fnmatch.fnmatch(file_path, pattern):
+ logger.info('Blacklisting file {0} from event {1} (matched pattern "{2}")'.format(file_path, event_id, pattern))
+ continue
+
+ gist_data = file
+ gist_data['confname'] = 'github'
+ gist_data['@timestamp'] = event_meta['created_at']
+ gist_data['pasteid'] = event_id
+ gist_data['user'] = event_meta.get('actor').get('login')
+ gist_data['pastesite'] = 'github.com'
+ gist_data['scrape_url'] = file.get('raw_url')
+ # remove some original keys just to keep it a bit cleaner
+ del gist_data['raw_url']
+ paste_list.append(gist_data)
+
+ # Return results and history
+ return paste_list, history
+ except Exception as e:
+ logger.exception('Unable to parse paste results: {0}'.format(e), e)
+ return paste_list, history
diff --git a/pastehunter/inputs/pastebin.py b/pastehunter/inputs/pastebin.py
new file mode 100644
index 0000000..23cb0a0
--- /dev/null
+++ b/pastehunter/inputs/pastebin.py
@@ -0,0 +1,49 @@
+import requests
+import logging
+from datetime import datetime
+
+logger = logging.getLogger('pastehunter')
+
+def recent_pastes(conf, input_history):
+ # populate vars from config
+ paste_limit = conf['inputs']['pastebin']['paste_limit']
+ api_scrape = conf['inputs']['pastebin']['api_scrape']
+ history = []
+ paste_list = []
+ try:
+ # Create the API uri
+ scrape_uri = '{0}?limit={1}'.format(api_scrape, paste_limit)
+ # Get some pastes and convert to json
+ # Get last 'paste_limit' pastes
+ paste_list_request = requests.get(scrape_uri)
+
+ # Check to see if our IP is whitelisted or not.
+ if 'DOES NOT HAVE ACCESS' in paste_list_request.text:
+ logger.error("Your IP is not whitelisted visits 'https://pastebin.com/doc_scraping_api'")
+ return [], []
+ paste_list_json = paste_list_request.json()
+
+ for paste in paste_list_json:
+ # Track paste ids to prevent dupes
+ history.append(paste['key'])
+ if paste['key'] in input_history:
+ continue
+
+ # Create a new paste dict for us to normalize
+ paste_data = paste
+ paste_data['filename'] = paste['key']
+ paste_data['confname'] = 'pastebin'
+ paste_data['pasteid'] = paste['key']
+ paste_data['pastesite'] = 'pastebin.com'
+ # Add a date field that kibana will map
+ date = datetime.utcfromtimestamp(float(paste_data['date'])).isoformat()
+ paste_data['@timestamp'] = date
+ paste_list.append(paste_data)
+ return paste_list, history
+
+ except Exception as e:
+ logger.error("Unable to parse paste results: {0}".format(e))
+ return paste_list, history
+
+
+
diff --git a/pastehunter/inputs/slexy.py b/pastehunter/inputs/slexy.py
new file mode 100644
index 0000000..856be13
--- /dev/null
+++ b/pastehunter/inputs/slexy.py
@@ -0,0 +1,102 @@
+import logging
+import re
+import urllib.request as urllib
+from datetime import datetime
+
+logger = logging.getLogger('pastehunter')
+
+
+class SlexySite(object):
+
+ def __init__(self):
+ self.site = "slexy.org"
+ url_slexy = "https://" + self.site
+ self.url_recent = url_slexy + "/recent"
+ self.url_view = url_slexy + "/view"
+ self.url_raw = url_slexy + "/raw"
+
+ def view_link(self, pid):
+ return self.create_req("%s/%s" % (self.url_view, pid))
+
+ def raw_link(self, pid, args):
+ return self.create_req("%s/%s%s" % (self.url_raw, pid, args))
+
+ def create_req(self, url):
+ return urllib.Request(
+ url,
+ data=None,
+ headers={
+ 'Referer': self.url_recent,
+ 'User-Agent': 'PasteHunter'
+ }
+ )
+
+
+class SlexyPaste(SlexySite):
+ def __init__(self, pid):
+ super(SlexyPaste, self).__init__()
+ self.pid = pid
+ self.site = self.site
+ self.url = None
+ self.timestamp = None
+ self.parse()
+
+ def parse(self):
+ data = urllib.urlopen(self.view_link(self.pid), timeout=10).read().decode('utf-8')
+ self.url = self.get_raw_link(data)
+ self.timestamp = self.get_timestamp(data)
+
+ def get_raw_link(self, data):
+ pattern = '', getdata)
+ return list(set(pids))
+
+
+def recent_pastes(conf, input_history):
+ history = []
+ paste_list = []
+ my_scraper = SlexyScraper()
+ recent_pids = my_scraper.get_recents()
+ pid_to_process = set()
+ for pid in recent_pids:
+ if pid in input_history:
+ history.append(pid)
+ else:
+ pid_to_process.add(pid)
+ try:
+ for pid in pid_to_process:
+ paste = SlexyPaste(pid)
+ history.append(paste.pid)
+ paste_data = {}
+ paste_data['confname'] = 'slexy'
+ paste_data['scrape_url'] = paste.url.full_url
+ paste_data['pasteid'] = paste.pid
+ paste_data['pastesite'] = paste.site
+ paste_data['@timestamp'] = paste.timestamp
+ paste_list.append(paste_data)
+ return paste_list, history
+ except Exception as e:
+ logger.error("Unable to parse paste results: %s", e)
+ return paste_list, history
diff --git a/pastehunter/inputs/stackexchange.py b/pastehunter/inputs/stackexchange.py
new file mode 100644
index 0000000..244e1f5
--- /dev/null
+++ b/pastehunter/inputs/stackexchange.py
@@ -0,0 +1,87 @@
+import requests
+import math
+import logging
+from datetime import datetime
+
+# Set some logging options
+logger = logging.getLogger('pastehunter')
+logging.getLogger('requests').setLevel(logging.ERROR)
+
+# Test API Key from the docs - U4DMV*8nvpm3EOpvf69Rxw((
+# https://api.stackexchange.com/2.2/questions?key=U4DMV*8nvpm3EOpvf69Rxw((&site=stackoverflow&page=1&pagesize=100&order=desc&sort=creation&filter=default
+
+
+
+def recent_pastes(conf, input_history):
+ api_key = conf['inputs']['stackexchange']['api_key']
+ api_scrape = conf['inputs']['stackexchange']['api_scrape']
+ site_list = conf['inputs']['stackexchange']['site_list']
+ store_filter = conf['inputs']['stackexchange']['store_filter']
+ question_body_filter = '!bA1dOlliDM)pi9'
+ pagesize = 100 # Default = 30
+ headers = {'user-agent': 'PasteHunter'}
+
+ if api_key == '':
+ logger.error("No API Key configured for StackExchange Access")
+ return [], []
+
+ result_pages = []
+ history = []
+ paste_list = []
+
+ try:
+
+ # For each of the stack sites we want to query
+ for site in site_list:
+ logger.info("Query Stack Exchange site: {0}".format(site))
+
+ # Create the API uri
+ scrape_uri = '{0}?key={1}&site={2}&page=1&pagesize=100&order=desc&sort=creation&filter={3}'.format(api_scrape, api_key, site, store_filter)
+ # Get some pastes and convert to json
+ # Get last 'paste_limit' pastes
+ paste_list_request = requests.get(scrape_uri)
+
+ # ToDo: Add an API rate test in here.
+ paste_list_json = paste_list_request.json()
+
+ if "error_id" in paste_list_json:
+ logging.error("StackExchange API Error: {0}".format(paste_list_json['error_message']))
+ return [], []
+
+
+
+ for question in paste_list_json['items']:
+ # Track question ids to prevent dupes
+ history.append(question['question_id'])
+ if question['question_id'] in input_history:
+ continue
+
+ # Create a new question dict for us to normalize
+ question_data = question
+ question_data['filename'] = ''
+ question_data['confname'] = "stackexchange"
+ # Force type to string else it breaks ES Index mappings
+ question_data['pasteid'] = str(question['question_id'])
+ question_data['pastesite'] = site
+ # Set the raw uri to avoid breaking other things. Defaults to empty if not found
+ question_data['scrape_url'] = question.get('link', '')
+ # Get the author and then trim the data we store.
+ question_data['username'] = question['owner']['display_name']
+ del question_data['owner']
+ # Add a date field that kibana will map
+ date = datetime.utcfromtimestamp(float(question_data['creation_date'])).isoformat()
+ question_data['@timestamp'] = date
+ paste_list.append(question_data)
+
+
+ # Record API Quota on last call to save some logging.
+ quota_max = paste_list_json['quota_max']
+ quota_remaining = paste_list_json['quota_remaining']
+
+ logger.info("Used {0} of {1} of StackExchange api quota".format(quota_remaining, quota_max))
+ # Return the pastes and update history
+ return paste_list, history
+
+ except Exception as e:
+ logger.error("Unable to parse question results: {0}".format(e))
+ return paste_list, history
\ No newline at end of file
diff --git a/pastehunter/outputs/__init__.py b/pastehunter/outputs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/outputs/csv_output.py b/pastehunter/outputs/csv_output.py
similarity index 79%
rename from outputs/csv_output.py
rename to pastehunter/outputs/csv_output.py
index d79cf67..7cebe1b 100644
--- a/outputs/csv_output.py
+++ b/pastehunter/outputs/csv_output.py
@@ -1,11 +1,13 @@
+import logging
import os
import datetime
-from common import parse_config
+from pastehunter.common import parse_config
+logger = logging.getLogger('pastehunter')
config = parse_config()
-class CSVOutput():
+class CSVOutput(object):
def __init__(self):
base_path = config['outputs']['csv_output']['output_path']
# Get todays CSV
@@ -18,7 +20,7 @@ def __init__(self):
os.makedirs(base_path)
self.test = True
except OSError as e:
- print("Unable to create CSV Path: {0}".format(e))
+ logger.error("Unable to create CSV Path: {}".format(e))
self.test = False
else:
self.test = True
@@ -34,4 +36,4 @@ def store_paste(self, paste_data):
with open(self.csv_path, 'a') as out:
out.write('{0}\n'.format(csv_line))
else:
- print("CSV Output Error")
+ logging.error("CSV Output Error. Output path '{}' was never created.".format(self.csv_path))
diff --git a/outputs/elastic_output.py b/pastehunter/outputs/elastic_output.py
similarity index 98%
rename from outputs/elastic_output.py
rename to pastehunter/outputs/elastic_output.py
index 12eccbe..66ce467 100644
--- a/outputs/elastic_output.py
+++ b/pastehunter/outputs/elastic_output.py
@@ -1,5 +1,5 @@
from elasticsearch import Elasticsearch
-from common import parse_config
+from pastehunter.common import parse_config
from datetime import datetime
import logging
diff --git a/outputs/json_output.py b/pastehunter/outputs/json_output.py
similarity index 95%
rename from outputs/json_output.py
rename to pastehunter/outputs/json_output.py
index d98d279..e578a53 100644
--- a/outputs/json_output.py
+++ b/pastehunter/outputs/json_output.py
@@ -2,7 +2,7 @@
import logging
import os
-from common import parse_config
+from pastehunter.common import parse_config
logger = logging.getLogger('pastehunter')
diff --git a/pastehunter/outputs/slack_output.py b/pastehunter/outputs/slack_output.py
new file mode 100644
index 0000000..8e0abac
--- /dev/null
+++ b/pastehunter/outputs/slack_output.py
@@ -0,0 +1,48 @@
+import logging
+import requests
+from pastehunter.common import parse_config
+
+logger = logging.getLogger('pastehunter')
+
+config = parse_config()
+
+
+class SlackOutput():
+ def __init__(self):
+ self.valid = True
+ self.webhook_url = config['outputs']['slack_output']['webhook_url']
+ self.accepted_rules = config['outputs']['slack_output']['rule_list']
+
+ if self.webhook_url == '':
+ logging.error("Slack Webhook not configured")
+ self.valid = False
+ if self.webhook_url == '':
+ logging.error("No Rules configured to alert")
+
+ def store_paste(self, paste_data):
+ if self.valid:
+ send = ('all' in self.accepted_rules)
+
+ for rule in self.accepted_rules:
+ if rule in paste_data['YaraRule']:
+ send = True
+
+ if send:
+ json_data = {
+ "text": "Pastehunter alert!",
+ "attachments": [
+ {
+ "fallback": "Plan a vacation",
+ "author_name": "PasteHunter",
+ "title": "Paste ID {0}".format(paste_data['pasteid']),
+ "text": "Yara Rule {0} Found on {1}\n\r{2}".format(paste_data['YaraRule'], paste_data['pastesite'], paste_data['scrape_url'])
+ }
+ ]
+ }
+
+ req = requests.post(self.webhook_url, json=json_data)
+ if req.status_code == 200 and req.text == 'ok':
+ logger.debug("Paste sent to slack")
+ else:
+ logger.error(
+ "Failed to post to slack Status Code {0}".format(req.status_code))
diff --git a/outputs/smtp_output.py b/pastehunter/outputs/smtp_output.py
similarity index 99%
rename from outputs/smtp_output.py
rename to pastehunter/outputs/smtp_output.py
index 6090511..dd549ec 100644
--- a/outputs/smtp_output.py
+++ b/pastehunter/outputs/smtp_output.py
@@ -9,7 +9,7 @@
import json
import logging
-from common import parse_config
+from pastehunter.common import parse_config
logger = logging.getLogger('pastehunter')
config = parse_config()
diff --git a/pastehunter/outputs/splunk_output.py b/pastehunter/outputs/splunk_output.py
new file mode 100644
index 0000000..9406c42
--- /dev/null
+++ b/pastehunter/outputs/splunk_output.py
@@ -0,0 +1,42 @@
+from pastehunter.common import parse_config
+import json
+import logging
+import splunklib.client as client
+
+logger = logging.getLogger('pastehunter')
+config = parse_config()
+
+class SplunkOutput():
+ def __init__(self):
+ # Set up the database connection
+ splunk_host = config['outputs']['splunk_output']['splunk_host']
+ splunk_port = config['outputs']['splunk_output']['splunk_port']
+ splunk_user = config['outputs']['splunk_output']['splunk_user']
+ splunk_pass = config['outputs']['splunk_output']['splunk_pass']
+ self.splunk_index = config['outputs']['splunk_output']['splunk_index']
+
+ try:
+ self.service = client.connect(
+ host=splunk_host,
+ port=splunk_port,
+ username=splunk_user,
+ password=splunk_pass,
+ autologin=True)
+
+ self.index = self.service.indexes[self.splunk_index]
+ except Exception as e:
+ logger.error(e)
+ raise Exception('Unable to connect or missing index') from None
+
+ def store_paste(self, paste_data):
+ # Make a copy so we don't affect any other output modules
+ local_data = dict(paste_data)
+ if not config['outputs']['splunk_output']['store_raw']:
+ del local_data['raw_paste']
+
+ try:
+ # The edit_tcp capability is required to access this API
+ sourcetype = config['outputs']['splunk_output']['splunk_sourcetype']
+ self.index.submit(json.dumps(local_data), sourcetype=sourcetype)
+ except Exception as e:
+ logger.exception('Error submitting paste_data to splunk', e)
diff --git a/outputs/syslog_output.py b/pastehunter/outputs/syslog_output.py
similarity index 94%
rename from outputs/syslog_output.py
rename to pastehunter/outputs/syslog_output.py
index 6618eba..88df0a4 100644
--- a/outputs/syslog_output.py
+++ b/pastehunter/outputs/syslog_output.py
@@ -1,5 +1,5 @@
import socket
-from common import parse_config
+from pastehunter.common import parse_config
config = parse_config()
diff --git a/pastehunter/outputs/twilio_output.py b/pastehunter/outputs/twilio_output.py
new file mode 100644
index 0000000..c9012af
--- /dev/null
+++ b/pastehunter/outputs/twilio_output.py
@@ -0,0 +1,66 @@
+import logging
+from twilio.rest import Client
+from pastehunter.common import parse_config
+
+logger = logging.getLogger('pastehunter')
+config = parse_config()
+
+class TwilioOutput(object):
+ def __init__(self):
+ self.account_sid = config['outputs']['twilio_output']['account_sid']
+ self.auth_token = config['outputs']['twilio_output']['auth_token']
+ self.twilio_sender = config['outputs']['twilio_output']['twilio_sender']
+ self.recipient_list = config['outputs']['twilio_output']['recipient_list']
+ self.accepted_rules = config['outputs']['twilio_output']['rule_list']
+ self.message_type = 'sms' # Whatsapp is still in beta on twilio.
+ try:
+ self.client = Client(self.account_sid, self.auth_token)
+ self.test = True
+ except Exception as e:
+ logging.error("Unable to create twilio Client: {0}".format(e))
+ self.test = False
+
+
+ def store_paste(self, paste_data):
+ if self.test:
+
+
+ send = ('all' in self.accepted_rules)
+
+ for rule in self.accepted_rules:
+ if rule in paste_data['YaraRule']:
+ send = True
+
+ if send:
+ message_body = "Yara Rule {0} Found on {1}\n\r{2}".format(
+ paste_data['YaraRule'],
+ paste_data['pastesite'],
+ paste_data['scrape_url']
+ )
+
+ logger.debug("Sending Twilio Message")
+ if self.message_type == 'sms':
+ for recipient in self.recipient_list:
+ try:
+ message = self.client.messages.create(
+ from_=self.twilio_sender,
+ body=message_body,
+ to=recipient
+ )
+ logging.debug("Sent twilio message with ID: {0}".format(message.sid))
+ except Exception as e:
+ logging.error(e)
+
+ elif self.message_type == 'whatsapp':
+ for recipient in self.recipient_list:
+ try:
+ message = self.client.messages.create(
+ from_='whatsapp:{0}'.format(self.twilio_sender),
+ body=message_body,
+ to='whatsapp:{0}'.format(recipient)
+ )
+ logging.debug("Sent twilio message with ID: {0}".format(message.sid))
+ except Exception as e:
+ logging.error(e)
+ else:
+ logging.error("No Valid twilio message type found")
diff --git a/pastehunter/postprocess/__init__.py b/pastehunter/postprocess/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pastehunter/postprocess/post_b64.py b/pastehunter/postprocess/post_b64.py
new file mode 100644
index 0000000..7a45941
--- /dev/null
+++ b/pastehunter/postprocess/post_b64.py
@@ -0,0 +1,71 @@
+import hashlib
+import importlib
+import gzip
+import logging
+from base64 import b64decode
+# This gets the raw paste and the paste_data json object
+from pastehunter.common import parse_config
+conf = parse_config()
+
+logger = logging.getLogger('pastehunter')
+
+def run(results, raw_paste_data, paste_object):
+
+ '''
+
+ ToDo: Lets look at multiple base64 streams
+ for now only accept if the entire paste is
+
+ # Figure out which b64 rule fire
+
+ # The base64 re can hang on occasion with this one
+ # b64_re = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
+
+ # This one has a few empty results i need to catch but doesn't kill pastehunter
+ b64_re = '(?:[A-Za-z0-9+/]{4}){3,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
+ b64_strings = re.findall(b64_re, raw_paste_data)
+
+
+ # Set a counter for multiple streams.
+ counter = 0
+ for b64_str in b64_strings:
+
+ '''
+
+ for rule in results:
+ if len(raw_paste_data) > 0:
+ if rule == 'b64_gzip':
+ # Lets try to decode and get a file listing.
+ # Also get the MD5 of the decoded file
+ try:
+ uncompressed = gzip.decompress(b64decode(raw_paste_data))
+ encoded = uncompressed.encode('utf-8')
+ paste_object["decompressed_stream"] = encoded
+ except Exception as e:
+ logger.error("Unable to decompress gzip stream")
+
+ if rule == 'b64_exe':
+ try:
+ raw_exe = b64decode(raw_paste_data)
+ paste_object["exe_size"] = len(raw_exe)
+ paste_object["exe_md5"] = hashlib.md5(raw_exe).hexdigest()
+ paste_object["exe_sha256"] = hashlib.sha256(raw_exe).hexdigest()
+
+ # We are guessing that the sample has been submitted, and crafting a URL
+ paste_object["VT"] = 'https://www.virustotal.com/#/file/{0}'.format(paste_object["exe_md5"])
+
+ # If sandbox modules are enabled then submit the file
+ for sandbox, sandbox_values in conf["sandboxes"].items():
+ if sandbox_values["enabled"]:
+ logger.info("Uploading file {0} using {1}".format(paste_object["pasteid"], sandbox_values["module"]))
+ sandbox_module = importlib.import_module(sandbox_values["module"])
+ paste_object = sandbox_module.upload_file(raw_exe, paste_object)
+
+ except Exception as e:
+ logger.error("Unable to decode exe file")
+
+ # Get unique domain count
+ # Update the json
+
+ # Send the updated json back
+ return paste_object
diff --git a/pastehunter/postprocess/post_compress.py b/pastehunter/postprocess/post_compress.py
new file mode 100644
index 0000000..d47f9fb
--- /dev/null
+++ b/pastehunter/postprocess/post_compress.py
@@ -0,0 +1,26 @@
+import lzma
+import base64
+import logging
+from pastehunter.common import parse_config
+logger = logging.getLogger('pastehunter')
+config = parse_config()
+
+def run(results, raw_paste_data, paste_object):
+ if config['outputs']['json_output']['store_raw']:
+ original = raw_paste_data
+ orig_size = len(original.encode())
+ logger.debug("Compressing paste... Pre-compression size: {}", orig_size)
+ compressed = base64.b64encode(lzma.compress(raw_paste_data.encode()))
+ compressed_size = len(compressed)
+ logger.debug("Compressing paste... Post-compression size: {}", compressed_size)
+
+ # In some cases compressed blobs may be larger
+ # if not much data is compressed
+ if orig_size > compressed_size:
+ paste_object['raw_paste'] = compressed.decode('utf-8')
+ logger.debug("Compressed data smaller than original blob. Keeping compressed.")
+ else:
+ logger.debug("Original smaller than compressed blob. Keeping original.")
+
+ # Regardless of modification, return the paste object
+ return paste_object
diff --git a/pastehunter/postprocess/post_email.py b/pastehunter/postprocess/post_email.py
new file mode 100644
index 0000000..53bbc79
--- /dev/null
+++ b/pastehunter/postprocess/post_email.py
@@ -0,0 +1,29 @@
+import re
+
+
+def run(results, raw_paste_data, paste_object):
+ # Use the rule name to determine what postprocess to do
+
+ # Get total unique emails.
+
+ all_emails = re.findall('[\w\.-]+@[\w\.-]+\.\w+', raw_paste_data)
+ domain_list = []
+ for email_address in all_emails:
+ email_domain = email_address.split("@")
+ domain_list.append(email_domain[-1])
+
+ unique_emails = set(all_emails)
+ unique_domains = set(domain_list)
+ # We can filter some of the false positives from the yara match here
+
+ if len(unique_emails) < 10:
+ paste_object["results"] = []
+
+ # Get unique domain count
+ # Update the json
+ paste_object["total_emails"] = len(all_emails)
+ paste_object["unique_emails"] = len(unique_emails)
+ paste_object["unique_domains"] = len(unique_domains)
+
+ # Send the updated json back
+ return paste_object
diff --git a/pastehunter/postprocess/post_entropy.py b/pastehunter/postprocess/post_entropy.py
new file mode 100644
index 0000000..ca12b67
--- /dev/null
+++ b/pastehunter/postprocess/post_entropy.py
@@ -0,0 +1,16 @@
+import re
+import math
+from collections import Counter
+
+def shannon_entropy(s):
+ # https://rosettacode.org/wiki/Entropy#Python
+ s = str(s)
+ p, lns = Counter(s), float(len(s))
+ return -sum(count / lns * math.log(count / lns, 2) for count in p.values())
+
+
+def run(results, raw_paste_data, paste_object):
+ # Calculate the Shannon Entropy for the raw paste
+ paste_object["Shannon Entropy"] = shannon_entropy(raw_paste_data)
+ # Send the updated json back
+ return paste_object
diff --git a/pastehunter/sandboxes/__init__.py b/pastehunter/sandboxes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pastehunter/sandboxes/cuckoo.py b/pastehunter/sandboxes/cuckoo.py
new file mode 100644
index 0000000..fda3dbd
--- /dev/null
+++ b/pastehunter/sandboxes/cuckoo.py
@@ -0,0 +1,36 @@
+import io
+import logging
+import requests
+from pastehunter.common import parse_config
+conf = parse_config()
+
+logger = logging.getLogger('pastehunter')
+
+def upload_file(raw_file, paste_object):
+ try:
+ task_id = send_to_cuckoo(raw_file, paste_object["pasteid"])
+ paste_object["Cuckoo Task ID"] = task_id
+ logger.info("exe submitted to Cuckoo with task id {0}".format(task_id))
+ except Exception as e:
+ logger.error("Unabled to submit sample to cuckoo")
+
+ # Send any updated json back
+ return paste_object
+
+def send_to_cuckoo(raw_exe, pasteid):
+ cuckoo_ip = conf["sandboxes"]["cuckoo"]["api_host"]
+ cuckoo_port = conf["sandboxes"]["cuckoo"]["api_port"]
+ cuckoo_host = 'http://{0}:{1}'.format(cuckoo_ip, cuckoo_port)
+ submit_file_url = '{0}/tasks/create/file'.format(cuckoo_host)
+ files = {'file': ('{0}.exe'.format(pasteid), io.BytesIO(raw_exe))}
+ submit_file = requests.post(submit_file_url, files=files).json()
+ task_id = None
+ try:
+ task_id = submit_file['task_id']
+ except KeyError:
+ try:
+ task_id = submit_file['task_ids'][0]
+ except KeyError:
+ logger.error(submit_file)
+
+ return task_id
diff --git a/pastehunter/sandboxes/viper.py b/pastehunter/sandboxes/viper.py
new file mode 100644
index 0000000..f77fb2d
--- /dev/null
+++ b/pastehunter/sandboxes/viper.py
@@ -0,0 +1,19 @@
+import io
+import logging
+import requests
+from pastehunter.common import parse_config
+conf = parse_config()
+
+logger = logging.getLogger('pastehunter')
+
+def upload_file(raw_file, paste_object):
+ viper_ip = conf["sandboxes"]["viper"]["api_host"]
+ viper_port = conf["sandboxes"]["viper"]["api_port"]
+ viper_host = 'http://{0}:{1}'.format(viper_ip, viper_port)
+
+ submit_file_url = '{0}/tasks/create/file'.format(viper_host)
+ files = {'file': ('{0}.exe'.format(paste_object["pasteid"]), io.BytesIO(raw_file))}
+ submit_file = requests.post(submit_file_url, files=files).json()
+
+ # Send any updated json back
+ return paste_object
diff --git a/settings.json.sample b/settings.json.sample
index f2cc1b0..138878d 100644
--- a/settings.json.sample
+++ b/settings.json.sample
@@ -2,7 +2,7 @@
"inputs": {
"pastebin":{
"enabled": true,
- "module": "inputs.pastebin",
+ "module": "pastehunter.inputs.pastebin",
"api_scrape": "https://scrape.pastebin.com/api_scraping.php",
"api_raw": "https://scrape.pastebin.com/api_scrape_item.php?i=",
"paste_limit": 200,
@@ -11,7 +11,7 @@
"dumpz": {
"enabled": false,
"comment": "This api endpoint has been removed.",
- "module": "inputs.dumpz",
+ "module": "pastehunter.inputs.dumpz",
"api_scrape": "https://dumpz.org/api/recent",
"api_raw": "https://dumpz.org/api/dump",
"paste_limit": 200,
@@ -19,7 +19,7 @@
},
"gists": {
"enabled": true,
- "module": "inputs.gists",
+ "module": "pastehunter.inputs.gists",
"api_token": "",
"api_limit": 100,
"store_all": false,
@@ -28,16 +28,17 @@
},
"github": {
"enabled": false,
- "module": "inputs.github",
+ "module": "pastehunter.inputs.github",
"api_token": "",
"api_limit": 100,
"store_all": false,
+ "ignore_bots": false,
"user_blacklist": [],
- "file_blacklist": ["node_modules/*", "__pycache__/*", "*/grahamcofborg-eval-package-list", "*/yarn.lock", "*.3ds", "*.3g2", "*.3gp", "*.7z", "*.DS_Store", "*.a", "*.aac", "*.adp", "*.ai", "*.aif", "*.aiff", "*.alz", "*.ape", "*.apk", "*.ar", "*.arj", "*.asf", "*.au", "*.avi", "*.bak", "*.bh", "*.bin", "*.bk", "*.bmp", "*.btif", "*.bz2", "*.bzip2", "*.cab", "*.caf", "*.cgm", "*.class", "*.cmx", "*.cpio", "*.cr2", "*.csv", "*.cur", "*.dat", "*.deb", "*.dex", "*.djvu", "*.dll", "*.dmg", "*.dng", "*.doc", "*.docm", "*.docx", "*.dot", "*.dotm", "*.dra", "*.dsk", "*.dts", "*.dtshd", "*.dvb", "*.dwg", "*.dxf", "*.ecelp4800", "*.ecelp7470", "*.ecelp9600", "*.egg", "*.eol", "*.eot", "*.epub", "*.exe", "*.f4v", "*.fbs", "*.fh", "*.fla", "*.flac", "*.fli", "*.flv", "*.fpx", "*.fst", "*.fvt", "*.g3", "*.gif", "*.graffle", "*.gz", "*.gzip", "*.h261", "*.h263", "*.h264", "*.ico", "*.ief", "*.img", "*.ipa", "*.iso", "*.jar", "*.jpeg", "*.jpg", "*.jpgv", "*.jpm", "*.jxr", "*.key", "*.ktx", "*.lha", "*.lvp", "*.lz", "*.lzh", "*.lzma", "*.lzo", "*.m3u", "*.m4a", "*.m4v", "*.mar", "*.mdi", "*.mht", "*.mid", "*.midi", "*.mj2", "*.mka", "*.mkv", "*.mmr", "*.mng", "*.mobi", "*.mov", "*.movie", "*.mp3", "*.mp4", "*.mp4a", "*.mpeg", "*.mpg", "*.mpga", "*.mxu", "*.nef", "*.npx", "*.numbers", "*.o", "*.oga", "*.ogg", "*.ogv", "*.otf", "*.pages", "*.pbm", "*.pcx", "*.pdf", "*.pea", "*.pgm", "*.pic", "*.png", "*.pnm", "*.pot", "*.potm", "*.potx", "*.ppa", "*.ppam", "*.ppm", "*.pps", "*.ppsm", "*.ppsx", "*.ppt", "*.pptm", "*.pptx", "*.psd", "*.pya", "*.pyc", "*.pyo", "*.pyv", "*.qt", "*.rar", "*.ras", "*.raw", "*.rgb", "*.rip", "*.rlc", "*.rmf", "*.rmvb", "*.rtf", "*.rz", "*.s3m", "*.s7z", "*.scpt", "*.sgi", "*.shar", "*.sil", "*.sketch", "*.slk", "*.smv", "*.so", "*.sub", "*.swf", "*.tar", "*.tbz", "*.tbz2", "*.tga", "*.tgz", "*.thmx", "*.tif", "*.tiff", "*.tlz", "*.ttc", "*.ttf", "*.txz", "*.udf", "*.uvh", "*.uvi", "*.uvm", "*.uvp", "*.uvs", "*.uvu", "*.viv", "*.vob", "*.war", "*.wav", "*.wax", "*.wbmp", "*.wdp", "*.weba", "*.webm", "*.webp", "*.whl", "*.wim", "*.wm", "*.wma", "*.wmv", "*.wmx", "*.woff", "*.woff2", "*.wvx", "*.xbm", "*.xif", "*.xla", "*.xlam", "*.xls", "*.xlsb", "*.xlsm", "*.xlsx", "*.xlt", "*.xltm", "*.xltx", "*.xm", "*.xmind", "*.xpi", "*.xpm", "*.xwd", "*.xz", "*.z", "*.zip", "*.zipx"]
+ "file_blacklist": ["node_modules/*", "__pycache__/*", "*/grahamcofborg-eval-package-list", "*.lock", "*.3ds", "*.3g2", "*.3gp", "*.7z", "*.DS_Store", "*.a", "*.aac", "*.adp", "*.ai", "*.aif", "*.aiff", "*.alz", "*.ape", "*.apk", "*.ar", "*.arj", "*.asf", "*.au", "*.avi", "*.bak", "*.bh", "*.bin", "*.bk", "*.bmp", "*.btif", "*.bz2", "*.bzip2", "*.cab", "*.caf", "*.cgm", "*.class", "*.cmx", "*.cpio", "*.cr2", "*.cur", "*.dat", "*.deb", "*.dex", "*.djvu", "*.dll", "*.dmg", "*.dng", "*.doc", "*.docm", "*.docx", "*.dot", "*.dotm", "*.dra", "*.dsk", "*.dts", "*.dtshd", "*.dvb", "*.dwg", "*.dxf", "*.ecelp4800", "*.ecelp7470", "*.ecelp9600", "*.egg", "*.eol", "*.eot", "*.epub", "*.exe", "*.f4v", "*.fbs", "*.fh", "*.fla", "*.flac", "*.fli", "*.flv", "*.fpx", "*.fst", "*.fvt", "*.g3", "*.gif", "*.graffle", "*.gz", "*.gzip", "*.h261", "*.h263", "*.h264", "*.ico", "*.ief", "*.img", "*.ipa", "*.iso", "*.jar", "*.jpeg", "*.jpg", "*.jpgv", "*.jpm", "*.jxr","*.ktx", "*.lha", "*.lvp", "*.lz", "*.lzh", "*.lzma", "*.lzo", "*.m3u", "*.m4a", "*.m4v", "*.mar", "*.mdi", "*.mht", "*.mid", "*.midi", "*.mj2", "*.mka", "*.mkv", "*.mmr", "*.mng", "*.mobi", "*.mov", "*.movie", "*.mp3", "*.mp4", "*.mp4a", "*.mpeg", "*.mpg", "*.mpga", "*.mxu", "*.nef", "*.npx", "*.numbers", "*.o", "*.oga", "*.ogg", "*.ogv", "*.otf", "*.pages", "*.pbm", "*.pcx", "*.pdf", "*.pea", "*.pgm", "*.pic", "*.png", "*.pnm", "*.pot", "*.potm", "*.potx", "*.ppa", "*.ppam", "*.ppm", "*.pps", "*.ppsm", "*.ppsx", "*.ppt", "*.pptm", "*.pptx", "*.psd", "*.pya", "*.pyc", "*.pyo", "*.pyv", "*.qt", "*.rar", "*.ras", "*.raw", "*.rgb", "*.rip", "*.rlc", "*.rmf", "*.rmvb", "*.rtf", "*.rz", "*.s3m", "*.s7z", "*.scpt", "*.sgi", "*.shar", "*.sil", "*.sketch", "*.slk", "*.smv", "*.so", "*.sub", "*.swf", "*.tar", "*.tbz", "*.tbz2", "*.tga", "*.tgz", "*.thmx", "*.tif", "*.tiff", "*.tlz", "*.ttc", "*.ttf", "*.txz", "*.udf", "*.uvh", "*.uvi", "*.uvm", "*.uvp", "*.uvs", "*.uvu", "*.viv", "*.vob", "*.war", "*.wav", "*.wax", "*.wbmp", "*.wdp", "*.weba", "*.webm", "*.webp", "*.whl", "*.wim", "*.wm", "*.wma", "*.wmv", "*.wmx", "*.woff", "*.woff2", "*.wvx", "*.xbm", "*.xif", "*.xla", "*.xlam", "*.xls", "*.xlsb", "*.xlsm", "*.xlsx", "*.xlt", "*.xltm", "*.xltx", "*.xm", "*.xmind", "*.xpi", "*.xpm", "*.xwd", "*.xz", "*.z", "*.zip", "*.zipx"]
},
"slexy":{
"enabled": true,
- "module": "inputs.slexy",
+ "module": "pastehunter.inputs.slexy",
"store_all": false,
"api_scrape": "http://slexy.org/recent",
"api_raw": "http://slexy.org/raw",
@@ -45,7 +46,7 @@
},
"stackexchange":{
"enabled": false,
- "module": "inputs.stackexchange",
+ "module": "pastehunter.inputs.stackexchange",
"site_list": ["stackoverflow","serverfault", "superuser", "webapps", "webmasters", "dba"],
"api_key": "",
"store_filter": "!)r_ttsG0v3bE1vo3*8Ki",
@@ -57,7 +58,7 @@
"outputs": {
"elastic_output": {
"enabled": true,
- "module": "outputs.elastic_output",
+ "module": "pastehunter.outputs.elastic_output",
"classname": "ElasticOutput",
"elastic_index": "paste-test",
"elastic_host": "172.16.10.10",
@@ -69,7 +70,7 @@
},
"splunk_output": {
"enabled": false,
- "module": "outputs.splunk_output",
+ "module": "pastehunter.outputs.splunk_output",
"classname": "SplunkOutput",
"splunk_host": "host",
"splunk_port": 8089,
@@ -81,7 +82,7 @@
},
"json_output": {
"enabled": false,
- "module": "outputs.json_output",
+ "module": "pastehunter.outputs.json_output",
"classname": "JsonOutput",
"output_path": "logs/json/",
"store_raw": true,
@@ -89,20 +90,20 @@
},
"csv_output": {
"enabled": false,
- "module": "outputs.csv_output",
+ "module": "pastehunter.outputs.csv_output",
"classname": "CSVOutput",
"output_path": "logs/csv/"
},
"syslog_output": {
"enabled": false,
- "module": "outputs.syslog_output",
+ "module": "pastehunter.outputs.syslog_output",
"classname": "SyslogOutput",
"host": "192.168.1.1",
"port": 514
},
"smtp_output": {
"enabled": false,
- "module": "outputs.smtp_output",
+ "module": "pastehunter.outputs.smtp_output",
"classname": "SMTPOutput",
"smtp_host": "smtp.server.com",
"smtp_port": 25,
@@ -124,14 +125,14 @@
},
"slack_output": {
"enabled": false,
- "module": "outputs.slack_output",
+ "module": "pastehunter.outputs.slack_output",
"classname": "SlackOutput",
"webhook_url": "",
"rule_list": ["custom_keywords"]
},
"twilio_output": {
"enabled": false,
- "module": "outputs.twilio_output",
+ "module": "pastehunter.outputs.twilio_output",
"classname": "TwilioOutput",
"account_sid": "",
"auth_token": "",
@@ -141,7 +142,9 @@
}
},
"yara": {
- "rule_path": "YaraRules",
+ "default_rules": true,
+ "custom_rules": "none",
+ "exclude_rules": [],
"blacklist": true,
"test_rules": false
},
@@ -159,13 +162,13 @@
"sandboxes": {
"cuckoo": {
"enabled": false,
- "module": "sandboxes.cuckoo",
+ "module": "pastehunter.sandboxes.cuckoo",
"api_host": "127.0.0.1",
"api_port": 8080
},
"viper": {
"enabled": false,
- "module": "sandboxes.viper",
+ "module": "pastehunter.sandboxes.viper",
"api_host": "127.0.0.1",
"api_port": 8080
}
@@ -173,22 +176,22 @@
"post_process": {
"post_email": {
"enabled": true,
- "module": "postprocess.post_email",
+ "module": "pastehunter.postprocess.post_email",
"rule_list": ["email_list"]
},
"post_b64": {
"enabled": true,
- "module": "postprocess.post_b64",
+ "module": "pastehunter.postprocess.post_b64",
"rule_list": ["b64_exe", "b64_rar", "b64_zip", "b64_gzip"]
},
"post_entropy": {
"enabled": false,
- "module": "postprocess.post_entropy",
+ "module": "pastehunter.postprocess.post_entropy",
"rule_list": ["ALL"]
},
"post_compress": {
"enabled": false,
- "module": "postprocess.post_compress",
+ "module": "pastehunter.postprocess.post_compress",
"rule_list": ["ALL"]
}
}
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..84e2751
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+from setuptools import setup, find_packages
+
+with open("README.md", "r") as fh:
+ long_description = fh.read()
+
+setup(
+ name='pastehunter',
+ version='1.2.1',
+ author='@kevthehermit @Plazmaz',
+ author_email='info@pastehunter.com',
+ description="Pastehunter",
+ long_description=long_description,
+ long_description_content_type="text/markdown",
+ url='https://pastehunter.com',
+ license='GNU V3',
+ zip_safe=False,
+ packages=find_packages(),
+ include_package_data=True,
+ install_requires=[
+ 'yara-python',
+ 'requests',
+ 'elasticsearch',
+ 'splunk-sdk'
+ ],
+ scripts=['pastehunter-cli'],
+ package_data={'': ['*.yar', 'README.md, LICENSE']}
+)
\ No newline at end of file