Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added logic to check for duplicates, which I get from several of the … #10

Open
wants to merge 1 commit into
base: python3
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 42 additions & 8 deletions scripts/zeek_otx.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import datetime
import shutil
import time

from argparse import ArgumentParser
from configparser import ConfigParser
Expand Down Expand Up @@ -74,6 +75,10 @@ def parse_args():
parser.add_argument('-c', '--config',
help='configuration file path',
default='zeek_otx.conf')
parser.add_argument('--duplicates',action='store_true',
help='removes duplicate indiciators (will keep the newest based on created date)')
parser.add_argument('--scrub',action='store_true',
help='additional scrubbing of indicators (beyond the URL scrubbing)')
return parser.parse_args()

def parse_config(config_path):
Expand Down Expand Up @@ -117,26 +122,32 @@ def sync_otx_cache(api_key, days, otx_cache):
sys.exit(1)
return cache

def sanitize_url(url):
def sanitize_url(url, scrub):
'''Sanitize url for import in to Zeek intel framework.

The Zeek intel framework does not support url scheme (http, https, etc.)
and it must be stripped before adding the url into the intel framework.

Args:
url: a string url
scrub: removes null bytes which are present in some indicators
Returns
A string of the sanitized url.
'''

if scrub:
#Add a check for null bytes that are present in some of the URL indicators
url = url.split("\x00")[0]
parsed_url = urlparse(url)
return parsed_url.geturl().replace('{0}://'.format(parsed_url.scheme), '')

def main(api_key, days, outfile, do_notice, otx_cache):
def main(api_key, days, outfile, do_notice, otx_cache, duplicates, scrub):
'''Main runtime routine.'''

cache = sync_otx_cache(api_key, days, otx_cache)

iocs = set()
duplicate_list = dict()
# iterate through pulses, building the zeek intel file
for pulse in cache.getall_iter():
pulse_name = pulse.get('name')
Expand All @@ -161,11 +172,32 @@ def main(api_key, days, outfile, do_notice, otx_cache):
indicator_type = ioc.get('type')
# special handling for URL types
if indicator_type == 'URL':
indicator = sanitize_url(indicator)

iocs.add('\t'.join([indicator,
_MAP.get(indicator_type),
metadata]))
indicator = sanitize_url(indicator, scrub)
# test for duplicates if set on CLI
if duplicates:
after = ioc.get('created')
after = after.split('T',1)
date_tuple = after[0]
date_tuple = date_tuple.split('-')
date_struct = datetime.date(int(date_tuple[0]), int(date_tuple[1]), int(date_tuple[2]))
unixtime = int(time.mktime(date_struct.timetuple()))
if indicator not in duplicate_list:
iocs.add('\t'.join([indicator,
_MAP.get(indicator_type),
metadata]))
# track duplicate entries
duplicate_list.setdefault(indicator, {})['date'] = unixtime
duplicate_list.setdefault(indicator, {})['name'] = indicator
else:
# check which indicator is newer and use that
unixtime_current = duplicate_list[indicator]['date']
if unixtime_current < unixtime:
duplicate_list[indicator]['date'] = unixtime
duplicate_list[indicator]['name'] = indicator
else:
iocs.add('\t'.join([indicator,
_MAP.get(indicator_type),
metadata]))

tf_name = ''
with NamedTemporaryFile(mode='w', delete=False) as tf:
Expand All @@ -185,6 +217,8 @@ def main(api_key, days, outfile, do_notice, otx_cache):
# Parse arguments from sys.argv
args = parse_args()
CONFIG = parse_config(args.config)
DUPLICATE = args.duplicates
SCRUB = args.scrub

# Validate configuration values
API_KEY = CONFIG.get('otx', 'api_key')
Expand All @@ -207,4 +241,4 @@ def main(api_key, days, outfile, do_notice, otx_cache):

OTX_CACHE = CONFIG.get('otx', 'otx_cache')

main(API_KEY, DAYS, OUTFILE, DO_NOTICE, OTX_CACHE)
main(API_KEY, DAYS, OUTFILE, DO_NOTICE, OTX_CACHE, DUPLICATE, SCRUB)