From 5b6750c758cc8515893c43790303cd8789d16620 Mon Sep 17 00:00:00 2001 From: garanews Date: Thu, 12 Oct 2017 11:14:42 +0200 Subject: [PATCH] improved url detection - see #18 --- iocp/Parser.py | 5 ++++- iocp/data/patterns.ini | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/iocp/Parser.py b/iocp/Parser.py index f8d084a..f2e9743 100644 --- a/iocp/Parser.py +++ b/iocp/Parser.py @@ -130,7 +130,10 @@ def load_patterns(self, fpath): continue if ind_pattern: - ind_regex = re.compile(ind_pattern) + if ind_type == 'URL': + ind_regex = re.compile(ind_pattern, re.IGNORECASE|re.MULTILINE|re.DOTALL) + else: + ind_regex = re.compile(ind_pattern) self.patterns[ind_type] = ind_regex try: diff --git a/iocp/data/patterns.ini b/iocp/data/patterns.ini index 6266da1..5cedc0d 100644 --- a/iocp/data/patterns.ini +++ b/iocp/data/patterns.ini @@ -1,5 +1,5 @@ [URL] -pattern: \b([a-z]{3,}\:\/\/[\S]{16,})\b +pattern: \b(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[A-Z0-9+&@#/%=~_|$])\b defang: True [Host]