From 33599eabb91af2c727bacd8c577fcebd51a5ed09 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Wed, 10 Oct 2018 21:20:48 +0200 Subject: [PATCH 01/16] Allign spamscope main configuration file for ansible install test --- ansible/requirements.txt | 2 +- ansible/templates/spamscope.yml.j2 | 29 +++++++++++++++++++++++++---- conf/spamscope.example.yml | 7 ++++--- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/ansible/requirements.txt b/ansible/requirements.txt index 19df0e6..cabb1f5 100644 --- a/ansible/requirements.txt +++ b/ansible/requirements.txt @@ -1 +1 @@ -ansible==2.5.0 +ansible \ No newline at end of file diff --git a/ansible/templates/spamscope.yml.j2 b/ansible/templates/spamscope.yml.j2 index 1bc35e7..c3d3561 100644 --- a/ansible/templates/spamscope.yml.j2 +++ b/ansible/templates/spamscope.yml.j2 @@ -51,6 +51,9 @@ phishing: tokenizer: + # Persistent where store dumps of hashes. + persistent_path: /tmp + # If true mails with same hash are filtered and not analyzed. # Only the body will not saved filter_mails: true @@ -84,19 +87,37 @@ network: enabled: false api_key: xxxxxxxxxxxxxxxxxxxxxxxxxx - # RawMail bolt configuration raw_mail: # SpamAssassin analysis: https://spamassassin.apache.org/ spamassassin: enabled: false - + # Dialect analysis: https://sissden.eu/blog/analysis-of-smtp-dialects + dialect: + enabled: false + + # elasticsearch instance where are postfix logs + elasticsearch: + hosts: + - "node1:9200" + - "node2" + + # Prefix with dash '-' of Postfix index in Elasticsearch + # The format of indices should be postfix-2018.12.30 + index.prefix.postfix: postfix- + # Attachments bolt configuration attachments: - # The lists of all components must be under lists keyword to load them - # automatically commons: + # enable or disable filter on size + size.filter.enabled: false + + # max size to analyze in bytes + max.size: 3145728 + + # The lists of all components must be under lists keyword to load them + # automatically lists: blacklist_content_types: # All content types to remove from results diff --git a/conf/spamscope.example.yml b/conf/spamscope.example.yml index 6e72726..4c5ba0a 100644 --- a/conf/spamscope.example.yml +++ b/conf/spamscope.example.yml @@ -113,7 +113,7 @@ raw_mail: # Dialect analysis: https://sissden.eu/blog/analysis-of-smtp-dialects dialect: enabled: false - + # elasticsearch instance where are postfix logs elasticsearch: hosts: @@ -127,14 +127,15 @@ raw_mail: # Attachments bolt configuration attachments: - # The lists of all components must be under lists keyword to load them - # automatically commons: # enable or disable filter on size size.filter.enabled: false + # max size to analyze in bytes max.size: 3145728 + # The lists of all components must be under lists keyword to load them + # automatically lists: blacklist_content_types: # All content types to remove from results From 3632e3206b37e9b7bf813f3a92b518c6d4e69249 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Wed, 24 Oct 2018 22:33:44 +0200 Subject: [PATCH 02/16] Issue BadPickleGet --- src/bolts/tokenizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bolts/tokenizer.py b/src/bolts/tokenizer.py index 84070cd..be97812 100644 --- a/src/bolts/tokenizer.py +++ b/src/bolts/tokenizer.py @@ -24,6 +24,7 @@ import random import six from collections import deque +from cPickle import BadPickleGet from streamparse import Stream import mailparser @@ -84,7 +85,7 @@ def load_filters(self): try: obj = load_obj(path) setattr(self, "analyzed_" + i, obj) - except (IOError, EOFError, ValueError): + except (IOError, EOFError, ValueError, BadPickleGet): setattr(self, "analyzed_" + i, deque( maxlen=getattr(self, "maxlen_" + i))) From c12279a4210c5aa9cde4535782eec76fb8014971 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 27 Oct 2018 18:46:39 +0200 Subject: [PATCH 03/16] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..f2fd5c8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,34 @@ +--- +name: Bug report +about: Create a report to help us improve + +--- + +**Describe the bug** +A clear and concise description of what the bug is: + - which bolt/spout is in error + - Apache Storm error log + - ... + +**To Reproduce** +Steps to reproduce the behavior: +1. ... +2. ... + +Attach main configuration file of `SpamScope`. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Raw mail** +The raw mail to reproduce the behavior. +You can use a `gist` like [this](https://gist.github.com/fedelemantuano/5dd702004c25a46b2bd60de21e67458e). +The issues without raw mail will be closed. + +**Environment:** + - OS: [e.g. Debian, Centos] + - Docker: [yes or no] + - `SpamScope` version [e.g. 3.6.0] + +**Additional context** +Add any other context about the problem here. From 8aeda3e8853a576cd2041d4356a5d6cffe88fbbb Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 27 Oct 2018 19:16:47 +0200 Subject: [PATCH 04/16] Update issue templates --- .github/ISSUE_TEMPLATE/feature_request.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..066b2d9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,17 @@ +--- +name: Feature request +about: Suggest an idea for this project + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. From a030de22f39cadf058c1cb8a89c7175af271e536 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 27 Oct 2018 20:06:41 +0200 Subject: [PATCH 05/16] Fixed test --- tests/test_raw_mail_post_processing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_raw_mail_post_processing.py b/tests/test_raw_mail_post_processing.py index d6a58e4..73f338f 100644 --- a/tests/test_raw_mail_post_processing.py +++ b/tests/test_raw_mail_post_processing.py @@ -75,7 +75,8 @@ def test_processors(self): p_ordered = [i[0] for i in sorted(mails.processors, key=itemgetter(1))] conf = { - "spamassassin": {"enabled": True}} + "spamassassin": {"enabled": True}, + "dialect": {"enabled": False}} results = {} self.assertFalse(results) @@ -85,6 +86,7 @@ def test_processors(self): self.assertTrue(results) self.assertIn("spamassassin", results) + self.assertNotIn("dialect", results) if __name__ == '__main__': From 59fd2be013786bc3a7df8ca47fc592b0f27d61e2 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Mon, 29 Oct 2018 21:34:16 +0100 Subject: [PATCH 06/16] Fixed issue if index doesn't exist --- src/modules/mails/dialects.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/modules/mails/dialects.py b/src/modules/mails/dialects.py index 1bd0100..50aef49 100644 --- a/src/modules/mails/dialects.py +++ b/src/modules/mails/dialects.py @@ -189,7 +189,9 @@ def get_messages(message_id, elastic_server, index_prefix, max_size=100): # From message_id get code of comunication from client and server r = es.search( - index=indices, body=query_code % {"message_id": message_id}) + index=indices, + body=query_code % {"message_id": message_id}, + ignore_unavailable=True) code = r["hits"]["hits"][0]["_source"]["code"] timestamp = r["hits"]["hits"][0]["_source"]["@timestamp"] @@ -197,7 +199,9 @@ def get_messages(message_id, elastic_server, index_prefix, max_size=100): # From code get client (ip and name) r = es.search( - index=indices, body=query_client % {"code": code}) + index=indices, + body=query_client % {"code": code}, + ignore_unavailable=True) client_ip = r["hits"]["hits"][0]["_source"]["client_ip"] client_name = r["hits"]["hits"][0]["_source"]["client_name"] @@ -208,7 +212,8 @@ def get_messages(message_id, elastic_server, index_prefix, max_size=100): "timestamp": timestamp, "client_ip": client_ip, "client_name": client_name}, - size=max_size) + size=max_size, + ignore_unavailable=True) messages = [(i["_source"]["actor"], i["_source"]["dialect"]) for i in r["hits"]["hits"]] From 51514f077c32620b53c6f03345574db42e1b566b Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Mon, 29 Oct 2018 21:40:06 +0100 Subject: [PATCH 07/16] Number of shards --- conf/templates/commons.json | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/templates/commons.json b/conf/templates/commons.json index a62d292..619df8f 100644 --- a/conf/templates/commons.json +++ b/conf/templates/commons.json @@ -4,6 +4,7 @@ "settings": { "index.codec": "best_compression", "index.number_of_replicas": 0, + "index.number_of_shards": 1, "index.refresh_interval": "5s", "index.mapping.total_fields.limit": 100000, "index.mapping.ignore_malformed": true From 323e3af2dac0411146c3bb4ad29988e77b4f95d8 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sun, 18 Nov 2018 10:50:59 +0100 Subject: [PATCH 08/16] Added alias to elasticsearch templates --- conf/templates/spamscope_attachments.json | 94 +++++++++++++++-------- conf/templates/spamscope_mails.json | 89 +++++++++++++-------- 2 files changed, 119 insertions(+), 64 deletions(-) diff --git a/conf/templates/spamscope_attachments.json b/conf/templates/spamscope_attachments.json index 47d6eeb..2517e6c 100644 --- a/conf/templates/spamscope_attachments.json +++ b/conf/templates/spamscope_attachments.json @@ -1,38 +1,44 @@ { "order": 0, - "version": 2, - "index_patterns": "spamscope_attachments-*", + "version": 3, + "index_patterns": [ + "spamscope_attachments-*" + ], "settings": { - "analysis": { - "analyzer": { - "header": { - "tokenizer": "uax_url_email", - "filter": [ - "lowercase" - ] - }, - "html_body": { - "char_filter": [ - "html_strip" - ], - "tokenizer": "uax_url_email", - "filter": [ - "lowercase" - ] - }, - "path_pattern": { - "tokenizer": "path_hierarchy", - "filter": [ - "lowercase" - ] + "index": { + "codec": "best_compression", + "mapping": { + "ignore_malformed": "true" + }, + "refresh_interval": "5s", + "analysis": { + "analyzer": { + "header": { + "filter": [ + "lowercase" + ], + "tokenizer": "uax_url_email" + }, + "html_body": { + "filter": [ + "lowercase" + ], + "char_filter": [ + "html_strip" + ], + "tokenizer": "uax_url_email" + }, + "path_pattern": { + "filter": [ + "lowercase" + ], + "tokenizer": "path_hierarchy" + } } - } - }, - "index.codec": "best_compression", - "index.number_of_shards": 1, - "index.number_of_replicas": 0, - "index.refresh_interval": "5s", - "index.mapping.ignore_malformed": true + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } }, "mappings": { "_doc": { @@ -86,5 +92,29 @@ } } } + }, + "aliases": { + "attachments": {}, + "attachments_thug": { + "filter": { + "exists": { + "field": "thug" + } + } + }, + "attachments_tika": { + "filter": { + "exists": { + "field": "tika" + } + } + }, + "attachments_virustotal": { + "filter": { + "exists": { + "field": "virustotal" + } + } + } } -} +} \ No newline at end of file diff --git a/conf/templates/spamscope_mails.json b/conf/templates/spamscope_mails.json index fac8c3d..1f8c78a 100644 --- a/conf/templates/spamscope_mails.json +++ b/conf/templates/spamscope_mails.json @@ -1,38 +1,44 @@ { "order": 0, - "version": 8, - "index_patterns": "spamscope_mails-*", + "version": 9, + "index_patterns": [ + "spamscope_mails-*" + ], "settings": { - "analysis": { - "analyzer": { - "header": { - "tokenizer": "uax_url_email", - "filter": [ - "lowercase" - ] - }, - "html_body": { - "char_filter": [ - "html_strip" - ], - "tokenizer": "uax_url_email", - "filter": [ - "lowercase" - ] - }, - "path_pattern": { - "tokenizer": "path_hierarchy", - "filter": [ - "lowercase" - ] + "index": { + "codec": "best_compression", + "mapping": { + "ignore_malformed": "true" + }, + "refresh_interval": "5s", + "analysis": { + "analyzer": { + "header": { + "filter": [ + "lowercase" + ], + "tokenizer": "uax_url_email" + }, + "html_body": { + "filter": [ + "lowercase" + ], + "char_filter": [ + "html_strip" + ], + "tokenizer": "uax_url_email" + }, + "path_pattern": { + "filter": [ + "lowercase" + ], + "tokenizer": "path_hierarchy" + } } - } - }, - "index.codec": "best_compression", - "index.number_of_shards": 1, - "index.number_of_replicas": 0, - "index.refresh_interval": "5s", - "index.mapping.ignore_malformed": true + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } }, "mappings": { "_doc": { @@ -159,5 +165,24 @@ } } } + }, + "aliases": { + "mails": {}, + "mails_attachments": { + "filter": { + "term": { + "with_attachments": { + "value": "true" + } + } + } + }, + "mails_dialect": { + "filter": { + "exists": { + "field": "raw_mail.dialect" + } + } + } } -} +} \ No newline at end of file From 7fdd041dedbac3332b59c316549c858fa116af16 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 25 Jan 2019 23:36:52 +0100 Subject: [PATCH 09/16] Replace bitbucket --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 00c8e4d..f2abc28 100644 --- a/.travis.yml +++ b/.travis.yml @@ -48,7 +48,7 @@ install: - pip install --upgrade pip setuptools - python setup.py install - pip install -r requirements_optional.txt - - git clone https://$BITBUCKET_USER:$BITBUCKET_ROBOT_KEY@bitbucket.org/$BITBUCKET_USER/zemana-api.git $ZEMANA_PATH && cd $ZEMANA_PATH && python setup.py install && cd - + - git clone https://$GITHUB_USER:$GITHUB_TOKEN@https://github.com/fedelemantuano/zemana-api.git $ZEMANA_PATH && cd $ZEMANA_PATH && python setup.py install && cd - - src/cli/faup.sh - pip install coveralls From fad8d737bd14a4fe5e23931c6171473c05d412fa Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 25 Jan 2019 23:42:56 +0100 Subject: [PATCH 10/16] Fixed typo --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f2abc28..b1de5d4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -48,7 +48,7 @@ install: - pip install --upgrade pip setuptools - python setup.py install - pip install -r requirements_optional.txt - - git clone https://$GITHUB_USER:$GITHUB_TOKEN@https://github.com/fedelemantuano/zemana-api.git $ZEMANA_PATH && cd $ZEMANA_PATH && python setup.py install && cd - + - git clone https://$GITHUB_USER:$GITHUB_TOKEN@github.com/fedelemantuano/zemana-api.git $ZEMANA_PATH && cd $ZEMANA_PATH && python setup.py install && cd - - src/cli/faup.sh - pip install coveralls From d6b411469874f38c2adff06cd0e9e1942ad7c845 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 25 Jan 2019 23:57:32 +0100 Subject: [PATCH 11/16] Bugfix Zemana --- src/modules/attachments/post_processing.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/modules/attachments/post_processing.py b/src/modules/attachments/post_processing.py index b7fbdcd..f0035e2 100644 --- a/src/modules/attachments/post_processing.py +++ b/src/modules/attachments/post_processing.py @@ -224,10 +224,10 @@ def zemana(conf, attachments): log.exception( "HTTPError in Zemana query for md5 {!r}".format( a["md5"])) - - if result: - a["zemana"] = result.json - a["zemana"]["type"] = result.type + else: + if result: + a["zemana"] = result.json + a["zemana"]["type"] = result.type for i in a.get("files", []): try: @@ -236,10 +236,10 @@ def zemana(conf, attachments): log.exception( "HTTPError in Zemana query for md5 {!r}".format( i["md5"])) - - if i_result: - i["zemana"] = i_result.json - i["zemana"]["type"] = i_result.type + else: + if i_result: + i["zemana"] = i_result.json + i["zemana"]["type"] = i_result.type @register(processors, priority=999, active=True) From e674cc4fdcba3ea616bf1b5811339d1dda012611 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 26 Jan 2019 11:19:49 +0100 Subject: [PATCH 12/16] Disabled Zemana. Added makefile + tox. lint errors fixed --- .gitignore | 2 + .travis.yml | 2 - Makefile | 82 ++++++++++++++++++++++ requirements-dev.txt | 28 ++++++++ src/modules/attachments/attachments.py | 8 ++- src/modules/attachments/post_processing.py | 4 +- tests/test_attachments.py | 15 ++-- tests/test_attachments_utils.py | 1 - tests/test_bitmap.py | 2 - tests/test_dialect.py | 2 - tests/test_network_post_processing.py | 11 +-- tests/test_phishing.py | 1 - tox.ini | 13 ++++ 13 files changed, 146 insertions(+), 25 deletions(-) create mode 100644 Makefile create mode 100644 requirements-dev.txt create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index 110be8f..8113e42 100644 --- a/.gitignore +++ b/.gitignore @@ -6,10 +6,12 @@ _resources .env .idea/ .ropeproject +.tox/ .vscode/ *.pyc build/ dist/ logs/ +report/ SpamScope.egg-info/ venv/ \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index b1de5d4..15e2e5a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,6 @@ python: env: - TIKA_APP_JAR=/tmp/tika-app-${TIKA_VER}.jar FAUP_PATH=/tmp/faup - ZEMANA_PATH=/tmp/zemana DOCKER_ELASTICSEARCH_PATH=/tmp/docker-elasticsearch before_install: @@ -48,7 +47,6 @@ install: - pip install --upgrade pip setuptools - python setup.py install - pip install -r requirements_optional.txt - - git clone https://$GITHUB_USER:$GITHUB_TOKEN@github.com/fedelemantuano/zemana-api.git $ZEMANA_PATH && cd $ZEMANA_PATH && python setup.py install && cd - - src/cli/faup.sh - pip install coveralls diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..71bbea6 --- /dev/null +++ b/Makefile @@ -0,0 +1,82 @@ +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +try: + from urllib import pathname2url +except: + from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + +lint: ## check style with flake8 + flake8 src tests + +test: ## run tests quickly with the default Python + python -m unittest discover -s tests -f -v + +test-all: ## run tests on every Python version with tox + tox + +# docs: ## generate Sphinx HTML documentation, including API docs +# rm -f docs/mailparser.rst +# rm -f docs/modules.rst +# sphinx-apidoc -o docs/ mailparser +# $(MAKE) -C docs clean +# $(MAKE) -C docs html +# $(BROWSER) docs/_build/html/index.html + +# servedocs: docs ## compile the docs watching for changes +# watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . + +release: dist ## package and upload a release + twine upload dist/* + +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +install: clean ## install the package to the active Python's site-packages + python setup.py install diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..8b6b1e1 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,28 @@ +# always +PyYAML +astropy==1.3.3 +backports.functools-lru-cache>=1.3 +chainmap +lxml +mail-parser>=3.4.1 +patool +pyparsing +python-magic +simplejson +six +ssdeep +streamparse==3.13.1 + +# optional +elasticsearch>=6.0.0, <7 +redis>=2.10.5, <3 +shodan +tika-app>=1.4.0 +virustotal-api + +# editable +git+https://github.com/stricaud/faup.git#egg=pyfaup&subdirectory=src/lib/bindings/python + +# dev +flake8 +coverage \ No newline at end of file diff --git a/src/modules/attachments/attachments.py b/src/modules/attachments/attachments.py index e6d57e0..63198d2 100644 --- a/src/modules/attachments/attachments.py +++ b/src/modules/attachments/attachments.py @@ -141,7 +141,10 @@ def popcontenttype(self, content_type): remove = [] for i in self: - if not i.get("is_filtered", False): + filtered = i.get("is_filtered", False) + m_content_type = i["mail_content_type"].lower() + + if not filtered: try: if i["Content-Type"].lower() == content_type: remove.append(i) @@ -164,8 +167,7 @@ def popcontenttype(self, content_type): # you should remove sample from results. # You can't use Content-Type because we don't have payload, so # we use mail_content_type - elif (i.get("is_filtered") and - i["mail_content_type"].lower() == content_type): + elif (filtered and m_content_type == content_type): remove.append(i) else: diff --git a/src/modules/attachments/post_processing.py b/src/modules/attachments/post_processing.py index f0035e2..b31fea8 100644 --- a/src/modules/attachments/post_processing.py +++ b/src/modules/attachments/post_processing.py @@ -192,8 +192,8 @@ def thug(conf, attachments): i["thug"] = thug.run(i, **conf) -@register(processors, active=True) -def zemana(conf, attachments): +@register(processors, active=False) +def zemana(conf, attachments): # pragma: no cover """This method updates the attachments results with Zemana AntiMalware reports. diff --git a/tests/test_attachments.py b/tests/test_attachments.py index 0c2ab2d..baaf5a8 100644 --- a/tests/test_attachments.py +++ b/tests/test_attachments.py @@ -305,12 +305,13 @@ def test_filtercontenttypes(self): self.assertEqual(len(t), 1) self.assertEqual(len(t[0]["files"]), 0) - @unittest.skipIf(OPTIONS["THUG_ENABLED"].capitalize() == "False" or - OPTIONS["VIRUSTOTAL_ENABLED"].capitalize() == "False" or - OPTIONS["ZEMANA_ENABLED"].capitalize() == "False", - "Complete post processing test skipped: " - "set env variables 'THUG_ENABLED', " - "'VIRUSTOTAL_ENABLED' and 'ZEMANA_ENABLED' to True") + @unittest.skipIf( + OPTIONS["THUG_ENABLED"].capitalize() == "False" or OPTIONS[ + "VIRUSTOTAL_ENABLED"].capitalize() == "False" or OPTIONS[ + "ZEMANA_ENABLED"].capitalize() == "False", + "Complete post processing test skipped: " + "set env variables 'THUG_ENABLED', " + "'VIRUSTOTAL_ENABLED' and 'ZEMANA_ENABLED' to True") def test_post_processing(self): t = MailAttachments.withhashes(self.attachments_thug) parameters = { @@ -325,7 +326,7 @@ def test_post_processing(self): "user_agents": ["win7ie90", "winxpie80"], "referer": "http://www.google.com/", "timeout": 300}, - "zemana": {"enabled": True, + "zemana": {"enabled": False, "PartnerId": OPTIONS["ZEMANA_PARTNERID"], "UserId": OPTIONS["ZEMANA_USERID"], "ApiKey": OPTIONS["ZEMANA_APIKEY"], diff --git a/tests/test_attachments_utils.py b/tests/test_attachments_utils.py index 58ef03c..8257d90 100644 --- a/tests/test_attachments_utils.py +++ b/tests/test_attachments_utils.py @@ -19,7 +19,6 @@ import logging import os -import sys import unittest import simplejson as json diff --git a/tests/test_bitmap.py b/tests/test_bitmap.py index d805f6c..414d3b4 100644 --- a/tests/test_bitmap.py +++ b/tests/test_bitmap.py @@ -18,8 +18,6 @@ """ import logging -import os -import sys import unittest from context import bitmap diff --git a/tests/test_dialect.py b/tests/test_dialect.py index 1f73efc..f81550f 100644 --- a/tests/test_dialect.py +++ b/tests/test_dialect.py @@ -18,9 +18,7 @@ """ -import datetime import logging -import os import unittest from context import mails diff --git a/tests/test_network_post_processing.py b/tests/test_network_post_processing.py index 6b3a1a5..7cc0f66 100644 --- a/tests/test_network_post_processing.py +++ b/tests/test_network_post_processing.py @@ -90,11 +90,12 @@ def test_shodan(self): shodan(conf, "8.8.8", results) self.assertFalse(results) - @unittest.skipIf(OPTIONS["SHODAN_ENABLED"].capitalize() == "False" or - OPTIONS["VIRUSTOTAL_ENABLED"].capitalize() == "False", - "Complete post processing test skipped: " - "set env variables 'SHODAN_ENABLED' and " - "'VIRUSTOTAL_ENABLED' to True") + @unittest.skipIf( + OPTIONS["SHODAN_ENABLED"].capitalize() == "False" or OPTIONS[ + "VIRUSTOTAL_ENABLED"].capitalize() == "False", + "Complete post processing test skipped: " + "set env variables 'SHODAN_ENABLED' and " + "'VIRUSTOTAL_ENABLED' to True") def test_processors(self): """Test all post processing.""" diff --git a/tests/test_phishing.py b/tests/test_phishing.py index c153580..0db54f3 100644 --- a/tests/test_phishing.py +++ b/tests/test_phishing.py @@ -20,7 +20,6 @@ import logging import copy import os -import sys import unittest import mailparser diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..97c9b9a --- /dev/null +++ b/tox.ini @@ -0,0 +1,13 @@ +[tox] +envlist = begin, py27, end + +[testenv:begin] +commands = coverage erase + +[testenv] +deps = -rrequirements-dev.txt +commands = + coverage run --append -m unittest discover -s tests -f -v + +[testenv:end] +commands = coverage html -d {toxinidir}/report From f64f3b4f93a37f53cbc7759c6d3a2c50892d1858 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 26 Jan 2019 11:54:17 +0100 Subject: [PATCH 13/16] Added test_post_processing --- tests/test_attachments.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_attachments.py b/tests/test_attachments.py index baaf5a8..c705042 100644 --- a/tests/test_attachments.py +++ b/tests/test_attachments.py @@ -307,8 +307,7 @@ def test_filtercontenttypes(self): @unittest.skipIf( OPTIONS["THUG_ENABLED"].capitalize() == "False" or OPTIONS[ - "VIRUSTOTAL_ENABLED"].capitalize() == "False" or OPTIONS[ - "ZEMANA_ENABLED"].capitalize() == "False", + "VIRUSTOTAL_ENABLED"].capitalize() == "False", "Complete post processing test skipped: " "set env variables 'THUG_ENABLED', " "'VIRUSTOTAL_ENABLED' and 'ZEMANA_ENABLED' to True") @@ -337,12 +336,12 @@ def test_post_processing(self): for i in t: self.assertIn("tika", i) self.assertIn("virustotal", i) - self.assertIn("zemana", i) + self.assertNotIn("zemana", i) self.assertNotIn("thug", i) for j in i.get("files", []): self.assertIn("virustotal", j) - self.assertIn("zemana", j) + self.assertNotIn("zemana", j) self.assertIn("thug", j) def test_incorrect_padding(self): From f7a41cc9a03eb6de29c3468a59f0afb6cd53fce4 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Mon, 28 Jan 2019 21:53:21 +0100 Subject: [PATCH 14/16] Upgraded streamparse --- requirements-dev.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 8b6b1e1..1f1a9b9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,7 +11,7 @@ python-magic simplejson six ssdeep -streamparse==3.13.1 +streamparse==3.15.1 # optional elasticsearch>=6.0.0, <7 diff --git a/requirements.txt b/requirements.txt index ad611fa..62b7719 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,4 @@ python-magic simplejson six ssdeep -streamparse==3.13.1 +streamparse==3.15.1 From fdb0f869fd5d5338ae4a06de235a1a10331164fb Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sun, 3 Feb 2019 01:17:36 +0100 Subject: [PATCH 15/16] Getting only mains parts of mail + headers --- requirements-dev.txt | 2 +- requirements.txt | 2 +- src/bolts/tokenizer.py | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 1f1a9b9..12fb2b8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,7 +4,7 @@ astropy==1.3.3 backports.functools-lru-cache>=1.3 chainmap lxml -mail-parser>=3.4.1 +mail-parser>=3.9.0 patool pyparsing python-magic diff --git a/requirements.txt b/requirements.txt index 62b7719..0463e72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ astropy==1.3.3 backports.functools-lru-cache>=1.3 chainmap lxml -mail-parser>=3.4.1 +mail-parser>=3.9.0 patool pyparsing python-magic diff --git a/src/bolts/tokenizer.py b/src/bolts/tokenizer.py index be97812..7c4d096 100644 --- a/src/bolts/tokenizer.py +++ b/src/bolts/tokenizer.py @@ -101,7 +101,11 @@ def _make_mail(self, tup): mail_type = tup.values[5] rand = '_' + ''.join(random.choice('0123456789') for i in range(10)) self.parser = self.mailparser[mail_type](raw_mail) - mail = self.parser.mail + + # get only the mains headers because this number can explode + # Elastic can't manage all possible headers + mail = self.parser.mail_partial + mail["headers"] = self.parser.headers_json # Data mail sources mail["mail_server"] = tup.values[1] From 93ab90ba36b17480ad7cccee6d75b2651d920515 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 9 Feb 2019 11:43:09 +0100 Subject: [PATCH 16/16] Updated version --- project.clj | 2 +- src/options.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/project.clj b/project.clj index e4b69d0..c4df0b0 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject spamscope "2.6.0-SNAPSHOT" +(defproject spamscope "2.7.0-SNAPSHOT" :resource-paths ["_resources"] :target-path "_build" :min-lein-version "2.0.0" diff --git a/src/options.py b/src/options.py index 288fddf..31ef55e 100644 --- a/src/options.py +++ b/src/options.py @@ -19,7 +19,7 @@ from os.path import join -__version__ = "2.6.0" +__version__ = "2.7.0" __configuration_path__ = "/etc/spamscope" __defaults__ = {