Skip to content

Commit

Permalink
Merge branch 'release/2.0.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
fedelemantuano committed Nov 5, 2017
2 parents 8f0404c + 7f42dc7 commit 48e83af
Show file tree
Hide file tree
Showing 60 changed files with 2,874 additions and 778 deletions.
7 changes: 7 additions & 0 deletions .bettercodehub.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
exclude:
- /fabfile.py
- /topologies/.*
component_depth: 1
languages:
- python
- script
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
.DS_Store
.coverage
.env
.idea/
.ropeproject
SpamScope.egg-info/
_build
Expand Down
63 changes: 27 additions & 36 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,59 +18,45 @@ env:

before_install:
- sudo apt-get -qq update
- sudo apt-get install -y build-essential cmake libfuzzy-dev unrar
- sudo apt-get install -y build-essential cmake libfuzzy-dev unrar spamassassin

# Build latest images spamscope-root, spamscope-elasticsearch

# make images
- if [ "$TRAVIS_BRANCH" == "master" ]; then
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-root.git $DOCKER_ROOT_PATH;
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-elasticsearch.git $DOCKER_ELASTICSEARCH_PATH;
cd $DOCKER_ROOT_PATH;
docker build -t $DOCKER_USERNAME/spamscope-root .;
cd -;
docker run --rm -it $DOCKER_USERNAME/spamscope-root /bin/sh -c 'for f in tests/test_*.py; do python "$f"; done';
docker run --rm -it $DOCKER_USERNAME/spamscope-root /bin/sh -c 'thug -V';
docker run --rm -it $DOCKER_USERNAME/spamscope-root /bin/sh -c 'spamscope-topology -v';
docker run --rm -it $DOCKER_USERNAME/spamscope-root /bin/sh -c 'spamscope-elasticsearch -v';
cd $DOCKER_ELASTICSEARCH_PATH;
docker build -t $DOCKER_USERNAME/spamscope-elasticsearch .;
cd -;
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-root.git $DOCKER_ROOT_PATH &&
cd $DOCKER_ROOT_PATH && docker build -t $DOCKER_USERNAME/spamscope-root . && cd - &&
docker run --rm -it $DOCKER_USERNAME/spamscope-root /bin/sh -c 'python -m unittest discover -s tests -f -v' &&
docker run --rm -it $DOCKER_USERNAME/spamscope-root /bin/sh -c 'thug -V && spamscope-topology -v && spamscope-elasticsearch -v' &&
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-elasticsearch.git $DOCKER_ELASTICSEARCH_PATH &&
cd $DOCKER_ELASTICSEARCH_PATH && docker build -t $DOCKER_USERNAME/spamscope-elasticsearch . && cd -;
fi

- if [ "$TRAVIS_BRANCH" == "develop" ]; then
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-root.git $DOCKER_ROOT_PATH;
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-elasticsearch.git $DOCKER_ELASTICSEARCH_PATH;
cd $DOCKER_ROOT_PATH;
docker build -t $DOCKER_USERNAME/spamscope-root:develop .;
cd -;
docker run --rm -it $DOCKER_USERNAME/spamscope-root:develop /bin/sh -c 'for f in tests/test_*.py; do python "$f"; done';
docker run --rm -it $DOCKER_USERNAME/spamscope-root:develop /bin/sh -c 'thug -V';
docker run --rm -it $DOCKER_USERNAME/spamscope-root:develop /bin/sh -c 'spamscope-topology -v';
docker run --rm -it $DOCKER_USERNAME/spamscope-root:develop /bin/sh -c 'spamscope-elasticsearch -v';
cd $DOCKER_ELASTICSEARCH_PATH;
docker build -t $DOCKER_USERNAME/spamscope-elasticsearch:develop .;
cd -;
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-root.git $DOCKER_ROOT_PATH &&
cd $DOCKER_ROOT_PATH && docker build -t $DOCKER_USERNAME/spamscope-root:$TRAVIS_BRANCH . && cd - &&
docker run --rm -it $DOCKER_USERNAME/spamscope-root:$TRAVIS_BRANCH /bin/sh -c 'python -m unittest discover -s tests -f -v' &&
docker run --rm -it $DOCKER_USERNAME/spamscope-root:$TRAVIS_BRANCH /bin/sh -c 'thug -V && spamscope-topology -v && spamscope-elasticsearch -v' &&
git clone -b $TRAVIS_BRANCH --single-branch https://github.com/SpamScope/spamscope-dockerfile-elasticsearch.git $DOCKER_ELASTICSEARCH_PATH &&
cd $DOCKER_ELASTICSEARCH_PATH && docker build -t $DOCKER_USERNAME/spamscope-elasticsearch:$TRAVIS_BRANCH . && cd -;
fi

# command to install dependencies
install:
- pip install -r requirements.txt && python setup.py install
- git clone https://$BITBUCKET_USER:[email protected]/$BITBUCKET_USER/zemana-api.git $ZEMANA_PATH;
cd $ZEMANA_PATH && python setup.py install && cd -
- git clone https://$BITBUCKET_USER:[email protected]/$BITBUCKET_USER/zemana-api.git $ZEMANA_PATH && cd $ZEMANA_PATH && python setup.py install && cd -
- src/cli/faup.sh
- cd ${FAUP_PATH}/src/lib/bindings/python && python setup.py install && cd -
- pip install coveralls

before_script:
- curl -o ${TIKA_APP_JAR} https://archive.apache.org/dist/tika/tika-app-${TIKA_VER}.jar

# command to run tests
script:
# Unittests
- python tests/test_attachments.py
- python tests/test_attachments_post_processing.py
- python tests/test_attachments_utils.py
- python tests/test_bitmap.py
- python tests/test_utils.py
# Unittests and coverage
- coverage run --include=src/modules/* --omit=src/modules/abstracts.py -m unittest discover -s tests -f -v
# - python -m unittest discover -s tests -f -v

# cli help
- spamscope-elasticsearch -h
Expand All @@ -80,16 +66,21 @@ script:
- cd tests && python timing_test_search_keywords.py && cd -

after_success:
- coveralls

- if [ "$TRAVIS_BRANCH" == "master" ]; then
docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD";
docker push $DOCKER_USERNAME/spamscope-root;
docker push $DOCKER_USERNAME/spamscope-elasticsearch;
fi

- if [ "$TRAVIS_BRANCH" == "develop" ]; then
docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD";
docker push $DOCKER_USERNAME/spamscope-root:develop;
docker push $DOCKER_USERNAME/spamscope-elasticsearch:develop;
docker push $DOCKER_USERNAME/spamscope-root:$TRAVIS_BRANCH;
docker push $DOCKER_USERNAME/spamscope-elasticsearch:$TRAVIS_BRANCH;
fi

notifications:
slack: spamscope:$SLACK_KEY
email: false
slack:
secure: C9SiQ+mfwtLEpyn7cJRpR4e1KNTNNWVLIe+zmq78+LSCeHOpdsfbCp2Y2/C7nbcBj1yLLxmfa30PkPX2tRBady5MikXj05+tblOic3RdMoAAsGErMwAWEzysWy1z1l1bNrnviQ7rExpyRq+6lkg8zj8vIoKh2WZlcSJpQmKd4gzwITFgdawwiraioEf20I/nVon/U/S4NqINDrv6BhffngKwRcBary7GexPNfu/lTR4bg0Ow8isi4Gbu9naU24ueoiuXsUtXSuwQUPuJEAnWFKsZpJ7g053RpRNkOZAYcsdM5e3RegdPbby9+c/XSAPXGjzWfI8j+EcgTFtDZI0pAStGIilsdf7K9y7OkL0JHgS9lIYtgNLRkMBHdHyvGqEmZYExXl1gwi8RhmfoJdiOwVW0AuqmnqpvmTDaimzyBJZ9hR62u9ZLXh9XP6mNYxgWmRp1zeRfHliFv+5iPIcEiST85IWXQpt2uy+Isb7XaY8edtkfjLkEXiPAtB9jU/QEVRCsw1Ei0QzUWyY7QZxptwNemzC6FnNbVO40O3ifC3KgUfKj34TIQb892OM51yHNabeGkGdHDD91TM0d3tnjXl/6O339lMN97mtkVFtJDS5jxwyfxJtGmSqzvyZ0Xx5WVKcBcIbmDoJ7dk8+GW+p6BclUaKwWLJc4mpiYVOaD/U=
83 changes: 31 additions & 52 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,32 +1,44 @@
[![Build Status](https://travis-ci.org/SpamScope/spamscope.svg?branch=master)](https://travis-ci.org/SpamScope/spamscope)
[![Coverage Status](https://coveralls.io/repos/github/SpamScope/spamscope/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/spamscope?branch=develop)
[![BCH compliance](https://bettercodehub.com/edge/badge/SpamScope/spamscope?branch=develop)](https://bettercodehub.com/)

![SpamScope](https://raw.githubusercontent.com/SpamScope/spamscope/develop/docs/logo/spamscope.png)


## Overview
SpamScope is an advanced spam analysis tool that use [Apache Storm](http://storm.apache.org/) with [streamparse](https://github.com/Parsely/streamparse) to process a stream of mails.

It's possible to analyze more than 5 milions of mails (without attachments post processors) for day with a 4 cores server and 4 GB of RAM.
It's possible to analyze more than 5 milions of mails for day with a 4 cores server and 4 GB of RAM (without third party analysis).

![Schema topology](docs/images/schema_topology.png?raw=true "Schema topology")

### Why should I use SpamScope
- It's very fast: the job is splitted in functionalities that work in parallel.
- It's flexible: you can chose what SpamScope has to do.
- It's flexible: you can choose what SpamScope has to do.
- It's distributed: SpamScope uses Apache Storm, free and open source distributed realtime computation system.
- It makes JSON output that you can save where you want.
- It's easy to setup: there are docker images and docker-compose ready for use.
- It's integrated with Apache Tika, VirusTotal, Thug and Shodan (for now).
- It's free (for special functions you can contact me).
- It's integrated with Apache Tika, VirusTotal, Thug, Shodan and SpamAssassin (for now).
- It's free and open source (for special functions you can contact me).
- It can analyze Outlook msg.

### Distributed
SpamScope uses Apache Storm that allows you to start small and scale horizontally as you grow. Simply add more workers.

### Flexibility
You can chose your mails input sources (with spouts) and your functionalities (with bolts). SpamScope comes with a tokenizer (split mail in token: headers, body, attachments), attachments and phishing analyzer (Which is the target of mails? Is there a malware in attachment?) and JSON output.
You can choose your mails input sources (with **spouts**) and your functionalities (with **bolts**).

SpamScope comes with the following bolts:
- tokenizer splits mail in token like headers, body, attachments and it can filter emails, attachments and ip addresses already seen
- phishing looks for your keywords in email and connects email to targets (bank, your customers, etc.)
- raw_mail is for all third party tools that analyze raw mails like SpamAssassin
- attachments analyzes all mail attachments and uses third party tools like VirusTotal
- network analyzes all sender ip addresses with third party tools like Shodan
- urls extracts all urls in email and attachments
- json_maker and outputs make the json report and save it

### Store where you want
You can build your custom output bolts and store your data in Elasticsearch, Mongo, filesystem, etc.
You can build your custom output bolts and store your data in Elasticsearch, MongoDB, filesystem, etc.

### Build your topology
With streamparse tecnology you can build your topology in Python, add and/or remove spouts and bolts.
Expand Down Expand Up @@ -55,9 +67,9 @@ SpamScope can be downloaded, used, and modified free of charge. It is available


## Output example
- [Raw mail](https://goo.gl/wMBfbF).
- [Raw example email](https://goo.gl/wMBfbF).
- [SpamScope output](https://goo.gl/MS7ugy).
- [SpamScope output with Thug](https://goo.gl/Y4kWCv).
- [SpamScope complete output](https://goo.gl/fr4i7C).



Expand Down Expand Up @@ -88,8 +100,11 @@ python setup.py install
[Faup](https://github.com/stricaud/faup) stands for Finally An Url Parser and is a library and command line tool to parse URLs and normalize fields.
To install it follow the [wiki](https://github.com/SpamScope/spamscope/wiki/Installation#faup).

### SpamAssassin (optional)
SpamScope can use [SpamAssassin](http://spamassassin.apache.org/) an open source anti-spam to analyze every mails.

### Tika (optional)
SpamScope can use [Tika App](https://tika.apache.org/) to parse every attachment mail.
SpamScope can use [Tika App](https://tika.apache.org/) to parse every attachments.
The **Apache Tika** toolkit detects and extracts metadata and text from over a thousand different file types (such as PPT, XLS, and PDF).
To install it follow the [wiki](https://github.com/SpamScope/spamscope/wiki/Installation#tika-app-optional).
To enable Apache Tika analisys, you should set it in `attachments` section.
Expand Down Expand Up @@ -122,25 +137,23 @@ It's possible to store the results in Redis. In this case you should install `re
## Configuration
For more details please visit the [wiki page](https://github.com/SpamScope/spamscope/wiki/Configuration) or read the comments in the files in `conf` folder.

From SpamScope v1.1 you can decide to **filter mails and attachments** already analyzed. If you enable filter in `tokenizer` section you will enable the RAM database and
SpamScope will check on it to decide if mail/attachment is already analyzed or not. If the mail is in RAM database, SpamScope will not analyze it and will store only the hashes.
You can decide to **filter emails, attachments and ip addresses** already analyzed. All filters are in `tokenizer` bolt section.



## Usage
SpamScope comes with four topologies:
- spamscope_debug
SpamScope comes with three topologies:
- spamscope_debug (save json on file system)
- spamscope_elasticsearch
- spamscope_redis
- spamscope_testing

and a general configuration file `spamscope.example.yml` in `conf/` folder.


If you want submit SpamScope topology use `spamscope-topology submit` tool. For more details `spamscope-topology submit -h`:

```
$ spamscope-topology submit --topology {spamscope_debug,spamscope_elasticsearch,spamscope_redis,spamscope_testing}
$ spamscope-topology submit --topology {spamscope_debug,spamscope_elasticsearch,spamscope_redis}
```


Expand All @@ -160,52 +173,17 @@ It's possible change the default settings for all Apache Storm options. I sugges
- **topology.max.spout.pending**: Apache Storm framework will then throttle your spout as needed to meet the `topology.max.spout.pending` requirement
- **topology.sleep.spout.wait.strategy.time.ms**: max sleep for emit new tuple (mail)

If you don't enable Apache Tika, Thug and VirusTotal, you could use:

```
topology.tick.tuple.freq.secs: 60
topology.max.spout.pending: 500
topology.sleep.spout.wait.strategy.time.ms: 10
```

If **Apache Tika** is enabled:

```
topology.max.spout.pending: 200
```

To submit above options use:

```
sparse submit -f --name topology -o "topology.tick.tuple.freq.secs=60" -o "topology.max.spout.pending=200" -o "topology.sleep.spout.wait.strategy.time.ms=10"
```

**Thug** analysis can be very slow, it depends from attachment. To avoid Apache Storm timeout, you should use these two switches when submit the topology:

```
supervisor.worker.timeout.secs=600
topology.message.timeout.secs=600
```

As you can see, the timeouts are both to 600 seconds. 600 seconds is the default timeout of Thug.

The complete command is:
```
sparse submit -f --name topology -o "topology.tick.tuple.freq.secs=60" -o "topology.max.spout.pending=50" -o "topology.sleep.spout.wait.strategy.time.ms=10" -o "supervisor.worker.timeout.secs=600" -o "topology.message.timeout.secs=600"
```

For more details you can refer [here](http://streamparse.readthedocs.io/en/stable/quickstart.html).


To simplify this operation, SpamScope comes with a custom tool `spamscope-topology submit` where you can choose the values of all these parameters.



## Unittest
SpamScope comes with unittests for each its modules. In bolts and spouts there are no special features, all intelligence is in external modules.
SpamScope comes with unittests for each modules. In bolts and spouts there are no special features, all intelligence is in external modules.
All unittests are in `tests` folder.

To have complete tests you should set the followings variables enviroment:
To have complete tests you should set the followings enviroment variables:

```
$ export THUG_ENABLED=True
Expand All @@ -217,6 +195,7 @@ $ export ZEMANA_PARTNERID="your partner id"
$ export ZEMANA_USERID="your userid"
$ export SHODAN_ENABLED=True
$ export SHODAN_APIKEY="your key"
$ export SPAMASSASSIN_ENABLED=True
```


Expand Down
1 change: 1 addition & 0 deletions conf/keywords/subjects.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
- bonifico
- Fattura
- 123456
- urgent
5 changes: 5 additions & 0 deletions conf/keywords/targets.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,8 @@ Google:

Amazon:
- amazon

Test:
- payment
- hampton
- global
Loading

0 comments on commit 48e83af

Please sign in to comment.