conf/spamscope.example.yml

---
# Spouts configurations
# Spout file on file system
files-mails:

    # Reload new mails after reload.mails analyzed
    reload.mails: 1000

    # The mails in processing older that fail.after.seconds will be failed
    fail.after.seconds: 60

    # Post processing
    post_processing:

        # move or remove mails analyzed, default remove
        what: remove

        # Where you want move the analyzed mails, default /tmp/moved
        where: /tmp/moved

        # Where you want move the failed mails, default /tmp/failed
        where.failed: /tmp/failed

    # Mailboxes
    mailboxes:
        test:
            mail_server: hostname
            # Trust string is used to get sender IP address from mail server.
            # More details:
            # https://github.com/SpamScope/mail-parser/blob/v0.4.6/mailparser/__init__.py#L221
            trust_string: "test_trust_string"
            files_pattern: "*untroubled*"
            priority: 1
            path_mails: /path/mails1

            # This flag enables Outlook msg parsing for every mails in mailbox
            # Default value is false
            outlook: false

            # List of others headers to get
            headers:
                - custom_one
                - custom_two
        test1:
            mail_server: hostname
            trust_string: "test1_trust_string"
            files_pattern: "*"
            priority: 2
            path_mails: /path/mails2


# Spout file on file system
# Use an iterator. Safe for RAM
iter-files-mails:

    # The mails in processing older that fail.after.seconds will be failed
    fail.after.seconds: 60

    # Post processing
    post_processing:

        # move or remove mails analyzed, default remove
        what: remove

        # Where you want move the analyzed mails, default /tmp/moved
        where: /tmp/moved

        # Where you want move the failed mails, default /tmp/failed
        where.failed: /tmp/failed

    # Mailboxes
    mailboxes:
        test:
            mail_server: hostname
            # Trust string is used to get sender IP address from mail server.
            # More details:
            # https://github.com/SpamScope/mail-parser/blob/v0.4.6/mailparser/__init__.py#L221
            trust_string: "test_trust_string"
            files_pattern: "*untroubled*"
            path_mails: /path/mails1

            # This flag enables Outlook msg parsing for every mails in mailbox
            # Default value is false
            outlook: false

            # List of others headers to get
            headers:
                - custom_one
                - custom_two
        test1:
            mail_server: hostname
            trust_string: "test1_trust_string"
            files_pattern: "*"
            path_mails: /path/mails2

# Bolts configurations
# Phishing bolt configuration
phishing:
    lists:
        subjects:
            # Suspect subjects
            # Example in conf/keywords/subjects.example.yml
            generic: /path/to/generic_subjects
            custom: /path/to/custom_subjects

        targets:
            # Keyword for every targets
            # Example in conf/keywords/targets.example.yml
            generic: /path/to/generic_targets
            custom: /path/to/custom_targets


tokenizer:
    # Persistent where store dumps of hashes.
    persistent_path: /tmp

    # If true mails with same hash are filtered and not analyzed.
    # Only the body will not saved
    filter_mails: true

    # Max number of hashes saved for filter function
    maxlen_mails: 1000000

    # If true attachments with same hash are filtered and not analyzed.
    # Only hashes will be saved
    filter_attachments: true

    # Max number of hashes saved for filter function
    maxlen_attachments: 1000000

    # If true the same ip address is filtered and not analyzed.
    filter_network: true

    # Max number of hashes saved for filter function
    maxlen_network: 1000000


# Network bolt configuration
network:
    # Shodan analysis https://www.shodan.io/
    shodan:
        enabled: false
        api_key: xxxxxxxxxxxxxxxxxxxxxxxxxx

    # VirusTotal analysis https://www.virustotal.com/
    virustotal:
        enabled: false
        api_key: xxxxxxxxxxxxxxxxxxxxxxxxxx


# RawMail bolt configuration
raw_mail:
    # SpamAssassin analysis: https://spamassassin.apache.org/
    spamassassin:
        enabled: false

    # Dialect analysis: https://sissden.eu/blog/analysis-of-smtp-dialects
    dialect:
        enabled: false

        # elasticsearch instance where are postfix logs
        elasticsearch:
            hosts:
                - "node1:9200"
                - "node2"

            # Prefix with dash '-' of Postfix index in Elasticsearch
            # The format of indices should be postfix-2018.12.30
            index.prefix.postfix: postfix-


# Attachments bolt configuration
attachments:
    commons:
        # enable or disable filter on size
        size.filter.enabled: false

        # max size to analyze in bytes
        max.size: 3145728

        # The lists of all components must be under lists keyword to load them
        # automatically
        lists:
            blacklist_content_types:
                # All content types to remove from results
                # Example in content_types/blacklist/generic.example.yml
                generic: /path/to/generic_content_types
                custom: /path/to/custom_content_types

            not_extract_content_types:
                # All content types that you don't want extract from archive
                # Example: application/java-archive (jar), you can save the jar
                # but do not extract the class inside.
                generic: /path/to/generic_content_types
                custom: /path/to/custom_content_types

    # Apache Tika analysis: https://tika.apache.org/
    tika:
        # Enable Tika but it's very slow:
        enabled: false

        path_jar: /path/to/tika_jar

        # Like parameter -Xmx of java application
        memory_allocation:

        # All content types to extract details
        # Example in content_types/tika/generic.example.yml
        lists:
            whitelist_content_types:
                generic: /path/to/generic_content_types
                custom: /path/to/custom_content_types

    # VirusTotal analysis: https://www.virustotal.com/
    virustotal:
        enabled: false

        api_key: xxxxxxxxxxxxxxxxxxxxxxxxxx

        # All content types to analyze with virustotal
        # Example in content_types/virustotal/generic.example.yml
        # Now is not active
        lists:
            whitelist_content_types:
                generic: /path/to/generic_content_types
                custom: /path/to/custom_content_types

    # Thug analysis: https://github.com/buffer/thug
    thug:
        enabled: false

        # File extensions to submit to thug
        extensions:
            - .html
            - .js
            - .jse

        # More details:
        # http://buffer.github.io/thug/doc/usage.html#basic-usage
        #
        # list of user agents to use for analysis
        # SpamScope gets start an analysis for user agent
        user_agents:
            - win7ie90

        referer: http://www.google.com/

        # Set the singe analysis timeout in seconds
        # This value MUST be lower than supervisor.worker.timeout.secs
        # of SpamScope:
        # nr. user agents * timeout < supervisor.worker.timeout.secs
        timeout: 10

        # Set the connect timeout in seconds
        connect_timeout: 1

        disable_cert_logging: true
        disable_code_logging: true

        # Maximum pages to fetch
        # For SpamScope a good value is 1 to have short analysis
        threshold: 1

    # Zemana Antimalware analysis: https://www.zemana.com/
    # only premium users
    zemana:
        enabled: false

        PartnerId: xxxxx
        UserId: xxxxx
        ApiKey: xxxxx
        useragent: SpamScope

    # This plugin store the samples on file system
    # in date format subfolders (YYYY-MM-DD)
    store_samples:
        enabled: false
        base_path: /tmp


# Urls
urls:
    whitelists:
        # Only second level domains to whitelisting
        # Example in conf/whitelists/generic.example.yml
        alexa:
            path: /path/to/alexa
            expiry: 2016-06-28T12:33:00.000Z  # date ISO 8601 only UTC
        test1:
            path: /path/to/test1
            expiry:
        test2:
            path: /path/to/test2


# Output debug bolt configuration
output-debug:
    json.indent: 4
    output.path: /path/to/output


# Output Redis
# hosts:
# The hostname(s) of your Redis server(s). Ports may be specified on any
# hostname, which will override the global port config.
# If the hosts list is an array, it will pick one random host to connect to,
# if that host is disconnected it will then pick another.

# For example:
# "127.0.0.1"
# ["127.0.0.1", "127.0.0.2"]
# ["127.0.0.1:6380", "127.0.0.1"]

# shuffle_hosts:
# Shuffle the host list during connection.

# port:
# The default port to connect on. Can be overridden on any host.

# db:
# The Redis database number.

# password:
# Password to authenticate with.  There is no authentication by default.

# reconnect_interval:
# Interval for reconnecting to failed Redis connections.

# max_retry:
# number of connect retries

# flush_size:
# max number of json to send in a bulk

# queue_name:
# name of Redis server list

output-redis:
    servers:
        hosts:
            - "localhost:6379"
            - "localhost:6380"
        shuffle_hosts: true
        port: 6379
        db: 0
        password:
        reconnect_interval: 1
        max_retry: 60
        flush_size: 50
        queue_mails: spamscope_mails
        queue_attachments: spamscope_attachments


output-elasticsearch:
    servers:
        hosts:
            - "node1:9200"
            - "node2"

        # Prefix with dash '-'. SpamScope use a index for day
        index.prefix.mails: spamscope_mails-
        doc.type.mails: analysis

        index.prefix.attachments: spamscope_attachments-
        doc.type.attachments: analysis

        # max number of json to send in a bulk
        flush_size: 50