-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add inspector classes for spam checking. Updates to INSTALL
- Legacy-Id: 434
- Loading branch information
Showing
21 changed files
with
410 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
|
||
from .celeryapp import app | ||
|
||
__version__ = "1.2.6" | ||
__version__ = "1.3.0" | ||
|
||
__date__ = "$Date$" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
'''This module contains classes which inherit from Inspector. They are used | ||
to inspect incoming messages and perform some auxiliary processing. ie. spam | ||
checkers''' | ||
|
||
from django.conf import settings | ||
|
||
class SpamMessage(Exception): | ||
pass | ||
|
||
|
||
class InspectorMeta(type): | ||
def __init__(cls, name, bases, dct): | ||
if not hasattr(cls, 'registry'): | ||
# this is the base class. Create an empty registry | ||
cls.registry = {} | ||
else: | ||
# this is a derived class. Add cls to the registry | ||
interface_id = name.lower() | ||
cls.registry[interface_id] = cls | ||
|
||
super(InspectorMeta, cls).__init__(name, bases, dct) | ||
|
||
|
||
class Inspector(object): | ||
'''The base class for inspector classes. Takes a MessageWrapper object and listname | ||
(string). Inherit from this class and implement has_condition(), handle_file(), | ||
raise_error() methods. Call inspect() to run inspection.''' | ||
__metaclass__ = InspectorMeta | ||
|
||
def __init__(self, message_wrapper, options=None): | ||
self.message_wrapper = message_wrapper | ||
self.listname = message_wrapper.listname | ||
if options: | ||
self.options = options | ||
else: | ||
self.options = settings.INSPECTORS.get(self.__class__.__name__) | ||
|
||
def inspect(self): | ||
if 'includes' in self.options and self.listname not in self.options['includes']: | ||
return | ||
if self.has_condition(): | ||
if not self.options.get('check_only'): | ||
self.handle_file() | ||
self.raise_error() | ||
|
||
def has_condition(self): | ||
raise NotImplementedError | ||
|
||
def handle_file(self): | ||
raise NotImplementedError | ||
|
||
def raise_error(self): | ||
raise NotImplementedError | ||
|
||
|
||
class SpamInspector(Inspector): | ||
'''Base spam handling class. To write a spam filter inherit from this class and | ||
implement check_condition(). Filters will be run on all mail unless a | ||
settings.INSPECTOR_INLCUDES entry is used''' | ||
|
||
def has_condition(self): | ||
raise NotImplementedError | ||
|
||
def handle_file(self): | ||
self.message_wrapper.write_msg(subdir='_spam') | ||
|
||
def raise_error(self): | ||
raise SpamMessage('Spam Detected. Message-ID: {}'.format(self.message_wrapper.msgid)) | ||
|
||
|
||
class ListIdSpamInspector(SpamInspector): | ||
'''Checks for missing or bogus List-Id header (doesn't contain listname). If so, | ||
message is spam (has_condition = True)''' | ||
def has_condition(self): | ||
listid = self.message_wrapper.email_message.get('List-Id') | ||
if listid and self.listname in listid: | ||
return False | ||
else: | ||
return True | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#!/usr/bin/python | ||
''' | ||
This script will scan messages in the archive, identify spam and remove it (move it | ||
to the _spam directory) | ||
''' | ||
|
||
# Set PYTHONPATH and load environment variables for standalone script ----------------- | ||
# for file living in project/bin/ | ||
import os | ||
import sys | ||
path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||
if not path in sys.path: | ||
sys.path.insert(0, path) | ||
|
||
import django | ||
os.environ['DJANGO_SETTINGS_MODULE'] = 'mlarchive.settings.development' | ||
django.setup() | ||
|
||
# ------------------------------------------------------------------------------------- | ||
import argparse | ||
import email | ||
|
||
from celery_haystack.utils import get_update_task | ||
from django.conf import settings | ||
|
||
from mlarchive.archive.forms import get_list_info | ||
from mlarchive.archive.inspectors import * | ||
from mlarchive.archive.management.commands._classes import MessageWrapper | ||
from mlarchive.archive.models import * | ||
|
||
import logging | ||
logpath = os.path.join(settings.DATA_ROOT,'log/check_spam.log') | ||
logging.basicConfig(filename=logpath,level=logging.DEBUG) | ||
|
||
|
||
def main(): | ||
# parse arguments | ||
parser = argparse.ArgumentParser(description='Check archive for spam') | ||
parser.add_argument('-i', '--inspector', help="enter the inspector class to use") | ||
parser.add_argument('-l', '--list', help="enter the email list name to check") | ||
parser.add_argument('-r','--remove',help="remove spam. default is check only",action='store_true') | ||
args = parser.parse_args() | ||
stat = {} | ||
|
||
if not EmailList.objects.filter(name=args.list).exists(): | ||
parser.error('List {} does not exist'.format(args.list)) | ||
|
||
inspector_class = eval(args.inspector) | ||
|
||
stat['scanned'] = Message.objects.filter(email_list__name=args.list).count() | ||
stat['spam'] = 0 | ||
|
||
for message in Message.objects.filter(email_list__name=args.list): | ||
path = message.get_file_path() | ||
with open(path) as f: | ||
msg = email.message_from_file(f) | ||
mw = MessageWrapper(msg,args.list) | ||
inspector = inspector_class(mw,{'check_only':not args.remove}) | ||
try: | ||
inspector.inspect() | ||
except SpamMessage: | ||
stat['spam'] = stat['spam'] + 1 | ||
if args.remove: | ||
message.delete() | ||
|
||
for k,v in stat.items(): | ||
print "{}:{}".format(k,v) | ||
|
||
if __name__ == "__main__": | ||
main() |
File renamed without changes.
Oops, something went wrong.