-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassify.py
58 lines (48 loc) · 1.87 KB
/
classify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#Code: Tarun Pathak
#------------------
#importing libraries
from gmail import Gmail
from random import randint
import joblib, sys, logging, datetime
from helper_functions import get_current_directory
#main
if __name__=='__main__':
#connecting to gmail
#fetching unread emails
mail = Gmail('[email protected]', 'Nik@nD51@@')
result,data=mail.get_unread_emails()
#exiting (if no data found)
if not result=='OK':
print('No unread emails found.')
sys.exit(0)
#loading model and vectorizer
path = get_current_directory()
clf=joblib.load(path + '\\model\\naive_bayes.sav')
vectorizer=joblib.load(path + '\\model\\tf_idf.sav')
#setting up log files
dt=datetime.datetime.now()
log_file=path + '\\logs\\' + str(dt.year) + str(dt.month) + str(dt.day) + '-' + str(randint(0,10000)) + '.txt'
logger=logging.getLogger(log_file)
logger.setLevel(logging.INFO)
logging.info('Starting...')
#fetching email contents
#storing email subject along with body
#this is to classify those emails which dont have text but
#just an image embedded in the body
for uid in data:
dict = mail.parse_email(uid)
content=[dict['Subject'] + '\n' + dict['Body']]
#extracting features
#getting predicition from classifier
features = vectorizer.transform(content)
pred = clf.predict(features)
#moving to spam folder (if predicted as spam)
if pred[0]==1:
mov=mail.move_email(data[1],'Inbox','[Gmail]/Spam')
if mov=='OK':
message='Following email moved to spam:\nuid: ' + str(uid) +'\nSubject: ' + str(dict['Subject']) + '\nContent: ' + str(dict['Body']) +'\n----------------------------------'
print(message)
logging.info(message)
#message
print('Program Ended')
logging.info('Ended.')