-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnbclassify.py
60 lines (54 loc) · 1.53 KB
/
nbclassify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pickle
import sys
import os
import math
modelfile=open(sys.argv[1],"rb")
words_dict=pickle.load(modelfile)
##prints=list()
i=0
##mismatch=0
##total=0
##directory='SPAM_dev'
##find all files in that directory
##for root,dirs,files in (os.walk(directory)):
##open individual file and paste contents
##for f in sorted(files):
##total+=1
##f=f.split('.')
##prints.append(f[0])
##with open("spam.out",'w') as outputfile:
with open(sys.argv[2],"r") as testfile:
#read testfile one line at a time representing one doc
for line in testfile:
words=line.split()
maxprob=-9999999
for classes in words_dict['Meta_Info']['names_of_classes']:
#print(classes)
#print(maxprob)
prob=words_dict['Meta_Info']["P("+classes+")"] ##P(class)
#print(prob)
for word in words:
#print(word)
if word in words_dict[classes]:
prob+=words_dict[classes][word]
else:
##If word not present add-one smoothing
a=1/(words_dict['Meta_Info'][classes+"_words"]+words_dict['Meta_Info']['size_vocab'])
a=math.log(a)
prob+=a
#print(prob)
if(prob>maxprob):
name=classes
maxprob=prob
print(name)
##print("\n")
#outputfile.write(prints[i])
#outputfile.write("\t")
##outputfile.write(name)
##outputfile.write("\n")
##if(prints[i]!=name):
## mismatch+=1
##i+=1
##precision:#actual no of docs of class1 identified correctly/#no of docs identified as class1
##recall:#actual no of docs of class1 identified correctly/#total no of docs of class1
##print(mismatch)