-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpercepclassify.py
71 lines (52 loc) · 1.96 KB
/
percepclassify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#Perceptron
import sys
import re
import pickle
import codecs
class perceptron_classify:
def check_dev_error(self, dev_file , weight_vector):
dev_text = codecs.open(dev_file, 'r+',encoding='latin-1',errors = 'ignore')
total = 0.0
count = 0.0
error = 0.0
for line in dev_text:
total += 1
words = re.split(r'\s+', line.rstrip())
actual_label = words[0]
w_line = ' '.join(words[1:])
classified_label = self.classify(w_line, weight_vector)
if classified_label != actual_label:
count += 1
dev_text.close()
if total!=0:
error = count/total
return error
def classify(self , line , weight_vector):
words = []
words = re.split(r'\s+', line.rstrip())
calculated_weights = {}
for label in weight_vector:
weights = weight_vector[label]
weight_calc = 0.0
for word in words:
if word in weights:
weight_calc += weights[word]
calculated_weights[label] = weight_calc
classified_label = sorted(calculated_weights,key = calculated_weights.get, reverse = True)[0]
return classified_label
def main():
if len(sys.argv) < 2:
print("Usage : python3 percepclassify.py weightsfile < input_file")
return
weights_file = open(sys.argv[1] , 'rb')
feature_weights = pickle.load(weights_file)
weights_file.close()
perceptron = perceptron_classify()
sys.stdin = codecs.getreader('latin-1')(sys.stdin)
sys.stdout = codecs.getwriter('latin-1')(sys.stdout)
for line in sys.stdin:
classified_label = perceptron.classify(line , feature_weights)
sys.stdout.write(classified_label + "\n")
sys.stdout.flush
if __name__ == '__main__':
main()