-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment.py
69 lines (53 loc) · 2.18 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pickle
from nltk.classify import ClassifierI
from nltk.tokenize import word_tokenize
from statistics import mode
class VoteClassifier(ClassifierI):
def __init__(self, *classifiers):
self._classifiers = classifiers
def classify(self, features):
votes = []
for classifier in self._classifiers:
vote = classifier.classify(features)
votes.append(vote)
return mode(votes)
def confidence(self, features):
votes = []
for classifier in self._classifiers:
vote = classifier.classify(features)
votes.append(vote)
choice_votes = votes.count(mode(votes))
confidence = choice_votes / len(votes)
return confidence
def find_features(document, word_features):
words = word_tokenize(document)
features = {}
for w in word_features:
features[w] = (w in words)
return features
def sentiment(text):
classifier_strings = ["NB_classifier",
"MNB_classifier",
"BernoulliNB_classifier",
"LogisticRegression_classifier",
"LinearSVC_classifier"]
classifiers = []
for classifier_string in classifier_strings:
string = "pickled/algorithms/" + classifier_string + ".pickle"
with open(string, "rb") as classifier_file:
classifier = pickle.load(classifier_file)
classifiers.append(classifier)
NB_classifier = classifiers[0]
MNB_classifier = classifiers[1]
BernoulliNB_classifier = classifiers[2]
LogisticRegression_classifier = classifiers[3]
LinearSVC_classifier = classifiers[4]
voted_clf = VoteClassifier(NB_classifier,
MNB_classifier,
BernoulliNB_classifier,
LogisticRegression_classifier,
LinearSVC_classifier)
with open("pickled/data/word_features.pickle", "rb") as word_features_file:
word_features = pickle.load(word_features_file)
features = find_features(text, word_features)
return voted_clf.classify(features), voted_clf.confidence(features)