-
Notifications
You must be signed in to change notification settings - Fork 0
/
repl.py
65 lines (43 loc) · 1.65 KB
/
repl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# The REPL where you write down your review and get the sentiment.
import os
import pickle
import numpy as np
from scipy import sparse
from utils import nlp_ize
from sklearn.preprocessing import LabelBinarizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelBinarizer
from sklearn.naive_bayes import MultinomialNB
# Check if the datasets are OK.
dataset = [ 'dataset/ds_review.npz', 'dataset/ds_sentiment.npy', 'dataset/feature_names.pkl' ]
for item in dataset:
if not os.path.exists(item):
print("Dataset not found! Try running 'python gends.py' to generate the datasets.")
exit(-1)
train_review = sparse.load_npz('dataset/ds_review.npz')
train_sentiment = np.load('dataset/ds_sentiment.npy')
# Get the feature names (vocabularies).
with open('dataset/feature_names.pkl', 'rb') as file:
feature_names = pickle.load(file)
mnb = MultinomialNB()
# Train the Bayes.
print('Training Naive Bayes...')
mnb.fit(train_review, train_sentiment.ravel())
print('Initializing vectorizer...')
cv = CountVectorizer(vocabulary = feature_names)
print('Preparing binarizer...')
lb = LabelBinarizer()
# We only have to deal with 'positive' and 'negative' words.
lb.fit([ 'positive', 'negative' ])
print('Entering REPL...', end = '\n\n')
while True:
print('>>> ', end = '')
string = input()
if string == '.exit':
break
string = cv.transform([nlp_ize(string)])
response = lb.inverse_transform(mnb.predict(string))
if response[0] == 'positive':
print('-> Your review is positive! Good job.')
else:
print('-> Your review is negative! Good luck with that.')