-
Notifications
You must be signed in to change notification settings - Fork 6
/
multi_NB.py
104 lines (93 loc) · 3.22 KB
/
multi_NB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
'''
使用机器学习库sklearn处理多分类问题
'''
import random
import os
import json
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import zero_one_loss
from sklearn.naive_bayes import MultinomialNB
import Bayes as bayes
base_dir = os.path.dirname(__file__)
n_estimators = 500
learning_rate = 1.
vocabList = bayes.build_key_word(os.path.join(base_dir, "train.txt"))
line_cut, label = bayes.loadDataSet(os.path.join(base_dir, "train.txt"))
train_mood_array = bayes.setOfWordsListToVecTor(vocabList, line_cut)
test_word_array = []
test_word_arrayLabel = []
testCount = 100 # 从中随机选取100条用来测试,并删除原来的位置
for i in range(testCount):
try:
randomIndex = int(random.uniform(0, len(train_mood_array)))
test_word_arrayLabel.append(label[randomIndex])
test_word_array.append(train_mood_array[randomIndex])
del (train_mood_array[randomIndex])
del (label[randomIndex])
except Exception as e:
print(e)
multi = MultinomialNB()
ada_real = AdaBoostClassifier(
base_estimator=multi,
learning_rate=learning_rate,
n_estimators=n_estimators,
algorithm="SAMME.R")
ada_real.fit(train_mood_array, label)
ada_real_err = np.zeros((n_estimators,)) # 变成一个一维的矩阵,长度为n
for i, y_pred in enumerate(ada_real.staged_predict(test_word_array)): # 测试
ada_real_err[i] = zero_one_loss(y_pred, test_word_arrayLabel) # 得出不同的,然后除于总数
ada_real_err_train = np.zeros((n_estimators,))
for i, y_pred in enumerate(ada_real.staged_predict(train_mood_array)): # 训练样本对训练样本的结果
ada_real_err_train[i] = zero_one_loss(y_pred, label)
def test(word):
word_array = bayes.build_word_array(word)
asfaiajioaf = bayes.setOfWordsListToVecTor(vocabList, word_array)
return ada_real.predict(asfaiajioaf)[0]
def testandscore(word):
word_array = bayes.build_word_array(word)
asfaiajioaf = bayes.setOfWordsListToVecTor(vocabList, word_array)
aa, bb = ada_real.predict(asfaiajioaf)[0], ada_real.predict_proba(asfaiajioaf)[0]
total = {}
total["type"] = int(aa) # 需要转化一下int跟int32是不同的,int32不能序列化
temp = []
ggg = {}
ccc = {}
ddd = {}
print(len(str(bb[0])))
print("end")
a = float('%.5f' % bb[0])
b = float('%.5f' % bb[1])
c = float('%.5f' % bb[2])
max_value = str(max([a, b, c]))
min_value = str(min([a, b, c]))
same = ''
for i in range(0, len(min_value)):
if max_value[i] == min_value[i]:
same = same + min_value[i]
else:
break
print(same)
kkkk = pow(10, (len(same) - 2))
a = (a - float(same)) * kkkk
b = (b - float(same)) * kkkk
c = (c - float(same)) * kkkk
a = float('%.5f' % a)
b = float('%.5f' % b)
c = float('%.5f' % c)
print(a, b, c)
ggg["key"] = "正向"
ggg["value"] = a
ccc["key"] = "负向"
ccc["value"] = b
ddd["key"] = "客观"
ddd["value"] = c
temp.append(ggg)
temp.append(ccc)
temp.append(ddd)
total["data"] = temp
return total
if __name__ == '__main__':
word = "高兴,开心,非常开心,愉快"
tt = testandscore(word)
print(json.dumps(tt))