-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_traits.py
53 lines (39 loc) · 1.74 KB
/
model_traits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from keras.preprocessing.text import Tokenizer
from keras import preprocessing as pp
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Embedding, Dense, LSTM
def model_personalitytraits(df1,txt):
MAX_NB_WORDS = 2000
MAX_SEQUENCE_LENGTH = 250
EMBEDDING_DIM = 100
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(df1['content'].values)
word_index = tokenizer.word_index
X = tokenizer.texts_to_sequences(df1['content'].values)
X = pp.sequence.pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)\
Y = pd.get_dummies(df1['sentiment']).values\
sentiment_tensors = pd.get_dummies(df1['sentiment'].unique()).values
res_dict = dict()
j = 0
for i in df1['sentiment'].unique():
res_dict[i] = sentiment_tensors[j]
j = j + 1
model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(keras.layers.core.SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(4, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.load_weights(os.getcwd()+r'/model.h5')
print(type(txt))
tweet = [txt]
tweet = tokenizer.texts_to_sequences(tweet)
tweet = pp.sequence.pad_sequences(tweet, maxlen=MAX_SEQUENCE_LENGTH)
#print('Shape of data tensor:', tweet.shape)
res = model.predict(tweet, verbose=0)
res = (res == res.max(axis=1)[:, None]).astype('uint8')
for key, value in res_dict.items():
if (((res == value).all()) == True):
return key