-
Notifications
You must be signed in to change notification settings - Fork 1
/
cnn_test.py
90 lines (87 loc) · 3.55 KB
/
cnn_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import sys
sys.path.append("./")
sys.path = ['/home/tao0920/libs/keras-fix'] + sys.path
import json
import numpy as np
from keras.utils import np_utils
from model_trainer.cnn_implicit_classifier import cnn_config
from model_trainer.cnn_implicit_classifier.trainer import build_model
from keras.preprocessing import sequence
import config
maxlen=120
def test_imp(instances):
nb_filter, filter_length1, filter_length2, filter_length3 = 1024,3,3,3
lr = 0.001
activation = 'tanh'
batch_size = 128
rf = open(config.CNN_IMP_DICT,'r')
vocab = json.load(rf)
w2i_dic = vocab[0]
p2i_dic = vocab[1]
Arg1_word,Arg2_word,Arg1_pos,Arg2_pos = embedding_process(instances, w2i_dic, p2i_dic)
print("Implicit cnn is building...")
model = build_model(lr,activation, nb_filter, filter_length1, filter_length2, filter_length3,train=False)
print("Implicit cnn finish building!")
model.load_weights(config.CNN_IMP_MODEL)
dev_output = model.predict({'arg1': Arg1_word, 'arg2': Arg2_word, 'pos1': Arg1_pos, 'pos2': Arg2_pos},
batch_size=batch_size)['output']
# print(dev_output)
return dev_output
def test_nop(instances):
nb_filter, filter_length1, filter_length2, filter_length3 = 512 ,4,8,12
lr = 0.001
activation = 'tanh'
batch_size = 64
rf = open(config.CNN_NOEXP_DICT,'r')
vocab = json.load(rf)
w2i_dic = vocab[0]
p2i_dic = vocab[1]
Arg1_word,Arg2_word,Arg1_pos,Arg2_pos = embedding_process(instances, w2i_dic, p2i_dic)
print("Non_Explicit cnn is building...")
model = build_model(lr,activation, nb_filter, filter_length1, filter_length2, filter_length3,word_dim=36821,train=False)
print("Non_Explicit cnn finish building!")
model.load_weights(config.CNN_NOEXP_MODEL)
dev_output = model.predict({'arg1': Arg1_word, 'arg2': Arg2_word, 'pos1': Arg1_pos, 'pos2': Arg2_pos},
batch_size=batch_size)['output']
# print(dev_output)
return dev_output
def embedding_process(instances, w2i_dic, p2i_dic):
tmp = []
for x in instances:
arg1 = []
arg2 = []
pos1 = []
pos2 = []
sense = []
for w in x[0]:
if w in w2i_dic.keys():
arg1.append(w2i_dic[w])
else:
arg1.append(0)
for w in x[1]:
if w in w2i_dic.keys():
arg2.append(w2i_dic[w])
else:
arg2.append(0)
for w in x[2]:
if w in p2i_dic.keys():
pos1.append(p2i_dic[w])
else:
pos1.append(0)
for w in x[3]:
if w in p2i_dic.keys():
pos2.append(p2i_dic[w])
else:
pos2.append(0)
tmp.append((arg1, arg2, pos1, pos2, sense))
data = tmp
X_1 = np.array([x[0] for x in data])
X_2 = np.array([x[1] for x in data])
X_pos_1 = np.array([x[2] for x in data])
X_pos_2 = np.array([x[3] for x in data])
X_1 = sequence.pad_sequences(X_1, maxlen=maxlen, padding='pre', truncating='pre')
X_2 = sequence.pad_sequences(X_2, maxlen=maxlen, padding='post', truncating='post')
X_pos_1 = sequence.pad_sequences(X_pos_1, maxlen=maxlen, padding='pre', truncating='pre')
X_pos_2 = sequence.pad_sequences(X_pos_2, maxlen=maxlen, padding='post', truncating='post')
print((X_1.shape, X_pos_1.shape))
return (X_1, X_2, X_pos_1, X_pos_2)