-
Notifications
You must be signed in to change notification settings - Fork 52
/
GatedRNN.py
146 lines (133 loc) · 7.54 KB
/
GatedRNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import tensorflow as tf
import logging
import time
import os
from basic_rnn import dot_attention, dense, cudnn_gru
class GatedRNN(object):
def __init__(self, args, batch, token_embeddings=None, trainable=True, opt=True):
# logging
self.logger = logging.getLogger("brc")
# basic config
self.batch_size = args.batch_size
self.hidden_size = args.hidden_size
self.output_size = 3
# self.d_a = 300
# self.r = 64
# self.p_coef = 1
self.layer_num = args.layer_num
self.optim_type = args.optim
self.weight_decay = args.weight_decay
self.dropout_keep_prob = args.dropout_keep_prob
self.trainable = trainable
# length limit
self.max_q_len = args.max_q_len
self.max_a_len = args.max_a_len
# session info
sess_config = tf.ConfigProto()
sess_config.gpu_options.allow_growth = True
self.sess = tf.Session(config=sess_config)
self.a, self.q, self.answers_type, self.qa_id = batch.get_next()
self.lr = tf.get_variable('lr', shape=[], dtype=tf.float32, trainable=False)
self.is_train = tf.get_variable('is_train', shape=[], dtype=tf.bool, trainable=False)
self.a_mask = tf.cast(self.a, tf.bool)
self.q_mask = tf.cast(self.q, tf.bool)
self.a_len = tf.reduce_sum(tf.cast(self.a_mask, tf.int32), axis=1)
self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)
self.N = tf.shape(self.qa_id)[0]
self._build_graph(token_embeddings)
def _build_graph(self, token_embeddings):
start_t = time.time()
self._embed(token_embeddings)
self._encode()
self._gated_attention()
self._self_attention()
# self._annotation()
self._predict()
self._compute_loss()
if self.trainable:
self._create_train_op()
self.logger.info('Time to build graph: {} s'.format(time.time() - start_t))
def _embed(self, token_embeddings):
with tf.device('/cpu:0'), tf.variable_scope('word_embedding', reuse=tf.AUTO_REUSE):
word_embeddings = tf.get_variable('word_embeddings',
initializer=tf.constant(token_embeddings, dtype=tf.float32),
trainable=False)
self.a_emb = tf.nn.embedding_lookup(word_embeddings, self.a)
self.q_emb = tf.nn.embedding_lookup(word_embeddings, self.q)
def _encode(self):
with tf.variable_scope('answer_encoding', reuse=tf.AUTO_REUSE):
a_rnn = cudnn_gru(num_layers=2 * self.layer_num, num_units=self.hidden_size, batch_size=self.N,
input_size=self.a_emb.get_shape().as_list()[-1],
keep_prob=self.dropout_keep_prob, is_train=self.is_train)
self.a_encodes = a_rnn(self.a_emb, seq_len=self.a_len)
with tf.variable_scope('question_encoding', reuse=tf.AUTO_REUSE):
q_rnn = cudnn_gru(num_layers=2 * self.layer_num, num_units=self.hidden_size, batch_size=self.N,
input_size=self.q_emb.get_shape().as_list()[-1],
keep_prob=self.dropout_keep_prob, is_train=self.is_train)
self.q_encodes = q_rnn(self.q_emb, seq_len=self.q_len)
def _gated_attention(self):
with tf.variable_scope('gated_attention', reuse=tf.AUTO_REUSE):
self.qa_att = dot_attention(self.a_encodes, self.q_encodes, mask=self.q_mask,
hidden=self.hidden_size, keep_prob=self.dropout_keep_prob,
is_train=self.is_train)
gated_rnn = cudnn_gru(num_layers=self.layer_num, num_units=self.hidden_size, batch_size=self.N,
input_size=self.qa_att.get_shape().as_list()[-1], keep_prob=self.dropout_keep_prob,
is_train=self.is_train)
self.gated_att = gated_rnn(self.qa_att, self.a_len)
def _self_attention(self):
with tf.variable_scope('self_attention', reuse=tf.AUTO_REUSE):
self.aa_att = dot_attention(self.gated_att, self.gated_att, mask=self.a_mask,
hidden=self.hidden_size, keep_prob=self.dropout_keep_prob,
is_train=self.is_train)
self_rnn = cudnn_gru(num_layers=self.layer_num, num_units=self.hidden_size, batch_size=self.N,
input_size=self.aa_att.get_shape().as_list()[-1], keep_prob=self.dropout_keep_prob,
is_train=self.is_train)
self.self_att = self_rnn(self.aa_att, self.a_len)
def _annotation(self):
# shape(W_s1) = d_a * 2u
self.W_s1 = tf.get_variable('W_s1', shape=[self.d_a, 2 * self.hidden_size],
initializer=tf.contrib.layers.xavier_initializer())
# shape(W_s2) = r * d_a
self.W_s2 = tf.get_variable('W_s2', shape=[self.r, self.d_a],
initializer=tf.contrib.layers.xavier_initializer())
self.A = tf.nn.softmax(tf.map_fn(
lambda x: tf.matmul(self.W_s2, x),
tf.tanh(tf.map_fn(lambda x: tf.matmul(self.W_s1, tf.transpose(x)),
self.gated_att))))
self.M = tf.matmul(self.A, self.gated_att)
self.A_T = tf.transpose(self.A, perm=[0, 2, 1])
tile_eye = tf.tile(tf.eye(self.r), [self.N, 1])
tile_eye = tf.reshape(tile_eye, [-1, self.r, self.r])
self.AA_T = tf.matmul(self.A, self.A_T) - tile_eye
self.P = tf.square(tf.norm(self.AA_T, axis=[-2, -1], ord='fro'))
def _predict(self):
with tf.variable_scope('predict', reuse=tf.AUTO_REUSE):
self.att = tf.reshape(self.self_att, shape=[self.N, 2 * self.max_a_len * self.hidden_size])
self.mlp = tf.nn.relu(dense(self.att, hidden=4 * self.hidden_size, scope='dense_0'))
if self.is_train:
self.mlp = tf.nn.dropout(self.mlp, self.dropout_keep_prob)
self.mlp = tf.nn.relu(dense(self.mlp, hidden=2 * self.hidden_size, scope='dense_1'))
if self.is_train:
self.mlp = tf.nn.dropout(self.mlp, self.dropout_keep_prob)
self.outputs = dense(self.mlp, hidden=self.output_size, scope='output')
def _compute_loss(self):
self.pre_labels = tf.argmax(self.outputs, axis=1)
self.loss = tf.reduce_mean(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.outputs,
labels=tf.stop_gradient(
self.answers_type))))
def _create_train_op(self):
"""
Selects the training algorithm and creates a train operation with it
"""
with tf.variable_scope('optimizer', reuse=tf.AUTO_REUSE):
if self.optim_type == 'adadelta':
self.optimizer = tf.train.AdadeltaOptimizer(self.lr)
elif self.optim_type == 'adam':
self.optimizer = tf.train.AdamOptimizer(self.lr)
elif self.optim_type == 'rprop':
self.optimizer = tf.train.RMSPropOptimizer(self.lr)
elif self.optim_type == 'sgd':
self.optimizer = tf.train.GradientDescentOptimizer(self.lr)
else:
raise NotImplementedError('Unsupported optimizer: {}'.format(self.optim_type))
self.train_op = self.optimizer.minimize(self.loss)