forked from cgpotts/cs224u
-
Notifications
You must be signed in to change notification settings - Fork 0
/
np_tree_nn.py
247 lines (201 loc) · 8.32 KB
/
np_tree_nn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import numpy as np
from np_model_base import NNModelBase
from utils import softmax
__author__ = "Christopher Potts"
__version__ = "CS224u, Stanford, Spring 2020"
class TreeNN(NNModelBase):
def __init__(self, vocab, embedding=None, embed_dim=50, **kwargs):
self.vocab = vocab
self.vocab_lookup = dict(zip(self.vocab, range(len(self.vocab))))
if embedding is None:
embedding = self._define_embedding_matrix(
len(self.vocab), embed_dim)
self.embedding = embedding
self.embed_dim = self.embedding.shape[1]
super(TreeNN, self).__init__(**kwargs)
self.hidden_dim = self.embed_dim * 2
def fit(self, X, y=None):
"""Fairly standard `fit` method except that, if `y=None`,
then the labels `y` are presumed to come from the root nodes
of the trees in `X`. We retain the option of giving them
as a separate argument for consistency with the other model
interfaces, and so that we can use sklearn cross-validation
methods with this class.
Parameters
----------
X : list of `nltk.Tree` instances
y : iterable of labels, or None
Returns
-------
self
"""
if y is None:
y = [t.label() for t in X]
return super(TreeNN, self).fit(X, y)
def initialize_parameters(self):
# Hidden parameters for semantic composition:
self.W = self.weight_init(self.hidden_dim, self.embed_dim)
self.b = np.zeros(self.embed_dim)
# Output classifier:
self.W_hy = self.weight_init(self.embed_dim, self.output_dim)
self.b_y = np.zeros(self.output_dim)
def forward_propagation(self, subtree):
"""Forward propagation through the tree and through the
softmax prediction layer on top. For each subtree
[parent left right]
we compute
p = tanh([x_l; x_r]W + b)
where x_l and x_r are the representations on the root of
left and right, and [x_l; x_r] is their concatenation.
The representation on the root is then fed to a softmax
classifier.
Returns
----------
vectree : np.array or tuple of tuples (of tuples ...) of np.array
Predicted vector representation of the entire tree
y : np.array
The predictions made for this example, dimension
`self.output_dim`.
"""
vectree = self._interpret(subtree)
root = self._get_vector_tree_root(vectree)
y = softmax(root.dot(self.W_hy) + self.b_y)
return vectree, y
def _interpret(self, subtree):
"""The forward propagation through the tree itself (excluding
the softmax prediction layer on top of this).
Given an NLTK Tree instance `subtree`, this returns a vector
if `subtree` is just a leaf node, else a tuple of tuples (of
tuples ...) of vectors with the same shape as `subtree`,
with each node now represented by vector.
Parameters
----------
subtree : nltk.tree.Tree
Returns
-------
np.array or tuple-based representation of `subtree`.
"""
# For NLTK `Tree` objects, this identifies leaf nodes:
if isinstance(subtree, str):
return self.get_word_rep(subtree)
elif len(subtree) == 1:
return self._interpret(subtree[0])
else:
left_subtree, right_subtree = subtree[0], subtree[1]
# Recursive interpretation of the child trees:
left_vectree = self._interpret(left_subtree)
right_vectree = self._interpret(right_subtree)
# Top representations of each child tree:
left_rep = self._get_vector_tree_root(left_vectree)
right_rep = self._get_vector_tree_root(right_vectree)
# Concatenate and create the hidden representation:
combined = np.concatenate((left_rep, right_rep))
root_rep = self.hidden_activation(combined.dot(self.W) + self.b)
# Return the full subtree of vectors:
return (root_rep, left_vectree, right_vectree)
@staticmethod
def _get_vector_tree_root(vectree):
"""Returns `tree` if it represents only a lexical item, else
the root (first member) of `tree`.
Parameters
----------
vectree : np.array or tuple of tuples (of tuples ...) of np.array
Returns
-------
np.array
"""
if isinstance(vectree, tuple):
return vectree[0]
else:
return vectree
def backward_propagation(self, vectree, predictions, ex, labels):
root = self._get_vector_tree_root(vectree)
# Output errors:
y_err = predictions.copy()
y_err[np.argmax(labels)] -= 1.0
d_W_hy = np.outer(root, y_err)
d_b_y = y_err
# Internal error accumulation:
d_W = np.zeros_like(self.W)
d_b = np.zeros_like(self.b)
h_err = y_err.dot(self.W_hy.T) * self.d_hidden_activation(root)
d_W, d_b = self._tree_backprop(vectree, h_err, d_W, d_b)
return d_W_hy, d_b_y, d_W, d_b
def _tree_backprop(self, deep_tree, h_err, d_W, d_b):
# This is the leaf-node condition for vector trees:
if isinstance(deep_tree, np.ndarray):
return d_W, d_b
else:
left_subtree, right_subtree = deep_tree[1], deep_tree[2]
left_rep = self._get_vector_tree_root(left_subtree)
right_rep = self._get_vector_tree_root(right_subtree)
combined = np.concatenate((left_rep, right_rep))
d_b += h_err
d_W += np.outer(combined, h_err)
err = h_err.dot(self.W.T) * self.d_hidden_activation(combined)
l_err = err[: self.embed_dim]
r_err = err[self.embed_dim: ]
d_W, d_b = self._tree_backprop(left_subtree, l_err, d_W, d_b)
d_W, d_b = self._tree_backprop(right_subtree, r_err, d_W, d_b)
return d_W, d_b
def update_parameters(self, gradients):
d_W_hy, d_b_y, d_W, d_b = gradients
self.W_hy -= self.eta * d_W_hy
self.b_y -= self.eta * d_b_y
self.W -= self.eta * d_W
self.b -= self.eta * d_b
def set_params(self, **params):
super(TreeNN, self).set_params(**params)
self.hidden_dim = self.embed_dim * 2
def simple_example(initial_embedding=False, separate_y=False):
from nltk.tree import Tree
import utils
train = [
"(odd 1)",
"(even 2)",
"(even (odd 1) (neutral (neutral +) (odd 1)))",
"(odd (odd 1) (neutral (neutral +) (even 2)))",
"(odd (even 2) (neutral (neutral +) (odd 1)))",
"(even (even 2) (neutral (neutral +) (even 2)))",
"(even (odd 1) (neutral (neutral +) (odd (odd 1) (neutral (neutral +) (even 2)))))"
]
test = [
"(odd (odd 1))",
"(even (even 2))",
"(odd (odd 1) (neutral (neutral +) (even (odd 1) (neutral (neutral +) (odd 1)))))",
"(even (even 2) (neutral (neutral +) (even (even 2) (neutral (neutral +) (even 2)))))",
"(odd (even 2) (neutral (neutral +) (odd (even 2) (neutral (neutral +) (odd 1)))))",
"(even (odd 1) (neutral (neutral +) (odd (even 2) (neutral (neutral +) (odd 1)))))",
"(odd (even 2) (neutral (neutral +) (odd (odd 1) (neutral (neutral +) (even 2)))))"
]
vocab = ["1", "+", "2"]
X_train = [Tree.fromstring(x) for x in train]
X_test = [Tree.fromstring(x) for x in test]
if initial_embedding:
import numpy as np
embedding = np.random.uniform(
low=-1.0, high=1.0, size=(len(vocab), 50))
else:
embedding = None
model = TreeNN(
vocab,
embed_dim=50,
hidden_dim=50,
max_iter=100,
embedding=embedding)
if not separate_y:
y = [t.label() for t in X_train]
else:
y = None
model.fit(X_train, y=y)
print("\nTest predictions:")
preds = model.predict(X_test)
y_test = [t.label() for t in X_test]
correct = 0
for tree, label, pred in zip(X_test, y_test, preds):
if pred == label:
correct += 1
print("{}\n\tPredicted: {}\n\tActual: {}".format(tree, pred, label))
print("{}/{} correct".format(correct, len(X_test)))
if __name__ == '__main__':
simple_example(initial_embedding=False, separate_y=False)