-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtutorial_5_LSTM_ATIS.py
155 lines (116 loc) · 5.28 KB
/
tutorial_5_LSTM_ATIS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import math
import numpy as np
import os
import cntk as C
import cntk.tests.test_utils
import input_ATIS
cntk.tests.test_utils.set_device_from_pytest_env() # (only needed for our build system)
C.cntk_py.set_fixed_random_seed(1) # fix a random seed for CNTK components
# number of words in vocab, slot labels, and intent labels
vocab_size = 943 ; num_labels = 129 ; num_intents = 26
# model dimensions
input_dim = vocab_size
label_dim = num_labels
emb_dim = 150
hidden_dim = 300
# Create the containers for input feature (x) and the label (y)
x = C.sequence.input_variable(vocab_size)
y = C.sequence.input_variable(num_labels)
print 'input dimension {}'.format(x)
print 'label dimension {}'.format(y)
def create_model():
with C.layers.default_options(initial_state=0.1):
return C.layers.Sequential([
C.layers.Embedding(emb_dim, name='embed'),
C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False),
C.layers.Dense(num_labels, name='classify')
])
# peek
z = create_model()
print(z.embed.E.shape)
print(z.classify.b.value)
z = create_model()
print(z(x).embed.E.shape)
def create_criterion_function(model):
labels = C.placeholder(name='labels')
ce = C.cross_entropy_with_softmax(model, labels)
errs = C.classification_error(model, labels)
return C.combine ([ce, errs]) # (features, labels) -> (loss, metric)
criterion = create_criterion_function(create_model())
print criterion.replace_placeholders({criterion.placeholders[0]: C.sequence.input_variable(num_labels)})
def create_criterion_function_preferred(model, labels):
ce = C.cross_entropy_with_softmax(model, labels)
errs = C.classification_error(model, labels)
return ce, errs # (model, labels) -> (loss, error metr
def train(reader, model_func, max_epochs=10):
# Instantiate the model function; x is the input (feature) variable
model = model_func(x)
# Instantiate the loss and error function
loss, label_error = create_criterion_function_preferred(model, y)
# training config
epoch_size = 18000 # 18000 samples is half the dataset size
minibatch_size = 70
# LR schedule over epochs
# In CNTK, an epoch is how often we get out of the minibatch loop to
# do other stuff (e.g. checkpointing, adjust learning rate, etc.)
lr_per_sample = [3e-4]*4+[1.5e-4]
lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, epoch_size)
# Momentum schedule
momentum_as_time_constant = C.momentum_as_time_constant_schedule(700)
# We use a the Adam optimizer which is known to work well on this dataset
# Feel free to try other optimizers from
# https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
learner = C.adam(parameters=model.parameters,
lr=lr_schedule,
momentum=momentum_as_time_constant,
gradient_clipping_threshold_per_sample=15,
gradient_clipping_with_truncation=True)
# Setup the progress updater
progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
# Uncomment below for more detailed logging
#progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs)
# Instantiate the trainer
trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)
# process minibatches and perform model training
C.logging.log_number_of_parameters(model)
t = 0
for epoch in range(max_epochs): # loop over epochs
epoch_end = (epoch+1) * epoch_size
while t < epoch_end: # loop over minibatches on the epoch
data = reader.next_minibatch(minibatch_size, input_map={ # fetch minibatch
x: reader.streams.query,
y: reader.streams.slot_labels
})
trainer.train_minibatch(data) # update model with it
t += data[y].num_samples # samples so far
trainer.summarize_training_progress()
def do_train():
global z
z = create_model()
reader = input_ATIS.create_reader(os.path.join('./data/ATIS/' , input_ATIS.data['train']['file']), is_training=True)
train(reader, z)
do_train()
def evaluate(reader, model_func):
# Instantiate the model function; x is the input (feature) variable
model = model_func(x)
# Create the loss and error functions
loss, label_error = create_criterion_function_preferred(model, y)
# process minibatches and perform evaluation
progress_printer = C.logging.ProgressPrinter(tag='Evaluation', num_epochs=0)
while True:
minibatch_size = 500
data = reader.next_minibatch(minibatch_size, input_map={ # fetch minibatch
x: reader.streams.query,
y: reader.streams.slot_labels
})
if not data: # until we hit the end
break
evaluator = C.eval.Evaluator(loss, progress_printer)
evaluator.test_minibatch(data)
evaluator.summarize_test_progress()
def do_test():
reader = input_ATIS.create_reader(os.path.join('./data/ATIS/' , input_ATIS.data['test']['file']), is_training=False)
evaluate(reader, z)
do_test()
z.classify.b.value