remove dropout because don't use in these examples, not recently tested

BethanyL · Oct 13, 2019 · 1a5cc56 · 1a5cc56
1 parent db3abf9
commit 1a5cc56
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 65 deletions.
diff --git a/helperfns.py b/helperfns.py
@@ -435,9 +435,6 @@ def set_defaults(params):
     if 'batch_size' not in params:
         print("setting default: no batches (use whole training file at once)")
         params['batch_size'] = 0
-    if 'dropout_rate' not in params:
-        print("setting default: no dropout")
-        params['dropout_rate'] = 1.0
 
     # setting defaults related to keeping track of training time and progress
     if 'max_time' not in params:

diff --git a/networkarch.py b/networkarch.py
@@ -105,8 +105,7 @@ def encoder(widths, dist_weights, dist_biases, scale, num_shifts_max, first_gues
     return x, weights, biases
 
 
-def encoder_apply(x, weights, biases, act_type, batch_flag, phase, shifts_middle, keep_prob, name='E',
-                  num_encoder_weights=1):
+def encoder_apply(x, weights, biases, act_type, batch_flag, shifts_middle, name='E', num_encoder_weights=1):
     """Apply an encoder to data x.
 
     Arguments:
@@ -115,9 +114,7 @@ def encoder_apply(x, weights, biases, act_type, batch_flag, phase, shifts_middle
         biases -- dictionary of biases
         act_type -- string for activation type for nonlinear layers (i.e. sigmoid, relu, or elu)
         batch_flag -- 0 if no batch_normalization, 1 if batch_normalization
-        phase -- boolean placeholder for dropout: training phase or not training phase
         shifts_middle -- number of shifts (steps) in x to apply encoder to for linearity loss
-        keep_prob -- probability that weight is kept during dropout
         name -- string for prefix on weight matrices (default 'E' for encoder)
         num_encoder_weights -- number of weight matrices (layers) in encoder network (default 1)
 
@@ -139,13 +136,11 @@ def encoder_apply(x, weights, biases, act_type, batch_flag, phase, shifts_middle
         else:
             x_shift = tf.squeeze(x[shift, :, :])
         y.append(
-            encoder_apply_one_shift(x_shift, weights, biases, act_type, batch_flag, phase, keep_prob, name,
-                                    num_encoder_weights))
+            encoder_apply_one_shift(x_shift, weights, biases, act_type, batch_flag, name, num_encoder_weights))
     return y
 
 
-def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, phase, keep_prob, name='E',
-                            num_encoder_weights=1):
+def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, name='E', num_encoder_weights=1):
     """Apply an encoder to data for only one time step (shift).
 
     Arguments:
@@ -154,8 +149,6 @@ def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, p
         biases -- dictionary of biases
         act_type -- string for activation type for nonlinear layers (i.e. sigmoid, relu, or elu)
         batch_flag -- 0 if no batch_normalization, 1 if batch_normalization
-        phase -- boolean placeholder for dropout: training phase or not training phase
-        keep_prob -- probability that weight is kept during dropout
         name -- string for prefix on weight matrices (default 'E' for encoder)
         num_encoder_weights -- number of weight matrices (layers) in encoder network (default 1)
 
@@ -166,23 +159,22 @@ def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, p
         None
     """
     for i in np.arange(num_encoder_weights - 1):
-        h1 = tf.matmul(prev_layer, weights['W%s%d' % (name, i + 1)]) + biases['b%s%d' % (name, i + 1)]
+        prev_layer = tf.matmul(prev_layer, weights['W%s%d' % (name, i + 1)]) + biases['b%s%d' % (name, i + 1)]
         if batch_flag:
-            h1 = tf.contrib.layers.batch_norm(h1, is_training=phase)
+            prev_layer = tf.contrib.layers.batch_norm(prev_layer)
         if act_type == 'sigmoid':
-            h1 = tf.sigmoid(h1)
+            prev_layer = tf.sigmoid(prev_layer)
         elif act_type == 'relu':
-            h1 = tf.nn.relu(h1)
+            prev_layer = tf.nn.relu(prev_layer)
         elif act_type == 'elu':
-            h1 = tf.nn.elu(h1)
-        prev_layer = tf.cond(keep_prob < 1.0, lambda: tf.nn.dropout(h1, keep_prob), lambda: h1)
+            prev_layer = tf.nn.elu(prev_layer)
 
     # apply last layer without any nonlinearity
     final = tf.matmul(prev_layer, weights['W%s%d' % (name, num_encoder_weights)]) + biases[
         'b%s%d' % (name, num_encoder_weights)]
 
     if batch_flag:
-        final = tf.contrib.layers.batch_norm(final, is_training=phase)
+        final = tf.contrib.layers.batch_norm(final)
 
     return final
 
@@ -218,7 +210,7 @@ def decoder(widths, dist_weights, dist_biases, scale, name='D', first_guess=0):
     return weights, biases
 
 
-def decoder_apply(prev_layer, weights, biases, act_type, batch_flag, phase, keep_prob, num_decoder_weights):
+def decoder_apply(prev_layer, weights, biases, act_type, batch_flag, num_decoder_weights):
     """Apply a decoder to data prev_layer
 
     Arguments:
@@ -227,8 +219,6 @@ def decoder_apply(prev_layer, weights, biases, act_type, batch_flag, phase, keep
         biases -- dictionary of biases
         act_type -- string for activation type for nonlinear layers (i.e. sigmoid, relu, or elu)
         batch_flag -- 0 if no batch_normalization, 1 if batch_normalization
-        phase -- boolean placeholder for dropout: training phase or not training phase
-        keep_prob -- probability that weight is kept during dropout
         num_decoder_weights -- number of weight matrices (layers) in decoder network
 
     Returns:
@@ -238,16 +228,15 @@ def decoder_apply(prev_layer, weights, biases, act_type, batch_flag, phase, keep
         None
     """
     for i in np.arange(num_decoder_weights - 1):
-        h1 = tf.matmul(prev_layer, weights['WD%d' % (i + 1)]) + biases['bD%d' % (i + 1)]
+        prev_layer = tf.matmul(prev_layer, weights['WD%d' % (i + 1)]) + biases['bD%d' % (i + 1)]
         if batch_flag:
-            h1 = tf.contrib.layers.batch_norm(h1, is_training=phase)
+            prev_layer = tf.contrib.layers.batch_norm(prev_layer)
         if act_type == 'sigmoid':
-            h1 = tf.sigmoid(h1)
+            prev_layer = tf.sigmoid(prev_layer)
         elif act_type == 'relu':
-            h1 = tf.nn.relu(h1)
+            prev_layer = tf.nn.relu(prev_layer)
         elif act_type == 'elu':
-            h1 = tf.nn.elu(h1)
-        prev_layer = tf.cond(keep_prob < 1.0, lambda: tf.nn.dropout(h1, keep_prob), lambda: h1)
+            prev_layer = tf.nn.elu(prev_layer)
 
     # apply last layer without any nonlinearity
     return tf.matmul(prev_layer, weights['WD%d' % num_decoder_weights]) + biases['bD%d' % num_decoder_weights]
@@ -326,12 +315,10 @@ def varying_multiply(y, omegas, delta_t, num_real, num_complex_pairs):
         return real_part
 
 
-def create_omega_net(phase, keep_prob, params, ycoords):
+def create_omega_net(params, ycoords):
     """Create the auxiliary (omega) network(s), which have ycoords as input and output omegas (parameters for L).
 
     Arguments:
-        phase -- boolean placeholder for dropout: training phase or not training phase
-        keep_prob -- probability that weight is kept during dropout
         params -- dictionary of parameters for experiment
         ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k
 
@@ -356,7 +343,7 @@ def create_omega_net(phase, keep_prob, params, ycoords):
 
     params['num_omega_weights'] = len(params['widths_omega_real']) - 1
 
-    omegas = omega_net_apply(phase, keep_prob, params, ycoords, weights, biases)
+    omegas = omega_net_apply(params, ycoords, weights, biases)
 
     return omegas, weights, biases
 
@@ -384,12 +371,10 @@ def create_one_omega_net(params, temp_name, weights, biases, widths):
     biases.update(biasesO)
 
 
-def omega_net_apply(phase, keep_prob, params, ycoords, weights, biases):
+def omega_net_apply(params, ycoords, weights, biases):
     """Apply the omega (auxiliary) network(s) to the y-coordinates.
 
     Arguments:
-        phase -- boolean placeholder for dropout: training phase or not training phase
-        keep_prob -- probability that weight is kept during dropout
         params -- dictionary of parameters for experiment
         ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k
         weights -- dictionary of weights
@@ -408,22 +393,21 @@ def omega_net_apply(phase, keep_prob, params, ycoords, weights, biases):
         pair_of_columns = ycoords[:, ind:ind + 2]
         radius_of_pair = tf.reduce_sum(tf.square(pair_of_columns), axis=1, keep_dims=True)
         omegas.append(
-            omega_net_apply_one(phase, keep_prob, params, radius_of_pair, weights, biases, temp_name))
+            omega_net_apply_one(params, radius_of_pair, weights, biases, temp_name))
     for j in np.arange(params['num_real']):
         temp_name = 'OR%d_' % (j + 1)
         ind = 2 * params['num_complex_pairs'] + j
         one_column = ycoords[:, ind]
-        omegas.append(omega_net_apply_one(phase, keep_prob, params, one_column[:, np.newaxis], weights, biases, temp_name))
+        omegas.append(
+            omega_net_apply_one(params, one_column[:, np.newaxis], weights, biases, temp_name))
 
     return omegas
 
 
-def omega_net_apply_one(phase, keep_prob, params, ycoords, weights, biases, name):
+def omega_net_apply_one(params, ycoords, weights, biases, name):
     """Apply one auxiliary (omega) network for one real eigenvalue or a pair of complex conj. eigenvalues.
 
     Arguments:
-        phase -- boolean placeholder for dropout: training phase or not training phase
-        keep_prob -- probability that weight is kept during dropout
         params -- dictionary of parameters for experiment
         ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k
         weights -- dictionary of weights
@@ -437,18 +421,15 @@ def omega_net_apply_one(phase, keep_prob, params, ycoords, weights, biases, name
         None
     """
 
-    omegas = encoder_apply_one_shift(ycoords, weights, biases, params['act_type'], params['batch_flag'], phase,
-                                     keep_prob=keep_prob, name=name,
+    omegas = encoder_apply_one_shift(ycoords, weights, biases, params['act_type'], params['batch_flag'], name=name,
                                      num_encoder_weights=params['num_omega_weights'])
     return omegas
 
 
-def create_koopman_net(phase, keep_prob, params):
+def create_koopman_net(params):
     """Create a Koopman network that encodes, advances in time, and decodes.
 
     Arguments:
-        phase -- boolean placeholder for dropout: training phase or not training phase
-        keep_prob -- probability that weight is kept during dropout
         params -- dictionary of parameters for experiment
 
     Returns:
@@ -472,12 +453,12 @@ def create_koopman_net(phase, keep_prob, params):
                                  dist_biases=params['dist_biases'][0:depth + 1], scale=params['scale'],
                                  num_shifts_max=max_shifts_to_stack, first_guess=params['first_guess'])
     params['num_encoder_weights'] = len(weights)
-    g_list = encoder_apply(x, weights, biases, params['act_type'], params['batch_flag'], phase,
-                           shifts_middle=params['shifts_middle'], keep_prob=keep_prob,
+    g_list = encoder_apply(x, weights, biases, params['act_type'], params['batch_flag'],
+                           shifts_middle=params['shifts_middle'],
                            num_encoder_weights=params['num_encoder_weights'])
 
     # g_list_omega is list of omegas, one entry for each middle_shift of x (like g_list)
-    omegas, weights_omega, biases_omega = create_omega_net(phase, keep_prob, params, g_list[0])
+    omegas, weights_omega, biases_omega = create_omega_net(params, g_list[0])
     # params['num_omega_weights'] = len(weights_omega) already done inside create_omega_net
     weights.update(weights_omega)
     biases.update(biases_omega)
@@ -494,7 +475,7 @@ def create_koopman_net(phase, keep_prob, params):
     # y[0] is x[0,:,:] encoded and then decoded (no stepping forward)
     encoded_layer = g_list[0]
     params['num_decoder_weights'] = depth + 1
-    y.append(decoder_apply(encoded_layer, weights, biases, params['act_type'], params['batch_flag'], phase, keep_prob,
+    y.append(decoder_apply(encoded_layer, weights, biases, params['act_type'], params['batch_flag'],
                            params['num_decoder_weights']))
 
     # g_list_omega[0] is for x[0,:,:], pairs with g_list[0]=encoded_layer
@@ -504,10 +485,10 @@ def create_koopman_net(phase, keep_prob, params):
     for j in np.arange(max(params['shifts'])):
         # considering penalty on subset of yk+1, yk+2, yk+3, ...
         if (j + 1) in params['shifts']:
-            y.append(decoder_apply(advanced_layer, weights, biases, params['act_type'], params['batch_flag'], phase,
-                                   keep_prob, params['num_decoder_weights']))
+            y.append(decoder_apply(advanced_layer, weights, biases, params['act_type'], params['batch_flag'],
+                                   params['num_decoder_weights']))
 
-        omegas = omega_net_apply(phase, keep_prob, params, advanced_layer, weights, biases)
+        omegas = omega_net_apply(params, advanced_layer, weights, biases)
         advanced_layer = varying_multiply(advanced_layer, omegas, params['delta_t'], params['num_real'],
                                           params['num_complex_pairs'])
 

diff --git a/training.py b/training.py
@@ -8,7 +8,7 @@
 import networkarch as net
 
 
-def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob):
+def define_loss(x, y, g_list, weights, biases, params):
     """Define the (unregularized) loss functions for the training.
 
     Arguments:
@@ -18,8 +18,6 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob):
         weights -- dictionary of weights for all networks
         biases -- dictionary of biases for all networks
         params -- dictionary of parameters for experiment
-        phase -- boolean placeholder for dropout: training phase or not training phase
-        keep_prob -- probability that weight is kept during dropout
 
     Returns:
         loss1 -- autoencoder loss function
@@ -66,7 +64,7 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob):
     count_shifts_middle = 0
     if params['num_shifts_middle'] > 0:
         # generalization of: next_step = tf.matmul(g_list[0], L_pow)
-        omegas = net.omega_net_apply(phase, keep_prob, params, g_list[0], weights, biases)
+        omegas = net.omega_net_apply(params, g_list[0], weights, biases)
         next_step = net.varying_multiply(g_list[0], omegas, params['delta_t'], params['num_real'],
                                          params['num_complex_pairs'])
         # multiply g_list[0] by L (j+1) times
@@ -81,7 +79,7 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob):
                     tf.reduce_mean(tf.reduce_mean(tf.square(next_step - g_list[count_shifts_middle + 1]), 1)),
                     loss3_denominator)
                 count_shifts_middle += 1
-            omegas = net.omega_net_apply(phase, keep_prob, params, next_step, weights, biases)
+            omegas = net.omega_net_apply(params, next_step, weights, biases)
             next_step = net.varying_multiply(next_step, omegas, params['delta_t'], params['num_real'],
                                              params['num_complex_pairs'])
 
@@ -157,15 +155,13 @@ def try_net(data_val, params):
         Builds TensorFlow graph (reset in main_exp)
     """
     # SET UP NETWORK
-    phase = tf.placeholder(tf.bool, name='phase')
-    keep_prob = tf.placeholder(tf.float64, shape=[], name='keep_prob')
-    x, y, g_list, weights, biases = net.create_koopman_net(phase, keep_prob, params)
+    x, y, g_list, weights, biases = net.create_koopman_net(params)
 
     max_shifts_to_stack = helperfns.num_shifts_in_stack(params)
 
     # DEFINE LOSS FUNCTION
     trainable_var = tf.trainable_variables()
-    loss1, loss2, loss3, loss_Linf, loss = define_loss(x, y, g_list, weights, biases, params, phase, keep_prob)
+    loss1, loss2, loss3, loss_Linf, loss = define_loss(x, y, g_list, weights, biases, params)
     loss_L1, loss_L2, regularized_loss, regularized_loss1 = define_regularization(params, trainable_var, loss, loss1)
 
     # CHOOSE OPTIMIZATION ALGORITHM
@@ -225,9 +221,9 @@ def try_net(data_val, params):
 
             batch_data_train = data_train_tensor[:, offset:(offset + params['batch_size']), :]
 
-            feed_dict_train = {x: batch_data_train, phase: 1, keep_prob: params['dropout_rate']}
-            feed_dict_train_loss = {x: batch_data_train, phase: 1, keep_prob: 1.0}
-            feed_dict_val = {x: data_val_tensor, phase: 0, keep_prob: 1.0}
+            feed_dict_train = {x: batch_data_train}
+            feed_dict_train_loss = {x: batch_data_train}
+            feed_dict_val = {x: data_val_tensor}
 
             if (not params['been5min']) and params['auto_first']:
                 sess.run(optimizer_autoencoder, feed_dict=feed_dict_train)
@@ -288,6 +284,7 @@ def try_net(data_val, params):
     helperfns.save_files(sess, csv_path, train_val_error, params, weights, biases)
     tf.reset_default_graph()
 
+
 def main_exp(params):
     """Set up and run one random experiment.