From 5a59ff9c709626c560cd5a8272fe33a195afc617 Mon Sep 17 00:00:00 2001 From: Bethany Lusch Date: Mon, 16 Apr 2018 22:00:02 -0700 Subject: [PATCH] improved docstrings and removed unused out_flag --- helperfns.py | 53 +++++++++++++--- networkarch.py | 166 ++++++++++++++++++++++++++++++++++++++++++------- training.py | 35 +++++++++-- 3 files changed, 222 insertions(+), 32 deletions(-) diff --git a/helperfns.py b/helperfns.py index e74c718..e6a7f59 100644 --- a/helperfns.py +++ b/helperfns.py @@ -7,7 +7,13 @@ def stack_data(data, num_shifts, len_time): - """Stack data from a matrix into a tensor.""" + """Stack data from a 2D array into a 3D array. + + Arguments: + data -- data matrix to be reshaped + num_shifts -- number of shifts (time steps) that losses will use (maximum is len_time - 1) + len_time -- number of time steps in each trajectory in data + """ nd = data.ndim if nd > 1: n = data.shape[1] @@ -29,7 +35,13 @@ def stack_data(data, num_shifts, len_time): def choose_optimizer(params, regularized_loss, trainable_var): - """Choose which optimizer to use for the network training.""" + """Choose which optimizer to use for the network training. + + Arguments: + params -- dictionary of parameters for experiment + regularized_loss -- loss, including regularization + trainable_var -- list of trainable TensorFlow variables + """ if params['opt_alg'] == 'adam': optimizer = tf.train.AdamOptimizer(params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) elif params['opt_alg'] == 'adadelta': @@ -79,7 +91,13 @@ def choose_optimizer(params, regularized_loss, trainable_var): def check_progress(start, best_error, params): - """Check on the progress of the network training and decide if it's time to stop.""" + """Check on the progress of the network training and decide if it's time to stop. + + Arguments: + start -- time that experiment started + best_error -- best error so far in training + params -- dictionary of parameters for experiment + """ finished = 0 save_now = 0 @@ -188,7 +206,16 @@ def check_progress(start, best_error, params): def save_files(sess, csv_path, train_val_error, params, weights, biases): - """Save error files, weights, biases, and parameters""" + """Save error files, weights, biases, and parameters. + + Arguments: + sess -- TensorFlow session + csv_path -- string for path to save error file as csv + train_val_error -- table of training and validation errors + params -- dictionary of parameters for experiment + weights -- dictionary of weights for all networks + biases -- dictionary of biases for all networks + """ np.savetxt(csv_path, train_val_error, delimiter=',') for key, value in weights.items(): @@ -205,13 +232,21 @@ def save_files(sess, csv_path, train_val_error, params, weights, biases): def save_params(params): - """Save parameter dictionary to file.""" + """Save parameter dictionary to file. + + Arguments: + params -- dictionary of parameters for experiment + """ with open(params['model_path'].replace('ckpt', 'pkl'), 'wb') as f: pickle.dump(params, f, pickle.HIGHEST_PROTOCOL) def set_defaults(params): - """Set defaults and make some checks in parameters dictionary.""" + """Set defaults and make some checks in parameters dictionary. + + Arguments: + params -- dictionary of parameters for experiment + """ # defaults related to dataset if 'data_name' not in params: raise KeyError("Error: must give data_name as input to main") @@ -399,7 +434,11 @@ def set_defaults(params): def num_shifts_in_stack(params): - """Calculate how many time points (shifts) will be used in loss functions.""" + """Calculate how many time points (shifts) will be used in loss functions. + + Arguments: + params -- dictionary of parameters for experiment + """ max_shifts_to_stack = 1 if params['num_shifts']: max_shifts_to_stack = max(max_shifts_to_stack, max(params['shifts'])) diff --git a/networkarch.py b/networkarch.py index a935817..3addd4d 100644 --- a/networkarch.py +++ b/networkarch.py @@ -5,7 +5,15 @@ def weight_variable(shape, var_name, distribution='tn', scale=0.1, first_guess=0): - """Create a variable for a weight matrix.""" + """Create a variable for a weight matrix. + + Arguments: + shape -- array giving shape of output weight variable + var_name -- string naming weight variable + distribution -- string for which distribution to use for random initialization + scale -- (for tn distribution): standard deviation of normal distribution before truncation + first_guess -- (for tn distribution): array of first guess for weight matrix, added to tn dist. + """ if distribution == 'tn': initial = tf.truncated_normal(shape, stddev=scale, dtype=tf.float64) + first_guess elif distribution == 'xavier': @@ -35,7 +43,13 @@ def weight_variable(shape, var_name, distribution='tn', scale=0.1, first_guess=0 def bias_variable(shape, var_name, distribution=''): - """Create a variable for a bias vector.""" + """Create a variable for a bias vector. + + Arguments: + shape -- array giving shape of output bias variable + var_name -- string naming bias variable + distribution -- string for which distribution to use for random initialization (file name) + """ if distribution: initial = np.genfromtxt(distribution, delimiter=',', dtype=np.float64) else: @@ -44,7 +58,16 @@ def bias_variable(shape, var_name, distribution=''): def encoder(widths, dist_weights, dist_biases, scale, num_shifts_max, first_guess): - """Create an encoder network: an input placeholder x, dictionary of weights, and dictionary of biases.""" + """Create an encoder network: an input placeholder x, dictionary of weights, and dictionary of biases. + + Arguments: + widths -- array or list of widths for layers of network + dist_weights -- array or list of strings for distributions of weight matrices + dist_biases -- array or list of strings for distributions of bias vectors + scale -- (for tn distribution of weight matrices): standard deviation of normal distribution before truncation + num_shifts_max -- number of shifts (time steps) that losses will use (max of num_shifts and num_shifts_middle) + first_guess -- (for tn dist. of weight matrices): array of first guess for weight matrix, added to tn dist. + """ x = tf.placeholder(tf.float64, [num_shifts_max + 1, None, widths[0]]) weights = dict() @@ -60,9 +83,22 @@ def encoder(widths, dist_weights, dist_biases, scale, num_shifts_max, first_gues return x, weights, biases -def encoder_apply(x, weights, biases, act_type, batch_flag, phase, out_flag, shifts_middle, keep_prob, name='E', +def encoder_apply(x, weights, biases, act_type, batch_flag, phase, shifts_middle, keep_prob, name='E', num_encoder_weights=1): - """Apply an encoder to data x.""" + """Apply an encoder to data x. + + Arguments: + x -- placeholder for input + weights -- dictionary of weights + biases -- dictionary of biases + act_type -- string for activation type for nonlinear layers (i.e. sigmoid, relu, or elu) + batch_flag -- 0 if no batch_normalization, 1 if batch_normalization + phase -- boolean placeholder for dropout: training phase or not training phase + shifts_middle -- number of shifts (steps) in x to apply encoder to for linearity loss + keep_prob -- probability that weight is kept during dropout + name -- string for prefix on weight matrices, default 'E' for encoder + num_encoder_weights -- number of weight matrices (layers) in encoder network, default 1 + """ y = [] num_shifts_middle = len(shifts_middle) for j in np.arange(num_shifts_middle + 1): @@ -75,14 +111,26 @@ def encoder_apply(x, weights, biases, act_type, batch_flag, phase, out_flag, shi else: x_shift = tf.squeeze(x[shift, :, :]) y.append( - encoder_apply_one_shift(x_shift, weights, biases, act_type, batch_flag, phase, out_flag, keep_prob, name, + encoder_apply_one_shift(x_shift, weights, biases, act_type, batch_flag, phase, keep_prob, name, num_encoder_weights)) return y -def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, phase, out_flag, keep_prob, name='E', +def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, phase, keep_prob, name='E', num_encoder_weights=1): - """Apply an encoder to data for only one time step (shift).""" + """Apply an encoder to data for only one time step (shift). + + Arguments: + prev_layer -- input for a particular time step (shift) + weights -- dictionary of weights + biases -- dictionary of biases + act_type -- string for activation type for nonlinear layers (i.e. sigmoid, relu, or elu) + batch_flag -- 0 if no batch_normalization, 1 if batch_normalization + phase -- boolean placeholder for dropout: training phase or not training phase + keep_prob -- probability that weight is kept during dropout + name -- string for prefix on weight matrices, default 'E' (for "encoder") + num_encoder_weights -- number of weight matrices (layers) in encoder network, default 1 + """ for i in np.arange(num_encoder_weights - 1): h1 = tf.matmul(prev_layer, weights['W%s%d' % (name, i + 1)]) + biases['b%s%d' % (name, i + 1)] if batch_flag: @@ -99,14 +147,23 @@ def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, p final = tf.matmul(prev_layer, weights['W%s%d' % (name, num_encoder_weights)]) + biases[ 'b%s%d' % (name, num_encoder_weights)] - if (not out_flag) and batch_flag: + if batch_flag: final = tf.contrib.layers.batch_norm(final, is_training=phase) return final def decoder(widths, dist_weights, dist_biases, scale, name='D', first_guess=0): - """Create a decoder network: a dictionary of weights and a dictionary of biases.""" + """Create a decoder network: a dictionary of weights and a dictionary of biases. + + Arguments: + widths -- array or list of widths for layers of network + dist_weights -- array or list of strings for distributions of weight matrices + dist_biases -- array or list of strings for distributions of bias vectors + scale -- (for tn distribution of weight matrices): standard deviation of normal distribution before truncation + name -- string for prefix on weight matrices, default 'D' (for "decoder") + first_guess -- (for tn dist. of weight matrices): array of first guess for weight matrix, added to tn dist. + """ weights = dict() biases = dict() for i in np.arange(len(widths) - 1): @@ -120,7 +177,18 @@ def decoder(widths, dist_weights, dist_biases, scale, name='D', first_guess=0): def decoder_apply(prev_layer, weights, biases, act_type, batch_flag, phase, keep_prob, num_decoder_weights): - """Apply a decoder to data prev_layer""" + """Apply a decoder to data prev_layer + + Arguments: + prev_layer -- input to decoder network + weights -- dictionary of weights + biases -- dictionary of biases + act_type -- string for activation type for nonlinear layers (i.e. sigmoid, relu, or elu) + batch_flag -- 0 if no batch_normalization, 1 if batch_normalization + phase -- boolean placeholder for dropout: training phase or not training phase + keep_prob -- probability that weight is kept during dropout + num_decoder_weights -- number of weight matrices (layers) in decoder network + """ for i in np.arange(num_decoder_weights - 1): h1 = tf.matmul(prev_layer, weights['WD%d' % (i + 1)]) + biases['bD%d' % (i + 1)] if batch_flag: @@ -138,7 +206,16 @@ def decoder_apply(prev_layer, weights, biases, act_type, batch_flag, phase, keep def form_complex_conjugate_block(omegas, delta_t): - """Form a 2x2 block for a complex conj. pair of eigenvalues, but for each example: dimension [None, 2, 2]""" + """Form a 2x2 block for a complex conj. pair of eigenvalues, but for each example, so dimension [None, 2, 2] + + 2x2 Block is + exp(mu * delta_t) * [cos(omega * delta_t), -sin(omega * delta_t) + sin(omega * delta_t), cos(omega * delta_t)] + + Arguments: + omegas -- array of parameters for blocks. first column is freq. (omega) and 2nd is scaling (mu), size [None, 2] + delta_t -- time step in trajectories from input data + """ scale = tf.exp(omegas[:, 1] * delta_t) entry11 = tf.multiply(scale, tf.cos(omegas[:, 0] * delta_t)) entry12 = tf.multiply(scale, tf.sin(omegas[:, 0] * delta_t)) @@ -148,7 +225,15 @@ def form_complex_conjugate_block(omegas, delta_t): def varying_multiply(y, omegas, delta_t, num_real, num_complex_pairs): - """Multiply y-coordinates on the left by matrix L, but let matrix vary.""" + """Multiply y-coordinates on the left by matrix L, but let matrix vary. + + Arguments: + y -- array of shape [None, k] of y-coordinates, where L will be k x k + omegas -- list of arrays of parameters for the L matrices + delta_t -- time step in trajectories from input data + num_real -- number of real eigenvalues + num_complex_pairs -- number of pairs of complex conjugate eigenvalues + """ k = y.shape[1] complex_list = [] @@ -183,7 +268,14 @@ def varying_multiply(y, omegas, delta_t, num_real, num_complex_pairs): def create_omega_net(phase, keep_prob, params, ycoords): - """Create the auxiliary (omega) network(s), which have ycoords as input and output omegas (parameters for L).""" + """Create the auxiliary (omega) network(s), which have ycoords as input and output omegas (parameters for L). + + Arguments: + phase -- boolean placeholder for dropout: training phase or not training phase + keep_prob -- probability that weight is kept during dropout + params -- dictionary of parameters for experiment + ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k + """ weights = dict() biases = dict() @@ -203,7 +295,15 @@ def create_omega_net(phase, keep_prob, params, ycoords): def create_one_omega_net(params, temp_name, weights, biases, widths): - """Create one auxiliary (omega) network for one real eigenvalue or a pair of complex conj. eigenvalues.""" + """Create one auxiliary (omega) network for one real eigenvalue or a pair of complex conj. eigenvalues. + + Arguments: + params -- dictionary of parameters for experiment + temp_name -- string for prefix on weight matrices, i.e. OC1 or OR1 + weights -- dictionary of weights + biases -- dictionary of biases + widths -- array or list of widths for layers of network + """ weightsO, biasesO = decoder(widths, dist_weights=params['dist_weights_omega'], dist_biases=params['dist_biases_omega'], scale=params['scale_omega'], name=temp_name, first_guess=params['first_guess_omega']) @@ -212,8 +312,16 @@ def create_one_omega_net(params, temp_name, weights, biases, widths): def omega_net_apply(phase, keep_prob, params, ycoords, weights, biases): - """Apply the omega (auxiliary) network(s) to the y-coordinates.""" - """""" + """Apply the omega (auxiliary) network(s) to the y-coordinates. + + Arguments: + phase -- boolean placeholder for dropout: training phase or not training phase + keep_prob -- probability that weight is kept during dropout + params -- dictionary of parameters for experiment + ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k + weights -- dictionary of weights + biases -- dictionary of biases + """ omegas = [] for j in np.arange(params['num_complex_pairs']): temp_name = 'OC%d_' % (j + 1) @@ -229,7 +337,17 @@ def omega_net_apply(phase, keep_prob, params, ycoords, weights, biases): def omega_net_apply_one(phase, keep_prob, params, ycoords, weights, biases, name): - """Apply one auxiliary (omega) network for one real eigenvalue or a pair of complex conj. eigenvalues.""" + """Apply one auxiliary (omega) network for one real eigenvalue or a pair of complex conj. eigenvalues. + + Arguments: + phase -- boolean placeholder for dropout: training phase or not training phase + keep_prob -- probability that weight is kept during dropout + params -- dictionary of parameters for experiment + ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k + weights -- dictionary of weights + biases -- dictionary of biases + name -- string for prefix on weight matrices, i.e. OC1 or OR1 + """ if len(ycoords.shape) == 1: ycoords = ycoords[:, np.newaxis] @@ -241,13 +359,19 @@ def omega_net_apply_one(phase, keep_prob, params, ycoords, weights, biases, name input = ycoords omegas = encoder_apply_one_shift(input, weights, biases, params['act_type'], params['batch_flag'], phase, - out_flag=0, keep_prob=keep_prob, name=name, + keep_prob=keep_prob, name=name, num_encoder_weights=params['num_omega_weights']) return omegas def create_koopman_net(phase, keep_prob, params): - """Create a Koopman network that encodes, advances in time, and decodes.""" + """Create a Koopman network that encodes, advances in time, and decodes. + + Arguments: + phase -- boolean placeholder for dropout: training phase or not training phase + keep_prob -- probability that weight is kept during dropout + params -- dictionary of parameters for experiment + """ depth = int((params['d'] - 4) / 2) max_shifts_to_stack = helperfns.num_shifts_in_stack(params) @@ -257,7 +381,7 @@ def create_koopman_net(phase, keep_prob, params): dist_biases=params['dist_biases'][0:depth + 1], scale=params['scale'], num_shifts_max=max_shifts_to_stack, first_guess=params['first_guess']) params['num_encoder_weights'] = len(weights) - g_list = encoder_apply(x, weights, biases, params['act_type'], params['batch_flag'], phase, out_flag=0, + g_list = encoder_apply(x, weights, biases, params['act_type'], params['batch_flag'], phase, shifts_middle=params['shifts_middle'], keep_prob=keep_prob, num_encoder_weights=params['num_encoder_weights']) diff --git a/training.py b/training.py index 054a89d..29748f5 100644 --- a/training.py +++ b/training.py @@ -9,7 +9,18 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob): - """Define the (unregularized) loss functions for the training.""" + """Define the (unregularized) loss functions for the training. + + Arguments: + x -- placeholder for input + y -- list of outputs of network for each shift (each prediction step) + g_list -- list of output of encoder for each shift (encoding each step in x) + weights -- dictionary of weights for all networks + biases -- dictionary of biases for all networks + params -- dictionary of parameters for experiment + phase -- boolean placeholder for dropout: training phase or not training phase + keep_prob -- probability that weight is kept during dropout + """ # Minimize the mean squared errors. # subtraction and squaring element-wise, then average over both dimensions # n columns @@ -86,7 +97,14 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob): def define_regularization(params, trainable_var, loss, loss1): - """Define the regularization and add to loss.""" + """Define the regularization and add to loss. + + Arguments: + params -- dictionary of parameters for experiment + trainable_var -- list of trainable TensorFlow variables + loss -- the unregularized loss + loss1 -- the autoenocder component of the loss + """ if params['L1_lam']: l1_regularizer = tf.contrib.layers.l1_regularizer(scale=params['L1_lam'], scope=None) # TODO: don't include biases? use weights dict instead? @@ -105,7 +123,12 @@ def define_regularization(params, trainable_var, loss, loss1): def try_net(data_val, params): - """Run a random experiment for particular params and data.""" + """Run a random experiment for particular params and data. + + Arguments: + data_val -- array containing validation dataset + params -- dictionary of parameters for experiment + """ # SET UP NETWORK phase = tf.placeholder(tf.bool, name='phase') keep_prob = tf.placeholder(tf.float64, shape=[], name='keep_prob') @@ -239,7 +262,11 @@ def try_net(data_val, params): def main_exp(params): - """Set up and run one random experiment.""" + """Set up and run one random experiment. + + Arguments: + params -- dictionary of parameters for experiment + """ helperfns.set_defaults(params) if not os.path.exists(params['folder_name']):