diff --git a/helperfns.py b/helperfns.py index e6a7f59..1992197 100644 --- a/helperfns.py +++ b/helperfns.py @@ -10,9 +10,12 @@ def stack_data(data, num_shifts, len_time): """Stack data from a 2D array into a 3D array. Arguments: - data -- data matrix to be reshaped + data -- 2D data array to be reshaped num_shifts -- number of shifts (time steps) that losses will use (maximum is len_time - 1) len_time -- number of time steps in each trajectory in data + + Returns: + data_tensor -- data reshaped into 3D array, shape: num_shifts + 1, num_traj * (len_time - num_shifts), n """ nd = data.ndim if nd > 1: @@ -41,6 +44,9 @@ def choose_optimizer(params, regularized_loss, trainable_var): params -- dictionary of parameters for experiment regularized_loss -- loss, including regularization trainable_var -- list of trainable TensorFlow variables + + Returns: + optimizer -- optimizer from TensorFlow Class optimizer """ if params['opt_alg'] == 'adam': optimizer = tf.train.AdamOptimizer(params['learning_rate']).minimize(regularized_loss, var_list=trainable_var) @@ -97,6 +103,10 @@ def check_progress(start, best_error, params): start -- time that experiment started best_error -- best error so far in training params -- dictionary of parameters for experiment + + Returns: + finished -- 0 if should continue training, 1 if should stop training + save_now -- 0 if don't need to save results, 1 if should save results """ finished = 0 save_now = 0 @@ -215,6 +225,9 @@ def save_files(sess, csv_path, train_val_error, params, weights, biases): params -- dictionary of parameters for experiment weights -- dictionary of weights for all networks biases -- dictionary of biases for all networks + + Returns: + None (but side effect of saving files and updating params dict.) """ np.savetxt(csv_path, train_val_error, delimiter=',') @@ -236,6 +249,9 @@ def save_params(params): Arguments: params -- dictionary of parameters for experiment + + Returns: + None (but side effect of saving params dict to pkl file) """ with open(params['model_path'].replace('ckpt', 'pkl'), 'wb') as f: pickle.dump(params, f, pickle.HIGHEST_PROTOCOL) @@ -246,6 +262,9 @@ def set_defaults(params): Arguments: params -- dictionary of parameters for experiment + + Returns: + None (but side effect of updating params dict) """ # defaults related to dataset if 'data_name' not in params: @@ -438,6 +457,9 @@ def num_shifts_in_stack(params): Arguments: params -- dictionary of parameters for experiment + + Returns: + max_shifts_to_stack -- max number of shifts to use in loss functions """ max_shifts_to_stack = 1 if params['num_shifts']: diff --git a/networkarch.py b/networkarch.py index 3addd4d..eb57b26 100644 --- a/networkarch.py +++ b/networkarch.py @@ -13,6 +13,9 @@ def weight_variable(shape, var_name, distribution='tn', scale=0.1, first_guess=0 distribution -- string for which distribution to use for random initialization scale -- (for tn distribution): standard deviation of normal distribution before truncation first_guess -- (for tn distribution): array of first guess for weight matrix, added to tn dist. + + Returns: + a TensorFlow variable for a weight matrix """ if distribution == 'tn': initial = tf.truncated_normal(shape, stddev=scale, dtype=tf.float64) + first_guess @@ -49,6 +52,9 @@ def bias_variable(shape, var_name, distribution=''): shape -- array giving shape of output bias variable var_name -- string naming bias variable distribution -- string for which distribution to use for random initialization (file name) + + Returns: + a TensorFlow variable for a bias vector """ if distribution: initial = np.genfromtxt(distribution, delimiter=',', dtype=np.float64) @@ -67,6 +73,11 @@ def encoder(widths, dist_weights, dist_biases, scale, num_shifts_max, first_gues scale -- (for tn distribution of weight matrices): standard deviation of normal distribution before truncation num_shifts_max -- number of shifts (time steps) that losses will use (max of num_shifts and num_shifts_middle) first_guess -- (for tn dist. of weight matrices): array of first guess for weight matrix, added to tn dist. + + Returns: + x -- placeholder for input + weights -- dictionary of weights + biases -- dictionary of biases """ x = tf.placeholder(tf.float64, [num_shifts_max + 1, None, widths[0]]) @@ -98,6 +109,9 @@ def encoder_apply(x, weights, biases, act_type, batch_flag, phase, shifts_middle keep_prob -- probability that weight is kept during dropout name -- string for prefix on weight matrices, default 'E' for encoder num_encoder_weights -- number of weight matrices (layers) in encoder network, default 1 + + Returns: + y -- list, output of encoder network applied to each time shift in input x """ y = [] num_shifts_middle = len(shifts_middle) @@ -130,6 +144,9 @@ def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, p keep_prob -- probability that weight is kept during dropout name -- string for prefix on weight matrices, default 'E' (for "encoder") num_encoder_weights -- number of weight matrices (layers) in encoder network, default 1 + + Returns: + final -- output of encoder network applied to input prev_layer (a particular time step / shift) """ for i in np.arange(num_encoder_weights - 1): h1 = tf.matmul(prev_layer, weights['W%s%d' % (name, i + 1)]) + biases['b%s%d' % (name, i + 1)] @@ -163,6 +180,10 @@ def decoder(widths, dist_weights, dist_biases, scale, name='D', first_guess=0): scale -- (for tn distribution of weight matrices): standard deviation of normal distribution before truncation name -- string for prefix on weight matrices, default 'D' (for "decoder") first_guess -- (for tn dist. of weight matrices): array of first guess for weight matrix, added to tn dist. + + Returns: + weights -- dictionary of weights + biases -- dictionary of biases """ weights = dict() biases = dict() @@ -188,6 +209,9 @@ def decoder_apply(prev_layer, weights, biases, act_type, batch_flag, phase, keep phase -- boolean placeholder for dropout: training phase or not training phase keep_prob -- probability that weight is kept during dropout num_decoder_weights -- number of weight matrices (layers) in decoder network + + Returns: + output of decoder network applied to input prev_layer """ for i in np.arange(num_decoder_weights - 1): h1 = tf.matmul(prev_layer, weights['WD%d' % (i + 1)]) + biases['bD%d' % (i + 1)] @@ -215,6 +239,9 @@ def form_complex_conjugate_block(omegas, delta_t): Arguments: omegas -- array of parameters for blocks. first column is freq. (omega) and 2nd is scaling (mu), size [None, 2] delta_t -- time step in trajectories from input data + + Returns: + stack of 2x2 blocks, size [None, 2, 2], where first dimension matches first dimension of omegas """ scale = tf.exp(omegas[:, 1] * delta_t) entry11 = tf.multiply(scale, tf.cos(omegas[:, 0] * delta_t)) @@ -233,6 +260,9 @@ def varying_multiply(y, omegas, delta_t, num_real, num_complex_pairs): delta_t -- time step in trajectories from input data num_real -- number of real eigenvalues num_complex_pairs -- number of pairs of complex conjugate eigenvalues + + Returns: + array same size as input y, but advanced to next time step """ k = y.shape[1] complex_list = [] @@ -275,6 +305,11 @@ def create_omega_net(phase, keep_prob, params, ycoords): keep_prob -- probability that weight is kept during dropout params -- dictionary of parameters for experiment ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k + + Returns: + omegas -- list, output of omega (auxiliary) network(s) applied to input ycoords + weights -- dictionary of weights + biases -- dictionary of biases """ weights = dict() biases = dict() @@ -303,6 +338,9 @@ def create_one_omega_net(params, temp_name, weights, biases, widths): weights -- dictionary of weights biases -- dictionary of biases widths -- array or list of widths for layers of network + + Returns: + None (but side effect of updating weights and biases dictionaries) """ weightsO, biasesO = decoder(widths, dist_weights=params['dist_weights_omega'], dist_biases=params['dist_biases_omega'], scale=params['scale_omega'], name=temp_name, @@ -321,6 +359,9 @@ def omega_net_apply(phase, keep_prob, params, ycoords, weights, biases): ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k weights -- dictionary of weights biases -- dictionary of biases + + Returns: + omegas -- list, output of omega (auxiliary) network(s) applied to input ycoords """ omegas = [] for j in np.arange(params['num_complex_pairs']): @@ -347,6 +388,9 @@ def omega_net_apply_one(phase, keep_prob, params, ycoords, weights, biases, name weights -- dictionary of weights biases -- dictionary of biases name -- string for prefix on weight matrices, i.e. OC1 or OR1 + + Returns: + omegas - output of one auxiliary (omega) network to input ycoords """ if len(ycoords.shape) == 1: ycoords = ycoords[:, np.newaxis] @@ -371,6 +415,13 @@ def create_koopman_net(phase, keep_prob, params): phase -- boolean placeholder for dropout: training phase or not training phase keep_prob -- probability that weight is kept during dropout params -- dictionary of parameters for experiment + + Returns: + x -- placeholder for input + y -- list, output of decoder applied to each shift: g_list[0], K*g_list[0], K^2*g_list[0], ..., length num_shifts + 1 + g_list -- list, output of encoder applied to each shift in input x, length num_shifts_middle + 1 + weights -- dictionary of weights + biases -- dictionary of biases """ depth = int((params['d'] - 4) / 2) diff --git a/training.py b/training.py index 29748f5..2d86fa5 100644 --- a/training.py +++ b/training.py @@ -20,6 +20,13 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob): params -- dictionary of parameters for experiment phase -- boolean placeholder for dropout: training phase or not training phase keep_prob -- probability that weight is kept during dropout + + Returns: + loss1 -- autoencoder loss function + loss2 -- dynamics/prediction loss function + loss3 -- linearity loss function + loss_Linf -- inf norm on autoencoder loss and one-step prediction loss + loss -- sum of above four losses """ # Minimize the mean squared errors. # subtraction and squaring element-wise, then average over both dimensions @@ -36,7 +43,7 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob): mean_squared_error = tf.reduce_mean(tf.reduce_mean(tf.square(y[0] - tf.squeeze(x[0, :, :])), 1)) loss1 = params['recon_lam'] * tf.truediv(mean_squared_error, loss1_denominator) - # gets dynamics + # gets dynamics/prediction loss2 = tf.zeros([1, ], dtype=tf.float64) if params['num_shifts'] > 0: for j in np.arange(params['num_shifts']): @@ -77,7 +84,7 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob): loss3 = loss3 / params['num_shifts_middle'] - # inf norm on autoencoder error + # inf norm on autoencoder error and one prediction step if params['relative_loss']: Linf1_den = tf.norm(tf.norm(tf.squeeze(x[0, :, :]), axis=1, ord=np.inf), ord=np.inf) + denominator_nonzero Linf2_den = tf.norm(tf.norm(tf.squeeze(x[1, :, :]), axis=1, ord=np.inf), ord=np.inf) + denominator_nonzero @@ -104,6 +111,12 @@ def define_regularization(params, trainable_var, loss, loss1): trainable_var -- list of trainable TensorFlow variables loss -- the unregularized loss loss1 -- the autoenocder component of the loss + + Returns: + loss_L1 -- L1 regularization on weights W and b + loss_L2 -- L2 regularization on weights W + regularized_loss -- loss + regularization + regularized_loss1 -- loss1 (autoencoder loss) + regularization """ if params['L1_lam']: l1_regularizer = tf.contrib.layers.l1_regularizer(scale=params['L1_lam'], scope=None) @@ -128,6 +141,9 @@ def try_net(data_val, params): Arguments: data_val -- array containing validation dataset params -- dictionary of parameters for experiment + + Returns: + None """ # SET UP NETWORK phase = tf.placeholder(tf.bool, name='phase') @@ -266,6 +282,9 @@ def main_exp(params): Arguments: params -- dictionary of parameters for experiment + + Returns: + None """ helperfns.set_defaults(params)