diff --git a/helperfns.py b/helperfns.py
index e6a7f59..1992197 100644
--- a/helperfns.py
+++ b/helperfns.py
@@ -10,9 +10,12 @@ def stack_data(data, num_shifts, len_time):
     """Stack data from a 2D array into a 3D array.
 
     Arguments:
-        data -- data matrix to be reshaped
+        data -- 2D data array to be reshaped
         num_shifts -- number of shifts (time steps) that losses will use (maximum is len_time - 1)
         len_time -- number of time steps in each trajectory in data
+
+    Returns:
+        data_tensor -- data reshaped into 3D array, shape: num_shifts + 1, num_traj * (len_time - num_shifts), n
     """
     nd = data.ndim
     if nd > 1:
@@ -41,6 +44,9 @@ def choose_optimizer(params, regularized_loss, trainable_var):
         params -- dictionary of parameters for experiment
         regularized_loss -- loss, including regularization
         trainable_var -- list of trainable TensorFlow variables
+
+    Returns:
+        optimizer -- optimizer from TensorFlow Class optimizer
     """
     if params['opt_alg'] == 'adam':
         optimizer = tf.train.AdamOptimizer(params['learning_rate']).minimize(regularized_loss, var_list=trainable_var)
@@ -97,6 +103,10 @@ def check_progress(start, best_error, params):
         start -- time that experiment started
         best_error -- best error so far in training
         params -- dictionary of parameters for experiment
+
+    Returns:
+        finished -- 0 if should continue training, 1 if should stop training
+        save_now -- 0 if don't need to save results, 1 if should save results
     """
     finished = 0
     save_now = 0
@@ -215,6 +225,9 @@ def save_files(sess, csv_path, train_val_error, params, weights, biases):
         params -- dictionary of parameters for experiment
         weights -- dictionary of weights for all networks
         biases -- dictionary of biases for all networks
+
+    Returns:
+        None (but side effect of saving files and updating params dict.)
     """
     np.savetxt(csv_path, train_val_error, delimiter=',')
 
@@ -236,6 +249,9 @@ def save_params(params):
 
     Arguments:
         params -- dictionary of parameters for experiment
+
+    Returns:
+        None (but side effect of saving params dict to pkl file)
     """
     with open(params['model_path'].replace('ckpt', 'pkl'), 'wb') as f:
         pickle.dump(params, f, pickle.HIGHEST_PROTOCOL)
@@ -246,6 +262,9 @@ def set_defaults(params):
 
     Arguments:
         params -- dictionary of parameters for experiment
+
+    Returns:
+        None (but side effect of updating params dict)
     """
     # defaults related to dataset
     if 'data_name' not in params:
@@ -438,6 +457,9 @@ def num_shifts_in_stack(params):
 
     Arguments:
         params -- dictionary of parameters for experiment
+
+    Returns:
+        max_shifts_to_stack -- max number of shifts to use in loss functions
     """
     max_shifts_to_stack = 1
     if params['num_shifts']:
diff --git a/networkarch.py b/networkarch.py
index 3addd4d..eb57b26 100644
--- a/networkarch.py
+++ b/networkarch.py
@@ -13,6 +13,9 @@ def weight_variable(shape, var_name, distribution='tn', scale=0.1, first_guess=0
         distribution -- string for which distribution to use for random initialization
         scale -- (for tn distribution): standard deviation of normal distribution before truncation
         first_guess -- (for tn distribution): array of first guess for weight matrix, added to tn dist.
+
+    Returns:
+        a TensorFlow variable for a weight matrix
     """
     if distribution == 'tn':
         initial = tf.truncated_normal(shape, stddev=scale, dtype=tf.float64) + first_guess
@@ -49,6 +52,9 @@ def bias_variable(shape, var_name, distribution=''):
         shape -- array giving shape of output bias variable
         var_name -- string naming bias variable
         distribution -- string for which distribution to use for random initialization (file name)
+
+    Returns:
+        a TensorFlow variable for a bias vector
     """
     if distribution:
         initial = np.genfromtxt(distribution, delimiter=',', dtype=np.float64)
@@ -67,6 +73,11 @@ def encoder(widths, dist_weights, dist_biases, scale, num_shifts_max, first_gues
         scale -- (for tn distribution of weight matrices): standard deviation of normal distribution before truncation
         num_shifts_max -- number of shifts (time steps) that losses will use (max of num_shifts and num_shifts_middle)
         first_guess -- (for tn dist. of weight matrices): array of first guess for weight matrix, added to tn dist.
+
+    Returns:
+        x -- placeholder for input
+        weights -- dictionary of weights
+        biases -- dictionary of biases
     """
     x = tf.placeholder(tf.float64, [num_shifts_max + 1, None, widths[0]])
 
@@ -98,6 +109,9 @@ def encoder_apply(x, weights, biases, act_type, batch_flag, phase, shifts_middle
         keep_prob -- probability that weight is kept during dropout
         name -- string for prefix on weight matrices, default 'E' for encoder
         num_encoder_weights -- number of weight matrices (layers) in encoder network, default 1
+
+    Returns:
+        y -- list, output of encoder network applied to each time shift in input x
     """
     y = []
     num_shifts_middle = len(shifts_middle)
@@ -130,6 +144,9 @@ def encoder_apply_one_shift(prev_layer, weights, biases, act_type, batch_flag, p
         keep_prob -- probability that weight is kept during dropout
         name -- string for prefix on weight matrices, default 'E' (for "encoder")
         num_encoder_weights -- number of weight matrices (layers) in encoder network, default 1
+
+    Returns:
+        final -- output of encoder network applied to input prev_layer (a particular time step / shift)
     """
     for i in np.arange(num_encoder_weights - 1):
         h1 = tf.matmul(prev_layer, weights['W%s%d' % (name, i + 1)]) + biases['b%s%d' % (name, i + 1)]
@@ -163,6 +180,10 @@ def decoder(widths, dist_weights, dist_biases, scale, name='D', first_guess=0):
         scale -- (for tn distribution of weight matrices): standard deviation of normal distribution before truncation
         name -- string for prefix on weight matrices, default 'D' (for "decoder")
         first_guess -- (for tn dist. of weight matrices): array of first guess for weight matrix, added to tn dist.
+
+    Returns:
+        weights -- dictionary of weights
+        biases -- dictionary of biases
     """
     weights = dict()
     biases = dict()
@@ -188,6 +209,9 @@ def decoder_apply(prev_layer, weights, biases, act_type, batch_flag, phase, keep
         phase -- boolean placeholder for dropout: training phase or not training phase
         keep_prob -- probability that weight is kept during dropout
         num_decoder_weights -- number of weight matrices (layers) in decoder network
+
+    Returns:
+        output of decoder network applied to input prev_layer
     """
     for i in np.arange(num_decoder_weights - 1):
         h1 = tf.matmul(prev_layer, weights['WD%d' % (i + 1)]) + biases['bD%d' % (i + 1)]
@@ -215,6 +239,9 @@ def form_complex_conjugate_block(omegas, delta_t):
     Arguments:
         omegas -- array of parameters for blocks. first column is freq. (omega) and 2nd is scaling (mu), size [None, 2]
         delta_t -- time step in trajectories from input data
+
+    Returns:
+        stack of 2x2 blocks, size [None, 2, 2], where first dimension matches first dimension of omegas
     """
     scale = tf.exp(omegas[:, 1] * delta_t)
     entry11 = tf.multiply(scale, tf.cos(omegas[:, 0] * delta_t))
@@ -233,6 +260,9 @@ def varying_multiply(y, omegas, delta_t, num_real, num_complex_pairs):
         delta_t -- time step in trajectories from input data
         num_real -- number of real eigenvalues
         num_complex_pairs -- number of pairs of complex conjugate eigenvalues
+
+    Returns:
+        array same size as input y, but advanced to next time step
     """
     k = y.shape[1]
     complex_list = []
@@ -275,6 +305,11 @@ def create_omega_net(phase, keep_prob, params, ycoords):
         keep_prob -- probability that weight is kept during dropout
         params -- dictionary of parameters for experiment
         ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k
+
+    Returns:
+        omegas -- list, output of omega (auxiliary) network(s) applied to input ycoords
+        weights -- dictionary of weights
+        biases -- dictionary of biases
     """
     weights = dict()
     biases = dict()
@@ -303,6 +338,9 @@ def create_one_omega_net(params, temp_name, weights, biases, widths):
         weights -- dictionary of weights
         biases -- dictionary of biases
         widths -- array or list of widths for layers of network
+
+    Returns:
+        None (but side effect of updating weights and biases dictionaries)
     """
     weightsO, biasesO = decoder(widths, dist_weights=params['dist_weights_omega'],
                                 dist_biases=params['dist_biases_omega'], scale=params['scale_omega'], name=temp_name,
@@ -321,6 +359,9 @@ def omega_net_apply(phase, keep_prob, params, ycoords, weights, biases):
         ycoords -- array of shape [None, k] of y-coordinates, where L will be k x k
         weights -- dictionary of weights
         biases -- dictionary of biases
+
+    Returns:
+        omegas -- list, output of omega (auxiliary) network(s) applied to input ycoords
     """
     omegas = []
     for j in np.arange(params['num_complex_pairs']):
@@ -347,6 +388,9 @@ def omega_net_apply_one(phase, keep_prob, params, ycoords, weights, biases, name
         weights -- dictionary of weights
         biases -- dictionary of biases
         name -- string for prefix on weight matrices, i.e. OC1 or OR1
+
+    Returns:
+        omegas - output of one auxiliary (omega) network to input ycoords
     """
     if len(ycoords.shape) == 1:
         ycoords = ycoords[:, np.newaxis]
@@ -371,6 +415,13 @@ def create_koopman_net(phase, keep_prob, params):
         phase -- boolean placeholder for dropout: training phase or not training phase
         keep_prob -- probability that weight is kept during dropout
         params -- dictionary of parameters for experiment
+
+    Returns:
+        x -- placeholder for input
+        y -- list, output of decoder applied to each shift: g_list[0], K*g_list[0], K^2*g_list[0], ..., length num_shifts + 1
+        g_list -- list, output of encoder applied to each shift in input x, length num_shifts_middle + 1
+        weights -- dictionary of weights
+        biases -- dictionary of biases
     """
     depth = int((params['d'] - 4) / 2)
 
diff --git a/training.py b/training.py
index 29748f5..2d86fa5 100644
--- a/training.py
+++ b/training.py
@@ -20,6 +20,13 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob):
         params -- dictionary of parameters for experiment
         phase -- boolean placeholder for dropout: training phase or not training phase
         keep_prob -- probability that weight is kept during dropout
+
+    Returns:
+        loss1 -- autoencoder loss function
+        loss2 -- dynamics/prediction loss function
+        loss3 -- linearity loss function
+        loss_Linf -- inf norm on autoencoder loss and one-step prediction loss
+        loss -- sum of above four losses
     """
     # Minimize the mean squared errors.
     # subtraction and squaring element-wise, then average over both dimensions
@@ -36,7 +43,7 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob):
     mean_squared_error = tf.reduce_mean(tf.reduce_mean(tf.square(y[0] - tf.squeeze(x[0, :, :])), 1))
     loss1 = params['recon_lam'] * tf.truediv(mean_squared_error, loss1_denominator)
 
-    # gets dynamics
+    # gets dynamics/prediction
     loss2 = tf.zeros([1, ], dtype=tf.float64)
     if params['num_shifts'] > 0:
         for j in np.arange(params['num_shifts']):
@@ -77,7 +84,7 @@ def define_loss(x, y, g_list, weights, biases, params, phase, keep_prob):
 
         loss3 = loss3 / params['num_shifts_middle']
 
-    # inf norm on autoencoder error
+    # inf norm on autoencoder error and one prediction step
     if params['relative_loss']:
         Linf1_den = tf.norm(tf.norm(tf.squeeze(x[0, :, :]), axis=1, ord=np.inf), ord=np.inf) + denominator_nonzero
         Linf2_den = tf.norm(tf.norm(tf.squeeze(x[1, :, :]), axis=1, ord=np.inf), ord=np.inf) + denominator_nonzero
@@ -104,6 +111,12 @@ def define_regularization(params, trainable_var, loss, loss1):
         trainable_var -- list of trainable TensorFlow variables
         loss -- the unregularized loss
         loss1 -- the autoenocder component of the loss
+
+    Returns:
+        loss_L1 -- L1 regularization on weights W and b
+        loss_L2 -- L2 regularization on weights W
+        regularized_loss -- loss + regularization
+        regularized_loss1 -- loss1 (autoencoder loss) + regularization
     """
     if params['L1_lam']:
         l1_regularizer = tf.contrib.layers.l1_regularizer(scale=params['L1_lam'], scope=None)
@@ -128,6 +141,9 @@ def try_net(data_val, params):
     Arguments:
         data_val -- array containing validation dataset
         params -- dictionary of parameters for experiment
+
+    Returns:
+        None
     """
     # SET UP NETWORK
     phase = tf.placeholder(tf.bool, name='phase')
@@ -266,6 +282,9 @@ def main_exp(params):
 
     Arguments:
         params -- dictionary of parameters for experiment
+
+    Returns:
+        None
     """
     helperfns.set_defaults(params)