diff --git a/lxmls/deep_learning/numpy_models/log_linear.py b/lxmls/deep_learning/numpy_models/log_linear.py index 7714d585..3422d78c 100644 --- a/lxmls/deep_learning/numpy_models/log_linear.py +++ b/lxmls/deep_learning/numpy_models/log_linear.py @@ -18,36 +18,36 @@ def __init__(self, **config): self.bias = np.zeros((1, config['num_classes'])) self.learning_rate = config['learning_rate'] - def log_forward(self, input=None): + def log_forward(self, X): """Forward pass of the computation graph""" # Linear transformation - z = np.dot(input, self.weight.T) + self.bias + z = np.dot(X, self.weight.T) + self.bias # Softmax implemented in log domain log_tilde_z = z - logsumexp(z, axis=1, keepdims=True) return log_tilde_z - def predict(self, input=None): + def predict(self, X): """Most probable class index""" - return np.argmax(np.exp(self.log_forward(input)), axis=1) + return np.argmax(self.log_forward(X), axis=1) - def update(self, input=None, output=None): + def update(self, X, y): """Stochastic Gradient Descent update""" # Probabilities of each class - class_probabilities = np.exp(self.log_forward(input)) + class_probabilities = np.exp(self.log_forward(X)) batch_size, num_classes = class_probabilities.shape # Error derivative at softmax layer - I = index2onehot(output, num_classes) + I = index2onehot(y, num_classes) error = - (I - class_probabilities) / batch_size # Weight gradient gradient_weight = np.zeros(self.weight.shape) for l in np.arange(batch_size): - gradient_weight += np.outer(error[l, :], input[l, :]) + gradient_weight += np.outer(error[l, :], X[l, :]) # Bias gradient gradient_bias = np.sum(error, axis=0, keepdims=True) diff --git a/lxmls/deep_learning/numpy_models/mlp.py b/lxmls/deep_learning/numpy_models/mlp.py index ea19f452..f2de6cd4 100755 --- a/lxmls/deep_learning/numpy_models/mlp.py +++ b/lxmls/deep_learning/numpy_models/mlp.py @@ -15,19 +15,19 @@ def __init__(self, **config): # self.parameters MLP.__init__(self, **config) - def predict(self, input=None): + def predict(self, X): """ Predict model outputs given input """ - log_class_probabilities, _ = self.log_forward(input) - return np.argmax(np.exp(log_class_probabilities), axis=1) + log_class_probabilities, _ = self.log_forward(X) + return np.argmax(log_class_probabilities, axis=1) - def update(self, input=None, output=None): + def update(self, X, y): """ Update model parameters given batch of data """ - gradients = self.backpropagation(input, output) + gradients = self.backpropagation(X, y) learning_rate = self.config['learning_rate'] num_parameters = len(self.parameters) @@ -39,11 +39,11 @@ def update(self, input=None, output=None): # Update bias self.parameters[m][1] -= learning_rate * gradients[m][1] - def log_forward(self, input): + def log_forward(self, X): """Forward pass for sigmoid hidden layers and output softmax""" # Input - tilde_z = input + tilde_z = X layer_inputs = [] # Hidden layers @@ -72,17 +72,17 @@ def log_forward(self, input): return log_tilde_z, layer_inputs - def cross_entropy_loss(self, input, output): + def cross_entropy_loss(self, X, y): """Cross entropy loss""" - num_examples = input.shape[0] - log_probability, _ = self.log_forward(input) - return -log_probability[range(num_examples), output].mean() + num_examples = X.shape[0] + log_probability, _ = self.log_forward(X) + return -log_probability[range(num_examples), y].mean() - def backpropagation(self, input, output): + def backpropagation(self, X, y): """Gradients for sigmoid hidden layers and output softmax""" # Run forward and store activations for each layer - log_prob_y, layer_inputs = self.log_forward(input) + log_prob_y, layer_inputs = self.log_forward(X) prob_y = np.exp(log_prob_y) num_examples, num_clases = prob_y.shape @@ -97,7 +97,7 @@ def backpropagation(self, input, output): # Initial error is the cost derivative at the last layer (for cross # entropy cost) - I = index2onehot(output, num_clases) + I = index2onehot(y, num_clases) error = - (I - prob_y) / num_examples errors.append(error) @@ -105,10 +105,10 @@ def backpropagation(self, input, output): for n in reversed(range(num_hidden_layers)): # Backpropagate through linear layer - error = np.dot(error, self.parameters[n+1][0]) + error = np.dot(error, self.parameters[n + 1][0]) # Backpropagate through sigmoid layer - error *= layer_inputs[n+1] * (1-layer_inputs[n+1]) + error *= layer_inputs[n + 1] * (1 - layer_inputs[n + 1]) # Collect error errors.append(error) diff --git a/lxmls/deep_learning/numpy_models/rnn.py b/lxmls/deep_learning/numpy_models/rnn.py index f7911e5b..e2e2011e 100644 --- a/lxmls/deep_learning/numpy_models/rnn.py +++ b/lxmls/deep_learning/numpy_models/rnn.py @@ -11,18 +11,18 @@ def __init__(self, **config): # self.parameters RNN.__init__(self, **config) - def predict(self, input=None): + def predict(self, X): """ Predict model outputs given input """ - p_y = np.exp(self.log_forward(input)[0]) - return np.argmax(p_y, axis=1) + log_p_y = self.log_forward(X)[0] + return np.argmax(log_p_y, axis=1) - def update(self, input=None, output=None): + def update(self, X, y): """ Update model parameters given batch of data """ - gradients = self.backpropagation(input, output) + gradients = self.backpropagation(X, y) learning_rate = self.config['learning_rate'] # Update each parameter with SGD rule num_parameters = len(self.parameters) @@ -30,15 +30,15 @@ def update(self, input=None, output=None): # Update weight self.parameters[m] -= learning_rate * gradients[m] - def log_forward(self, input): + def log_forward(self, X): # Get parameters and sizes W_e, W_x, W_h, W_y = self.parameters hidden_size = W_h.shape[0] - nr_steps = input.shape[0] + nr_steps = X.shape[0] # Embedding layer - z_e = W_e[input, :] + z_e = W_e[X, :] # Recurrent layer h = np.zeros((nr_steps + 1, hidden_size)) @@ -48,7 +48,7 @@ def log_forward(self, input): z_t = W_x.dot(z_e[t, :]) + W_h.dot(h[t, :]) # Non-linear - h[t+1, :] = 1.0 / (1 + np.exp(-z_t)) + h[t + 1, :] = 1.0 / (1 + np.exp(-z_t)) # Output layer y = h[1:, :].dot(W_y.T) @@ -56,25 +56,25 @@ def log_forward(self, input): # Softmax log_p_y = y - logsumexp(y, axis=1, keepdims=True) - return log_p_y, y, h, z_e, input + return log_p_y, y, h, z_e, X # why does this return its own input? - def backpropagation(self, input, output): + def backpropagation(self, X, y): ''' Compute gradientes, with the back-propagation method inputs: - x: vector with the (embedding) indicies of the words of a + X: matrix with the (embedding) indicies of the words of a sentence - outputs: vector with the indicies of the tags for each word of + y: vector with the indicies of the tags for each word of the sentence outputs: gradient_parameters: vector with parameters gradientes ''' # Get parameters and sizes W_e, W_x, W_h, W_y = self.parameters - nr_steps = input.shape[0] + nr_steps = X.shape[0] - log_p_y, y, h, z_e, x = self.log_forward(input) + log_p_y, y, h, z_e, x = self.log_forward(X) p_y = np.exp(log_p_y) # Initialize gradients with zero entrances @@ -87,7 +87,7 @@ def backpropagation(self, input, output): # Solution to Exercise 6.1 # Gradient of the cost with respect to the last linear model - I = index2onehot(output, W_y.shape[0]) + I = index2onehot(y, W_y.shape[0]) error = - (I - p_y) / nr_steps # backward pass, with gradient computation @@ -119,8 +119,8 @@ def backpropagation(self, input, output): return gradient_parameters - def cross_entropy_loss(self, input, output): + def cross_entropy_loss(self, X, y): """Cross entropy loss""" - nr_steps = input.shape[0] - log_probability = self.log_forward(input)[0] - return -log_probability[range(nr_steps), output].mean() + nr_steps = X.shape[0] + log_probability = self.log_forward(X)[0] + return -log_probability[range(nr_steps), y].mean() diff --git a/lxmls/deep_learning/pytorch_models/log_linear.py b/lxmls/deep_learning/pytorch_models/log_linear.py index 721fbd60..79e6d8a5 100644 --- a/lxmls/deep_learning/pytorch_models/log_linear.py +++ b/lxmls/deep_learning/pytorch_models/log_linear.py @@ -20,14 +20,14 @@ def __init__(self, **config): self.log_softmax = torch.nn.LogSoftmax(dim=1) self.loss_function = torch.nn.NLLLoss() - def _log_forward(self, input=None): + def _log_forward(self, X): """Forward pass of the computation graph in logarithm domain (pytorch)""" # IMPORTANT: Cast to pytorch format - input = torch.from_numpy(input).float() + X = torch.from_numpy(X).float() # Linear transformation - z = torch.matmul(input, torch.t(self.weight)) + self.bias + z = torch.matmul(X, torch.t(self.weight)) + self.bias # Softmax implemented in log domain log_tilde_z = self.log_softmax(z) @@ -35,19 +35,19 @@ def _log_forward(self, input=None): # NOTE that this is a pytorch class! return log_tilde_z - def predict(self, input=None): + def predict(self, X): """Most probable class index""" - log_forward = self._log_forward(input).data.numpy() + log_forward = self._log_forward(X).data.numpy() return np.argmax(log_forward, axis=1) - def update(self, input=None, output=None): + def update(self, X, y): """Stochastic Gradient Descent update""" # IMPORTANT: Class indices need to be casted to LONG - true_class = torch.from_numpy(output).long() + true_class = torch.from_numpy(y).long() # Compute negative log-likelihood loss - loss = self.loss_function(self._log_forward(input), true_class) + loss = self.loss_function(self._log_forward(X), true_class) # Use autograd to compute the backward pass. loss.backward() diff --git a/lxmls/deep_learning/pytorch_models/mlp.py b/lxmls/deep_learning/pytorch_models/mlp.py index 5dce865c..0203fa44 100755 --- a/lxmls/deep_learning/pytorch_models/mlp.py +++ b/lxmls/deep_learning/pytorch_models/mlp.py @@ -33,16 +33,15 @@ def __init__(self, **config): self.loss_function = torch.nn.NLLLoss() # TODO: Move these outside fo the class as in the numpy case - def _log_forward(self, input): + def _log_forward(self, X): """ Forward pass """ # Ensure the type matches torch type - input = cast_float(input) + X = cast_float(X) - # Input - tilde_z = input + tilde_z = X # ---------- # Solution to Exercise 6.4 @@ -71,15 +70,15 @@ def _log_forward(self, input): return log_tilde_z - def gradients(self, input, output): + def gradients(self, X, y): """ Computes the gradients of the network with respect to cross entropy error cost """ - true_class = torch.from_numpy(output).long() + true_class = torch.from_numpy(y).long() # Compute negative log-likelihood loss - _log_forward = self._log_forward(input) + _log_forward = self._log_forward(X) loss = self.loss_function(_log_forward, true_class) # Use autograd to compute the backward pass. loss.backward() @@ -90,18 +89,18 @@ def gradients(self, input, output): nabla_parameters.append([weight.grad.data, bias.grad.data]) return nabla_parameters - def predict(self, input=None): + def predict(self, X): """ Predict model outputs given input """ - log_forward = self._log_forward(input).data.numpy() + log_forward = self._log_forward(X).data.numpy() return np.argmax(log_forward, axis=1) - def update(self, input=None, output=None): + def update(self, X, y): """ Update model parameters given batch of data """ - gradients = self.gradients(input, output) + gradients = self.gradients(X, y) learning_rate = self.config['learning_rate'] # Update each parameter with SGD rule for m in range(self.num_layers): diff --git a/lxmls/deep_learning/pytorch_models/rnn.py b/lxmls/deep_learning/pytorch_models/rnn.py index edca0f6f..c0720016 100644 --- a/lxmls/deep_learning/pytorch_models/rnn.py +++ b/lxmls/deep_learning/pytorch_models/rnn.py @@ -35,7 +35,7 @@ def __init__(self, **config): # First parameters are the embeddings # instantiate the embedding layer first self.embedding_layer = torch.nn.Embedding( - config['input_size'], + config['X_size'], config['embedding_size'] ) @@ -55,18 +55,18 @@ def __init__(self, **config): # Get weigths and bias of the layer (even and odd positions) self.parameters[index] = cast_float(self.parameters[index]) - def predict(self, input=None): + def predict(self, X): """ - Predict model outputs given input + Predict model outputs given X """ - log_p_y = self._log_forward(input).data.numpy() + log_p_y = self._log_forward(X).data.numpy() return np.argmax(log_p_y, axis=1) - def update(self, input=None, output=None): + def update(self, X, y): """ Update model parameters given batch of data """ - gradients = self.backpropagation(input, output) + gradients = self.backpropagation(X, y) learning_rate = self.config['learning_rate'] # Update each parameter with SGD rule num_parameters = len(self.parameters) @@ -74,19 +74,19 @@ def update(self, input=None, output=None): # Update weight self.parameters[m].data -= learning_rate * gradients[m] - def _log_forward(self, input): + def _log_forward(self, X): """ Forward pass """ # Ensure the type matches torch type - input = cast_int(input, grad=False) + X = cast_int(X, grad=False) # Get parameters and sizes W_e, W_x, W_h, W_y = self.parameters embedding_size, vocabulary_size = W_e.shape hidden_size = W_h.shape[0] - nr_steps = input.shape[0] + nr_steps = X.shape[0] # FORWARD PASS COMPUTATION GRAPH @@ -94,7 +94,7 @@ def _log_forward(self, input): # Solution to Exercise 6.2 # Word Embeddings - z_e = self.embedding_layer(input) + z_e = self.embedding_layer(X) # Recurrent layer h = torch.zeros(1, hidden_size) @@ -122,14 +122,14 @@ def _log_forward(self, input): return log_p_y - def backpropagation(self, input, output): + def backpropagation(self, X, y): """ Computes the gradients of the network with respect to cross entropy error cost """ # Ensure the type matches torch type - output = cast_int(output, grad=False) + y = cast_int(y, grad=False) # Zero gradients for parameter in self.parameters: @@ -137,8 +137,8 @@ def backpropagation(self, input, output): parameter.grad.data.zero_() # Compute negative log-likelihood loss - log_p_y = self._log_forward(input) - cost = self.loss(log_p_y, output) + log_p_y = self._log_forward(X) + cost = self.loss(log_p_y, y) # Use autograd to compute the backward pass. cost.backward() @@ -168,7 +168,7 @@ def __init__(self, **config): # First parameters are the embeddings # instantiate the embedding layer first self.embedding_layer = torch.nn.Embedding( - config['input_size'], + config['X_size'], config['embedding_size'] ) # Set its value to the stored weight @@ -196,19 +196,19 @@ def __init__(self, **config): [cast_float(self.parameters[-1])] ) - def predict(self, input=None): + def predict(self, X): """ - Predict model outputs given input + Predict model outputs given X """ - log_p_y = self._log_forward(input).data.numpy() + log_p_y = self._log_forward(X).data.numpy() return np.argmax(log_p_y, axis=1) - def update(self, input=None, output=None): + def update(self, X, y): """ Update model parameters given batch of data """ - gradients = self.backpropagation(input, output) + gradients = self.backpropagation(X, y) learning_rate = self.config['learning_rate'] # Update each parameter with SGD rule num_parameters = len(self.parameters) @@ -216,13 +216,13 @@ def update(self, input=None, output=None): # Update weight self.parameters[m].data -= learning_rate * gradients[m] - def _log_forward(self, input): + def _log_forward(self, X): """ Forward pass """ # Ensure the type matches torch type - input = cast_int(input) + X = cast_int(X) # Get parameters and sizes W_e, W_x, W_h, W_y = self.parameters @@ -231,7 +231,7 @@ def _log_forward(self, input): # FORWARD PASS COMPUTATION GRAPH # Word Embeddings - z_e = self.embedding_layer(input) + z_e = self.embedding_layer(X) # RNN h, _ = self.rnn(z_e[:, None, :]) @@ -244,12 +244,12 @@ def _log_forward(self, input): return log_p_y - def backpropagation(self, input, output): + def backpropagation(self, X, y): """ Computes the gradients of the network with respect to cross entropy error cost """ - output = cast_int(output, grad=False) + y = cast_int(y, grad=False) # Zero gradients for parameter in self.parameters: @@ -257,8 +257,8 @@ def backpropagation(self, input, output): parameter.grad.data.zero_() # Compute negative log-likelihood loss - log_p_y = self._log_forward(input) - cost = self.loss(log_p_y, output) + log_p_y = self._log_forward(X) + cost = self.loss(log_p_y, y) # Use autograd to compute the backward pass. cost.backward()