loglinear.py

# -*- coding: utf-8 -*-
import numpy as np

STUDENT={'name': 'Ofer Sabo',
         'ID': '201511110'}

def softmax(x):
    """
    Compute the softmax vector.
    x: a n-dim vector (numpy array)
    returns: an n-dim vector (numpy array) of softmax values
    """


    # YOUR CODE HERE
    # Your code should be fast, so use a vectorized implementation using numpy,
    # don't use any loops.
    # With a vectorized implementation, the code should be no more than 2 lines.
    #
    # For numeric stability, use the identify you proved in Ex 2 Q1.
    x = x - np.max(x)
    x = np.exp(x)
    x /= np.sum(x)

    return x
    

def classifier_output(x, params):
    """
    Return the output layer (class probabilities) 
    of a log-linear classifier with given params on input x.
    """
    W,b = params

    multi = np.dot(x,W)
    score = multi.flatten() + b
    probs = softmax((score))
    return probs

def predict(x, params):
    """
    Returnss the prediction (highest scoring class id) of a
    a log-linear classifier with given parameters on input x.

    params: a list of the form [(W, b)]
    W: matrix
    b: vector
    """
    return np.argmax(classifier_output(x, params))

def loss_and_gradients(x, y, params):
    """
    Compute the loss and the gradients at point x with given parameters.
    y is a scalar indicating the correct label.

    returns:
        loss,[gW,gb]

    loss: scalar
    gW: matrix, gradients of W
    gb: vector, gradients of b
    """
    W,b = params


    # YOU CODE HERE
    x = np.array(x)
    x = x[np.newaxis,:]
    probs = classifier_output(x,[W,b])
    #print probs
    loss = - np.log(probs[y])
    gW = np.zeros_like(W)
    gb = np.zeros_like(b)
    gW += probs
    gb += probs
    gW /= np.sum(probs)
    gb /= np.sum(probs)
    gW[:, y] -= 1
    gb[y] -= 1
    gW = (x.T * gW)
    #loss += np.sum(W * W )
    #gW += 2 * W
    return loss,[gW,gb]

def create_classifier(in_dim, out_dim):
    """
    returns the parameters (W,b) for a log-linear classifier
    with input dimension in_dim and output dimension out_dim.
    """
    W = np.random.randn(in_dim, out_dim) * (2.0/float(in_dim+out_dim))
    b = np.random.randn(out_dim) * (1.0/float(out_dim))
    return [W,b]

if __name__ == '__main__':
    # Sanity checks for softmax. If these fail, your softmax is definitely wrong.
    # If these pass, it may or may not be correct.
    test1 = softmax(np.array([1,2]))
    print (test1)
    assert np.amax(np.fabs(test1 - np.array([0.26894142,  0.73105858]))) <= 1e-6

    test2 = softmax(np.array([1001,1002]))
    print (test2)
    assert np.amax(np.fabs(test2 - np.array( [0.26894142, 0.73105858]))) <= 1e-6

    test3 = softmax(np.array([-1001,-1002])) 
    print (test3)
    assert np.amax(np.fabs(test3 - np.array([0.73105858, 0.26894142]))) <= 1e-6

    # Sanity checks. If these fail, your gradient calculation is definitely wrong.
    # If they pass, it is likely, but not certainly, correct.
    from grad_check import gradient_check

    W,b = create_classifier(3,4)


    def _loss_and_W_grad(W):
        global b
        loss,grads = loss_and_gradients([1,2,3],0,[W,b])
        return loss,grads[0]

    def _loss_and_b_grad(b):
        global W
        loss,grads = loss_and_gradients([1,2,3],0,[W,b])
        return loss,grads[1]

    for _ in xrange(1000):
        W = np.random.randn(W.shape[0],W.shape[1])
        b = np.random.randn(b.shape[0])
        # result = gradient_check(_loss_and_b_grad, b)
        # if (not result):
        #     print "ERROR"
        #     exit()
        if (not gradient_check(_loss_and_W_grad, W)):
            print ("ERROR")
            exit()