diff --git a/hw1.py b/hw1.py index c238f40..e5c856e 100644 --- a/hw1.py +++ b/hw1.py @@ -30,6 +30,8 @@ from tools import build_training_set from tools import sign +from numpy import array + def build_misclassified_set(t_set,w): '''returns a tuple of index of t_set items @@ -51,66 +53,57 @@ def h(w,x): res = res + w[i]*x[i] return sign(res) -def PLA(N_points = 10): +def PLA(N_points,w,f,t_set): ''' - Returns: (t_set,w,iteration,f) - -t_set: item of t_set is: [[vector_x], y] - -w: vector of same dimention as vector_x of weights - -iteration: Number of iterations needed for convergence - -f: target lambda function f + - t_set: item of t_set is: [[vector_x], y] + - w: vector of same dimention as vector_x of weights + - iteration: Number of iterations needed for convergence + - f: target lambda function f + + Perceptron Algorithm: + - pick a misclasified point from misclassified set + - if there are no misclassified points break iteration weight are ok. break iteration. + - if there is a misclassified point update weights ''' N = N_points iteration = 0 - # create random contelation of points () in interval [-1,1] - # create random target function - # build training set - - d = data(N) - l = randomline() - f = target_function(l) - t_set = build_training_set(d,f) - - w = [0,0,0] # weight vector w0 , w1, w2 - - #iterate Perceptron Algorithm iterate = True - count = 0 + while iterate: - iteration = iteration + 1 - #pick a misclasified point from misclassified set + iteration = iteration + 1 misclassified_set = build_misclassified_set(t_set,w) - # if there are no misclassified points break iteration weight are ok. - if len(misclassified_set)==0:break + if len(misclassified_set)==0 : break index = randint(0,len(misclassified_set)-1) - p = misclassified_set[index] - point = t_set[p][0] + j = misclassified_set[index] + point = t_set[j][0] s = h(w,point) - yn = t_set[p][1] + yn = t_set[j][1] - # update weights if misclassified if s != yn: xn = point w[0] = w[0] + yn*xn[0] w[1] = w[1] + yn*xn[1] w[2] = w[2] + yn*xn[2] - return t_set,w,iteration,f + return w,iteration def evaluate_diff_f_g(f,w): 'Returns the average of difference between f and g (g is equivalent as vector w )' count = 0 limit = 100 diff = 0 + # generate random point as out of sample data + # check result and count if there is a difference + # between target function f and hypothesis function g while count < limit: count = count + 1 - # generate random point as out of sample data x = uniform(-1,1) y = uniform(-1,1) vector = [1,x,y] sign_f = sign(f(x),y) sign_g = h(w,vector) - # check result and count if difference between target function f and hypothesis function g + if sign_f != sign_g: diff = diff + 1 return diff/(count*1.0) @@ -124,7 +117,14 @@ def run_PLA(N_samples,N_points): for i in range(N_samples): # run PLA in sample - t_set,w,iteration,f = PLA(N_points) + d = data(N_points) + l = randomline() + f = target_function(l) + t_set = build_training_set(d,f) + w = [0,0,0] + + w,iteration = PLA(N_points,w,f,t_set) + iterations.append(iteration) # check if points are classified or not for i in range(len(t_set)): @@ -135,6 +135,7 @@ def run_PLA(N_samples,N_points): samples.append(0) b_misclassified = True break + # check difference between f and g diff.append(evaluate_diff_f_g(f,w)) if not b_misclassified: samples.append(1) diff --git a/hw2.py b/hw2.py index f165759..076e685 100644 --- a/hw2.py +++ b/hw2.py @@ -6,12 +6,15 @@ N_COINS = 1000 N_TOSS = 10 N_EXPERIMENT = 100000 +HEAD = 0 +TAILS = 1 from random import randint #-------------------------------------------------- #Hoeffding inequality -def main(): + +def hoeffding_inequality(): table_v1 = [] table_vrand = [] table_vmin = [] @@ -27,7 +30,7 @@ def main(): # Cmin is a coin which had the minimum frequency of heads cmin = compute_min_frec_heads(coins) #print_coins(c1,crand,cmin) - # append values of fractions for average after experiment + table_v1.append(fractionOfHeads(c1)) table_vrand.append(fractionOfHeads(crand)) table_vmin.append(fractionOfHeads(cmin)) @@ -45,10 +48,11 @@ def main(): def fractionOfHeads(c): - return c.count(0)/(N_TOSS*1.0) + 'fractions of Heads in list c' + return c.count(HEAD)/(N_TOSS*1.0) def compute_min_frec_heads(coins): - 'min frec of head (head = 0 )' + 'minimum frecuency of heads in list coins ' f_heads = [fractionOfHeads(c) for c in coins] @@ -72,6 +76,7 @@ def print_vs(v1,vrand,vmin): print 'vmin = %s' % (str(vmin)) def flip_coin(n = N_TOSS): + 'list of n experiment between 0 and 1 randomly' return [randint(0,1) for i in range(n)] #-------------------------------------------------------------------------- @@ -83,34 +88,26 @@ def flip_coin(n = N_TOSS): from tools import build_training_set from tools import sign +from hw1 import PLA + from numpy import array from numpy import transpose from numpy.linalg import pinv as pinv # pseudo inverse aka dagger from numpy.linalg import norm from numpy import dot +from numpy import sign verbose_lr = False -def linear_regression(N_points = 100): - d = data(N_points) - l = randomline() - f = target_function(l) - - t_set = build_training_set(d,f) - +def linear_regression(N_points,d,t_set): + ''' + d = constelation of points + t_set = training set. [vector_x, vector_y] + ''' + y_vector = target_vector(t_set) X_matrix = input_data_matrix(t_set) X_pseudo_inverse = pseudo_inverse(X_matrix) - if verbose_lr: - print 'y: %s'% y_vector - print '----------' - print 'X: %s' %X_matrix - print '----------' - print 'X pseudo inverse: %s '%X_pseudo_inverse - print '----------' - print 'wlin: %s' %(dot(X_pseudo_inverse,y_vector) ) - print '----------' - return dot(X_pseudo_inverse,y_vector),X_matrix,y_vector def run_linear_regression(N_samples,N_points): @@ -118,29 +115,77 @@ def run_linear_regression(N_samples,N_points): print 'Each sample has %s data points' %str(N_points) Ein_avg = [] + Eout_avg = [] + for i in range(N_samples): - wlin,X,y = linear_regression(N_points) + + d = data(N_points) + l = randomline() + f = target_function(l) + t_set = build_training_set(d,f) + + wlin,X,y = linear_regression(N_points,d,t_set) + Ein = compute_Ein(wlin,X,y) Ein_avg.append(Ein) - if verbose_lr: - print 'Ein: %s '% Ein + + Eout = compute_Eout(wlin,f,N_points) + Eout_avg.append(Eout) + print 'Average Ein: %s' %(sum(Ein_avg)/(N_samples*1.0)) + print 'Average Eout: %s' %(sum(Eout_avg)/(N_samples*1.0)) -def compute_Ein(wlin, X, y): - 'fraction of in sample points which got classified incorrectly' - N = len(y) - g_vector = dot(X,wlin) #X * wlin +def run_lr_and_pla(N_samples, N_points): + print 'running Linear Regression on %s samples' %N_samples + print 'Each samples has %s data points' %N_points + + iteration_avg = [] + for i in range(N_samples): + + d = data(N_points) + l = randomline() + f = target_function(l) + t_set = build_training_set(d,f) + + wlin,X,y = linear_regression(N_points,d,t_set) + + w_pla,iteration = PLA_v2(N_points,wlin,f,t_set) + iteration_avg.append(iteration) + + print 'Average Number of iterations is : %s' %(sum(iteration_avg)/(N_samples*1.0)) + +def compute_Eout(wlin,f,N_points): + 'number of out-of-sample points misclassifed / total number of out-of-sample points' + + d = data(N_points) + t_set = build_training_set(d,f) + + X_matrix = input_data_matrix(t_set) + y_vector = target_vector(t_set) + + g_vector = dot(X_matrix,wlin) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) + + vEout = g_vector - y_vector + nEout = 0 + for i in range(len(vEout)): + if vEout[i]!=0: + nEout = nEout + 1 + Eout = nEout/(len(vEout)*1.0) + return Eout +def compute_Ein(wlin, X, y): + 'fraction of in sample points which got classified incorrectly' + N = len(y) + g_vector = sign(dot(X,wlin)) + vEin = g_vector - y nEin = 0 for i in range(len(vEin)): - if vEin[i]!= 0: - nEin = nEin + 1 + if vEin[i]!= 0: nEin = nEin + 1 - Ein = nEin / (len(vEin) *1.0) - return Ein + return nEin / (len(vEin) *1.0) def target_vector(t_set): y = array([t[1] for t in t_set]) diff --git a/tools.py b/tools.py index 6a3cfa4..e521095 100644 --- a/tools.py +++ b/tools.py @@ -29,9 +29,9 @@ def target_function(l): def sign(x,compare_to = 0): 'returns +1 or -1 by comparing x to compare_to param (by default = 0)' if x > compare_to: - return +1 + return +1. else: - return -1 + return -1. def map_point(point,f): 'maps a point (x1,x2) to a sign -+1 following function f '