diff --git a/hw5.py b/hw5.py index 25072a4..71227c4 100644 --- a/hw5.py +++ b/hw5.py @@ -157,7 +157,6 @@ def run_log_regression(): eps = 0.01 for i in range(nb_runs): - # generate random function l = randomline() f = target_random_function(l) diff --git a/hw6.py b/hw6.py index b78d96d..0167cb0 100644 --- a/hw6.py +++ b/hw6.py @@ -11,6 +11,8 @@ from tools import target_vector from tools import input_data_matrix from tools import pseudo_inverse +from tools import data_from_file +from tools import linear_regression KA = -3 LAMBDA = 10**KA @@ -75,58 +77,55 @@ def run_nonlineartransformation(indata,outdata): t_set_trans = transform_t_set(indata) wtrans,Xtrans,ytrans = linear_regression(N_points,t_set_trans) print '-2-' + print 'Linear regression on training set after non linear transformation:' Eintrans = compute_Ein(wtrans,Xtrans,ytrans) Eouttrans = compute_Eout_nonlineartrans(wtrans,outdata) - print Eintrans - print Eouttrans + print 'in sample classification error: %s'%(Eintrans) + print 'out of sample classification error: %s'%(Eouttrans) print '-3-' + print 'Adding weight decay to linear regression with lambda = 10k and k = -3' w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,-3) Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans) Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata) - print Eintrans_decay - print Eouttrans_decay + print 'in sample classification error:%s'%(Eintrans_decay) + print 'out of sample classification error: %s'%(Eouttrans_decay) print '-4-' + print 'Using now k = 3' w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,3) Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans) Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata) - print Eintrans_decay - print Eouttrans_decay + print 'in sample classification error: %s'%(Eintrans_decay) + print 'out of sample classification error: %s'%(Eouttrans_decay) print '-5-' - for k in [2,1,0,-1,-2]: + Ks = [2,1,0,-1,-2] + print 'searching the lowest out of sample classification error for the following k values.' + print 'k in (%s)'%(str(Ks)) + for k in Ks: w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,k) Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans) Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata) - print Eintrans_decay - print Eouttrans_decay + print 'K : %s'%(k) + print 'in sample classification error: %s'%(Eintrans_decay) + print 'out of sample classification error: %s'%(Eouttrans_decay) print '-6-' + print 'searching the minimum out of sample classification error by varying k in the integer values.' mink = 999 minEout = 999 - for k in range(-100,100): + for k in range(-200,200): w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,k) Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans) Eout_decay=compute_Eout_nonlineartrans(w_decay,outdata) if Eout_decay < minEout: minEout = Eout_decay mink = k - print k - print minEout - -def getData(filename): - datafile = open(filename, 'r') - data = [] - for line in datafile: - split = line.split() - x1 = float(split[0]) - x2 = float(split[1]) - y = float(split[2]) - data.append([ [x1,x2],y ]) - return data + print 'K: %s'%(k) + print 'out of sample classification error: %s'%(minEout) def tests(): print '-1-' - print '-2-' - indata = getData('in.dta') - outdata = getData('out.dta') + indata = data_from_file('in.dta') + outdata = data_from_file('out.dta') run_nonlineartransformation(indata,outdata) print '-8-' - demo() + print '-9-' + print '-10-' diff --git a/hw7.py b/hw7.py index 6af0eed..335afc8 100644 --- a/hw7.py +++ b/hw7.py @@ -1,3 +1,7 @@ +from math import sqrt + +from numpy import dot +from numpy import sign from tools import linear_regression from tools import target_vector @@ -6,12 +10,9 @@ from hw1 import evaluate_diff_f_g -from math import sqrt - -from numpy import dot -from numpy import sign - def getData(filename): + '''returns a data set with format [[x1,x2],y] + Note: To be moved to Tools''' datafile = open(filename, 'r') data = [] for line in datafile: diff --git a/tools.py b/tools.py index 5cb8548..f7e7a1c 100644 --- a/tools.py +++ b/tools.py @@ -7,10 +7,13 @@ def data_interval(low_b,high_b,N=100): + '''returns a vector of (N) values. + Values are uniformly distributed between low boundary (low_b) and high boundary (high_b)''' d = [] for i in range(N): d.append(uniform(low_b,high_b)) return d + def data(N = 10): 'return N random points (x1,x2)' d = [] @@ -20,8 +23,20 @@ def data(N = 10): d.append([x,y]) return d +def data_from_file(filepath): + 'from a filepath returns a dataset with the form [[x1,x2],y]' + datafile = open(filepath, 'r') + data = [] + for line in datafile: + split = line.split() + x1 = float(split[0]) + x2 = float(split[1]) + y = float(split[2]) + data.append([ [x1,x2],y ]) + return data + def randomline(): - 'computes a random line and returns a and b params: y = ax + b' + 'computes a random line and returns [a,b] : y = ax + b' x1 = uniform(-1,1) y1 = uniform(-1,1) x2 = uniform(-1,1) @@ -31,13 +46,19 @@ def randomline(): b = y1 - a*x1 return [a,b] # a*x + b -def target_function(l): - # print 'Target function: %s x + %s' %(l[0], l[1] - f = lambda x: l[0]*x + l[1] +def target_function(coords): + 'from a coordinate input [a,b] returns the function a*x + b' + f = lambda x: coords[0]*x + coords[1] return f -def target_random_function(l): - func = target_function(l) +def target_random_function(coords): + ''' + description: from a coordinate (coords) with the format [a,b] generated a random function. + - coord: a list of the form [a,b] + - returns: the generated random function that takes as argument a list with the form [x,y] + and returns 1 or -1 whether y is below the linear function defined by a*x + b or above. + ''' + func = target_function(coords) def f(X): x = X[0] y = X[1] @@ -48,7 +69,7 @@ def f(X): return f def sign(x,compare_to = 0): - 'returns +1 or -1 by comparing x to compare_to param (by default = 0)' + 'returns +1 or -1 by comparing (x) to (compare_to) param (by default = 0)' if x > compare_to: return +1. else: @@ -84,7 +105,6 @@ def build_training_set_fmultipleparams(data,func): t_set.append([ [ 1.0, point[0],point[1] ] , y ]) return t_set - def print_avg(name,vector): print 'Average %s: %s'%(name,sum(vector)/(len(vector)*1.0))