diff --git a/#tools.py# b/#tools.py# new file mode 100644 index 0000000..5be273d --- /dev/null +++ b/#tools.py# @@ -0,0 +1,153 @@ +from random import uniform +from random import randint + +from numpy import array +from numpy.linalg import pinv as pinv # pseudo inverse aka dagger +from numpy import dot +from numpy import eye +from numpy import size + +def data_interval(low_b,high_b,N=100): + 'returns a list of N values uniformly distributed between low boundary and high boundary' + d = [] + for i in range(N): + d.append(uniform(low_b,high_b)) + return d + +def data(N = 10): + 'return N random points (x1,x2)' + d = [] + for i in range(N): + x = uniform(-1,1) + y = uniform(-1,1) + d.append([x,y]) + return d + +def data_from_file(filepath): + 'from a filepath returns a dataset with the form [[x1,x2],y]' + datafile = open(filepath, 'r') + data = [] + for line in datafile: + split = line.split() + x1 = float(split[0]) + x2 = float(split[1]) + y = float(split[2]) + data.append([ [x1,x2],y ]) + return data + +def randomline(): + 'computes a random line and returns [a,b] : y = ax + b' + x1 = uniform(-1,1) + y1 = uniform(-1,1) + x2 = uniform(-1,1) + y2 = uniform(-1,1) + + a = abs(x1-x2)/abs(y1-y2) + b = y1 - a*x1 + return [a,b] # a*x + b + +def target_function(coords): + 'from a coordinate input [a,b] returns the function a*x + b' + f = lambda x: coords[0]*x + coords[1] + return f + +def target_random_function(coords): + ''' + description: from a coordinate (coords) with the format [a,b] generated a random function. + - coord: a list of the form [a,b] + - returns: the generated random function that takes as argument a list with the form [x,y] + and returns 1 or -1 whether y is below the linear function defined by a*x + b or above. + ''' + func = target_function(coords) + def f(X): + x = X[0] + y = X[1] + if func(x) < y: + return 1.0 + else: + return -1.0 + return f + +def signex(x,compare_to = 0): + 'returns +1 or -1 by comparing (x) to (compare_to) param (by default = 0)' + if x > compare_to: + return +1. + else: + return -1. + +def sign(x,compare_to = 0): + 'returns +1 or -1 by comparing (x) to (compare_to) param (by default = 0)' + if x > compare_to: + return +1. + else: + return -1. + +def map_point(point,f): + 'maps a point (x1,x2) to a sign -+1 following function f ' + x1 = point[0] + y1 = point[1] + y = f(x1) + compare_to = y1 + return sign(y,compare_to) + +def map_point_fmultipleparams(point,f): + y1 = point[1] + y = f(point) + compare_to = y1 + return sign(y,compare_to) + +def build_training_set(data, func): + t_set = [] + for i in range(len(data)): + point = data[i] + y = map_point(point,func) + t_set.append([ [ 1.0, point[0],point[1] ] , y ]) + return t_set + +def build_training_set_fmultipleparams(data,func): + t_set = [] + for i in range(len(data)): + point = data[i] + y = map_point_fmultipleparams(point,func) + t_set.append([ [ 1.0, point[0],point[1] ] , y ]) + return t_set + +def print_avg(name,vector): + print 'Average %s: %s'%(name,sum(vector)/(len(vector)*1.0)) + +def target_vector(t_set): + 'creates a numpy array (eg a Y matrix) from the training set' + y = array([t[1] for t in t_set]) + return y + +def input_data_matrix(t_set): + 'creates a numpy array (eg a X matrix) from the training set' + X = array([t[0] for t in t_set]) + return X + +def pseudo_inverse(X): + 'dagger of pseudo matrix used for linear regression' + return pinv(X) + +def linear_regression(N_points,t_set): + '''Linear regresion algorithm + from Y and X compute the dagger or pseudo matrix + return the Xdagger.Y as the w vector + default lambda is 1.0 + ''' + y_vector = target_vector(t_set) + X_matrix = input_data_matrix(t_set) + X_pseudo_inverse = pseudo_inverse(X_matrix) + return dot(dot(X_pseudo_inverse,X_matrix.T),y_vector),X_matrix,y_vector + +def linear_regression_lda(N_points,t_set,lda): + '''Linear regresion algorithm + from Y and X compute the dagger or pseudo matrix + return the Xdagger.Y as the w vector + default lambda is 1.0 + ''' + y_vector = target_vector(t_set) + X_matrix = input_data_matrix(t_set) + X_pseudo_inverse = pseudo_inverse(dot(X_matrix.T,X_matrix)+lda*eye(size(X_matrix,1))) + return dot(dot(X_pseudo_inverse,X_matrix.T),y_vector),X_matrix,y_vector + diff --git a/.#tools.py b/.#tools.py new file mode 120000 index 0000000..4f6cf56 --- /dev/null +++ b/.#tools.py @@ -0,0 +1 @@ +shino@macpro-santiago.lan.401 \ No newline at end of file diff --git a/README.md b/README.md index 1c75f01..1d474a5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,59 @@ caltech.ml ========== -Machine learning course from Caltech \ No newline at end of file +Machine learning course from Caltech + +utils_data: +------------------------------------------------- +data_interval(low_b,high_b,N=100) +rename with random_values(low_b,high_b,size) + +data(N = 10) +rename with random_points(size) + +data_from_file(filepath) +rename dataset_from_file + +build_training_set(data, func) +rename build_dataset + +build_training_set_fmultipleparams(data,func) +rename build_dataset_fmultiparams + +target_vector(t_set) +rename target_vector(dataset) + +input_data_matrix(t_set) +rename data_matrix(dataset) + +utils_math: +------------------------------------------------ +randomline() +rename random_line_coefs + +target_function(coords) +rename linear_function(coefs) + +target_random_function(coords) +rename random_function()... + +signex(x,compare_to = 0) +delete use sign with an alias + +sign(x,compare_to = 0) +map_point(point,f) +rename get_sign(point,f) + +map_point_fmultipleparams(point,f) +get_sign_multiparam + +pseudo_inverse(X) +check if needed + +linear_regression(N_points,t_set,lda = 1.0) + +linear_regression_lda(N_points,t_set,lda) + +utils_print: +----------------------------------------------- +print_avg(name,vector) \ No newline at end of file diff --git a/hw1.py b/hw1.py index 0de61f1..31230b1 100644 --- a/hw1.py +++ b/hw1.py @@ -106,7 +106,6 @@ def evaluate_diff_f_g(f,w): if sign_f != sign_g: diff = diff + 1 return diff/(count*1.0) - def run_PLA(N_samples,N_points): samples = []# vector of 1 clasified, 0 misclassified diff --git a/tools.py b/tools.py index c992c1b..a81239b 100644 --- a/tools.py +++ b/tools.py @@ -130,7 +130,18 @@ def pseudo_inverse(X): 'dagger of pseudo matrix used for linear regression' return pinv(X) -def linear_regression(N_points,t_set,lda = 1.0): +def linear_regression(N_points,t_set): + '''Linear regresion algorithm + from Y and X compute the dagger or pseudo matrix + return the Xdagger.Y as the w vector + default lambda is 1.0 + ''' + y_vector = target_vector(t_set) + X_matrix = input_data_matrix(t_set) + X_pseudo_inverse = pseudo_inverse(X_matrix) + return dot(dot(X_pseudo_inverse,X_matrix.T),y_vector),X_matrix,y_vector + +def linear_regression_lda(N_points,t_set,lda): '''Linear regresion algorithm from Y and X compute the dagger or pseudo matrix return the Xdagger.Y as the w vector @@ -138,7 +149,6 @@ def linear_regression(N_points,t_set,lda = 1.0): ''' y_vector = target_vector(t_set) X_matrix = input_data_matrix(t_set) - #X_pseudo_inverse = pseudo_inverse(X_matrix)#+lda*eye(size(X_matrix,0))) X_pseudo_inverse = pseudo_inverse(dot(X_matrix.T,X_matrix)+lda*eye(size(X_matrix,1))) return dot(dot(X_pseudo_inverse,X_matrix.T),y_vector),X_matrix,y_vector