Skip to content

Commit

Permalink
part 2 linear regression and linear regression with perceptron learni…
Browse files Browse the repository at this point in the history
…ng algorithm
  • Loading branch information
santiaago committed Jan 20, 2013
1 parent 541d4a9 commit bd44236
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 65 deletions.
63 changes: 32 additions & 31 deletions hw1.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
from tools import build_training_set
from tools import sign

from numpy import array


def build_misclassified_set(t_set,w):
'''returns a tuple of index of t_set items
Expand All @@ -51,66 +53,57 @@ def h(w,x):
res = res + w[i]*x[i]
return sign(res)

def PLA(N_points = 10):
def PLA(N_points,w,f,t_set):
'''
Returns: (t_set,w,iteration,f)
-t_set: item of t_set is: [[vector_x], y]
-w: vector of same dimention as vector_x of weights
-iteration: Number of iterations needed for convergence
-f: target lambda function f
- t_set: item of t_set is: [[vector_x], y]
- w: vector of same dimention as vector_x of weights
- iteration: Number of iterations needed for convergence
- f: target lambda function f
Perceptron Algorithm:
- pick a misclasified point from misclassified set
- if there are no misclassified points break iteration weight are ok. break iteration.
- if there is a misclassified point update weights
'''
N = N_points
iteration = 0
# create random contelation of points () in interval [-1,1]
# create random target function
# build training set

d = data(N)
l = randomline()
f = target_function(l)
t_set = build_training_set(d,f)

w = [0,0,0] # weight vector w0 , w1, w2

#iterate Perceptron Algorithm
iterate = True
count = 0

while iterate:
iteration = iteration + 1
#pick a misclasified point from misclassified set
iteration = iteration + 1
misclassified_set = build_misclassified_set(t_set,w)
# if there are no misclassified points break iteration weight are ok.
if len(misclassified_set)==0:break
if len(misclassified_set)==0 : break
index = randint(0,len(misclassified_set)-1)
p = misclassified_set[index]
point = t_set[p][0]
j = misclassified_set[index]
point = t_set[j][0]

s = h(w,point)
yn = t_set[p][1]
yn = t_set[j][1]

# update weights if misclassified
if s != yn:
xn = point
w[0] = w[0] + yn*xn[0]
w[1] = w[1] + yn*xn[1]
w[2] = w[2] + yn*xn[2]
return t_set,w,iteration,f
return w,iteration

def evaluate_diff_f_g(f,w):
'Returns the average of difference between f and g (g is equivalent as vector w )'
count = 0
limit = 100
diff = 0
# generate random point as out of sample data
# check result and count if there is a difference
# between target function f and hypothesis function g
while count < limit:
count = count + 1
# generate random point as out of sample data
x = uniform(-1,1)
y = uniform(-1,1)
vector = [1,x,y]

sign_f = sign(f(x),y)
sign_g = h(w,vector)
# check result and count if difference between target function f and hypothesis function g

if sign_f != sign_g: diff = diff + 1

return diff/(count*1.0)
Expand All @@ -124,7 +117,14 @@ def run_PLA(N_samples,N_points):

for i in range(N_samples):
# run PLA in sample
t_set,w,iteration,f = PLA(N_points)
d = data(N_points)
l = randomline()
f = target_function(l)
t_set = build_training_set(d,f)
w = [0,0,0]

w,iteration = PLA(N_points,w,f,t_set)

iterations.append(iteration)
# check if points are classified or not
for i in range(len(t_set)):
Expand All @@ -135,6 +135,7 @@ def run_PLA(N_samples,N_points):
samples.append(0)
b_misclassified = True
break

# check difference between f and g
diff.append(evaluate_diff_f_g(f,w))
if not b_misclassified: samples.append(1)
Expand Down
109 changes: 77 additions & 32 deletions hw2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
N_COINS = 1000
N_TOSS = 10
N_EXPERIMENT = 100000
HEAD = 0
TAILS = 1

from random import randint

#--------------------------------------------------
#Hoeffding inequality
def main():

def hoeffding_inequality():
table_v1 = []
table_vrand = []
table_vmin = []
Expand All @@ -27,7 +30,7 @@ def main():
# Cmin is a coin which had the minimum frequency of heads
cmin = compute_min_frec_heads(coins)
#print_coins(c1,crand,cmin)
# append values of fractions for average after experiment

table_v1.append(fractionOfHeads(c1))
table_vrand.append(fractionOfHeads(crand))
table_vmin.append(fractionOfHeads(cmin))
Expand All @@ -45,10 +48,11 @@ def main():


def fractionOfHeads(c):
return c.count(0)/(N_TOSS*1.0)
'fractions of Heads in list c'
return c.count(HEAD)/(N_TOSS*1.0)

def compute_min_frec_heads(coins):
'min frec of head (head = 0 )'
'minimum frecuency of heads in list coins '

f_heads = [fractionOfHeads(c) for c in coins]

Expand All @@ -72,6 +76,7 @@ def print_vs(v1,vrand,vmin):
print 'vmin = %s' % (str(vmin))

def flip_coin(n = N_TOSS):
'list of n experiment between 0 and 1 randomly'
return [randint(0,1) for i in range(n)]

#--------------------------------------------------------------------------
Expand All @@ -83,64 +88,104 @@ def flip_coin(n = N_TOSS):
from tools import build_training_set
from tools import sign

from hw1 import PLA

from numpy import array
from numpy import transpose
from numpy.linalg import pinv as pinv # pseudo inverse aka dagger
from numpy.linalg import norm
from numpy import dot
from numpy import sign

verbose_lr = False
def linear_regression(N_points = 100):
d = data(N_points)
l = randomline()
f = target_function(l)

t_set = build_training_set(d,f)

def linear_regression(N_points,d,t_set):
'''
d = constelation of points
t_set = training set. [vector_x, vector_y]
'''

y_vector = target_vector(t_set)
X_matrix = input_data_matrix(t_set)
X_pseudo_inverse = pseudo_inverse(X_matrix)

if verbose_lr:
print 'y: %s'% y_vector
print '----------'
print 'X: %s' %X_matrix
print '----------'
print 'X pseudo inverse: %s '%X_pseudo_inverse
print '----------'
print 'wlin: %s' %(dot(X_pseudo_inverse,y_vector) )
print '----------'

return dot(X_pseudo_inverse,y_vector),X_matrix,y_vector

def run_linear_regression(N_samples,N_points):
print 'running Linear Regression on %s samples' %str(N_samples)
print 'Each sample has %s data points' %str(N_points)

Ein_avg = []
Eout_avg = []

for i in range(N_samples):
wlin,X,y = linear_regression(N_points)

d = data(N_points)
l = randomline()
f = target_function(l)
t_set = build_training_set(d,f)

wlin,X,y = linear_regression(N_points,d,t_set)

Ein = compute_Ein(wlin,X,y)
Ein_avg.append(Ein)
if verbose_lr:
print 'Ein: %s '% Ein

Eout = compute_Eout(wlin,f,N_points)
Eout_avg.append(Eout)

print 'Average Ein: %s' %(sum(Ein_avg)/(N_samples*1.0))
print 'Average Eout: %s' %(sum(Eout_avg)/(N_samples*1.0))

def compute_Ein(wlin, X, y):
'fraction of in sample points which got classified incorrectly'
N = len(y)
g_vector = dot(X,wlin) #X * wlin
def run_lr_and_pla(N_samples, N_points):
print 'running Linear Regression on %s samples' %N_samples
print 'Each samples has %s data points' %N_points

iteration_avg = []
for i in range(N_samples):

d = data(N_points)
l = randomline()
f = target_function(l)
t_set = build_training_set(d,f)

wlin,X,y = linear_regression(N_points,d,t_set)

w_pla,iteration = PLA_v2(N_points,wlin,f,t_set)
iteration_avg.append(iteration)

print 'Average Number of iterations is : %s' %(sum(iteration_avg)/(N_samples*1.0))

def compute_Eout(wlin,f,N_points):
'number of out-of-sample points misclassifed / total number of out-of-sample points'

d = data(N_points)
t_set = build_training_set(d,f)

X_matrix = input_data_matrix(t_set)
y_vector = target_vector(t_set)

g_vector = dot(X_matrix,wlin)
for i in range(len(g_vector)):
g_vector[i] = sign(g_vector[i])

vEout = g_vector - y_vector
nEout = 0
for i in range(len(vEout)):
if vEout[i]!=0:
nEout = nEout + 1
Eout = nEout/(len(vEout)*1.0)
return Eout

def compute_Ein(wlin, X, y):
'fraction of in sample points which got classified incorrectly'
N = len(y)
g_vector = sign(dot(X,wlin))

vEin = g_vector - y
nEin = 0
for i in range(len(vEin)):
if vEin[i]!= 0:
nEin = nEin + 1
if vEin[i]!= 0: nEin = nEin + 1

Ein = nEin / (len(vEin) *1.0)
return Ein
return nEin / (len(vEin) *1.0)

def target_vector(t_set):
y = array([t[1] for t in t_set])
Expand Down
4 changes: 2 additions & 2 deletions tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def target_function(l):
def sign(x,compare_to = 0):
'returns +1 or -1 by comparing x to compare_to param (by default = 0)'
if x > compare_to:
return +1
return +1.
else:
return -1
return -1.

def map_point(point,f):
'maps a point (x1,x2) to a sign -+1 following function f '
Expand Down

0 comments on commit bd44236

Please sign in to comment.