-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit adafbef
Showing
10 changed files
with
245,454 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
0.148 0.810 0.151 0.521 -1.0 | ||
0.163 0.084 0.848 0.202 -1.0 | ||
0.428 0.742 0.982 0.686 1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
0.148 0.810 0.151 0.521 -1.0 | ||
0.163 0.084 0.848 0.202 -1.0 | ||
0.428 0.742 0.982 0.686 1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Parameters | ||
param n >= 1, integer; | ||
param m >= 1, integer; | ||
param nu >= 0; | ||
|
||
param K {1..m, 1..m}; | ||
param y {i in 1..m}; | ||
|
||
# Variables | ||
var gamma; | ||
var lambda {1..m} >= 0, <= nu; | ||
|
||
#optimización | ||
maximize SVM_dual: sum{i in 1..m}(lambda[i]) | ||
-1/2*sum{j in 1..m, k in 1..m}( | ||
lambda[j]*y[j]*lambda[k]*y[k]*K[j,k] | ||
); | ||
subject to Dual_restric: sum{i in 1..m}(lambda[i]*y[i]) = 0; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
from sklearn import datasets as ds | ||
from os import getcwd | ||
import numpy as np | ||
import random | ||
import time | ||
import os | ||
|
||
# Epsilon es la tolerancia de error a la hora de comparar números | ||
eps = 10e-6 | ||
|
||
|
||
''' | ||
Recibe como parámetros el número de puntos y las variables y parámetros | ||
y las escribe en un archivo en el formato legible para AMPL. | ||
''' | ||
def write_data(nu, K, y, laux): | ||
dimensions = len(K[0]) | ||
|
||
o = open('./ampl_data.dat', 'w') | ||
if (laux == 'A'): o.write('param n := ' + str(dimensions) + ';\n') | ||
o.write('param m := ' + str(len(y)) + ';\n' + \ | ||
'param nu:= ' + str(nu) + ';\n\n' + \ | ||
'param ' + laux + ':\n ') | ||
for i in range (1, dimensions + 1): | ||
o.write(str(i) + ' ') | ||
o.write(':=\n') | ||
for i in range(0, len(y)): | ||
o.write(str(i + 1) + ' ' + \ | ||
str(K[i, :]).replace('[', '').replace(']', '') + '\n') | ||
o.write(';\n\nparam y :=\n') | ||
for i in range(0, len(y)): | ||
o.write(str(i + 1) + ' ' + str(y[i]) +'\n') | ||
o.write(';') | ||
|
||
|
||
''' | ||
Se generan num_points puntos con su clase aleatoriamente mediante el método | ||
gensvmdat. | ||
''' | ||
def gensvmdat(num_points, seed): | ||
os.system('./gensvmdat data.dat ' + str(num_points) + ' ' + str(seed)) | ||
with open('./data.dat', 'r') as raw, \ | ||
open('./aux_data.dat', 'w') as clean: | ||
data = raw.read() | ||
data = data.replace('*', '').replace(' ', ' ') | ||
clean.write(data) | ||
|
||
A = np.loadtxt('./aux_data.dat', delimiter = ' ') | ||
y = A[:, A[0].size - 1] | ||
A = np.delete(A, A[0].size - 1, 1) | ||
return A, y | ||
|
||
|
||
''' | ||
Se generan num_points puntos con su clase aleatoriamente mediante el método | ||
del swiss_roll (brazo de gitano). | ||
''' | ||
def generate_swiss(num_points,seed): | ||
A, y = ds.make_swiss_roll(num_points, 2, seed) | ||
my = np.mean(y) | ||
y_binary = [0 for i in range(len(y))] | ||
for i in range(len(y)): | ||
if y[i] > my: y_binary[i] = 1 | ||
else: y_binary[i] = -1 | ||
return A, y_binary | ||
|
||
|
||
''' | ||
Debido a errores numéricos, ampl devuelve que la matriz K no es semidefinida | ||
positiva, pero en realidad los valores propios supuestamente negativos, son 0. | ||
Así que sumamos una matriz identidad multiplicada por epsilon para | ||
corregir esto y hacer que la matriz sea semidefinida positiva. | ||
''' | ||
def write_ampl(A, y, nu, option): | ||
if option == 1: write_data(nu, A, y, 'A') | ||
elif option == 2: write_data(nu, A.dot(A.T) + np.eye(len(y))*eps, y, 'K') | ||
else: | ||
m = len(y) | ||
K = np.zeros((m, m)) | ||
s2 = np.mean(np.var(A, 0)) | ||
for i in range(m): | ||
for j in range(i,m): | ||
K[i,j] = K[j,i] = np.exp(- np.linalg.norm(A[i,:] - A[j,:])**2/(2*s2)) | ||
write_data(nu, K + np.eye(len(y))*eps, y, 'K') | ||
|
||
|
||
''' | ||
Con la función generate_skin, leemeos los datos del fichero de texto y los | ||
ordenamos aleatoriamente. Para el conjunto de training cogemos los num_points | ||
primeras observaciones mientras que para los datos de test cogemos aleatoriamente | ||
a través un batch de num_points datos diferentes a los primeros. | ||
''' | ||
def generate_skin(num_points, seed, test): | ||
A = np.loadtxt('./Skin_NonSkin.txt', delimiter = ' ') | ||
np.random.seed(seed) | ||
np.random.shuffle(A) | ||
if not test: | ||
y = A[:num_points, 3] | ||
A = A[:num_points, 0:3] | ||
else: | ||
random.seed(time.time()) | ||
start = random.randint(num_points, len(A) - num_points - 1) | ||
y = A[start:(start + num_points), 3] | ||
A = A[start:(start + num_points), 0:3] | ||
|
||
for i in range(len(y)): y[i] = 2*y[i] - 3 # Transformamos la variable respuesta | ||
return A, y # de 1 / 2 a -1 / 1. | ||
|
||
|
||
''' | ||
Esta función discierne que tipo de datos quiere generar el usuario y | ||
devuelve la matriz de datos explicativos y un vector con la variable | ||
respuesta. | ||
''' | ||
def generate_data(num_points, seed, dt, test): | ||
if dt == 1: | ||
A, y = gensvmdat(num_points, seed) | ||
elif dt == 2: | ||
A, y = generate_swiss(num_points, seed) | ||
else: | ||
A, y = generate_skin(num_points, seed, test) | ||
return A, y | ||
|
||
|
||
''' | ||
A partir de la solución para las lambdas(la), las y's, los puntos y la nu, | ||
devuelve la solución de la w para el caso que utilizamos la formulación dual. | ||
''' | ||
def dual_w(la, y, A, nu): | ||
w = np.zeros((1,len(A[0,:]))) | ||
for i in range(len(y)): | ||
if la[i] > eps and la[i] < nu - eps: | ||
w = w + la[i]*y[i]*A[i,:] | ||
elif la[i] > nu - eps: | ||
w = w + nu*y[i]*A[i,:].T | ||
return w.T | ||
|
||
|
||
''' | ||
Busca el índice del primer support vector que encuentre y lo devuelve. | ||
''' | ||
def support_vector(la,nu): | ||
for i in range(len(la)): | ||
if la[i] > eps and la[i] < nu - eps: | ||
return i | ||
return 0 | ||
|
||
|
||
''' | ||
A partir de los resultados de la optimización con la formulación dual | ||
se calculan el parámetro de intersección (gamma) y la clasificación | ||
de cada punto (1 o -1). | ||
''' | ||
def dual_classification(la, y, Atr, nu, option, Ate): | ||
if option == 3: s2 = np.mean(np.var(Atr, 0)) | ||
m = len(y) | ||
c = [1 for i in range(m)] | ||
index_sv = support_vector(la, nu) | ||
#Calculo gamma | ||
gamma = 1/y[index_sv] | ||
for i in range(m): | ||
if option == 3: K = np.exp(- np.linalg.norm(Atr[index_sv, :] - Atr[i, :])**2/(2*s2)) | ||
else: K = Atr[index_sv, :].dot(Atr[i, :]) | ||
gamma = gamma - la[i]*y[i]*K | ||
# Miramos en que lado del hiperplano se encuentra el punto. | ||
for i in range(m): | ||
wtphi = 0 | ||
for j in range(m): | ||
if option == 3: K = np.exp(- np.linalg.norm(Ate[i,:] - Atr[j,:])**2/(2*s2)) | ||
else: K = Ate[i, :].dot(Atr[j, :]) | ||
wtphi = wtphi + la[j]*y[j]*K | ||
|
||
if wtphi + gamma < eps: c[i] = -1 | ||
|
||
return c, gamma | ||
|
||
|
||
''' | ||
A partir de los resultados de la optimización con la formulación primal, | ||
calculamos en que lado del hiperplano está cada punto y le asignamos 1 o -1. | ||
''' | ||
def primal_classification(A, w, y, gamma): | ||
c1 = [1 for i in range(len(y))] | ||
for i in range(len(y)): | ||
if A[i,:] * w + gamma < eps: c1[i] = -1 | ||
|
||
return c1 | ||
|
||
|
||
''' | ||
A partir de la clase real y la predicha de cada punto, calculamos la precisión | ||
de nuestra optimización. | ||
''' | ||
def precision(y, y_pred): | ||
acc = 0; | ||
t = len(y) | ||
for i in range(t): | ||
if y[i] == y_pred[i]: acc = acc + 1 | ||
return acc/t | ||
|
||
|
||
''' | ||
Escribe los resultados en un archivo de texto. | ||
''' | ||
def print_to_txt(w = [0], gamma = 0, acc1 = 0, acc2 = 0, s = 0, option = 1): | ||
res = open('resultados.txt', 'w') | ||
if option == 1: res.write('\n + ------------------- RESULTADO DEL PROBLEMA PRIMAL ------------------- +' + '\n\n') | ||
elif option == 2: res.write('\n + -------------------- RESULTADO DEL PROBLEMA DUAL -------------------- +' + '\n\n') | ||
else: res.write('\n + -------------------- RESULTADO DEL PROBLEMA RBF -------------------- +' + '\n\n') | ||
res.write('Resultado de gamma: ' + str(gamma) + '\n') | ||
if option != 3: | ||
res.write('\nResultado de los pesos w:' + '\n') | ||
for i in range(len(w)): | ||
res.write(' ' + str(w[i]) + '\n') | ||
res.write('\nLa accuracy de test es: ' + str(acc2*100) + '%.') | ||
res.write('\nLa accuracy de training es: ' + str(acc1*100) + '%.' + '\n\n') |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
from amplpy import AMPL, Environment | ||
import functions as fun | ||
import numpy as np | ||
import random | ||
import time | ||
import sys | ||
import os | ||
|
||
|
||
# Path donde está AMPL en nuestro ordenador | ||
ampl = AMPL(Environment('/home/alex/AMPL/ampl_linux-intel64')) | ||
#ampl = AMPL(Environment('/home/elias/Escritorio/uni/2º/OM/ampl_linux-intel64')) | ||
# Escogemos el solver cplex | ||
ampl.setOption('solver', 'cplex') | ||
|
||
''' | ||
PARÁMETROS: | ||
- option: nos indica que tipo de problema vamos a resolver | ||
1: primal | ||
2: dual con kernel lineal | ||
3: dual con kernel gaussiano | ||
- num_points: número de puntos | ||
- seed: semilla para la generación aleatoria del dataset | ||
- nu: parámetro de la formulación del problema SVM | ||
- data_type: indica si generamos los puntos con el algoritmo proporcionado | ||
por el profesor (gensvmdat), con el algoritmo del swiss roll | ||
(brazo de gitano) de sklearn o usamos la base de datos "skin" | ||
1: Para generarlos con gensvmdat (datos linealmente separables) | ||
2: Para generarlos con sklearn swiss_roll (datos no separables linealmente) | ||
3: Para usar la base de datos "skin" | ||
''' | ||
try: | ||
option = int(sys.argv[1]) | ||
num_points = int(sys.argv[2]) | ||
seed = int(sys.argv[3]) | ||
nu = float(sys.argv[4]) | ||
data_type = int(sys.argv[5]) | ||
|
||
''' | ||
Aquí generamos los dos datasets: | ||
El de training, para generar el modelo, es decir, w y gamma. | ||
El de test, para clasificar nuevos puntos con el modelo anterior | ||
y ver cómo de bien clasifica puntos con los que no ha modelado. | ||
''' | ||
|
||
print("\nCalculando resultados...\n") | ||
|
||
# Generamos los datos de entrenamiento | ||
Atr, ytr = fun.generate_data(num_points, seed, data_type, False) | ||
fun.write_ampl(Atr, ytr, nu, option) | ||
|
||
# Si no se usa RBF, generamos datos de test | ||
|
||
if data_type != 3: | ||
random.seed(time.time()) | ||
seed2 = random.randint(0, 1e6) | ||
else: seed2 = seed | ||
Ate, yte = fun.generate_data(num_points, seed2, data_type, True) | ||
|
||
# Leemos el modelo y los datos | ||
if option == 1: ampl.read('./primal.mod') | ||
else: ampl.read('./dual.mod') | ||
|
||
ampl.readData('./ampl_data.dat') | ||
ampl.solve() | ||
|
||
print("\nEscribiendo resultados...\n") | ||
|
||
''' | ||
Aquí obtenemos los parámetros y variables que nos da AMPL y hacemos la | ||
conversión a matrices de numpy | ||
''' | ||
if option == 1: | ||
w = ampl.getVariable('w').getValues() | ||
w = np.matrix(w.toPandas()) | ||
w = w.reshape(w.size, 1) | ||
|
||
gamma = ampl.getVariable('gamma').getValues() | ||
gamma = float(gamma.toList()[0]) | ||
|
||
s = ampl.getVariable('s').getValues() | ||
s = np.matrix(s.toPandas()) | ||
s = s.reshape(num_points, 1) | ||
|
||
ctrain = fun.primal_classification(Atr, w, ytr, gamma) | ||
ctest = fun.primal_classification(Ate, w, yte, gamma) | ||
else: | ||
la = ampl.getVariable('lambda').getValues() | ||
la = np.matrix(la.toPandas()) | ||
la = la.reshape(num_points, 1) | ||
|
||
ctrain, gamma = fun.dual_classification(la, ytr, Atr, nu, option, Atr) | ||
ctest, gamma = fun.dual_classification(la, ytr, Atr, nu, option, Ate) | ||
if option == 2: | ||
w = fun.dual_w(la, ytr, Atr, nu) | ||
|
||
if option != 3: | ||
ctest = fun.primal_classification(Ate, w, yte, gamma) | ||
|
||
''' | ||
Imprimimos los resultados en el archivo resultados.txt | ||
''' | ||
if option == 3: | ||
fun.print_to_txt(gamma = gamma, acc1 = fun.precision(ytr, ctrain), acc2 = fun.precision(yte, ctest), option = option) | ||
else: | ||
fun.print_to_txt(w = w, gamma = gamma, acc1 = fun.precision(ytr, ctrain), acc2 = fun.precision(yte, ctest), option = option) | ||
|
||
os.remove('./ampl_data.dat') | ||
# Eliminamos el .dat que le pasamos a AMPL y nos guardamos únicamente | ||
# el de generación. Para mantener el .dat comentar esta línea. | ||
|
||
# Error de formato de entrada | ||
except: | ||
print('\nERROR EN LA ENTRADA!: Por favor, asegurese de introducir además ' + | ||
'del archivo ejecutable (EN ESTE ORDEN) la opción, el número de ' + | ||
'puntos a generar, la seed para la generación de puntos, la nu ' + | ||
'y un útlimo parámetro que indique como generar los datos:\n\n' + | ||
' 1: Para generarlos con gensvmdat (datos linealmente separables)\n' + | ||
' 2: Para generarlos con sklearn swiss_roll (datos no separables linealmente)\n' + | ||
' 3: Para usar la base de datos "skin"\n') | ||
print('Ejemplo: python3 launch.py 1 250 1234 0.5 1') | ||
print('\nOpciones disponibles:\n' + | ||
' 1. Problema primal\n' + | ||
' 2. Problema dual\n' + | ||
' 3. Problema dual (RBF)\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Parameters | ||
param n >= 1, integer; | ||
param m >= 1, integer; | ||
param nu >= 0; | ||
|
||
param A {1..m, 1..n}; | ||
param y {i in 1..m}; | ||
|
||
|
||
# Variables | ||
var gamma; | ||
var w {1..n}; | ||
var s {1..m} >= 0; | ||
|
||
#optimización | ||
minimize SVM_primal: 1/2*sum{i in 1..n}(w[i]*w[i]) + nu*sum{j in 1..m}(s[j]); | ||
subject to restric {j in 1..m}: | ||
y[j] * (sum{i in 1..n} (w[i]*A[j,i]) + gamma) + s[j] >= 1; |
Oops, something went wrong.