forked from CleanColonClub/predict-patient-cancellations
-
Notifications
You must be signed in to change notification settings - Fork 0
/
svmClassifier.py
87 lines (73 loc) · 2.73 KB
/
svmClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split, KFold
def loadData():
dataset = pd.read_csv('data/data_train.csv')
#feature_cols = ['Date Diff', 'SMS', 'Email', 'Gender', 'Age']
#target = 'No Show/LateCancel Flag'
X = dataset.drop(columns=['Patient Id', 'No Show/LateCancel Flag'])
y = dataset['No Show/LateCancel Flag']
return(X, y)
def trainSVM(X_train, X_test, y_train, y_test, krnl):
svm = SVC(kernel = krnl)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
auc = roc_auc_score(y_test, y_pred)
return (auc, svm)
def linearSVM(X_train, X_test, y_train, y_test):
svm = SVC(kernel = 'linear')
svm.fit(X_train, y_train)
# y_pred = svm.predict(X_test)
# auc = roc_auc_score(y_test, y_pred)
# return (auc, svm)
return svm
def rbfSVM(X_train, X_test, y_train, y_test):
svm = SVC(kernel = 'rbf')
svm.fit(X_train, y_train)
# y_pred = svm.predict(X_test)
# auc = roc_auc_score(y_test, y_pred)
# return (auc, svm)
return svm
def sigmoidSVM(X_train, X_test, y_train, y_test):
svm = SVC(kernel = 'sigmoid')
svm.fit(X_train, y_train)
# y_pred = svm.predict(X_test)
# auc = roc_auc_score(y_test, y_pred)
# return (auc, svm)
return svm
def polynomialSVM(X_train, X_test, y_train, y_test):
svm = SVC(kernel = 'poly')
svm.fit(X_train, y_train)
# y_pred = svm.predict(X_test)
# auc = roc_auc_score(y_test, y_pred)
# return (auc, svm)
return svm
def SVM_KFoldValidation(X, y, func, k, krnl):
model = None
max_auc = 0.0
aucs = np.array([])
kfold = KFold(n_splits=k)
kfold.get_n_splits(X)
for train_index, test_index in kfold.split(X):
print("running fold", train_index)
X_train = X.iloc[train_index]
X_test = X.iloc[test_index]
y_train = y.iloc[train_index]
y_test = y.iloc[test_index]
auc, mod = func(X_train, X_test, y_train, y_test, krnl)
aucs = np.append(aucs, auc)
if auc > max_auc:
model = mod
max_auc = auc
return (np.mean(aucs), model)
#X,y = loadData()
#auc_svml, svml = SVM_KFoldValidation(X, y, trainSVM, 10, 'linear')
#print("Linear SVM AUC: ", auc_svml)
#auc_svmr, svmr = SVM_KFoldValidation(X, y, trainSVM, 10, 'rbf')
#print("RBF SVM AUC: ", auc_svmr)
#auc_svms, svms = SVM_KFoldValidation(X, y, trainSVM, 10, 'sigmoid')
#print("Sigmoid SVM AUC: ", auc_svms)
#auc_svmp, svmp = SVM_KFoldValidation(X, y, trainSVM, 10, 'poly')
#print("Polynomial SVM AUC: ", auc_svmp)