-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample_trainer.py
119 lines (99 loc) · 5.23 KB
/
sample_trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""
This is a sample trainer file, used to test both linear regression and logistic regression supervised learning technique using sample files.
Sample1.csv is trained using linear regression, while sample2.csv is trained using logistic regression.
Assuming you are in the main 'Othello' directory, launch the file immediately to train sample1.csv and sample2.csv.
Some analytics from both linear and logistic regression will be printed out on the terminal window.
Please note that this is only using some arbitrary data, and not the one used to train the othello heuristic function.
"""
import sklearn
import sklearn.model_selection
import sklearn.linear_model
import sklearn.metrics
import csv
import os
import pickle
# Load training data, split into training and test sets, train and analyze model, getting result from new data
def main():
# (1) Linear Regression:
x_train1, x_test1, y_train1, y_test1 = load_data(os.path.join("sample.csv"), 1)
linreg_model = train_model_linreg(x_train1, y_train1)
predictions1 = linreg_model.predict(x_test1)
analyze_model_linreg(linreg_model, y_test1, predictions1)
# (2) Logistic Regression
x_train2, x_test2, y_train2, y_test2 = load_data(os.path.join("learning_data", "sample2.csv"), 2)
logreg_model = train_model_logreg(x_train2, y_train2)
predictions2 = logreg_model.predict(x_test2)
analyze_model_logreg(logreg_model, y_test2, predictions2)
# Save model in a wav file
print("Saving models...")
save_path_1 = os.path.join("learning_data", "sample_linreg.sav")
save_path_2 = os.path.join("learning_data", "sample_logreg.sav")
pickle.dump(linreg_model, open(save_path_1, 'wb'))
pickle.dump(logreg_model, open(save_path_2, 'wb'))
print("Models saved.\n")
# Using the saved models to predict result based on some evidences
# Expected func: y=3a1+2a2-a3; 10, 7, 6, 4; win win lose lose
loaded_model1 = pickle.load(open(save_path_1, 'rb'))
loaded_model2 = pickle.load(open(save_path_2, 'rb'))
data = [[6, 1, 10], [4, 2, 9], [1, 2, 1], [1, 2, 3]]
make_predictions(data, loaded_model1, loaded_model2)
# Load the necessary data from the csv file, divide into training and test parameters and outcome, return those values
def load_data(dir, type):
with open(dir) as file:
next(file)
reader = csv.reader(file)
parameters, real_values = [], []
for row in reader:
parameters.append([int(row[0]), int(row[1]), int(row[2])])
# if type == 1: real_values.append(int(row[3]))
# elif type == 2: real_values.append(row[3])
print(parameters)
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(parameters, real_values, test_size = 0.2)
return (x_train, x_test, y_train, y_test)
# Using linear regression to train a model
def train_model_linreg(x_train, y_train):
regressor = sklearn.linear_model.LinearRegression()
regressor.fit(x_train, y_train)
return regressor
# Using logistic regression to train a model
def train_model_logreg(x_train, y_train):
logreg = sklearn.linear_model.LogisticRegression()
logreg.fit(x_train, y_train)
return logreg
# Print out the coefficients gained and mean squared error from the test data (linear regression)
def analyze_model_linreg(model, y_test, predictions):
print("Linear Regression")
print("Coefficients gained: \n", model.coef_)
print("Mean squared error: %.2f\n" % sklearn.metrics.mean_squared_error(y_test, predictions))
# Print out the true positive (win) rate and true negative (lose) rate (logistic regression)
def analyze_model_logreg(model, y_test, predictions):
print("Coefficients gained: \n", model.coef_)
positive_labels_count, negative_labels_count, positive_labels_accurate, negative_labels_accurate = 0, 0, 0, 0
for label, prediction in zip(y_test, predictions):
if label == "win":
positive_labels_count += 1
if label == prediction: positive_labels_accurate += 1
elif label == "lose":
negative_labels_count += 1
if label == prediction: negative_labels_accurate += 1
print("Logistic Regression")
print(f"Correct: {(y_test == predictions).sum()}")
print(f"Incorrect: {(y_test != predictions).sum()}")
if positive_labels_count != 0:
sensitivity = positive_labels_accurate / positive_labels_count # True positive rate (percentage when 'win' is guessed correctly)
print(f"True Positive Rate: {100 * sensitivity:.2f}%")
else: print("True Positive Rate: Undetermined (not enough test data)")
if negative_labels_count != 0:
specificity = negative_labels_accurate / negative_labels_count # True negative rate (percentage when 'lose' is guessed correctly)
print(f"True Negative Rate: {100 * specificity:.2f}%")
else: print("True Negative Rate: Undetermined (not enough test data)")
print("")
# Based on model 1 (linear regression) and model 2 (logistic regression), print the predicted outcome based on the test cases
def make_predictions(test_cases, model1, model2):
print("Predictions based on test cases:")
print(model1.predict(test_cases))
print(model2.predict(test_cases))
print(model2.predict_proba(test_cases))
print("")
if __name__ == "__main__":
main()