-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathminiProject3.py
84 lines (74 loc) · 2.96 KB
/
miniProject3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from math import sqrt
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
###
# Import the boston dataset into separate train/test AND input/result arrays
# (13+1 columns for every row - 404 rows in train data - 102 rows in test data)
#
# x_train:
# 0: CRIM: Per capita crime rate by town
# 1: ZN: Proportion of residential land zoned for lots over 25,000 sq. ft
# 2: INDUS: Proportion of non-retail business acres per town
# 3: CHAS: Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
# 4: NOX: Nitric oxide concentration (parts per 10 million)
# 5: RM: Average number of rooms per dwelling
# 6: AGE: Proportion of owner-occupied units built prior to 1940
# 7: DIS: Weighted distances to five Boston employment centers
# 8: RAD: Index of accessibility to radial highways
# 9: TAX: Full-value property tax rate per $10,000
# 10: PTRATIO: Pupil-teacher ratio by town
# 11: B: 1000(Bk — 0.63)², where Bk is the proportion of [people of African American descent] by town
# 12: LSTAT: Percentage of lower status of the population
#
# y_train:
# MEDV: Median value of owner-occupied homes in $1000s
###
from tensorflow.python.keras._impl.keras.wrappers.scikit_learn import KerasRegressor
(x_train, y_train), (x_test, y_test) = keras.datasets.boston_housing.load_data()
###
# Describe dataset
###
print("The train input is a tuple of shape: "+str(x_train.shape))
print("The train output is an array of length: "+str(len(y_train)))
median_value_array = y_train
plt.title("Overview of mean value in dataset")
plt.xlabel("Median value of Home (k$)")
plt.ylabel("Nr of occurences in test data")
plt.hist(median_value_array)
plt.show()
crim_rate = np.array(x_train[:,0])
plt.title("Value/Crime Rate")
plt.xlabel("Crime Rate (per town capita)")
plt.ylabel("Median value of Home (k$)")
plt.scatter(crim_rate,median_value_array,linestyle = 'None')
plt.show()
average_number_of_rooms = np.array(x_train[:,5])
plt.title("Value/Number of rooms")
plt.xlabel("Average number of rooms")
plt.ylabel("Median value of Home (k$)")
plt.scatter(average_number_of_rooms,median_value_array,linestyle = 'None')
plt.show()
###
# Initiate Model
###
model = keras.Sequential()
model.add(keras.layers.Dense(9, activation=tf.nn.relu,kernel_initializer='normal', input_shape=(13,)))
#model.add(keras.layers.Dropout(0.2)) # Not Overfitting data
model.add(keras.layers.Dense(11, kernel_initializer='normal', activation=tf.nn.relu))
model.add(keras.layers.Dense(1, kernel_initializer='normal'))
model.compile(optimizer=tf.train.AdamOptimizer(),
loss='mean_squared_error')
###
# Train Model
# (Fit model to training data set)
###
model.fit(x_train, y_train, epochs=2000, verbose=0)
###
# Test Model
# (get mean squared error of test data in k$)
###
mean_squared_error = model.evaluate(x_test, y_test)
print("Mean squared error on test data: "+str(round(mean_squared_error,2))+"k$")
print("Mean error on test data: "+str(round(sqrt(mean_squared_error),2))+"k$")