-
Notifications
You must be signed in to change notification settings - Fork 3
/
example_Random_Forest.py
84 lines (62 loc) · 3.48 KB
/
example_Random_Forest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from hyperoptimize import GraphicalOptimizer
# Loading data
data = pd.read_csv('kc_house_data.csv')
features = data.iloc[:, 3:].columns.tolist()
target = data.iloc[:, 2].name
y = data.loc[:, ['sqft_living', 'grade', target]].sort_values(target, ascending=True).values
x = np.arange(y.shape[0])
new_data = data[
['sqft_living', 'grade', 'sqft_above', 'sqft_living15', 'bathrooms', 'view', 'sqft_basement', 'lat', 'waterfront',
'yr_built', 'bedrooms']]
X = new_data.values
y = data.price.values
def model_function(params, X_train, y_train):
model = RandomForestRegressor(n_estimators=params['n_estimators'],
bootstrap=params['bootstrap'],
max_depth=params['max_depth'],
max_features=params['max_features'],
min_samples_leaf=params['min_samples_leaf'],
min_samples_split=params['min_samples_split'])
model.fit(X_train, y_train)
return model
def prediction_function(model, X):
return model.predict(X)
def performance_function(y_test, y_pred):
model_mae = mean_absolute_error(y_test, y_pred)
model_mse = mean_squared_error(y_test, y_pred)
model_rmse = np.sqrt(mean_squared_error(y_test, y_pred))
model_r2 = r2_score(y_test, y_pred)
model_results = {"Mean Absolute Error (MAE)": model_mae,
"Mean Squared Error (MSE)": model_mse,
"Root Mean Squared Error (RMSE)": model_rmse,
"Adjusted R^2 Score": model_r2}
return model_results
# Creating hyperparameter dictionary
hyperparameters_bayesian = {'n_estimators': [200, 2000], # Upper and lower bounds
'bootstrap': [True, False], # Categorical bounds
'max_depth': [10, 100], # Upper and lower bounds
'max_features': ['sqrt', 'log2'], # Categorical bounds
'min_samples_leaf': [1, 4], # Upper and lower bounds
'min_samples_split': [2, 10], } # Upper and lower bounds
hyperparameters_grid_and_random = {'n_estimators': range(200, 2000, 100), # Extensive list of possibilities
'bootstrap': [True, False], # Extensive list of possibilities
'max_depth': range(10, 100, 10), # Extensive list of possibilities
'max_features': ['sqrt', 'log2'], # Extensive list of possibilities
'min_samples_leaf': range(1, 4), # Extensive list of possibilities
'min_samples_split': [2, 5, 10], } # Extensive list of possibilities
# Performing optimization
opt = GraphicalOptimizer(model_function=model_function,
prediction_function=prediction_function,
performance_function=performance_function,
performance_parameter="Adjusted R^2 Score",
hyperparameters=hyperparameters_bayesian,
optimizer="bayesian",
max_num_combinations=5,
cross_validation=2,
max_num_of_parallel_processes=-1,
parallel_combinations=2)
opt.fit(X, y)