-
Notifications
You must be signed in to change notification settings - Fork 0
/
h2o_final.R
105 lines (96 loc) · 2.92 KB
/
h2o_final.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
rm(list=ls())
## Leave your path here to feed in setwd
path_amalesh <- paste0("D:/ALL/Study/KUL/Stats/Amalesh/",
"3rd sem/Advanced analyics for business")
path_marc <- paste0("d:/MarcandreC/Desktop/ass2_h2o")
setwd(path_marc)
## The following functions loads the
## dependencies automatically
source("./assigment2_functions_macode.r")
source("./assigment2_macode.r")
## TRUE if you want to install the h20 packages etc.
debug_install <- FALSE
if(debug_install) {
## The following two commands remove any previously installed H2O
## packages for R.
if ("package:h2o" %in% search()) {
detach("package:h2o", unload=TRUE)
}
if ("h2o" %in% rownames(installed.packages())) {
remove.packages("h2o")
}
## Next, we download packages that H2O depends on.
pkgs <- c("RCurl","jsonlite")
for (pkg in pkgs) {
if (!(pkg %in% rownames(installed.packages()))) {
install.packages(pkg)
}
}
## Now we download, install and initialize the H2O package for R.
install.packages("h2o", type="source",
repos=paste0("http://h2o-release.s3.amazonaws.com/",
"h2o/rel-yates/3/R"))
}
## Main meat
## orig.dat must be used for training,
## otherwise validation set are infected
## with rows from training set.
library(h2o)
h2o.init()
train.dat <- orig.dat
test.dat <- test.dat
churn <- Y.orig
train.dat1<- cbind(train.dat,churn)
churn<-read_rds('Y_test.rds')
test.dat1<- cbind(test.dat,churn)
training<- as.h2o(train.dat1)
testing<- as.h2o(test.dat1)
rf_h2o <- h2o.randomForest(
y = 'churn',
training_frame = training,
validation_frame = testing,
ntrees = 1000,
stopping_metric = "AUC",
stopping_rounds = 10,
stopping_tolerance = 0.005,
seed = 123
)
#modelperformance
h2o.auc(rf_h2o, valid = TRUE)
#automl
h2o_ai<- h2o.automl(y = 'churn',
training_frame = training,
validation_frame = testing,
max_models = 50,stopping_metric = "AUC",
max_runtime_secs = 1200,seed = 120)
#print all models
lb <- h2o_ai@leaderboard
print(lb, n = nrow(lb))
model_ids <- as.data.frame(h2o_ai@leaderboard$model_id)[,1]
#selecting best ensamble model from leadderboard
se <- h2o.getModel(grep("StackedEnsemble_AllModels", model_ids, value = TRUE)[1])
#summary of best ensamble
se
metalearner <- h2o.getModel(se@model$metalearner$name)
h2o.varimp(metalearner)
h2o.varimp_plot(metalearner)
#gbm
gbm <- h2o.getModel(grep("GBM", model_ids, value = TRUE)[1])
#summary of best gbm
gbm
#knowing parameters used
gbm@allparameters
h2o.varimp(gbm)
h2o.varimp_plot(gbm)
# #explanation
# explainer_rf <- lime::lime(
# as.data.frame(training[,-36]),
# model = rf_h2o,
# bin_continuous = F
# )
# explanation_rf <- lime::explain(
# as.data.frame(testing[, -36]),
# explainer = explainer_rf,
# n_labels = 1,
# n_features = 4)
# klm<-plot_features(explanation_rf,ncol = 5,cases = 63)