-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbuild_model_revisit_top20_3day.R
59 lines (46 loc) · 1.6 KB
/
build_model_revisit_top20_3day.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#Pipeline for XGBoost model tuning (ED revisit - 72 hours, Top 20 variables)
setwd('~/ED_Return/')
library(readr)
library(dplyr)
library(reshape2)
library(parallel)
library(caret)
library(xgboost)
library(doMC)
library(pROC)
registerDoMC(5) #for parallelization
load('./Results/cleandf_revisit.RData')
#Use only top 20 variables
source('./Scripts/top20_df_3day.R')
df <- top20_df_3day(df)
#Re-encode dataframe into design matrix x and response y
source("./Scripts/makematrix_revisit.R")
dataset <- makematrix_revisit(df, nineday = F)
save(dataset, file = './Results/sparseMatrix_revisit_top20_3day.RData') #save for future use
load('./Results/indeces_revisit.RData')
# load('./Results/sparseMatrix_revisit.RData')
x <- dataset$x
y <- dataset$y
rm(dataset)
#prepare matrices for XGBoost
x_train <- x[indeces$i_train,]
y_train <- y[indeces$i_train]
x_dev <- x[indeces$i_dev,]
y_dev <- y[indeces$i_dev]
rm(x); rm(y)
for (depth in c(5,10,15)) {
bst <- xgboost(data = x_train, label = y_train,
max_depth = depth, eta = 0.3,
nthread = 5, nrounds = 20,
eval_metric = 'auc',
objective = "binary:logistic",
colsample_bylevel = 0.3)
print(bst)
# save(bst, file = './Results/bst_model.RData')
auc_train <- as.numeric(bst$evaluation_log$train_auc[length(bst$evaluation_log$train_auc)])
#7) Predict on dev
y_hat_dev <- predict(bst, x_dev)
auc_dev <- as.numeric(auc(y_dev, y_hat_dev))
print('3day return AUC')
print(c(auc_train,auc_dev))
}