-
Notifications
You must be signed in to change notification settings - Fork 1
/
testing.R
120 lines (88 loc) · 7.07 KB
/
testing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Test that this works in the software. GLM model 1 should be equivalent to GLM model 2
# In GLM model 1, the offset is applied through the offset term
# in GLM model 2, the offset is applied manually to the incurred loss and exposure
# Since HDtweedie does not support offsets, we will manually apply the offset in this way.
glm_model_1 <- glm(formula = incurred_loss ~
# driver_age_18_38_hinge_ln + driver_age_38_76_hinge_ln + driver_age_76_99_hinge_ln +
# vehicle_age_0_10_hinge_mod1_ln + vehicle_age_10_99_hinge_mod1_ln +
driver_age_18_38_hinge + driver_age_38_76_hinge + driver_age_76_99_hinge +
vehicle_age_0_10_hinge + vehicle_age_10_99_hinge +
C(MultiPolicy) + C(xTreme_TurnSignal) + C(car_weight) +C(industry_code),
family = tweedie(1.6, link.power = 0),
weights = exposure,
offset = log(cw_model_complement_of_credibility), # Here the offset is applied through the
# offset logic and not the target/exposure terms
data = large_state_training_data)
glm_model_2 <- glm(formula = (incurred_loss*(cw_model_complement_of_credibility^(1-1.6)) /(cw_model_complement_of_credibility^(2-1.6))) ~
# driver_age_18_38_hinge_ln + driver_age_38_76_hinge_ln + driver_age_76_99_hinge_ln +
# vehicle_age_0_10_hinge_mod1_ln + vehicle_age_10_99_hinge_mod1_ln +
driver_age_18_38_hinge + driver_age_38_76_hinge + driver_age_76_99_hinge +
vehicle_age_0_10_hinge + vehicle_age_10_99_hinge +
C(MultiPolicy) + C(xTreme_TurnSignal) + C(car_weight) +C(industry_code),
family = tweedie(1.6, link.power = 0),
weights = exposure*(cw_model_complement_of_credibility^(2-1.6)),
data = large_state_training_data)
# This is identical within a material tolerance. All coefficients are identical at the thousandths place,
# and therefore equivalent for insurance implementation purposes
glm_model_1$coefficients
glm_model_2$coefficients
# Large State Model, no Offset
# fit the lasso model with an offset
large_state_no_offset_lasso_model <- cv.HDtweedie(x = large_state_training_data_matrix,
y = (large_state_training_loss_vector),
group = NULL, p = 1.6,
weights = large_state_training_data$exposure, lambda = NULL,
pred.loss = "deviance",
nfolds = 4, foldid = large_state_training_cv_vector,
standardize = TRUE)
# Fit the GLM model without an offset
large_state_glm_model <- glm(formula = incurred_loss ~
driver_age_18_38_hinge + driver_age_38_76_hinge + driver_age_76_99_hinge +
vehicle_age_0_10_hinge + vehicle_age_10_99_hinge +
C(MultiPolicy) + C(xTreme_TurnSignal) + C(car_weight) +C(industry_code),
family = tweedie(1.6, link.power = 0),
weights = exposure,
# offset = cw_model_complement_of_credibility_vector, - Note, no complement here.
data = large_state_training_data)
# add the predictions without a complement to our training data
large_state_training_data[,glm_prediction := predict.glm(large_state_glm_model, large_state_training_data, type = "response")]
large_state_training_data[,lasso_prediction := predict(large_state_no_offset_lasso_model, large_state_training_data_matrix, s="lambda.min",
weights = exposure) ]
large_state_validate_data[,glm_prediction := predict.glm(large_state_glm_model, large_state_validate_data, type = "response")]
large_state_validate_data[,lasso_prediction := predict(large_state_no_offset_lasso_model, large_state_validate_data_matrix, s="lambda.min",
weights = exposure) ]
summary(large_state_validate_data)
large_state_coeff_table <- create_coeff_table(unpenalized_model = large_state_glm_model,
penalized_model = large_state_no_offset_lasso_model)
relativity_plot(get_cat_var_table(large_state_training_data, large_state_coeff_table, "industry_code"))
relativity_plot(get_cat_var_table(large_state_training_data, large_state_coeff_table, "MultiPolicy"))
relativity_plot(get_cat_var_table(large_state_training_data, large_state_coeff_table, "xTreme_TurnSignal"))
relativity_plot(get_cat_var_table(large_state_training_data, large_state_coeff_table, "car_weight"))
prediction_plot(get_cat_var_table(large_state_training_data, large_state_coeff_table, "industry_code"))
prediction_plot(get_cat_var_table(large_state_training_data, large_state_coeff_table, "MultiPolicy"))
prediction_plot(get_cat_var_table(large_state_training_data, large_state_coeff_table, "xTreme_TurnSignal"))
prediction_plot(get_cat_var_table(large_state_training_data, large_state_coeff_table, "car_weight"))
large_state_driver_age_table <- get_driver_var_table(large_state_training_data, large_state_coeff_table, variable_name = "driver_age")
large_state_vehicle_age_table <- get_vehicle_var_table(large_state_training_data, large_state_coeff_table, variable_name = "vehicle_age")
prediction_plot(large_state_driver_age_table)
relativity_plot(large_state_driver_age_table)
# cw_coeff_table
prediction_plot(large_state_vehicle_age_table)
relativity_plot(large_state_vehicle_age_table)
# Fit a GLM with an offset to see the difference in behavior.
large_state_offset_glm_model <- glm(formula = incurred_loss ~
driver_age_18_38_hinge + driver_age_38_76_hinge + driver_age_76_99_hinge +
vehicle_age_0_10_hinge + vehicle_age_10_99_hinge +
C(MultiPolicy) + C(xTreme_TurnSignal) + C(car_weight) +C(industry_code),
family = tweedie(1.6, link.power = 0),
weights = exposure,
offset = log(cw_model_complement_of_credibility_vector),
data = large_state_training_data)
# First, let's compare the GLM models with and without the complement of credibility
original_predictions <- predict.glm(large_state_glm_model, large_state_training_data, type = "response")
with_offset_predictions <- predict.glm(large_state_offset_glm_model, large_state_training_data, type = "response") # uses the offset
cbind(original_predictions, with_offset_predictions)
# They're exactly the same!
# This is because the GLM treats the data with full credibility.
# as long as our complement can be cancelled out by our selected predictors, the model predictions will not change.
# We will no longer both model and offset variables in a GLM.