-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEstimation.R
150 lines (106 loc) · 5.79 KB
/
Estimation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
## Copy from Github files
library(caret)
library(dplyr)
library(GGally)
library(ggplot2)
library(pROC)
set.seed(123)
df <- read.csv("/Users/yonglaizhu/Desktop/246 Project/df_train.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE)
df_test <- read.csv("/Users/yonglaizhu/Desktop/246 Project/df_test.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE)
levels(df$LoanStatus) <- c(1,0)
levels(df_test$LoanStatus) <- c(1,0)
# base model for reference and variable selection
logisticModel <- train(LoanStatus ~ 1 + ThirdPartyDollars + TermInMonths + GrossApproval + Term.Multiple + Same.State + In.CA + Is.ThirdParty + Missing.Interest
+ Refinance + Delta + Private.Sector + Premier + CORPORATION + INDIVIDUAL + MISSING + PARTNERSHIP
+ Unemployment.YR + Avg.Home.Price + GDP.Delta.YR + Log.S.P.Open + BorrState.Unemployment + ProjectState.Unemployment
+ BorrState.Income + Missing.Borr.Income + Missing.Proj.Income + BorrState.GDP + ProjState.GDP + Missing.Borr.GDP
+ Missing.Proj.GDP + BorrState.Vacancy + ProjectState.Vacancy
, data = df, method = "glm",
family = "binomial")
summary(logisticModel)
logistic_predictions <- predict(logisticModel, newdata = df_test, type = "prob")
############################################################################
n_samples <- 100 # Number of times to repeat the sampling, e.g., 100
total_losses <- numeric(n_samples)
for(j in 1:n_samples) {
## Estimate total loss distribution by randomly sampling portfolios consisting of 1,000 loans from the test set
sampled_test_data <- df_test[sample(nrow(df_test), 1000), ]
# Whether it is default
default_probs <- predict(logisticModel, newdata = sampled_test_data, type = "prob")
default_preds <- ifelse(default_probs$CHGOFF > 0.4, 1, 0) # Adjust threshold as necessary
# Initialize a vector to store estimated losses
sampled_test_data$EstimatedLoss <- numeric(nrow(sampled_test_data))
sampled_test_data$ChargeOffBinary <- numeric(nrow(sampled_test_data))
# For loans predicted to default, predict GrossChargeOffAmount category
for(i in which(default_preds == 1)) {
chargeoff_prob <- predict(classify_model, sampled_test_data[i, ], type = "response")
chargeoff_pred <- ifelse(chargeoff_prob > optimal_threshold, 1, 0)
sampled_test_data$ChargeOffBinary[i] <- chargeoff_pred
if(chargeoff_pred == 1) { # If GrossChargeOffAmount is predicted to be > 0
estimated_loss <- predict(lgd_model_positive_chargeooff, sampled_test_data[i, ])
sampled_test_data$EstimatedLoss[i] <- estimated_loss
}
}
total_losses[j] <- sum(sampled_test_data$EstimatedLoss) # Hypothetical
print(paste("Total Estimated Loss:", total_losses[j]))
}
############################################################################
hist(total_losses, breaks = 50, main = "Distribution of Estimated Loss")
summary(total_losses)
## VaR and average VaR (CVaR)
# Calculate VaR at the 95% and 99% levels
VaR_95 <- quantile(total_losses, 0.95)
VaR_99 <- quantile(total_losses, 0.99)
# Calculate AVaR (Expected Shortfall) at the 95% and 99% levels
AVaR_95 <- mean(total_losses[total_losses > VaR_95])
AVaR_99 <- mean(total_losses[total_losses > VaR_99])
print(paste("VaR 95%:", VaR_95))
print(paste("VaR 99%:", VaR_99))
print(paste("AVaR 95%:", AVaR_95))
print(paste("AVaR 99%:", AVaR_99))
# Estimate the confidence intervals
# Bootstrapping for 95% VaR
bootstrap_samples <- 1000 # Number of bootstrap samples
VaR_95_samples <- numeric(bootstrap_samples)
for (i in 1:bootstrap_samples) {
# Sample with replacement from total_losses
sample_losses <- sample(total_losses, replace = TRUE)
VaR_95_samples[i] <- quantile(sample_losses, 0.95)
}
# Calculate confidence intervals for VaR 95%
VaR_95_CI <- quantile(VaR_95_samples, probs = c(0.025, 0.975))
print(paste("95% Confidence Interval for VaR 95%:", VaR_95_CI))
####################################################
n_bootstraps <- 1000
VaR_95_samples <- numeric(n_bootstraps)
CVaR_95_samples <- numeric(n_bootstraps)
for (i in 1:n_bootstraps) {
bootstrap_sample <- sample(total_losses, size = length(total_losses), replace = TRUE)
# Recalculate VaR and CVaR for the bootstrap sample
bootstrap_VaR_95 <- quantile(bootstrap_sample, 0.95)
bootstrap_CVaR_95 <- mean(bootstrap_sample[bootstrap_sample >= bootstrap_VaR_95])
VaR_95_samples[i] <- bootstrap_VaR_95
CVaR_95_samples[i] <- bootstrap_CVaR_95
}
# Calculate the 95% confidence intervals for VaR 95% and CVaR 95%
VaR_95_CI <- quantile(VaR_95_samples, probs = c(0.025, 0.975))
CVaR_95_CI <- quantile(CVaR_95_samples, probs = c(0.025, 0.975))
print(paste("95% Confidence Interval for VaR 95%: [", VaR_95_CI[1], ", ", VaR_95_CI[2], "]"))
print(paste("95% Confidence Interval for CVaR 95%: [", CVaR_95_CI[1], ", ", CVaR_95_CI[2], "]"))
####################################################
VaR_99_samples <- numeric(n_bootstraps)
CVaR_99_samples <- numeric(n_bootstraps)
for (i in 1:n_bootstraps) {
bootstrap_sample <- sample(total_losses, size = length(total_losses), replace = TRUE)
# Recalculate VaR and CVaR for the bootstrap sample
bootstrap_VaR_99 <- quantile(bootstrap_sample, 0.99)
bootstrap_CVaR_99 <- mean(bootstrap_sample[bootstrap_sample >= bootstrap_VaR_99])
VaR_99_samples[i] <- bootstrap_VaR_99
CVaR_99_samples[i] <- bootstrap_CVaR_99
}
# Calculate the 95% confidence intervals for VaR 99% and CVaR 99%
VaR_99_CI <- quantile(VaR_99_samples, probs = c(0.025, 0.975))
CVaR_99_CI <- quantile(CVaR_99_samples, probs = c(0.025, 0.975))
print(paste("95% Confidence Interval for VaR 99%: [", VaR_99_CI[1], ", ", VaR_99_CI[2], "]"))
print(paste("95% Confidence Interval for CVaR 99%: [", CVaR_99_CI[1], ", ", CVaR_99_CI[2], "]"))
####################################################