-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathchapter_11.R
162 lines (111 loc) · 4.17 KB
/
chapter_11.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
## Simulation (11.4)
param_range <- c(0,1)
prior <- function(x) 1
lhood <- function(x, p) dbinom(x, 1, p)
posterior <- function(x, data) {
ans <- vector("double", length(x))
for (i in seq_along(x)) {
ans[i] <- prior(x[i]) * prod(lhood(data, x[i]))
}
ans
}
data <- c(1, 0, 0, 1, 0, 0, 1)
est_mean <- integrate(function (x) x*posterior(x, data), 0, 1)$value / integrate(function (x) posterior(x, data), 0, 1)$value
exact_mean <- (sum(data) + 1) / (length(data) + 2)
phi <- function(x) log(x / (1-x))
est_phi_mean <- integrate(function(x) phi(x) * posterior(x, data), 0, 1)$value / integrate(function (x) posterior(x, data), 0, 1)$value
## This disagrees with Wasserman's answer in Example 11.3 because he dropped an exp(phi) from the numerator of the derivative.
exact_phi_dist <- function(x) dbeta(exp(x)/(1+exp(x)), sum(data) + 2, length(data) - sum(data) + 2)
exact_phi_mean <- integrate(function(x) x * exact_phi_dist(x), -100, 100)$value / integrate(exact_phi_dist, -100, 100)$value
## Ex 11.2
ex11_2 <- rnorm(100, 5, 1)
## Take f(mu) = 1 and find the posterior density.
ex11_2_posterior <- function(mu) {
n <- length(ex11_2)
xbar <- mean(ex11_2)
sqrt(n/(2*pi)) * exp(-n/2*(mu - xbar)^2)
}
t <- seq(0, 10, by=0.05)
plot(t, ex11_2_posterior(t), 'l')
# Similar
ex11_2_simulation <- rnorm(1000, mean(ex11_2), 1/sqrt(length(ex11_2)))
hist(ex11_2_simulation)
# d) Find posterior density for exp(mu)
ex11_2_exp_density <- function(t) {
ans <- vector("double", length(t))
for (i in seq_along(t)) {
ans[i] <- 1/ t[i] * prod(exp(-(ex11_2 - log(t[i]))^2 / 2))
}
ans
}
## Simulation
hist(exp(ex11_2_simulation))
# e-f) Test against simulation
ex2_mu <- 5
ex2_n <- 100
ex2_sample_size <- 10000
ex2_samples <- matrix(rnorm(ex2_sample_size * ex2_n, ex2_mu, 1), ncol = ex2_n)
# posterior interval
ex2_in_pi <- function(x, mu=ex2_mu, alpha=0.05) {
centre <- mean(x)
size <- qnorm(1 - alpha/2) / sqrt(length(x))
# c(centre - size, centre+size)
abs(centre - mu) < size
}
## 95.16%: This seems excellent
sum(apply(ex2_samples, 1, ex2_in_pi)) / ex2_sample_size
ex2_in_exp_ci <- function(x, mu=ex2_mu, alpha=0.05) {
centre <- exp(mean(x))
size <- qnorm(1 - alpha/2) * exp(mean(x)) / sqrt(length(x))
# c(centre - size, centre+size)
abs(centre - exp(mu)) < size
}
# 94.85%, acceptable as is approximate confidence interval
sum(apply(ex2_samples, 1, ex2_in_exp_ci)) / ex2_sample_size
# Ex 4b)
n1 <- 50
s1 <- 30
n2 <- 50
s2 <- 40
p1 <- s1/n1
p2 <- s2/n2
boot_size <- 1000
bootstrap1 <- rbinom(boot_size, n1, p1) / n1
bootstrap2 <- rbinom(boot_size, n2, p2) / n2
bootstrap_se <- sd(bootstrap2 - bootstrap1)
bootstrap_90ci <- c((p2 - p1) - qnorm(1 - 0.1/2) * bootstrap_se, (p2 - p1) + qnorm(1 - 0.1/2) * bootstrap_se)
# c) Use prior f(p1, p2) = 1. Use simulation to find posterior mean and 90% Posterior interval.
sim_size <- 1000
# Prior for each of p1 and p2 is Beta(1, 1) and they are indep
# Posterior is separately Beta(s + 1, n - s + 1)
sim_post1 <- rbeta(sim_size, s1 + 1, n1 - s1 + 1)
sim_post2 <- rbeta(sim_size, s2 + 1, n2 - s2 + 1)
tau_post <- sim_post2 - sim_post1
hist(tau_post)
# A little lower, but similar to the bootstrap
post_90ci <- quantile(tau_post, c(0.05, 0.95))
# d) Check: se = sqrt(1/(n1*p1*(1-p1)) + 1/(n2*p2*(1-p2)))
sqrt(1/(n1*p1*(1-p1)) + 1/(n2*p2*(1-p2)))
sd(log((bootstrap1)/(1-bootstrap1) / ((bootstrap2) / (1-bootstrap2))))
# e)
phi_post <- log((sim_post1) / (1-sim_post1) / ((sim_post2) / (1-sim_post2)))
# Bayesian mean
mean_phi <- mean(phi_post)
# Very close to Frequentist approach
post_90ci_phi <- quantile(phi_post, c(0.05, 0.95))
# Ex 5
n <- 10
s <- 2
t <- seq(0, 1, 0.01)
alpha <- c(0.5, 1, 10, 100)
beta <- c(0.5, 1, 10, 100)
ex5_df <- data.frame(
t = rep(t, length(alpha)),
alpha = rep(alpha, each = length(t)),
beta = rep(beta, each = length(t))
)
ex5_df$posterior <- with(ex5_df, dbeta(t, alpha + s, beta + n - s))
ex5_df$alpha_beta <- with(ex5_df, paste(as.character(alpha), as.character(beta), sep=","))
# The stronger the prior the more evidence that is needed to overcome it
library(ggplot2)
ggplot(ex5_df, mapping=aes(x=t, y=posterior, group = alpha_beta, col=alpha_beta)) + geom_line()