Skip to content

Commit

Permalink
Updates for friday
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthew Talluto committed Dec 1, 2023
1 parent 5e6f3d2 commit de4de85
Show file tree
Hide file tree
Showing 41 changed files with 2,618 additions and 22,045 deletions.
21,931 changes: 0 additions & 21,931 deletions docs/exercises/data/treedata.csv

This file was deleted.

22 changes: 11 additions & 11 deletions ex/soln5_kung.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ data {
vector [n] height;
vector [n] weight;
vector [n] age;
vector [n] sex;
vector [n] is_male;
// priors
real <lower = 0> lam_sig;
real <lower = 0> sig_a;
Expand All @@ -226,12 +226,12 @@ parameters {
real b_weight;
real b_age;
real b_interaction;
real b_sex;
real b_male;
real <lower = 0> sigma;
}
transformed parameters {
vector [n] eta;
eta = a + b_weight * weight + b_age * age + b_interaction * weight .* age + b_sex * sex;
eta = a + b_weight * weight + b_age * age + b_interaction * weight .* age + b_male * is_male;
}
model {
height ~ normal(eta, sigma);
Expand All @@ -240,14 +240,14 @@ model {
b_weight ~ normal(0, sig_b);
b_age ~ normal(0, sig_b);
b_interaction ~ normal(0, sig_b);
b_sex ~ normal(0, sig_b);
b_male ~ normal(0, sig_b);
}
generated quantities {
// prediction, for part 3c
// 41 is the hypothetical weight for this example
// 24 is the age
// the subject is female, so sex=0
real prediction_3c = normal_rng(a + b_weight * 41 + b_age * 24 + b_interaction*24*41 + b_sex * 0, sigma);
// the subject is female, so is_male=0
real prediction_3c = normal_rng(a + b_weight * 41 + b_age * 24 + b_interaction*24*41 + b_male * 0, sigma);
}
```
Expand All @@ -264,7 +264,7 @@ kung_mlr = stan_model("stan/kung_mlr.stan")
```{r cache = TRUE}
# add some variables we need for the multiple regression
kung_lm_data$age = Howell1$age
kung_lm_data$sex = Howell1$male
kung_lm_data$is_male = Howell1$male
fit_lm = sampling(kung_lm, data = kung_lm_data, refresh = 0)
fit_lm_log = sampling(kung_lm_log, data = kung_lm_data, refresh = 0)
Expand All @@ -273,7 +273,7 @@ fit_mlr = sampling(kung_mlr, data = kung_lm_data, refresh = 0)
# extract the posterior samples
samps_lm = as.matrix(fit_lm, pars = c("a", "b", "sigma"))
samps_lm_log = as.matrix(fit_lm_log, pars = c("a", "b", "sigma"))
samps_mlr = as.matrix(fit_mlr, pars = c("a", "b_weight", "b_age", "b_interaction", "b_sex", "sigma"))
samps_mlr = as.matrix(fit_mlr, pars = c("a", "b_weight", "b_age", "b_interaction", "b_male", "sigma"))
# plotting is a little challenging with curves, we will accomplish it by creating a counterfactual dataset
# the multiple regression is also difficult to visualise, so we will stick to the two univariate cases
Expand Down Expand Up @@ -318,15 +318,15 @@ In R, we extract the posterior distribution of each parameter, then draw samples
``` {r}
weight = 41
age = 24
sex = 0 # 0 codes female, 1 male
is_male = 0 # 0 codes female, 1 male
## linear model
# the '41' below is the weight, which is the only thing that is relevant for this model
pr_lm = rnorm(nrow(samps_lm), samps_lm[,'a'] + samps_lm[,'b'] * weight, samps_lm[,'sigma'])
## similar logic for the other two models
pr_lm_log = rnorm(nrow(samps_lm_log), samps_lm_log[,'a'] + samps_lm_log[,'b'] * log(weight), samps_lm_log[,'sigma'])
pr_mlr = rnorm(nrow(samps_mlr), samps_mlr[,'a'] + samps_mlr[,'b_weight'] * weight +
samps_mlr[,'b_age'] * age + samps_mlr[,'b_interaction'] * weight * age + samps_mlr[,'b_sex'] * sex,
samps_mlr[,'b_age'] * age + samps_mlr[,'b_interaction'] * weight * age + samps_mlr[,'b_male'] * is_male,
samps_mlr[,'sigma'])
## next we examine how many cases fall within the interval we want
Expand All @@ -336,7 +336,7 @@ sum(pr_mlr >= 130 & pr_mlr <= 145) / length(pr_mlr)
## for a male, only one model will change
pr_mlr_male = rnorm(nrow(samps_mlr), samps_mlr[,'a'] + samps_mlr[,'b_weight'] * weight +
samps_mlr[,'b_age'] * age + samps_mlr[,'b_interaction'] * weight * age + samps_mlr[,'b_sex'] * 1,
samps_mlr[,'b_age'] * age + samps_mlr[,'b_interaction'] * weight * age + samps_mlr[,'b_male'] * 1,
samps_mlr[,'sigma'])
sum(pr_mlr_male >= 130 & pr_mlr_male <= 145) / length(pr_mlr_male)
Expand Down
36 changes: 18 additions & 18 deletions ex/soln5_kung.html
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ <h4 class="date">29.11.2023</h4>
vector [n] height;
vector [n] weight;
vector [n] age;
vector [n] sex;
vector [n] is_male;
// priors
real &lt;lower = 0&gt; lam_sig;
real &lt;lower = 0&gt; sig_a;
Expand All @@ -371,12 +371,12 @@ <h4 class="date">29.11.2023</h4>
real b_weight;
real b_age;
real b_interaction;
real b_sex;
real b_male;
real &lt;lower = 0&gt; sigma;
}
transformed parameters {
vector [n] eta;
eta = a + b_weight * weight + b_age * age + b_interaction * weight .* age + b_sex * sex;
eta = a + b_weight * weight + b_age * age + b_interaction * weight .* age + b_male * is_male;
}
model {
height ~ normal(eta, sigma);
Expand All @@ -385,14 +385,14 @@ <h4 class="date">29.11.2023</h4>
b_weight ~ normal(0, sig_b);
b_age ~ normal(0, sig_b);
b_interaction ~ normal(0, sig_b);
b_sex ~ normal(0, sig_b);
b_male ~ normal(0, sig_b);
}
generated quantities {
// prediction, for part 3c
// 41 is the hypothetical weight for this example
// 24 is the age
// the subject is female, so sex=0
real prediction_3c = normal_rng(a + b_weight * 41 + b_age * 24 + b_interaction*24*41 + b_sex * 0, sigma);
// the subject is female, so is_male=0
real prediction_3c = normal_rng(a + b_weight * 41 + b_age * 24 + b_interaction*24*41 + b_male * 0, sigma);
}
</code></pre>
<pre class="r"><code>kung_lm_log = stan_model(&quot;stan/kung_lm_log.stan&quot;)
Expand All @@ -402,7 +402,7 @@ <h4 class="date">29.11.2023</h4>
<code>sampling</code></div>
<pre class="r"><code># add some variables we need for the multiple regression
kung_lm_data$age = Howell1$age
kung_lm_data$sex = Howell1$male
kung_lm_data$is_male = Howell1$male

fit_lm = sampling(kung_lm, data = kung_lm_data, refresh = 0)
fit_lm_log = sampling(kung_lm_log, data = kung_lm_data, refresh = 0)
Expand All @@ -411,7 +411,7 @@ <h4 class="date">29.11.2023</h4>
# extract the posterior samples
samps_lm = as.matrix(fit_lm, pars = c(&quot;a&quot;, &quot;b&quot;, &quot;sigma&quot;))
samps_lm_log = as.matrix(fit_lm_log, pars = c(&quot;a&quot;, &quot;b&quot;, &quot;sigma&quot;))
samps_mlr = as.matrix(fit_mlr, pars = c(&quot;a&quot;, &quot;b_weight&quot;, &quot;b_age&quot;, &quot;b_interaction&quot;, &quot;b_sex&quot;, &quot;sigma&quot;))
samps_mlr = as.matrix(fit_mlr, pars = c(&quot;a&quot;, &quot;b_weight&quot;, &quot;b_age&quot;, &quot;b_interaction&quot;, &quot;b_male&quot;, &quot;sigma&quot;))

# plotting is a little challenging with curves, we will accomplish it by creating a counterfactual dataset
# the multiple regression is also difficult to visualise, so we will stick to the two univariate cases
Expand All @@ -434,8 +434,8 @@ <h4 class="date">29.11.2023</h4>
round(apply(samps_lm, 2, quantile, c(0.05, 0.95)),2)</code></pre>
<pre><code>## parameters
## a b sigma
## 5% 73.72 1.72 8.91
## 95% 77.14 1.81 9.87</code></pre>
## 5% 73.69 1.72 8.93
## 95% 77.23 1.81 9.87</code></pre>
<pre class="r"><code>## log-transformed model
round(apply(samps_lm_log, 2, quantile, c(0.05, 0.95)),2)</code></pre>
<pre><code>## parameters
Expand All @@ -445,9 +445,9 @@ <h4 class="date">29.11.2023</h4>
<pre class="r"><code>## multiple regression
round(apply(samps_mlr, 2, quantile, c(0.05, 0.95)),2)</code></pre>
<pre><code>## parameters
## a b_weight b_age b_interaction b_sex sigma
## 5% 57.67 2.13 1.47 -0.04 3.67 6.18
## 95% 60.99 2.25 1.67 -0.04 5.58 6.83</code></pre>
## a b_weight b_age b_interaction b_male sigma
## 5% 57.67 2.13 1.47 -0.04 3.67 6.18
## 95% 60.99 2.25 1.67 -0.04 5.58 6.83</code></pre>
</div>
<div class="line-block">   c. What is the probability that the height of
a 24-year-old female with a weight of 41 kg is between 130 and 145 cm?
Expand All @@ -463,27 +463,27 @@ <h4 class="date">29.11.2023</h4>
standard deviation from the model.</p>
<pre class="r"><code>weight = 41
age = 24
sex = 0 # 0 codes female, 1 male
is_male = 0 # 0 codes female, 1 male
## linear model
# the &#39;41&#39; below is the weight, which is the only thing that is relevant for this model
pr_lm = rnorm(nrow(samps_lm), samps_lm[,&#39;a&#39;] + samps_lm[,&#39;b&#39;] * weight, samps_lm[,&#39;sigma&#39;])

## similar logic for the other two models
pr_lm_log = rnorm(nrow(samps_lm_log), samps_lm_log[,&#39;a&#39;] + samps_lm_log[,&#39;b&#39;] * log(weight), samps_lm_log[,&#39;sigma&#39;])
pr_mlr = rnorm(nrow(samps_mlr), samps_mlr[,&#39;a&#39;] + samps_mlr[,&#39;b_weight&#39;] * weight +
samps_mlr[,&#39;b_age&#39;] * age + samps_mlr[,&#39;b_interaction&#39;] * weight * age + samps_mlr[,&#39;b_sex&#39;] * sex,
samps_mlr[,&#39;b_age&#39;] * age + samps_mlr[,&#39;b_interaction&#39;] * weight * age + samps_mlr[,&#39;b_male&#39;] * is_male,
samps_mlr[,&#39;sigma&#39;])

## next we examine how many cases fall within the interval we want
sum(pr_lm &gt;= 130 &amp; pr_lm &lt;= 145) / length(pr_lm)</code></pre>
<pre><code>## [1] 0.36325</code></pre>
<pre><code>## [1] 0.3645</code></pre>
<pre class="r"><code>sum(pr_lm_log &gt;= 130 &amp; pr_lm_log &lt;= 145) / length(pr_lm_log)</code></pre>
<pre><code>## [1] 0.12175</code></pre>
<pre class="r"><code>sum(pr_mlr &gt;= 130 &amp; pr_mlr &lt;= 145) / length(pr_mlr)</code></pre>
<pre><code>## [1] 0.2215</code></pre>
<pre class="r"><code>## for a male, only one model will change
pr_mlr_male = rnorm(nrow(samps_mlr), samps_mlr[,&#39;a&#39;] + samps_mlr[,&#39;b_weight&#39;] * weight +
samps_mlr[,&#39;b_age&#39;] * age + samps_mlr[,&#39;b_interaction&#39;] * weight * age + samps_mlr[,&#39;b_sex&#39;] * 1,
samps_mlr[,&#39;b_age&#39;] * age + samps_mlr[,&#39;b_interaction&#39;] * weight * age + samps_mlr[,&#39;b_male&#39;] * 1,
samps_mlr[,&#39;sigma&#39;])
sum(pr_mlr_male &gt;= 130 &amp; pr_mlr_male &lt;= 145) / length(pr_mlr_male)</code></pre>
<pre><code>## [1] 0.0785</code></pre>
Expand All @@ -496,7 +496,7 @@ <h4 class="date">29.11.2023</h4>
pr_mlr = as.matrix(fit_mlr, pars = &quot;prediction_3c&quot;)

sum(pr_lm &gt;= 130 &amp; pr_lm &lt;= 145) / length(pr_lm)</code></pre>
<pre><code>## [1] 0.34625</code></pre>
<pre><code>## [1] 0.36525</code></pre>
<pre class="r"><code>sum(pr_lm_log &gt;= 130 &amp; pr_lm_log &lt;= 145) / length(pr_lm_log)</code></pre>
<pre><code>## [1] 0.12275</code></pre>
<pre class="r"><code>sum(pr_mlr &gt;= 130 &amp; pr_mlr &lt;= 145) / length(pr_mlr)</code></pre>
Expand Down
Binary file modified ex/soln5_kung_files/figure-html/unnamed-chunk-11-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified ex/soln5_kung_files/figure-html/unnamed-chunk-15-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
45 changes: 25 additions & 20 deletions index.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,15 @@ This course will cover the basics of Bayesian statistical methods with applicati
</td>
<td>
<p>Generalised linear models</p>
<p><b>Inference II:</b></p>
<p class = "soln">Priors & Diagnostics</p>
<p class = "soln">Bayesian workflow</p>
</td>
<td>
<p><a href="lec/4_regression">Regression &amp; GLM</a></p>
<p><a href="lec/5_inference_ii">Inference II</a></p>
</td>
<td>
<p class = "ex"><a href = "ex/ex4_birddisp.html">Bird dispersal</a></p>
<p class = "soln"><a href = "ex/soln4_birddisp.html">Solutions</a></p>
<p class = "ex"><a href = "ex/ex5_kung.html">!Kung height</a></p>
<!--<p class = "soln"><a href = "ex/soln">Solutions</a></p>-->
<p class = "soln"><a href = "ex/soln">Solutions</a></p>
<p class = "ex"><a href = "ex/ex6_birddiv_glm.html">Bird diversity</a></p>
</td>
</tr>
Expand All @@ -105,28 +101,40 @@ This course will cover the basics of Bayesian statistical methods with applicati
<p>8:15–12:00</p>
</td>
<td>
<p><b>Inference III:</b></p>
<p class = "soln">Model selection</p>
<p class = "soln">Multimodel inference</p>
<p><b>Inference II:</b></p>
<p class = "soln">Priors & Diagnostics</p>
<p class = "soln">Bayesian workflow</p>
<p>Hierarchical &amp; Multilevel Models</p>
</td>
<td>
<p>Model Selection</p>
<p>Hierarchical Models</p>
<p><a href="lec/5_inference_ii">Inference II</a></p>
<p><a href="lec/6_hm">Hierarchical Models</a></p>
</td>
<td>
<p class = "ex"><a href = "ex/"></a></p>
<!--<p class = "soln"><a href = "ex/soln">Solutions</a></p>-->
</td>
</tr>
<tr>
<td><b>Monday</b> 04.12<br/>13:15–17:00</td>
<td>Special Topics<p class = "soln">(Time permitting)</p>Exercise catch-up<br />Project catch-up</td>
<td><a href=""></a></td>
<td><p class = "ex"><a href = "ex/"></a></p>
<!--<p class = "soln"><a href = "ex/soln">Solutions</a></p>--></td>
<td>
<p><b>Monday</b> 04.12</p>
<p>13:15–17:00</p>
</td>
<td>
<p><b>Inference III:</b></p>
<p class = "soln">Model selection</p>
<p class = "soln">Multimodel inference</p>
<p>Special Topics</p>
<p class = "soln">(Time permitting)</p>
<p>Exercise catch-up</p>
<p>Project catch-up</p>
</td>
<td>
<a href="">Model selection</a>
</td>
<td></td>
</tr>
<tr>
<tr>
<td>
<p><b>Monday</b> 11.12</p>
<p>13:15–17:00</p>
Expand All @@ -139,10 +147,7 @@ This course will cover the basics of Bayesian statistical methods with applicati
<td>
<a href=""></a>
</td>
<td>
<p><a href = "ex/"></a></p>
<!--<p class = "soln"><a href = "ex/soln">Solutions</a></p>-->
</td>
<td></td>
</tr>
</table>

Expand Down
41 changes: 24 additions & 17 deletions lec/5_inference_ii.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ standat1 = list(
sigma_b = 5
)
fit1 = sampling(bird_glm, data = standat1, iter=2000,
fit1 = sampling(bird_glm, data = standat1, iter=5000,
chains = 4, refresh = 0)
Expand Down Expand Up @@ -357,7 +357,7 @@ standat2 = list(
sigma_b = 5
)
fit2 = sampling(bird_glm, data = standat2, iter=2000,
fit2 = sampling(bird_glm, data = standat2, iter=5000,
chains = 4, refresh = 0)
```
::::
Expand Down Expand Up @@ -506,17 +506,26 @@ grid.arrange(pl_left, pl_right, ncol=2, nrow=1)
::::
:::: {.column}

For the Poisson, we want to ensure that the $Var(y|x) = \mathbb{E}(y|x)$. We can approximate this by computing the dispersion parameter, which we expect to be equal to one:

$$
\phi = \frac{-2 \times \log pr(x|y)}{n-k}
$$
Where $k$ is the number of parameters in the model.

```{r message = FALSE, fig.width=7}
# compute posterior distribution of the residual sum of squares
# extract the samples
samp1_lam = as.matrix(fit1, pars='lambda')
sq_resid1 = apply(samp1_lam, 1, function(x) (standat1$richness - x)^2)
# compute the posterior distribution of the residual deviance
dev1 = apply(samp1_lam, 1, function(x) -2 * sum(dpois(standat1$richness, x, log = TRUE)))
# compute posterior distribution of dispersion parameter, which is just
# sum(squared residuals)/(n - k)
# deviance/(n - k)
# here k is 2, we have an intercept and one slope
# if phi > 1, we have overdispersion and need a better model
phi = apply(sq_resid1, 2, function(x) sum(x)/(length(x) - 2))
quantile(phi, c(0.5, 0.05, 0.95))
phi = dev1 / (length(standat1$richness) - 2)
quantile(phi, c(0.05, 0.95))
```

Expand Down Expand Up @@ -574,21 +583,19 @@ ggplot(pldat2) + xlab("Observed Richness") +
:::: {.column}

```{r message = FALSE, fig.width=7}
# compute posterior distribution of the residual sum of squares
samp2_lam = as.matrix(fit2, pars='lambda')
sq_resid2 = apply(samp2_lam, 1, function(x) (standat2$richness - x)^2)
# extract the samples
samp2_lam = as.matrix(fit1, pars='lambda')
# compute posterior distribution of dispersion parameter, which is just
# sum(squared residuals)/(n - k)
# here k is 2, we have an intercept and one slope
# if phi > 1, we have overdispersion and need a better model
phi = apply(sq_resid2, 2, function(x) sum(x)/(length(x) - 2))
quantile(phi, c(0.5, 0.05, 0.95))
# compute the posterior distribution of the residual deviance
dev2 = apply(samp2_lam, 1, function(x) -2 * sum(dpois(standat2$richness, x, log = TRUE)))
phi = dev2 / (length(standat2$richness) - 9)
quantile(phi, c(0.05, 0.95))
```

* This model is still quite overdispersed
- Consider more variables
- Consider more (and better!) variables
- Consider other likelihoods (e.g., Negative Binomial)

::::
Expand Down
Loading

0 comments on commit de4de85

Please sign in to comment.