diff --git a/19-linear_mixed_effects_models3.Rmd b/19-linear_mixed_effects_models3.Rmd index 6d8084d..32c8b1c 100644 --- a/19-linear_mixed_effects_models3.Rmd +++ b/19-linear_mixed_effects_models3.Rmd @@ -4,7 +4,7 @@ - Pitfalls in fitting `lmers()`s (and what to do about it). - Understanding `lmer()` syntax even better. -- ANOVA vs. Lmer +- ANOVA vs. lmer ## Load packages and set plotting theme @@ -42,53 +42,12 @@ options(dplyr.summarise.inform = F) ## Load data sets -### Sleep data - -```{r} -# load sleepstudy data set -df.sleep = sleepstudy %>% - as_tibble() %>% - clean_names() %>% - mutate(subject = as.character(subject)) %>% - select(subject, days, reaction) - -# add two fake participants (with missing data) -df.sleep = df.sleep %>% - bind_rows(tibble(subject = "374", - days = 0:1, - reaction = c(286, 288)), - tibble(subject = "373", - days = 0, - reaction = 245)) -``` - ### Reasoning data ```{r} df.reasoning = sk2011.1 ``` -### Weight loss data - -```{r} -data("weightloss", package = "datarium") - -# Modify it to have three-way mixed design -df.weightloss = weightloss %>% - mutate(id = rep(1:24, 2)) %>% - pivot_longer(cols = t1:t3, - names_to = "timepoint", - values_to = "score") %>% - arrange(id) -``` - -### Politness data - -```{r} -df.politeness = read_csv("data/politeness_data.csv") %>% - mutate(scenario = as.factor(scenario)) -``` - ## Understanding the lmer() syntax Here is an overview of how to specify different kinds of linear mixed effects models. @@ -99,7 +58,7 @@ tibble(formula = c("`dv ~ x1 + (1 | g)`", "`dv ~ x1 + (x1 | g)`", "`dv ~ x1 + (x1 || g)`", "`dv ~ x1 + (1 | school) + (1 | teacher)`", - "`dv ~ x1 + (1 | school/teacher)`"), + "`dv ~ x1 + (1 | school) + (1 | school:teacher)`"), description = c("Random intercept for each level of `g`", "Random slope for each level of `g`", "Correlated random slope and intercept for each level of `g`", @@ -111,7 +70,7 @@ tibble(formula = c("`dv ~ x1 + (1 | g)`", Note that this `(1 | school/teacher)` is equivalent to `(1 | school) + (1 | teacher:school)` (see [here](https://stats.stackexchange.com/questions/228800/crossed-vs-nested-random-effects-how-do-they-differ-and-how-are-they-specified)). -## ANOVA vs. Lmer +## ANOVA vs. lmer ### Between subjects ANOVA @@ -128,7 +87,7 @@ aov_ez(id = "id", Looks like there was no main effect of `instruction` on participants' responses. -An alternative route for getting at the same test, would be via combining `lm()` with `Anova()` (as we've done before in class). +An alternative route for getting at the same test, would be via combining `lm()` with `joint_tests()` (as we've done before in class). ```{r} lm(formula = response ~ instruction, @@ -153,10 +112,11 @@ aov_ez(id = "id", filter(instruction == "probabilistic")) ``` -For the linear model route, given that we have repeated observations from the same participants, we need to use `lmer()`. The repeated measures anova has the random effect structure as shown below: +For the linear model route, given that we have repeated observations from the same participants, we need to use `lmer()`. The repeated measures ANOVA has the random effect structure as shown below: ```{r} -lmer(formula = response ~ validity * plausibility + (1 | id) + (1 | validity:id) + (1 | plausibility:id), +lmer(formula = response ~ 1 + validity * plausibility + (1 | id) + + (1 | id:validity) + (1 | id:plausibility), data = df.reasoning %>% filter(instruction == "probabilistic") %>% group_by(id, validity, plausibility) %>% @@ -183,15 +143,21 @@ aov_ez(id = "id", with the `lmer()` route: ```{r} -lmer(formula = response ~ instruction * validity * plausibility + (1 | id) + (1 | validity:id) + (1 | plausibility:id), +lmer(formula = response ~ instruction * validity * plausibility + (1 | id) + + (1 | id:validity) + (1 | id:plausibility), data = df.reasoning %>% group_by(id, validity, plausibility, instruction) %>% summarize(response = mean(response))) %>% joint_tests() ``` - Here, both routes yield the same results. +## Additional resources + +### Readings + +- [Nested and crossed random effects in lme4](https://www.muscardinus.be/statistics/nested.html) + ## Session info Information about this R session including which version of R was used, and what packages were loaded. diff --git a/20-linear_mixed_effects_models4.Rmd b/20-linear_mixed_effects_models4.Rmd index 5f64937..89a17ae 100644 --- a/20-linear_mixed_effects_models4.Rmd +++ b/20-linear_mixed_effects_models4.Rmd @@ -228,11 +228,14 @@ ggplot(data = df.politeness, We fit the model and compute the contrasts. ```{r, message=FALSE} -fit = lmer(formula = frequency ~ 1 + attitude * gender + (1 | subject) + (1 | scenario), +fit = lmer(formula = frequency ~ 1 + attitude * gender + (1 + attitude | subject) + (1 | scenario), data = df.politeness) fit %>% - emmeans(specs = pairwise ~ attitude + gender, + joint_tests() + +fit %>% + emmeans(specs = pairwise ~ attitude + gender, adjust = "none") ``` @@ -492,7 +495,8 @@ df.boot = df.sleep %>% bootstrap(n = 100, id = "id") %>% mutate(fit = map(.x = strap, - .f = ~ lm(formula = reaction ~ 1 + days, data = .)), + .f = ~ lm(formula = reaction ~ 1 + days, + data = .x)), tidy = map(.x = fit, .f = tidy)) %>% unnest(tidy) %>% diff --git a/docs/404.html b/docs/404.html index b965585..ec07cb4 100644 --- a/docs/404.html +++ b/docs/404.html @@ -23,7 +23,7 @@ - + @@ -621,19 +621,20 @@
library("knitr") # for knitting RMarkdown
-library("kableExtra") # for making nice tables
-library("janitor") # for cleaning column names
-library("broom.mixed") # for tidying up linear mixed effects models
-library("patchwork") # for making figure panels
-library("lme4") # for linear mixed effects models
-library("afex") # for ANOVAs
-library("car") # for ANOVAs
-library("datarium") # for ANOVA dataset
-library("modelr") # for bootstrapping
-library("boot") # also for bootstrapping
-library("ggeffects") # for plotting marginal effects
-library("emmeans") # for marginal effects
-library("tidyverse") # for wrangling, plotting, etc.
theme_set(theme_classic() + #set the theme
- theme(text = element_text(size = 20))) #set the default text size
-
-# knitr display options
-opts_chunk$set(comment = "",
- fig.show = "hold")
-
-# # set contrasts to using sum contrasts
-# options(contrasts = c("contr.sum", "contr.poly"))
-
-# suppress grouping warning messages
-options(dplyr.summarise.inform = F)
library("knitr") # for knitting RMarkdown
+library("kableExtra") # for making nice tables
+library("janitor") # for cleaning column names
+library("broom.mixed") # for tidying up linear mixed effects models
+library("patchwork") # for making figure panels
+library("lme4") # for linear mixed effects models
+library("afex") # for ANOVAs
+library("car") # for ANOVAs
+library("datarium") # for ANOVA dataset
+library("modelr") # for bootstrapping
+library("boot") # also for bootstrapping
+library("ggeffects") # for plotting marginal effects
+library("emmeans") # for marginal effects
+library("tidyverse") # for wrangling, plotting, etc.
theme_set(theme_classic() + #set the theme
+ theme(text = element_text(size = 20))) #set the default text size
+
+# knitr display options
+opts_chunk$set(comment = "",
+ fig.show = "hold")
+
+# # set contrasts to using sum contrasts
+# options(contrasts = c("contr.sum", "contr.poly"))
+
+# suppress grouping warning messages
+options(dplyr.summarise.inform = F)
# load sleepstudy data set
-df.sleep = sleepstudy %>%
- as_tibble() %>%
- clean_names() %>%
- mutate(subject = as.character(subject)) %>%
- select(subject, days, reaction)
-
-# add two fake participants (with missing data)
-df.sleep = df.sleep %>%
- bind_rows(tibble(subject = "374",
- days = 0:1,
- reaction = c(286, 288)),
- tibble(subject = "373",
- days = 0,
- reaction = 245))
Rows: 84 Columns: 5
-── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-Delimiter: ","
-chr (3): subject, gender, attitude
-dbl (2): scenario, frequency
-
-ℹ Use `spec()` to retrieve the full column specification for this data.
-ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+
dv ~ x1 + (1 | school/teacher)
+dv ~ x1 + (1 | school) + (1 | school:teacher)
school
and for each level of teacher
in school
(nested)
@@ -1020,15 +950,15 @@ Note that this (1 | school/teacher)
is equivalent to (1 | school) + (1 | teacher:school)
(see here).
Let’s start with a between subjects ANOVA (which means we are in lm()
world). We’ll take a look whether what type of instruction
participants received made a difference to their response
.
First, we use the aov_ez()
function from the “afex” package to do so.
Warning: More than one observation per design cell, aggregating data using `fun_aggregate = mean`.
To turn off this warning, pass `fun_aggregate = mean` explicitly.
Contrasts set to contr.sum for the following variables: instruction
@@ -1040,13 +970,13 @@ Looks like there was no main effect of instruction
on participants’ responses.
An alternative route for getting at the same test, would be via combining lm()
with Anova()
(as we’ve done before in class).
lm(formula = response ~ instruction,
- data = df.reasoning %>%
- group_by(id, instruction) %>%
- summarize(response = mean(response)) %>%
- ungroup()) %>%
- joint_tests()
An alternative route for getting at the same test, would be via combining lm()
with joint_tests()
(as we’ve done before in class).
lm(formula = response ~ instruction,
+ data = df.reasoning %>%
+ group_by(id, instruction) %>%
+ summarize(response = mean(response)) %>%
+ ungroup()) %>%
+ joint_tests()
model term df1 df2 F.ratio p.value
instruction 1 38 0.307 0.5830
The two routes yield the same result. Notice that for the lm()
approach, I calculated the means for each participant in each condition first (using group_by()
and summarize()
).
Now let’s take a look whether validity
and plausibility
affected participants’ responses in the reasoning task. These two factors were varied within participants. Again, we’ll use the aov_ez()
function like so:
aov_ez(id = "id",
- dv = "response",
- within = c("validity", "plausibility"),
- data = df.reasoning %>%
- filter(instruction == "probabilistic"))
aov_ez(id = "id",
+ dv = "response",
+ within = c("validity", "plausibility"),
+ data = df.reasoning %>%
+ filter(instruction == "probabilistic"))
Warning: More than one observation per design cell, aggregating data using `fun_aggregate = mean`.
To turn off this warning, pass `fun_aggregate = mean` explicitly.
Anova Table (Type 3 tests)
@@ -1070,13 +1000,14 @@ 19.5.2 Repeated-measures ANOVA
-For the linear model route, given that we have repeated observations from the same participants, we need to use lmer()
. The repeated measures anova has the random effect structure as shown below:
lmer(formula = response ~ validity * plausibility + (1 | id) + (1 | validity:id) + (1 | plausibility:id),
- data = df.reasoning %>%
- filter(instruction == "probabilistic") %>%
- group_by(id, validity, plausibility) %>%
- summarize(response = mean(response))) %>%
- joint_tests()
For the linear model route, given that we have repeated observations from the same participants, we need to use lmer()
. The repeated measures ANOVA has the random effect structure as shown below:
lmer(formula = response ~ 1 + validity * plausibility + (1 | id) +
+ (1 | id:validity) + (1 | id:plausibility),
+ data = df.reasoning %>%
+ filter(instruction == "probabilistic") %>%
+ group_by(id, validity, plausibility) %>%
+ summarize(response = mean(response))) %>%
+ joint_tests()
boundary (singular) fit: see help('isSingular')
model term df1 df2 F.ratio p.value
validity 1 19 0.016 0.9007
@@ -1088,11 +1019,11 @@ 19.5.2 Repeated-measures ANOVA
19.5.3 Mixed ANOVA
Now let’s take a look at both between- as well as within-subjects factors. Let’s compare the aov_ez()
route
-aov_ez(id = "id",
- dv = "response",
- between = "instruction",
- within = c("validity", "plausibility"),
- data = df.reasoning)
+aov_ez(id = "id",
+ dv = "response",
+ between = "instruction",
+ within = c("validity", "plausibility"),
+ data = df.reasoning)
Warning: More than one observation per design cell, aggregating data using `fun_aggregate = mean`.
To turn off this warning, pass `fun_aggregate = mean` explicitly.
Contrasts set to contr.sum for the following variables: instruction
@@ -1110,11 +1041,12 @@ 19.5.3 Mixed ANOVAlmer(formula = response ~ instruction * validity * plausibility + (1 | id) + (1 | validity:id) + (1 | plausibility:id),
- data = df.reasoning %>%
- group_by(id, validity, plausibility, instruction) %>%
- summarize(response = mean(response))) %>%
- joint_tests()
lmer(formula = response ~ instruction * validity * plausibility + (1 | id) +
+ (1 | id:validity) + (1 | id:plausibility),
+ data = df.reasoning %>%
+ group_by(id, validity, plausibility, instruction) %>%
+ summarize(response = mean(response))) %>%
+ joint_tests()
model term df1 df2 F.ratio p.value
instruction 1 38 0.307 0.5830
validity 1 38 4.121 0.0494
@@ -1126,10 +1058,19 @@ 19.5.3 Mixed ANOVA
-19.6 Session info
+
+
+19.7 Session info
Information about this R session including which version of R was used, and what packages were loaded.
-
+
R version 4.3.2 (2023-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.1.2
@@ -1154,7 +1095,7 @@ 19.6 Session info19.6 Session info19.6 Session info