diff --git a/15-model_comparison.Rmd b/15-model_comparison.Rmd
index b1bca40..a73d4f2 100644
--- a/15-model_comparison.Rmd
+++ b/15-model_comparison.Rmd
@@ -144,8 +144,11 @@ plot_fit = function(i){
     geom_point(data = df.more_data,
                size = 2, 
                color = "red") +
-    geom_smooth(method = "lm", se = F,
-                formula = y ~ poly(x, degree = i, raw = TRUE)) +
+    geom_smooth(method = "lm",
+                se = F,
+                formula = y ~ poly(x,
+                                   degree = i,
+                                   raw = TRUE)) +
     annotate(geom = "text",
              x = Inf,
              y = -Inf,
@@ -189,7 +192,9 @@ sd = 0.5
 
 # sample
 df.data = tibble(participant = 1:sample_size,
-                 x = runif(sample_size, min = 0, max = 1),
+                 x = runif(sample_size,
+                           min = 0,
+                           max = 1),
                  y = b0 + b1*x + b2*x^2 + rnorm(sample_size, sd = sd)) 
 ```
 
@@ -198,7 +203,7 @@ And plot it:
 ```{r}
 ggplot(data = df.data,
        mapping = aes(x = x,
-                    y = y)) + 
+                     y = y)) + 
   geom_smooth(method = "lm",
               formula = y ~ x + I(x^2)) +
   geom_point()
@@ -283,8 +288,10 @@ fun.cv_plot = function(data_point){
               filter(color == 2)) %>% 
     clean_names()
   
-  p = ggplot(df.plot,
-             aes(x, y, color = as.factor(color))) + 
+  p = ggplot(data = df.plot,
+             mapping = aes(x = x, 
+                           y = y, 
+                           color = as.factor(color))) + 
     geom_segment(aes(xend = x,
                      yend = fitted),
                  data = df.fit,
@@ -518,10 +525,12 @@ df.normal = tibble(y = seq(-5, 5, 0.1),
 
 # show the residual plot together with the normal distribution
 ggplot(data = df.plot ,
-       mapping = aes(x = fitted, y = resid)) + 
+       mapping = aes(x = fitted, 
+                     y = resid)) + 
   geom_point() +
   geom_path(data = df.normal,
-            aes(x = x, y = y),
+            aes(x = x,
+                y = y),
             size = 2)
 ```
 
diff --git a/docs/404.html b/docs/404.html
index fc51834..0eb93b0 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -23,7 +23,7 @@
 <meta name="author" content="Tobias Gerstenberg" />
 
 
-<meta name="date" content="2024-02-09" />
+<meta name="date" content="2024-02-12" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -51,8 +51,6 @@
 <link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
 <link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
 <script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
-<script src="libs/kePrint-0.0.1/kePrint.js"></script>
-<link href="libs/lightable-0.0.1/lightable.css" rel="stylesheet" />
 
 
 <style type="text/css">
@@ -903,6 +901,20 @@ <h1>Page not found</h1>
 });
 </script>
 
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
 </body>
 
 </html>
diff --git a/docs/model-comparison.html b/docs/model-comparison.html
index 4705f53..e1b7af7 100644
--- a/docs/model-comparison.html
+++ b/docs/model-comparison.html
@@ -23,7 +23,7 @@
 <meta name="author" content="Tobias Gerstenberg" />
 
 
-<meta name="date" content="2024-01-06" />
+<meta name="date" content="2024-02-12" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <meta name="apple-mobile-web-app-capable" content="yes" />
@@ -51,24 +51,11 @@
 <link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
 <link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
 <script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
-<script src="libs/htmlwidgets-1.6.4/htmlwidgets.js"></script>
-<link href="libs/datatables-css-0.0.0/datatables-crosstalk.css" rel="stylesheet" />
-<script src="libs/datatables-binding-0.31/datatables.js"></script>
-<link href="libs/dt-core-1.13.6/css/jquery.dataTables.min.css" rel="stylesheet" />
-<link href="libs/dt-core-1.13.6/css/jquery.dataTables.extra.css" rel="stylesheet" />
-<script src="libs/dt-core-1.13.6/js/jquery.dataTables.min.js"></script>
-<link href="libs/crosstalk-1.2.1/css/crosstalk.min.css" rel="stylesheet" />
-<script src="libs/crosstalk-1.2.1/js/crosstalk.min.js"></script>
-<script src="libs/viz-1.8.2/viz.js"></script>
-<link href="libs/DiagrammeR-styles-0.2/styles.css" rel="stylesheet" />
-<script src="libs/grViz-binding-1.0.10/grViz.js"></script>
-<script src="libs/kePrint-0.0.1/kePrint.js"></script>
-<link href="libs/lightable-0.0.1/lightable.css" rel="stylesheet" />
 
 
 <style type="text/css">
 pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
 pre > code.sourceCode > span:empty { height: 1.2em; }
 .sourceCode { overflow: visible; }
 code.sourceCode > span { color: inherit; text-decoration: inherit; }
@@ -135,29 +122,6 @@
   
   div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
 </style>
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-  margin-bottom: 0em;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
 
 <link rel="stylesheet" href="style.css" type="text/css" />
 </head>
@@ -328,36 +292,36 @@
 </ul></li>
 <li class="chapter" data-level="5.8" data-path="data-wrangling-2.html"><a href="data-wrangling-2.html#session-info-4"><i class="fa fa-check"></i><b>5.8</b> Session info</a></li>
 </ul></li>
-<li class="chapter" data-level="6" data-path="probability-and-causality.html"><a href="probability-and-causality.html"><i class="fa fa-check"></i><b>6</b> Probability and causality</a>
+<li class="chapter" data-level="6" data-path="probability.html"><a href="probability.html"><i class="fa fa-check"></i><b>6</b> Probability</a>
 <ul>
-<li class="chapter" data-level="6.1" data-path="probability-and-causality.html"><a href="probability-and-causality.html#load-packages-load-data-set-theme"><i class="fa fa-check"></i><b>6.1</b> Load packages, load data, set theme</a></li>
-<li class="chapter" data-level="6.2" data-path="probability-and-causality.html"><a href="probability-and-causality.html#counting"><i class="fa fa-check"></i><b>6.2</b> Counting</a></li>
-<li class="chapter" data-level="6.3" data-path="probability-and-causality.html"><a href="probability-and-causality.html#the-random-secretary"><i class="fa fa-check"></i><b>6.3</b> The random secretary</a></li>
-<li class="chapter" data-level="6.4" data-path="probability-and-causality.html"><a href="probability-and-causality.html#flipping-a-coin-many-times"><i class="fa fa-check"></i><b>6.4</b> Flipping a coin many times</a></li>
-<li class="chapter" data-level="6.5" data-path="probability-and-causality.html"><a href="probability-and-causality.html#clue-guide-to-probability"><i class="fa fa-check"></i><b>6.5</b> Clue guide to probability</a>
+<li class="chapter" data-level="6.1" data-path="probability.html"><a href="probability.html#load-packages-load-data-set-theme"><i class="fa fa-check"></i><b>6.1</b> Load packages, load data, set theme</a></li>
+<li class="chapter" data-level="6.2" data-path="probability.html"><a href="probability.html#counting"><i class="fa fa-check"></i><b>6.2</b> Counting</a></li>
+<li class="chapter" data-level="6.3" data-path="probability.html"><a href="probability.html#the-random-secretary"><i class="fa fa-check"></i><b>6.3</b> The random secretary</a></li>
+<li class="chapter" data-level="6.4" data-path="probability.html"><a href="probability.html#flipping-a-coin-many-times"><i class="fa fa-check"></i><b>6.4</b> Flipping a coin many times</a></li>
+<li class="chapter" data-level="6.5" data-path="probability.html"><a href="probability.html#clue-guide-to-probability"><i class="fa fa-check"></i><b>6.5</b> Clue guide to probability</a>
 <ul>
-<li class="chapter" data-level="6.5.1" data-path="probability-and-causality.html"><a href="probability-and-causality.html#conditional-probability"><i class="fa fa-check"></i><b>6.5.1</b> Conditional probability</a></li>
-<li class="chapter" data-level="6.5.2" data-path="probability-and-causality.html"><a href="probability-and-causality.html#law-of-total-probability"><i class="fa fa-check"></i><b>6.5.2</b> Law of total probability</a></li>
+<li class="chapter" data-level="6.5.1" data-path="probability.html"><a href="probability.html#conditional-probability"><i class="fa fa-check"></i><b>6.5.1</b> Conditional probability</a></li>
+<li class="chapter" data-level="6.5.2" data-path="probability.html"><a href="probability.html#law-of-total-probability"><i class="fa fa-check"></i><b>6.5.2</b> Law of total probability</a></li>
 </ul></li>
-<li class="chapter" data-level="6.6" data-path="probability-and-causality.html"><a href="probability-and-causality.html#probability-operations"><i class="fa fa-check"></i><b>6.6</b> Probability operations</a></li>
-<li class="chapter" data-level="6.7" data-path="probability-and-causality.html"><a href="probability-and-causality.html#bayesian-reasoning-explained"><i class="fa fa-check"></i><b>6.7</b> Bayesian reasoning explained</a></li>
-<li class="chapter" data-level="6.8" data-path="probability-and-causality.html"><a href="probability-and-causality.html#getting-bayes-right-matters"><i class="fa fa-check"></i><b>6.8</b> Getting Bayes right matters</a>
+<li class="chapter" data-level="6.6" data-path="probability.html"><a href="probability.html#probability-operations"><i class="fa fa-check"></i><b>6.6</b> Probability operations</a></li>
+<li class="chapter" data-level="6.7" data-path="probability.html"><a href="probability.html#bayesian-reasoning-explained"><i class="fa fa-check"></i><b>6.7</b> Bayesian reasoning explained</a></li>
+<li class="chapter" data-level="6.8" data-path="probability.html"><a href="probability.html#getting-bayes-right-matters"><i class="fa fa-check"></i><b>6.8</b> Getting Bayes right matters</a>
 <ul>
-<li class="chapter" data-level="6.8.1" data-path="probability-and-causality.html"><a href="probability-and-causality.html#bayesian-reasoning-example"><i class="fa fa-check"></i><b>6.8.1</b> Bayesian reasoning example</a></li>
-<li class="chapter" data-level="6.8.2" data-path="probability-and-causality.html"><a href="probability-and-causality.html#bayesian-reasoning-example-covid-rapid-test"><i class="fa fa-check"></i><b>6.8.2</b> Bayesian reasoning example (COVID rapid test)</a></li>
-<li class="chapter" data-level="6.8.3" data-path="probability-and-causality.html"><a href="probability-and-causality.html#most-people-in-the-hospital-are-vaccinated"><i class="fa fa-check"></i><b>6.8.3</b> Most people in the hospital are vaccinated</a></li>
+<li class="chapter" data-level="6.8.1" data-path="probability.html"><a href="probability.html#bayesian-reasoning-example"><i class="fa fa-check"></i><b>6.8.1</b> Bayesian reasoning example</a></li>
+<li class="chapter" data-level="6.8.2" data-path="probability.html"><a href="probability.html#bayesian-reasoning-example-covid-rapid-test"><i class="fa fa-check"></i><b>6.8.2</b> Bayesian reasoning example (COVID rapid test)</a></li>
+<li class="chapter" data-level="6.8.3" data-path="probability.html"><a href="probability.html#most-people-in-the-hospital-are-vaccinated"><i class="fa fa-check"></i><b>6.8.3</b> Most people in the hospital are vaccinated</a></li>
 </ul></li>
-<li class="chapter" data-level="6.9" data-path="probability-and-causality.html"><a href="probability-and-causality.html#building-a-bayesis"><i class="fa fa-check"></i><b>6.9</b> Building a Bayesis</a>
+<li class="chapter" data-level="6.9" data-path="probability.html"><a href="probability.html#building-a-bayesis"><i class="fa fa-check"></i><b>6.9</b> Building a Bayesis</a>
 <ul>
-<li class="chapter" data-level="6.9.1" data-path="probability-and-causality.html"><a href="probability-and-causality.html#dice-example"><i class="fa fa-check"></i><b>6.9.1</b> Dice example</a></li>
+<li class="chapter" data-level="6.9.1" data-path="probability.html"><a href="probability.html#dice-example"><i class="fa fa-check"></i><b>6.9.1</b> Dice example</a></li>
 </ul></li>
-<li class="chapter" data-level="6.10" data-path="probability-and-causality.html"><a href="probability-and-causality.html#additional-resources-4"><i class="fa fa-check"></i><b>6.10</b> Additional resources</a>
+<li class="chapter" data-level="6.10" data-path="probability.html"><a href="probability.html#additional-resources-4"><i class="fa fa-check"></i><b>6.10</b> Additional resources</a>
 <ul>
-<li class="chapter" data-level="6.10.1" data-path="probability-and-causality.html"><a href="probability-and-causality.html#cheatsheets-4"><i class="fa fa-check"></i><b>6.10.1</b> Cheatsheets</a></li>
-<li class="chapter" data-level="6.10.2" data-path="probability-and-causality.html"><a href="probability-and-causality.html#books-and-chapters-4"><i class="fa fa-check"></i><b>6.10.2</b> Books and chapters</a></li>
-<li class="chapter" data-level="6.10.3" data-path="probability-and-causality.html"><a href="probability-and-causality.html#misc-2"><i class="fa fa-check"></i><b>6.10.3</b> Misc</a></li>
+<li class="chapter" data-level="6.10.1" data-path="probability.html"><a href="probability.html#cheatsheets-4"><i class="fa fa-check"></i><b>6.10.1</b> Cheatsheets</a></li>
+<li class="chapter" data-level="6.10.2" data-path="probability.html"><a href="probability.html#books-and-chapters-4"><i class="fa fa-check"></i><b>6.10.2</b> Books and chapters</a></li>
+<li class="chapter" data-level="6.10.3" data-path="probability.html"><a href="probability.html#misc-2"><i class="fa fa-check"></i><b>6.10.3</b> Misc</a></li>
 </ul></li>
-<li class="chapter" data-level="6.11" data-path="probability-and-causality.html"><a href="probability-and-causality.html#session-info-5"><i class="fa fa-check"></i><b>6.11</b> Session info</a></li>
+<li class="chapter" data-level="6.11" data-path="probability.html"><a href="probability.html#session-info-5"><i class="fa fa-check"></i><b>6.11</b> Session info</a></li>
 </ul></li>
 <li class="chapter" data-level="7" data-path="simulation-1.html"><a href="simulation-1.html"><i class="fa fa-check"></i><b>7</b> Simulation 1</a>
 <ul>
@@ -375,20 +339,23 @@
 <li class="chapter" data-level="7.3.4" data-path="simulation-1.html"><a href="simulation-1.html#cumulative-probability-distribution"><i class="fa fa-check"></i><b>7.3.4</b> Cumulative probability distribution</a></li>
 <li class="chapter" data-level="7.3.5" data-path="simulation-1.html"><a href="simulation-1.html#inverse-cumulative-distribution"><i class="fa fa-check"></i><b>7.3.5</b> Inverse cumulative distribution</a></li>
 <li class="chapter" data-level="7.3.6" data-path="simulation-1.html"><a href="simulation-1.html#computing-probabilities"><i class="fa fa-check"></i><b>7.3.6</b> Computing probabilities</a></li>
-<li class="chapter" data-level="7.3.7" data-path="simulation-1.html"><a href="simulation-1.html#make-the-plot"><i class="fa fa-check"></i><b>7.3.7</b> Make the plot</a></li>
-<li class="chapter" data-level="7.3.8" data-path="simulation-1.html"><a href="simulation-1.html#analytic-solutions"><i class="fa fa-check"></i><b>7.3.8</b> Analytic solutions</a></li>
-<li class="chapter" data-level="7.3.9" data-path="simulation-1.html"><a href="simulation-1.html#sampling-solution"><i class="fa fa-check"></i><b>7.3.9</b> Sampling solution</a></li>
 </ul></li>
-<li class="chapter" data-level="7.4" data-path="simulation-1.html"><a href="simulation-1.html#bayesian-inference-with-the-normal-distribution"><i class="fa fa-check"></i><b>7.4</b> Bayesian inference with the normal distribution</a>
+<li class="chapter" data-level="7.4" data-path="simulation-1.html"><a href="simulation-1.html#pinguin-exercise"><i class="fa fa-check"></i><b>7.4</b> Pinguin exercise</a>
+<ul>
+<li class="chapter" data-level="7.4.1" data-path="simulation-1.html"><a href="simulation-1.html#make-the-plot"><i class="fa fa-check"></i><b>7.4.1</b> Make the plot</a></li>
+<li class="chapter" data-level="7.4.2" data-path="simulation-1.html"><a href="simulation-1.html#analytic-solutions"><i class="fa fa-check"></i><b>7.4.2</b> Analytic solutions</a></li>
+<li class="chapter" data-level="7.4.3" data-path="simulation-1.html"><a href="simulation-1.html#sampling-solution"><i class="fa fa-check"></i><b>7.4.3</b> Sampling solution</a></li>
+</ul></li>
+<li class="chapter" data-level="7.5" data-path="simulation-1.html"><a href="simulation-1.html#bayesian-inference-with-the-normal-distribution"><i class="fa fa-check"></i><b>7.5</b> Bayesian inference with the normal distribution</a>
 <ul>
-<li class="chapter" data-level="7.4.1" data-path="simulation-1.html"><a href="simulation-1.html#analytic-solution"><i class="fa fa-check"></i><b>7.4.1</b> Analytic solution</a></li>
-<li class="chapter" data-level="7.4.2" data-path="simulation-1.html"><a href="simulation-1.html#solution-via-sampling"><i class="fa fa-check"></i><b>7.4.2</b> Solution via sampling</a></li>
+<li class="chapter" data-level="7.5.1" data-path="simulation-1.html"><a href="simulation-1.html#analytic-solution"><i class="fa fa-check"></i><b>7.5.1</b> Analytic solution</a></li>
+<li class="chapter" data-level="7.5.2" data-path="simulation-1.html"><a href="simulation-1.html#solution-via-sampling"><i class="fa fa-check"></i><b>7.5.2</b> Solution via sampling</a></li>
 </ul></li>
-<li class="chapter" data-level="7.5" data-path="simulation-1.html"><a href="simulation-1.html#additional-resources-5"><i class="fa fa-check"></i><b>7.5</b> Additional resources</a>
+<li class="chapter" data-level="7.6" data-path="simulation-1.html"><a href="simulation-1.html#additional-resources-5"><i class="fa fa-check"></i><b>7.6</b> Additional resources</a>
 <ul>
-<li class="chapter" data-level="7.5.1" data-path="simulation-1.html"><a href="simulation-1.html#datacamp"><i class="fa fa-check"></i><b>7.5.1</b> Datacamp</a></li>
+<li class="chapter" data-level="7.6.1" data-path="simulation-1.html"><a href="simulation-1.html#datacamp"><i class="fa fa-check"></i><b>7.6.1</b> Datacamp</a></li>
 </ul></li>
-<li class="chapter" data-level="7.6" data-path="simulation-1.html"><a href="simulation-1.html#session-info-6"><i class="fa fa-check"></i><b>7.6</b> Session info</a></li>
+<li class="chapter" data-level="7.7" data-path="simulation-1.html"><a href="simulation-1.html#session-info-6"><i class="fa fa-check"></i><b>7.7</b> Session info</a></li>
 </ul></li>
 <li class="chapter" data-level="8" data-path="simulation-2.html"><a href="simulation-2.html"><i class="fa fa-check"></i><b>8</b> Simulation 2</a>
 <ul>
@@ -410,7 +377,8 @@
 </ul></li>
 <li class="chapter" data-level="8.5" data-path="simulation-2.html"><a href="simulation-2.html#additional-resources-6"><i class="fa fa-check"></i><b>8.5</b> Additional resources</a>
 <ul>
-<li class="chapter" data-level="8.5.1" data-path="simulation-2.html"><a href="simulation-2.html#datacamp-1"><i class="fa fa-check"></i><b>8.5.1</b> Datacamp</a></li>
+<li class="chapter" data-level="8.5.1" data-path="simulation-2.html"><a href="simulation-2.html#misc-3"><i class="fa fa-check"></i><b>8.5.1</b> Misc</a></li>
+<li class="chapter" data-level="8.5.2" data-path="simulation-2.html"><a href="simulation-2.html#datacamp-1"><i class="fa fa-check"></i><b>8.5.2</b> Datacamp</a></li>
 </ul></li>
 <li class="chapter" data-level="8.6" data-path="simulation-2.html"><a href="simulation-2.html#session-info-7"><i class="fa fa-check"></i><b>8.6</b> Session info</a></li>
 </ul></li>
@@ -425,7 +393,7 @@
 </ul></li>
 <li class="chapter" data-level="9.3" data-path="modeling-data.html"><a href="modeling-data.html#hypothesis-testing-one-sample-t-test"><i class="fa fa-check"></i><b>9.3</b> Hypothesis testing: “One-sample t-test”</a></li>
 <li class="chapter" data-level="9.4" data-path="modeling-data.html"><a href="modeling-data.html#building-a-sampling-distribution-of-pre"><i class="fa fa-check"></i><b>9.4</b> Building a sampling distribution of PRE</a></li>
-<li class="chapter" data-level="9.5" data-path="modeling-data.html"><a href="modeling-data.html#misc-3"><i class="fa fa-check"></i><b>9.5</b> Misc</a></li>
+<li class="chapter" data-level="9.5" data-path="modeling-data.html"><a href="modeling-data.html#misc-4"><i class="fa fa-check"></i><b>9.5</b> Misc</a></li>
 <li class="chapter" data-level="9.6" data-path="modeling-data.html"><a href="modeling-data.html#additional-resources-7"><i class="fa fa-check"></i><b>9.6</b> Additional resources</a>
 <ul>
 <li class="chapter" data-level="9.6.1" data-path="modeling-data.html"><a href="modeling-data.html#reading"><i class="fa fa-check"></i><b>9.6.1</b> Reading</a></li>
@@ -446,7 +414,7 @@
 <li class="chapter" data-level="10.5" data-path="linear-model-1.html"><a href="linear-model-1.html#additional-resources-8"><i class="fa fa-check"></i><b>10.5</b> Additional resources</a>
 <ul>
 <li class="chapter" data-level="10.5.1" data-path="linear-model-1.html"><a href="linear-model-1.html#datacamp-3"><i class="fa fa-check"></i><b>10.5.1</b> Datacamp</a></li>
-<li class="chapter" data-level="10.5.2" data-path="linear-model-1.html"><a href="linear-model-1.html#misc-4"><i class="fa fa-check"></i><b>10.5.2</b> Misc</a></li>
+<li class="chapter" data-level="10.5.2" data-path="linear-model-1.html"><a href="linear-model-1.html#misc-5"><i class="fa fa-check"></i><b>10.5.2</b> Misc</a></li>
 </ul></li>
 <li class="chapter" data-level="10.6" data-path="linear-model-1.html"><a href="linear-model-1.html#session-info-9"><i class="fa fa-check"></i><b>10.6</b> Session info</a></li>
 </ul></li>
@@ -478,7 +446,7 @@
 <li class="chapter" data-level="11.8" data-path="linear-model-2.html"><a href="linear-model-2.html#additional-resources-9"><i class="fa fa-check"></i><b>11.8</b> Additional resources</a>
 <ul>
 <li class="chapter" data-level="11.8.1" data-path="linear-model-2.html"><a href="linear-model-2.html#datacamp-4"><i class="fa fa-check"></i><b>11.8.1</b> Datacamp</a></li>
-<li class="chapter" data-level="11.8.2" data-path="linear-model-2.html"><a href="linear-model-2.html#misc-5"><i class="fa fa-check"></i><b>11.8.2</b> Misc</a></li>
+<li class="chapter" data-level="11.8.2" data-path="linear-model-2.html"><a href="linear-model-2.html#misc-6"><i class="fa fa-check"></i><b>11.8.2</b> Misc</a></li>
 </ul></li>
 <li class="chapter" data-level="11.9" data-path="linear-model-2.html"><a href="linear-model-2.html#session-info-10"><i class="fa fa-check"></i><b>11.9</b> Session info</a></li>
 <li class="chapter" data-level="11.10" data-path="linear-model-2.html"><a href="linear-model-2.html#references"><i class="fa fa-check"></i><b>11.10</b> References</a></li>
@@ -509,7 +477,7 @@
 <li class="chapter" data-level="12.7" data-path="linear-model-3.html"><a href="linear-model-3.html#additional-resources-10"><i class="fa fa-check"></i><b>12.7</b> Additional resources</a>
 <ul>
 <li class="chapter" data-level="12.7.1" data-path="linear-model-3.html"><a href="linear-model-3.html#datacamp-5"><i class="fa fa-check"></i><b>12.7.1</b> Datacamp</a></li>
-<li class="chapter" data-level="12.7.2" data-path="linear-model-3.html"><a href="linear-model-3.html#misc-6"><i class="fa fa-check"></i><b>12.7.2</b> Misc</a></li>
+<li class="chapter" data-level="12.7.2" data-path="linear-model-3.html"><a href="linear-model-3.html#misc-7"><i class="fa fa-check"></i><b>12.7.2</b> Misc</a></li>
 </ul></li>
 <li class="chapter" data-level="12.8" data-path="linear-model-3.html"><a href="linear-model-3.html#session-info-11"><i class="fa fa-check"></i><b>12.8</b> Session info</a></li>
 </ul></li>
@@ -529,7 +497,7 @@
 </ul></li>
 <li class="chapter" data-level="13.6" data-path="linear-model-4.html"><a href="linear-model-4.html#additional-resources-11"><i class="fa fa-check"></i><b>13.6</b> Additional resources</a>
 <ul>
-<li class="chapter" data-level="13.6.1" data-path="linear-model-4.html"><a href="linear-model-4.html#misc-7"><i class="fa fa-check"></i><b>13.6.1</b> Misc</a></li>
+<li class="chapter" data-level="13.6.1" data-path="linear-model-4.html"><a href="linear-model-4.html#misc-8"><i class="fa fa-check"></i><b>13.6.1</b> Misc</a></li>
 </ul></li>
 <li class="chapter" data-level="13.7" data-path="linear-model-4.html"><a href="linear-model-4.html#session-info-12"><i class="fa fa-check"></i><b>13.7</b> Session info</a></li>
 </ul></li>
@@ -552,7 +520,7 @@
 <ul>
 <li class="chapter" data-level="14.7.1" data-path="power-analysis.html"><a href="power-analysis.html#datacamp-6"><i class="fa fa-check"></i><b>14.7.1</b> Datacamp</a></li>
 <li class="chapter" data-level="14.7.2" data-path="power-analysis.html"><a href="power-analysis.html#cheatsheets-5"><i class="fa fa-check"></i><b>14.7.2</b> Cheatsheets</a></li>
-<li class="chapter" data-level="14.7.3" data-path="power-analysis.html"><a href="power-analysis.html#misc-8"><i class="fa fa-check"></i><b>14.7.3</b> Misc</a></li>
+<li class="chapter" data-level="14.7.3" data-path="power-analysis.html"><a href="power-analysis.html#misc-9"><i class="fa fa-check"></i><b>14.7.3</b> Misc</a></li>
 </ul></li>
 <li class="chapter" data-level="14.8" data-path="power-analysis.html"><a href="power-analysis.html#session-info-13"><i class="fa fa-check"></i><b>14.8</b> Session info</a></li>
 </ul></li>
@@ -607,7 +575,7 @@
 <ul>
 <li class="chapter" data-level="16.8.1" data-path="causation.html"><a href="causation.html#books"><i class="fa fa-check"></i><b>16.8.1</b> Books</a></li>
 <li class="chapter" data-level="16.8.2" data-path="causation.html"><a href="causation.html#tutorials-1"><i class="fa fa-check"></i><b>16.8.2</b> Tutorials</a></li>
-<li class="chapter" data-level="16.8.3" data-path="causation.html"><a href="causation.html#misc-9"><i class="fa fa-check"></i><b>16.8.3</b> Misc</a></li>
+<li class="chapter" data-level="16.8.3" data-path="causation.html"><a href="causation.html#misc-10"><i class="fa fa-check"></i><b>16.8.3</b> Misc</a></li>
 </ul></li>
 <li class="chapter" data-level="16.9" data-path="causation.html"><a href="causation.html#session-info-15"><i class="fa fa-check"></i><b>16.9</b> Session info</a></li>
 </ul></li>
@@ -712,7 +680,7 @@
 <li class="chapter" data-level="21.7" data-path="generalized-linear-model.html"><a href="generalized-linear-model.html#logistic-mixed-effects-model"><i class="fa fa-check"></i><b>21.7</b> Logistic mixed effects model</a></li>
 <li class="chapter" data-level="21.8" data-path="generalized-linear-model.html"><a href="generalized-linear-model.html#additional-information"><i class="fa fa-check"></i><b>21.8</b> Additional information</a>
 <ul>
-<li class="chapter" data-level="21.8.1" data-path="generalized-linear-model.html"><a href="generalized-linear-model.html#misc-10"><i class="fa fa-check"></i><b>21.8.1</b> Misc</a></li>
+<li class="chapter" data-level="21.8.1" data-path="generalized-linear-model.html"><a href="generalized-linear-model.html#misc-11"><i class="fa fa-check"></i><b>21.8.1</b> Misc</a></li>
 <li class="chapter" data-level="21.8.2" data-path="generalized-linear-model.html"><a href="generalized-linear-model.html#datacamp-7"><i class="fa fa-check"></i><b>21.8.2</b> Datacamp</a></li>
 </ul></li>
 <li class="chapter" data-level="21.9" data-path="generalized-linear-model.html"><a href="generalized-linear-model.html#session-info-20"><i class="fa fa-check"></i><b>21.9</b> Session info</a></li>
@@ -835,7 +803,11 @@
 <ul>
 <li class="chapter" data-level="26.2.1" data-path="reporting-statistics.html"><a href="reporting-statistics.html#frequentist-statistics"><i class="fa fa-check"></i><b>26.2.1</b> Frequentist statistics</a></li>
 </ul></li>
-<li class="chapter" data-level="26.3" data-path="reporting-statistics.html"><a href="reporting-statistics.html#session-info-25"><i class="fa fa-check"></i><b>26.3</b> Session info</a></li>
+<li class="chapter" data-level="26.3" data-path="reporting-statistics.html"><a href="reporting-statistics.html#additional-resources-21"><i class="fa fa-check"></i><b>26.3</b> Additional resources</a>
+<ul>
+<li class="chapter" data-level="26.3.1" data-path="reporting-statistics.html"><a href="reporting-statistics.html#misc-12"><i class="fa fa-check"></i><b>26.3.1</b> Misc</a></li>
+</ul></li>
+<li class="chapter" data-level="26.4" data-path="reporting-statistics.html"><a href="reporting-statistics.html#session-info-25"><i class="fa fa-check"></i><b>26.4</b> Session info</a></li>
 </ul></li>
 <li class="chapter" data-level="27" data-path="cheatsheets-6.html"><a href="cheatsheets-6.html"><i class="fa fa-check"></i><b>27</b> Cheatsheets</a>
 <ul>
@@ -881,18 +853,18 @@ <h2><span class="header-section-number">15.1</span> Learning goals<a href="model
 </div>
 <div id="load-packages-and-set-plotting-theme-8" class="section level2 hasAnchor" number="15.2">
 <h2><span class="header-section-number">15.2</span> Load packages and set plotting theme<a href="model-comparison.html#load-packages-and-set-plotting-theme-8" class="anchor-section" aria-label="Anchor link to header"></a></h2>
-<div class="sourceCode" id="cb839"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb839-1"><a href="model-comparison.html#cb839-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;knitr&quot;</span>)      <span class="co"># for knitting RMarkdown </span></span>
-<span id="cb839-2"><a href="model-comparison.html#cb839-2" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;kableExtra&quot;</span>) <span class="co"># for making nice tables</span></span>
-<span id="cb839-3"><a href="model-comparison.html#cb839-3" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;janitor&quot;</span>)    <span class="co"># for cleaning column names</span></span>
-<span id="cb839-4"><a href="model-comparison.html#cb839-4" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;broom&quot;</span>)      <span class="co"># for tidying up linear models </span></span>
-<span id="cb839-5"><a href="model-comparison.html#cb839-5" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;patchwork&quot;</span>)  <span class="co"># for figure panels</span></span>
-<span id="cb839-6"><a href="model-comparison.html#cb839-6" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;modelr&quot;</span>)     <span class="co"># for cross-validation</span></span>
-<span id="cb839-7"><a href="model-comparison.html#cb839-7" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;tidyverse&quot;</span>)  <span class="co"># for wrangling, plotting, etc. </span></span></code></pre></div>
-<div class="sourceCode" id="cb840"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb840-1"><a href="model-comparison.html#cb840-1" tabindex="-1"></a><span class="fu">theme_set</span>(<span class="fu">theme_classic</span>() <span class="sc">+</span> <span class="co">#set the theme </span></span>
-<span id="cb840-2"><a href="model-comparison.html#cb840-2" tabindex="-1"></a>            <span class="fu">theme</span>(<span class="at">text =</span> <span class="fu">element_text</span>(<span class="at">size =</span> <span class="dv">20</span>))) <span class="co">#set the default text size</span></span>
-<span id="cb840-3"><a href="model-comparison.html#cb840-3" tabindex="-1"></a></span>
-<span id="cb840-4"><a href="model-comparison.html#cb840-4" tabindex="-1"></a>opts_chunk<span class="sc">$</span><span class="fu">set</span>(<span class="at">comment =</span> <span class="st">&quot;&quot;</span>,</span>
-<span id="cb840-5"><a href="model-comparison.html#cb840-5" tabindex="-1"></a>               <span class="at">fig.show =</span> <span class="st">&quot;hold&quot;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="model-comparison.html#cb1-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;knitr&quot;</span>)      <span class="co"># for knitting RMarkdown </span></span>
+<span id="cb1-2"><a href="model-comparison.html#cb1-2" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;kableExtra&quot;</span>) <span class="co"># for making nice tables</span></span>
+<span id="cb1-3"><a href="model-comparison.html#cb1-3" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;janitor&quot;</span>)    <span class="co"># for cleaning column names</span></span>
+<span id="cb1-4"><a href="model-comparison.html#cb1-4" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;broom&quot;</span>)      <span class="co"># for tidying up linear models </span></span>
+<span id="cb1-5"><a href="model-comparison.html#cb1-5" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;patchwork&quot;</span>)  <span class="co"># for figure panels</span></span>
+<span id="cb1-6"><a href="model-comparison.html#cb1-6" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;modelr&quot;</span>)     <span class="co"># for cross-validation</span></span>
+<span id="cb1-7"><a href="model-comparison.html#cb1-7" tabindex="-1"></a><span class="fu">library</span>(<span class="st">&quot;tidyverse&quot;</span>)  <span class="co"># for wrangling, plotting, etc. </span></span></code></pre></div>
+<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="model-comparison.html#cb2-1" tabindex="-1"></a><span class="fu">theme_set</span>(<span class="fu">theme_classic</span>() <span class="sc">+</span> <span class="co">#set the theme </span></span>
+<span id="cb2-2"><a href="model-comparison.html#cb2-2" tabindex="-1"></a>            <span class="fu">theme</span>(<span class="at">text =</span> <span class="fu">element_text</span>(<span class="at">size =</span> <span class="dv">20</span>))) <span class="co">#set the default text size</span></span>
+<span id="cb2-3"><a href="model-comparison.html#cb2-3" tabindex="-1"></a></span>
+<span id="cb2-4"><a href="model-comparison.html#cb2-4" tabindex="-1"></a>opts_chunk<span class="sc">$</span><span class="fu">set</span>(<span class="at">comment =</span> <span class="st">&quot;&quot;</span>,</span>
+<span id="cb2-5"><a href="model-comparison.html#cb2-5" tabindex="-1"></a>               <span class="at">fig.show =</span> <span class="st">&quot;hold&quot;</span>)</span></code></pre></div>
 </div>
 <div id="model-comparison-1" class="section level2 hasAnchor" number="15.3">
 <h2><span class="header-section-number">15.3</span> Model comparison<a href="model-comparison.html#model-comparison-1" class="anchor-section" aria-label="Anchor link to header"></a></h2>
@@ -908,149 +880,154 @@ <h3><span class="header-section-number">15.3.1</span> Fitting vs. predicting<a
 \epsilon_i \sim \mathcal{N}(\text{mean} = 0, ~\text{sd} = 20)
 \]</span>
 Here, I’ll use the following parameters: <span class="math inline">\(\beta_0 = 10\)</span>, <span class="math inline">\(\beta_1 = 3\)</span>, and <span class="math inline">\(\beta_2 = 2\)</span> to generate the data:</p>
-<div class="sourceCode" id="cb841"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb841-1"><a href="model-comparison.html#cb841-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
-<span id="cb841-2"><a href="model-comparison.html#cb841-2" tabindex="-1"></a></span>
-<span id="cb841-3"><a href="model-comparison.html#cb841-3" tabindex="-1"></a>n_plots <span class="ot">=</span> <span class="dv">3</span></span>
-<span id="cb841-4"><a href="model-comparison.html#cb841-4" tabindex="-1"></a></span>
-<span id="cb841-5"><a href="model-comparison.html#cb841-5" tabindex="-1"></a><span class="co"># sample size </span></span>
-<span id="cb841-6"><a href="model-comparison.html#cb841-6" tabindex="-1"></a>n_samples <span class="ot">=</span> <span class="dv">20</span> </span>
-<span id="cb841-7"><a href="model-comparison.html#cb841-7" tabindex="-1"></a></span>
-<span id="cb841-8"><a href="model-comparison.html#cb841-8" tabindex="-1"></a><span class="co"># number of parameters in the polynomial regression</span></span>
-<span id="cb841-9"><a href="model-comparison.html#cb841-9" tabindex="-1"></a>n_parameters <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>, <span class="fu">seq</span>(<span class="dv">7</span>, <span class="dv">19</span>, <span class="at">length.out =</span> <span class="dv">5</span>)) </span>
-<span id="cb841-10"><a href="model-comparison.html#cb841-10" tabindex="-1"></a></span>
-<span id="cb841-11"><a href="model-comparison.html#cb841-11" tabindex="-1"></a><span class="co"># generate data </span></span>
-<span id="cb841-12"><a href="model-comparison.html#cb841-12" tabindex="-1"></a>df.data <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="fu">runif</span>(n_samples, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">10</span>), </span>
-<span id="cb841-13"><a href="model-comparison.html#cb841-13" tabindex="-1"></a>                 <span class="at">y =</span> <span class="dv">10</span> <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> <span class="fu">rnorm</span>(n_samples, <span class="at">sd =</span> <span class="dv">20</span>))</span>
-<span id="cb841-14"><a href="model-comparison.html#cb841-14" tabindex="-1"></a></span>
-<span id="cb841-15"><a href="model-comparison.html#cb841-15" tabindex="-1"></a><span class="co"># plotting function</span></span>
-<span id="cb841-16"><a href="model-comparison.html#cb841-16" tabindex="-1"></a>plot_fit <span class="ot">=</span> <span class="cf">function</span>(i){</span>
-<span id="cb841-17"><a href="model-comparison.html#cb841-17" tabindex="-1"></a>  <span class="co"># calculate RMSE</span></span>
-<span id="cb841-18"><a href="model-comparison.html#cb841-18" tabindex="-1"></a>  rmse <span class="ot">=</span> <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> <span class="fu">poly</span>(x, <span class="at">degree =</span> i, <span class="at">raw =</span> <span class="cn">TRUE</span>),</span>
-<span id="cb841-19"><a href="model-comparison.html#cb841-19" tabindex="-1"></a>            <span class="at">data =</span> df.data) <span class="sc">%&gt;%</span> </span>
-<span id="cb841-20"><a href="model-comparison.html#cb841-20" tabindex="-1"></a>    <span class="fu">rmse</span>(<span class="at">data =</span> df.data)</span>
-<span id="cb841-21"><a href="model-comparison.html#cb841-21" tabindex="-1"></a>  </span>
-<span id="cb841-22"><a href="model-comparison.html#cb841-22" tabindex="-1"></a>  <span class="co"># make a plot</span></span>
-<span id="cb841-23"><a href="model-comparison.html#cb841-23" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="at">data =</span> df.data,</span>
-<span id="cb841-24"><a href="model-comparison.html#cb841-24" tabindex="-1"></a>         <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
-<span id="cb841-25"><a href="model-comparison.html#cb841-25" tabindex="-1"></a>                       <span class="at">y =</span> y)) <span class="sc">+</span></span>
-<span id="cb841-26"><a href="model-comparison.html#cb841-26" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
-<span id="cb841-27"><a href="model-comparison.html#cb841-27" tabindex="-1"></a>    <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>, <span class="at">se =</span> F,</span>
-<span id="cb841-28"><a href="model-comparison.html#cb841-28" tabindex="-1"></a>                <span class="at">formula =</span> y <span class="sc">~</span> <span class="fu">poly</span>(x, <span class="at">degree =</span> i, <span class="at">raw =</span> <span class="cn">TRUE</span>)) <span class="sc">+</span></span>
-<span id="cb841-29"><a href="model-comparison.html#cb841-29" tabindex="-1"></a>    <span class="fu">annotate</span>(<span class="at">geom =</span> <span class="st">&quot;text&quot;</span>,</span>
-<span id="cb841-30"><a href="model-comparison.html#cb841-30" tabindex="-1"></a>             <span class="at">x =</span> <span class="cn">Inf</span>,</span>
-<span id="cb841-31"><a href="model-comparison.html#cb841-31" tabindex="-1"></a>             <span class="at">y =</span> <span class="sc">-</span><span class="cn">Inf</span>,</span>
-<span id="cb841-32"><a href="model-comparison.html#cb841-32" tabindex="-1"></a>             <span class="at">label =</span> <span class="fu">str_c</span>(<span class="st">&quot;RMSE = &quot;</span>, <span class="fu">round</span>(rmse, <span class="dv">2</span>)),</span>
-<span id="cb841-33"><a href="model-comparison.html#cb841-33" tabindex="-1"></a>             <span class="at">hjust =</span> <span class="fl">1.1</span>,</span>
-<span id="cb841-34"><a href="model-comparison.html#cb841-34" tabindex="-1"></a>             <span class="at">vjust =</span> <span class="sc">-</span><span class="fl">0.3</span>) <span class="sc">+</span> </span>
-<span id="cb841-35"><a href="model-comparison.html#cb841-35" tabindex="-1"></a>    <span class="fu">theme</span>(<span class="at">axis.ticks =</span> <span class="fu">element_blank</span>(),</span>
-<span id="cb841-36"><a href="model-comparison.html#cb841-36" tabindex="-1"></a>          <span class="at">axis.title =</span> <span class="fu">element_blank</span>(),</span>
-<span id="cb841-37"><a href="model-comparison.html#cb841-37" tabindex="-1"></a>          <span class="at">axis.text =</span> <span class="fu">element_blank</span>())</span>
-<span id="cb841-38"><a href="model-comparison.html#cb841-38" tabindex="-1"></a>}</span>
-<span id="cb841-39"><a href="model-comparison.html#cb841-39" tabindex="-1"></a></span>
-<span id="cb841-40"><a href="model-comparison.html#cb841-40" tabindex="-1"></a><span class="co"># save plots in a list</span></span>
-<span id="cb841-41"><a href="model-comparison.html#cb841-41" tabindex="-1"></a>l.p <span class="ot">=</span> <span class="fu">map</span>(<span class="at">.x =</span> n_parameters,</span>
-<span id="cb841-42"><a href="model-comparison.html#cb841-42" tabindex="-1"></a>          <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">plot_fit</span>(.))</span>
-<span id="cb841-43"><a href="model-comparison.html#cb841-43" tabindex="-1"></a></span>
-<span id="cb841-44"><a href="model-comparison.html#cb841-44" tabindex="-1"></a><span class="co"># make figure panel </span></span>
-<span id="cb841-45"><a href="model-comparison.html#cb841-45" tabindex="-1"></a><span class="fu">wrap_plots</span>(<span class="at">plotlist =</span> l.p, <span class="at">ncol =</span> <span class="dv">3</span>)</span></code></pre></div>
-<p><img src="15-model_comparison_files/figure-html/unnamed-chunk-3-1.png" width="672" /></p>
+<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="model-comparison.html#cb3-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
+<span id="cb3-2"><a href="model-comparison.html#cb3-2" tabindex="-1"></a></span>
+<span id="cb3-3"><a href="model-comparison.html#cb3-3" tabindex="-1"></a>n_plots <span class="ot">=</span> <span class="dv">3</span></span>
+<span id="cb3-4"><a href="model-comparison.html#cb3-4" tabindex="-1"></a></span>
+<span id="cb3-5"><a href="model-comparison.html#cb3-5" tabindex="-1"></a><span class="co"># sample size </span></span>
+<span id="cb3-6"><a href="model-comparison.html#cb3-6" tabindex="-1"></a>n_samples <span class="ot">=</span> <span class="dv">20</span> </span>
+<span id="cb3-7"><a href="model-comparison.html#cb3-7" tabindex="-1"></a></span>
+<span id="cb3-8"><a href="model-comparison.html#cb3-8" tabindex="-1"></a><span class="co"># number of parameters in the polynomial regression</span></span>
+<span id="cb3-9"><a href="model-comparison.html#cb3-9" tabindex="-1"></a>n_parameters <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>, <span class="fu">seq</span>(<span class="dv">7</span>, <span class="dv">19</span>, <span class="at">length.out =</span> <span class="dv">5</span>)) </span>
+<span id="cb3-10"><a href="model-comparison.html#cb3-10" tabindex="-1"></a></span>
+<span id="cb3-11"><a href="model-comparison.html#cb3-11" tabindex="-1"></a><span class="co"># generate data </span></span>
+<span id="cb3-12"><a href="model-comparison.html#cb3-12" tabindex="-1"></a>df.data <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="fu">runif</span>(n_samples, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">10</span>), </span>
+<span id="cb3-13"><a href="model-comparison.html#cb3-13" tabindex="-1"></a>                 <span class="at">y =</span> <span class="dv">10</span> <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> <span class="fu">rnorm</span>(n_samples, <span class="at">sd =</span> <span class="dv">20</span>))</span>
+<span id="cb3-14"><a href="model-comparison.html#cb3-14" tabindex="-1"></a></span>
+<span id="cb3-15"><a href="model-comparison.html#cb3-15" tabindex="-1"></a><span class="co"># plotting function</span></span>
+<span id="cb3-16"><a href="model-comparison.html#cb3-16" tabindex="-1"></a>plot_fit <span class="ot">=</span> <span class="cf">function</span>(i){</span>
+<span id="cb3-17"><a href="model-comparison.html#cb3-17" tabindex="-1"></a>  <span class="co"># calculate RMSE</span></span>
+<span id="cb3-18"><a href="model-comparison.html#cb3-18" tabindex="-1"></a>  rmse <span class="ot">=</span> <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> <span class="fu">poly</span>(x, <span class="at">degree =</span> i, <span class="at">raw =</span> <span class="cn">TRUE</span>),</span>
+<span id="cb3-19"><a href="model-comparison.html#cb3-19" tabindex="-1"></a>            <span class="at">data =</span> df.data) <span class="sc">%&gt;%</span> </span>
+<span id="cb3-20"><a href="model-comparison.html#cb3-20" tabindex="-1"></a>    <span class="fu">rmse</span>(<span class="at">data =</span> df.data)</span>
+<span id="cb3-21"><a href="model-comparison.html#cb3-21" tabindex="-1"></a>  </span>
+<span id="cb3-22"><a href="model-comparison.html#cb3-22" tabindex="-1"></a>  <span class="co"># make a plot</span></span>
+<span id="cb3-23"><a href="model-comparison.html#cb3-23" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="at">data =</span> df.data,</span>
+<span id="cb3-24"><a href="model-comparison.html#cb3-24" tabindex="-1"></a>         <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
+<span id="cb3-25"><a href="model-comparison.html#cb3-25" tabindex="-1"></a>                       <span class="at">y =</span> y)) <span class="sc">+</span></span>
+<span id="cb3-26"><a href="model-comparison.html#cb3-26" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
+<span id="cb3-27"><a href="model-comparison.html#cb3-27" tabindex="-1"></a>    <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>, <span class="at">se =</span> F,</span>
+<span id="cb3-28"><a href="model-comparison.html#cb3-28" tabindex="-1"></a>                <span class="at">formula =</span> y <span class="sc">~</span> <span class="fu">poly</span>(x, <span class="at">degree =</span> i, <span class="at">raw =</span> <span class="cn">TRUE</span>)) <span class="sc">+</span></span>
+<span id="cb3-29"><a href="model-comparison.html#cb3-29" tabindex="-1"></a>    <span class="fu">annotate</span>(<span class="at">geom =</span> <span class="st">&quot;text&quot;</span>,</span>
+<span id="cb3-30"><a href="model-comparison.html#cb3-30" tabindex="-1"></a>             <span class="at">x =</span> <span class="cn">Inf</span>,</span>
+<span id="cb3-31"><a href="model-comparison.html#cb3-31" tabindex="-1"></a>             <span class="at">y =</span> <span class="sc">-</span><span class="cn">Inf</span>,</span>
+<span id="cb3-32"><a href="model-comparison.html#cb3-32" tabindex="-1"></a>             <span class="at">label =</span> <span class="fu">str_c</span>(<span class="st">&quot;RMSE = &quot;</span>, <span class="fu">round</span>(rmse, <span class="dv">2</span>)),</span>
+<span id="cb3-33"><a href="model-comparison.html#cb3-33" tabindex="-1"></a>             <span class="at">hjust =</span> <span class="fl">1.1</span>,</span>
+<span id="cb3-34"><a href="model-comparison.html#cb3-34" tabindex="-1"></a>             <span class="at">vjust =</span> <span class="sc">-</span><span class="fl">0.3</span>) <span class="sc">+</span> </span>
+<span id="cb3-35"><a href="model-comparison.html#cb3-35" tabindex="-1"></a>    <span class="fu">theme</span>(<span class="at">axis.ticks =</span> <span class="fu">element_blank</span>(),</span>
+<span id="cb3-36"><a href="model-comparison.html#cb3-36" tabindex="-1"></a>          <span class="at">axis.title =</span> <span class="fu">element_blank</span>(),</span>
+<span id="cb3-37"><a href="model-comparison.html#cb3-37" tabindex="-1"></a>          <span class="at">axis.text =</span> <span class="fu">element_blank</span>())</span>
+<span id="cb3-38"><a href="model-comparison.html#cb3-38" tabindex="-1"></a>}</span>
+<span id="cb3-39"><a href="model-comparison.html#cb3-39" tabindex="-1"></a></span>
+<span id="cb3-40"><a href="model-comparison.html#cb3-40" tabindex="-1"></a><span class="co"># save plots in a list</span></span>
+<span id="cb3-41"><a href="model-comparison.html#cb3-41" tabindex="-1"></a>l.p <span class="ot">=</span> <span class="fu">map</span>(<span class="at">.x =</span> n_parameters,</span>
+<span id="cb3-42"><a href="model-comparison.html#cb3-42" tabindex="-1"></a>          <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">plot_fit</span>(.))</span>
+<span id="cb3-43"><a href="model-comparison.html#cb3-43" tabindex="-1"></a></span>
+<span id="cb3-44"><a href="model-comparison.html#cb3-44" tabindex="-1"></a><span class="co"># make figure panel </span></span>
+<span id="cb3-45"><a href="model-comparison.html#cb3-45" tabindex="-1"></a><span class="fu">wrap_plots</span>(<span class="at">plotlist =</span> l.p, <span class="at">ncol =</span> <span class="dv">3</span>)</span></code></pre></div>
+<p><img src="psych252_files/figure-html/unnamed-chunk-3-1.png" width="672" /></p>
 <p>As we can see, RMSE becomes smaller and smaller the more parameters the model has to fit the data. But how does the RMSE look like for new data that is generated from the same underlying ground truth?</p>
-<div class="sourceCode" id="cb842"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb842-1"><a href="model-comparison.html#cb842-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
-<span id="cb842-2"><a href="model-comparison.html#cb842-2" tabindex="-1"></a></span>
-<span id="cb842-3"><a href="model-comparison.html#cb842-3" tabindex="-1"></a>n_plots <span class="ot">=</span> <span class="dv">3</span></span>
-<span id="cb842-4"><a href="model-comparison.html#cb842-4" tabindex="-1"></a></span>
-<span id="cb842-5"><a href="model-comparison.html#cb842-5" tabindex="-1"></a><span class="co"># sample size </span></span>
-<span id="cb842-6"><a href="model-comparison.html#cb842-6" tabindex="-1"></a>n_samples <span class="ot">=</span> <span class="dv">20</span> </span>
-<span id="cb842-7"><a href="model-comparison.html#cb842-7" tabindex="-1"></a></span>
-<span id="cb842-8"><a href="model-comparison.html#cb842-8" tabindex="-1"></a><span class="co"># number of parameters in the polynomial regression</span></span>
-<span id="cb842-9"><a href="model-comparison.html#cb842-9" tabindex="-1"></a>n_parameters <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>, <span class="fu">seq</span>(<span class="dv">7</span>, <span class="dv">19</span>, <span class="at">length.out =</span> <span class="dv">5</span>)) </span>
-<span id="cb842-10"><a href="model-comparison.html#cb842-10" tabindex="-1"></a></span>
-<span id="cb842-11"><a href="model-comparison.html#cb842-11" tabindex="-1"></a><span class="co"># generate data </span></span>
-<span id="cb842-12"><a href="model-comparison.html#cb842-12" tabindex="-1"></a>df.data <span class="ot">=</span> <span class="fu">tibble</span>(</span>
-<span id="cb842-13"><a href="model-comparison.html#cb842-13" tabindex="-1"></a>  <span class="at">x =</span> <span class="fu">runif</span>(n_samples, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">10</span>), </span>
-<span id="cb842-14"><a href="model-comparison.html#cb842-14" tabindex="-1"></a>  <span class="at">y =</span> <span class="dv">10</span> <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> <span class="fu">rnorm</span>(n_samples, <span class="at">sd =</span> <span class="dv">20</span>)</span>
-<span id="cb842-15"><a href="model-comparison.html#cb842-15" tabindex="-1"></a>)</span>
-<span id="cb842-16"><a href="model-comparison.html#cb842-16" tabindex="-1"></a></span>
-<span id="cb842-17"><a href="model-comparison.html#cb842-17" tabindex="-1"></a><span class="co"># generate some more data </span></span>
-<span id="cb842-18"><a href="model-comparison.html#cb842-18" tabindex="-1"></a>df.more_data <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="fu">runif</span>(<span class="dv">50</span>, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">10</span>), </span>
-<span id="cb842-19"><a href="model-comparison.html#cb842-19" tabindex="-1"></a>                      <span class="at">y =</span> <span class="dv">10</span> <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> <span class="fu">rnorm</span>(<span class="dv">50</span>, <span class="at">sd =</span> <span class="dv">20</span>))</span>
-<span id="cb842-20"><a href="model-comparison.html#cb842-20" tabindex="-1"></a></span>
-<span id="cb842-21"><a href="model-comparison.html#cb842-21" tabindex="-1"></a><span class="co"># list for plots </span></span>
-<span id="cb842-22"><a href="model-comparison.html#cb842-22" tabindex="-1"></a>l.p <span class="ot">=</span> <span class="fu">list</span>()</span>
-<span id="cb842-23"><a href="model-comparison.html#cb842-23" tabindex="-1"></a></span>
-<span id="cb842-24"><a href="model-comparison.html#cb842-24" tabindex="-1"></a><span class="co"># plotting function</span></span>
-<span id="cb842-25"><a href="model-comparison.html#cb842-25" tabindex="-1"></a>plot_fit <span class="ot">=</span> <span class="cf">function</span>(i){</span>
-<span id="cb842-26"><a href="model-comparison.html#cb842-26" tabindex="-1"></a>  <span class="co"># calculate RMSE for fitted data </span></span>
-<span id="cb842-27"><a href="model-comparison.html#cb842-27" tabindex="-1"></a>  fit <span class="ot">=</span> <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> <span class="fu">poly</span>(x, <span class="at">degree =</span> i, <span class="at">raw =</span> <span class="cn">TRUE</span>),</span>
-<span id="cb842-28"><a href="model-comparison.html#cb842-28" tabindex="-1"></a>           <span class="at">data =</span> df.data)</span>
-<span id="cb842-29"><a href="model-comparison.html#cb842-29" tabindex="-1"></a>  </span>
-<span id="cb842-30"><a href="model-comparison.html#cb842-30" tabindex="-1"></a>  <span class="co"># calculate RMSE for training data</span></span>
-<span id="cb842-31"><a href="model-comparison.html#cb842-31" tabindex="-1"></a>  rmse <span class="ot">=</span> fit <span class="sc">%&gt;%</span> </span>
-<span id="cb842-32"><a href="model-comparison.html#cb842-32" tabindex="-1"></a>    <span class="fu">rmse</span>(<span class="at">data =</span> df.data)</span>
-<span id="cb842-33"><a href="model-comparison.html#cb842-33" tabindex="-1"></a>  </span>
-<span id="cb842-34"><a href="model-comparison.html#cb842-34" tabindex="-1"></a>  <span class="co"># calculate RMSE for new data </span></span>
-<span id="cb842-35"><a href="model-comparison.html#cb842-35" tabindex="-1"></a>  rmse_new <span class="ot">=</span> fit <span class="sc">%&gt;%</span> </span>
-<span id="cb842-36"><a href="model-comparison.html#cb842-36" tabindex="-1"></a>    <span class="fu">rmse</span>(<span class="at">data =</span> df.more_data)</span>
-<span id="cb842-37"><a href="model-comparison.html#cb842-37" tabindex="-1"></a>  </span>
-<span id="cb842-38"><a href="model-comparison.html#cb842-38" tabindex="-1"></a>  <span class="co"># make a plot</span></span>
-<span id="cb842-39"><a href="model-comparison.html#cb842-39" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="at">data =</span> df.data,</span>
-<span id="cb842-40"><a href="model-comparison.html#cb842-40" tabindex="-1"></a>         <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
-<span id="cb842-41"><a href="model-comparison.html#cb842-41" tabindex="-1"></a>                       <span class="at">y =</span> y)) <span class="sc">+</span></span>
-<span id="cb842-42"><a href="model-comparison.html#cb842-42" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
-<span id="cb842-43"><a href="model-comparison.html#cb842-43" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">data =</span> df.more_data,</span>
-<span id="cb842-44"><a href="model-comparison.html#cb842-44" tabindex="-1"></a>               <span class="at">size =</span> <span class="dv">2</span>, </span>
-<span id="cb842-45"><a href="model-comparison.html#cb842-45" tabindex="-1"></a>               <span class="at">color =</span> <span class="st">&quot;red&quot;</span>) <span class="sc">+</span></span>
-<span id="cb842-46"><a href="model-comparison.html#cb842-46" tabindex="-1"></a>    <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>, <span class="at">se =</span> F,</span>
-<span id="cb842-47"><a href="model-comparison.html#cb842-47" tabindex="-1"></a>                <span class="at">formula =</span> y <span class="sc">~</span> <span class="fu">poly</span>(x, <span class="at">degree =</span> i, <span class="at">raw =</span> <span class="cn">TRUE</span>)) <span class="sc">+</span></span>
-<span id="cb842-48"><a href="model-comparison.html#cb842-48" tabindex="-1"></a>    <span class="fu">annotate</span>(<span class="at">geom =</span> <span class="st">&quot;text&quot;</span>,</span>
-<span id="cb842-49"><a href="model-comparison.html#cb842-49" tabindex="-1"></a>             <span class="at">x =</span> <span class="cn">Inf</span>,</span>
-<span id="cb842-50"><a href="model-comparison.html#cb842-50" tabindex="-1"></a>             <span class="at">y =</span> <span class="sc">-</span><span class="cn">Inf</span>,</span>
-<span id="cb842-51"><a href="model-comparison.html#cb842-51" tabindex="-1"></a>             <span class="at">label =</span> <span class="fu">str_c</span>(<span class="st">&quot;RMSE = &quot;</span>, <span class="fu">round</span>(rmse, <span class="dv">2</span>)),</span>
-<span id="cb842-52"><a href="model-comparison.html#cb842-52" tabindex="-1"></a>             <span class="at">hjust =</span> <span class="fl">1.1</span>,</span>
-<span id="cb842-53"><a href="model-comparison.html#cb842-53" tabindex="-1"></a>             <span class="at">vjust =</span> <span class="sc">-</span><span class="fl">0.3</span>) <span class="sc">+</span> </span>
-<span id="cb842-54"><a href="model-comparison.html#cb842-54" tabindex="-1"></a>    <span class="fu">annotate</span>(<span class="at">geom =</span> <span class="st">&quot;text&quot;</span>,</span>
-<span id="cb842-55"><a href="model-comparison.html#cb842-55" tabindex="-1"></a>             <span class="at">x =</span> <span class="cn">Inf</span>,</span>
-<span id="cb842-56"><a href="model-comparison.html#cb842-56" tabindex="-1"></a>             <span class="at">y =</span> <span class="sc">-</span><span class="cn">Inf</span>,</span>
-<span id="cb842-57"><a href="model-comparison.html#cb842-57" tabindex="-1"></a>             <span class="at">label =</span> <span class="fu">str_c</span>(<span class="st">&quot;RMSE = &quot;</span>, <span class="fu">round</span>(rmse_new, <span class="dv">2</span>)),</span>
-<span id="cb842-58"><a href="model-comparison.html#cb842-58" tabindex="-1"></a>             <span class="at">hjust =</span> <span class="fl">1.1</span>,</span>
-<span id="cb842-59"><a href="model-comparison.html#cb842-59" tabindex="-1"></a>             <span class="at">vjust =</span> <span class="sc">-</span><span class="dv">2</span>,</span>
-<span id="cb842-60"><a href="model-comparison.html#cb842-60" tabindex="-1"></a>             <span class="at">color =</span> <span class="st">&quot;red&quot;</span>) <span class="sc">+</span> </span>
-<span id="cb842-61"><a href="model-comparison.html#cb842-61" tabindex="-1"></a>    <span class="fu">theme</span>(<span class="at">axis.ticks =</span> <span class="fu">element_blank</span>(),</span>
-<span id="cb842-62"><a href="model-comparison.html#cb842-62" tabindex="-1"></a>          <span class="at">axis.title =</span> <span class="fu">element_blank</span>(),</span>
-<span id="cb842-63"><a href="model-comparison.html#cb842-63" tabindex="-1"></a>          <span class="at">axis.text =</span> <span class="fu">element_blank</span>())</span>
-<span id="cb842-64"><a href="model-comparison.html#cb842-64" tabindex="-1"></a>}</span>
-<span id="cb842-65"><a href="model-comparison.html#cb842-65" tabindex="-1"></a></span>
-<span id="cb842-66"><a href="model-comparison.html#cb842-66" tabindex="-1"></a><span class="co"># map over the parameters</span></span>
-<span id="cb842-67"><a href="model-comparison.html#cb842-67" tabindex="-1"></a>l.p <span class="ot">=</span> <span class="fu">map</span>(<span class="at">.x =</span> n_parameters,</span>
-<span id="cb842-68"><a href="model-comparison.html#cb842-68" tabindex="-1"></a>          <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">plot_fit</span>(.))</span>
-<span id="cb842-69"><a href="model-comparison.html#cb842-69" tabindex="-1"></a></span>
-<span id="cb842-70"><a href="model-comparison.html#cb842-70" tabindex="-1"></a><span class="co"># make figure panel </span></span>
-<span id="cb842-71"><a href="model-comparison.html#cb842-71" tabindex="-1"></a><span class="fu">wrap_plots</span>(<span class="at">plotlist =</span> l.p, <span class="at">ncol =</span> <span class="dv">3</span>)</span></code></pre></div>
-<p><img src="15-model_comparison_files/figure-html/unnamed-chunk-4-1.png" width="672" /></p>
+<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="model-comparison.html#cb4-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
+<span id="cb4-2"><a href="model-comparison.html#cb4-2" tabindex="-1"></a></span>
+<span id="cb4-3"><a href="model-comparison.html#cb4-3" tabindex="-1"></a>n_plots <span class="ot">=</span> <span class="dv">3</span></span>
+<span id="cb4-4"><a href="model-comparison.html#cb4-4" tabindex="-1"></a></span>
+<span id="cb4-5"><a href="model-comparison.html#cb4-5" tabindex="-1"></a><span class="co"># sample size </span></span>
+<span id="cb4-6"><a href="model-comparison.html#cb4-6" tabindex="-1"></a>n_samples <span class="ot">=</span> <span class="dv">20</span> </span>
+<span id="cb4-7"><a href="model-comparison.html#cb4-7" tabindex="-1"></a></span>
+<span id="cb4-8"><a href="model-comparison.html#cb4-8" tabindex="-1"></a><span class="co"># number of parameters in the polynomial regression</span></span>
+<span id="cb4-9"><a href="model-comparison.html#cb4-9" tabindex="-1"></a>n_parameters <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>, <span class="fu">seq</span>(<span class="dv">7</span>, <span class="dv">19</span>, <span class="at">length.out =</span> <span class="dv">5</span>)) </span>
+<span id="cb4-10"><a href="model-comparison.html#cb4-10" tabindex="-1"></a></span>
+<span id="cb4-11"><a href="model-comparison.html#cb4-11" tabindex="-1"></a><span class="co"># generate data </span></span>
+<span id="cb4-12"><a href="model-comparison.html#cb4-12" tabindex="-1"></a>df.data <span class="ot">=</span> <span class="fu">tibble</span>(</span>
+<span id="cb4-13"><a href="model-comparison.html#cb4-13" tabindex="-1"></a>  <span class="at">x =</span> <span class="fu">runif</span>(n_samples, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">10</span>), </span>
+<span id="cb4-14"><a href="model-comparison.html#cb4-14" tabindex="-1"></a>  <span class="at">y =</span> <span class="dv">10</span> <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> <span class="fu">rnorm</span>(n_samples, <span class="at">sd =</span> <span class="dv">20</span>)</span>
+<span id="cb4-15"><a href="model-comparison.html#cb4-15" tabindex="-1"></a>)</span>
+<span id="cb4-16"><a href="model-comparison.html#cb4-16" tabindex="-1"></a></span>
+<span id="cb4-17"><a href="model-comparison.html#cb4-17" tabindex="-1"></a><span class="co"># generate some more data </span></span>
+<span id="cb4-18"><a href="model-comparison.html#cb4-18" tabindex="-1"></a>df.more_data <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="fu">runif</span>(<span class="dv">50</span>, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">10</span>), </span>
+<span id="cb4-19"><a href="model-comparison.html#cb4-19" tabindex="-1"></a>                      <span class="at">y =</span> <span class="dv">10</span> <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> <span class="fu">rnorm</span>(<span class="dv">50</span>, <span class="at">sd =</span> <span class="dv">20</span>))</span>
+<span id="cb4-20"><a href="model-comparison.html#cb4-20" tabindex="-1"></a></span>
+<span id="cb4-21"><a href="model-comparison.html#cb4-21" tabindex="-1"></a><span class="co"># list for plots </span></span>
+<span id="cb4-22"><a href="model-comparison.html#cb4-22" tabindex="-1"></a>l.p <span class="ot">=</span> <span class="fu">list</span>()</span>
+<span id="cb4-23"><a href="model-comparison.html#cb4-23" tabindex="-1"></a></span>
+<span id="cb4-24"><a href="model-comparison.html#cb4-24" tabindex="-1"></a><span class="co"># plotting function</span></span>
+<span id="cb4-25"><a href="model-comparison.html#cb4-25" tabindex="-1"></a>plot_fit <span class="ot">=</span> <span class="cf">function</span>(i){</span>
+<span id="cb4-26"><a href="model-comparison.html#cb4-26" tabindex="-1"></a>  <span class="co"># calculate RMSE for fitted data </span></span>
+<span id="cb4-27"><a href="model-comparison.html#cb4-27" tabindex="-1"></a>  fit <span class="ot">=</span> <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> <span class="fu">poly</span>(x, <span class="at">degree =</span> i, <span class="at">raw =</span> <span class="cn">TRUE</span>),</span>
+<span id="cb4-28"><a href="model-comparison.html#cb4-28" tabindex="-1"></a>           <span class="at">data =</span> df.data)</span>
+<span id="cb4-29"><a href="model-comparison.html#cb4-29" tabindex="-1"></a>  </span>
+<span id="cb4-30"><a href="model-comparison.html#cb4-30" tabindex="-1"></a>  <span class="co"># calculate RMSE for training data</span></span>
+<span id="cb4-31"><a href="model-comparison.html#cb4-31" tabindex="-1"></a>  rmse <span class="ot">=</span> fit <span class="sc">%&gt;%</span> </span>
+<span id="cb4-32"><a href="model-comparison.html#cb4-32" tabindex="-1"></a>    <span class="fu">rmse</span>(<span class="at">data =</span> df.data)</span>
+<span id="cb4-33"><a href="model-comparison.html#cb4-33" tabindex="-1"></a>  </span>
+<span id="cb4-34"><a href="model-comparison.html#cb4-34" tabindex="-1"></a>  <span class="co"># calculate RMSE for new data </span></span>
+<span id="cb4-35"><a href="model-comparison.html#cb4-35" tabindex="-1"></a>  rmse_new <span class="ot">=</span> fit <span class="sc">%&gt;%</span> </span>
+<span id="cb4-36"><a href="model-comparison.html#cb4-36" tabindex="-1"></a>    <span class="fu">rmse</span>(<span class="at">data =</span> df.more_data)</span>
+<span id="cb4-37"><a href="model-comparison.html#cb4-37" tabindex="-1"></a>  </span>
+<span id="cb4-38"><a href="model-comparison.html#cb4-38" tabindex="-1"></a>  <span class="co"># make a plot</span></span>
+<span id="cb4-39"><a href="model-comparison.html#cb4-39" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="at">data =</span> df.data,</span>
+<span id="cb4-40"><a href="model-comparison.html#cb4-40" tabindex="-1"></a>         <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
+<span id="cb4-41"><a href="model-comparison.html#cb4-41" tabindex="-1"></a>                       <span class="at">y =</span> y)) <span class="sc">+</span></span>
+<span id="cb4-42"><a href="model-comparison.html#cb4-42" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
+<span id="cb4-43"><a href="model-comparison.html#cb4-43" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">data =</span> df.more_data,</span>
+<span id="cb4-44"><a href="model-comparison.html#cb4-44" tabindex="-1"></a>               <span class="at">size =</span> <span class="dv">2</span>, </span>
+<span id="cb4-45"><a href="model-comparison.html#cb4-45" tabindex="-1"></a>               <span class="at">color =</span> <span class="st">&quot;red&quot;</span>) <span class="sc">+</span></span>
+<span id="cb4-46"><a href="model-comparison.html#cb4-46" tabindex="-1"></a>    <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>,</span>
+<span id="cb4-47"><a href="model-comparison.html#cb4-47" tabindex="-1"></a>                <span class="at">se =</span> F,</span>
+<span id="cb4-48"><a href="model-comparison.html#cb4-48" tabindex="-1"></a>                <span class="at">formula =</span> y <span class="sc">~</span> <span class="fu">poly</span>(x,</span>
+<span id="cb4-49"><a href="model-comparison.html#cb4-49" tabindex="-1"></a>                                   <span class="at">degree =</span> i,</span>
+<span id="cb4-50"><a href="model-comparison.html#cb4-50" tabindex="-1"></a>                                   <span class="at">raw =</span> <span class="cn">TRUE</span>)) <span class="sc">+</span></span>
+<span id="cb4-51"><a href="model-comparison.html#cb4-51" tabindex="-1"></a>    <span class="fu">annotate</span>(<span class="at">geom =</span> <span class="st">&quot;text&quot;</span>,</span>
+<span id="cb4-52"><a href="model-comparison.html#cb4-52" tabindex="-1"></a>             <span class="at">x =</span> <span class="cn">Inf</span>,</span>
+<span id="cb4-53"><a href="model-comparison.html#cb4-53" tabindex="-1"></a>             <span class="at">y =</span> <span class="sc">-</span><span class="cn">Inf</span>,</span>
+<span id="cb4-54"><a href="model-comparison.html#cb4-54" tabindex="-1"></a>             <span class="at">label =</span> <span class="fu">str_c</span>(<span class="st">&quot;RMSE = &quot;</span>, <span class="fu">round</span>(rmse, <span class="dv">2</span>)),</span>
+<span id="cb4-55"><a href="model-comparison.html#cb4-55" tabindex="-1"></a>             <span class="at">hjust =</span> <span class="fl">1.1</span>,</span>
+<span id="cb4-56"><a href="model-comparison.html#cb4-56" tabindex="-1"></a>             <span class="at">vjust =</span> <span class="sc">-</span><span class="fl">0.3</span>) <span class="sc">+</span> </span>
+<span id="cb4-57"><a href="model-comparison.html#cb4-57" tabindex="-1"></a>    <span class="fu">annotate</span>(<span class="at">geom =</span> <span class="st">&quot;text&quot;</span>,</span>
+<span id="cb4-58"><a href="model-comparison.html#cb4-58" tabindex="-1"></a>             <span class="at">x =</span> <span class="cn">Inf</span>,</span>
+<span id="cb4-59"><a href="model-comparison.html#cb4-59" tabindex="-1"></a>             <span class="at">y =</span> <span class="sc">-</span><span class="cn">Inf</span>,</span>
+<span id="cb4-60"><a href="model-comparison.html#cb4-60" tabindex="-1"></a>             <span class="at">label =</span> <span class="fu">str_c</span>(<span class="st">&quot;RMSE = &quot;</span>, <span class="fu">round</span>(rmse_new, <span class="dv">2</span>)),</span>
+<span id="cb4-61"><a href="model-comparison.html#cb4-61" tabindex="-1"></a>             <span class="at">hjust =</span> <span class="fl">1.1</span>,</span>
+<span id="cb4-62"><a href="model-comparison.html#cb4-62" tabindex="-1"></a>             <span class="at">vjust =</span> <span class="sc">-</span><span class="dv">2</span>,</span>
+<span id="cb4-63"><a href="model-comparison.html#cb4-63" tabindex="-1"></a>             <span class="at">color =</span> <span class="st">&quot;red&quot;</span>) <span class="sc">+</span> </span>
+<span id="cb4-64"><a href="model-comparison.html#cb4-64" tabindex="-1"></a>    <span class="fu">theme</span>(<span class="at">axis.ticks =</span> <span class="fu">element_blank</span>(),</span>
+<span id="cb4-65"><a href="model-comparison.html#cb4-65" tabindex="-1"></a>          <span class="at">axis.title =</span> <span class="fu">element_blank</span>(),</span>
+<span id="cb4-66"><a href="model-comparison.html#cb4-66" tabindex="-1"></a>          <span class="at">axis.text =</span> <span class="fu">element_blank</span>())</span>
+<span id="cb4-67"><a href="model-comparison.html#cb4-67" tabindex="-1"></a>}</span>
+<span id="cb4-68"><a href="model-comparison.html#cb4-68" tabindex="-1"></a></span>
+<span id="cb4-69"><a href="model-comparison.html#cb4-69" tabindex="-1"></a><span class="co"># map over the parameters</span></span>
+<span id="cb4-70"><a href="model-comparison.html#cb4-70" tabindex="-1"></a>l.p <span class="ot">=</span> <span class="fu">map</span>(<span class="at">.x =</span> n_parameters,</span>
+<span id="cb4-71"><a href="model-comparison.html#cb4-71" tabindex="-1"></a>          <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">plot_fit</span>(.))</span>
+<span id="cb4-72"><a href="model-comparison.html#cb4-72" tabindex="-1"></a></span>
+<span id="cb4-73"><a href="model-comparison.html#cb4-73" tabindex="-1"></a><span class="co"># make figure panel </span></span>
+<span id="cb4-74"><a href="model-comparison.html#cb4-74" tabindex="-1"></a><span class="fu">wrap_plots</span>(<span class="at">plotlist =</span> l.p, <span class="at">ncol =</span> <span class="dv">3</span>)</span></code></pre></div>
+<p><img src="psych252_files/figure-html/unnamed-chunk-4-1.png" width="672" /></p>
 <p>The RMSE in black shows the root mean squared error for the data that the model was fit on. The RMSE in red shows the RMSE on the new data. As you can see, the complex models do really poorly. They overfit the noise in the original data which leads to make poor predictions for new data. The simplest model (with two parameters) doesn’t do particularly well either since it misses out on the quadratic trend in the data. Both the model with the quadratic term (top middle) and a model that includes a cubic term (top right) provide a good balance – their RMSE on the new data is lowest.</p>
 <p>Let’s generate another data set:</p>
-<div class="sourceCode" id="cb843"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb843-1"><a href="model-comparison.html#cb843-1" tabindex="-1"></a><span class="co"># make example reproducible </span></span>
-<span id="cb843-2"><a href="model-comparison.html#cb843-2" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
-<span id="cb843-3"><a href="model-comparison.html#cb843-3" tabindex="-1"></a></span>
-<span id="cb843-4"><a href="model-comparison.html#cb843-4" tabindex="-1"></a><span class="co"># parameters</span></span>
-<span id="cb843-5"><a href="model-comparison.html#cb843-5" tabindex="-1"></a>sample_size <span class="ot">=</span> <span class="dv">100</span></span>
-<span id="cb843-6"><a href="model-comparison.html#cb843-6" tabindex="-1"></a>b0 <span class="ot">=</span> <span class="dv">1</span></span>
-<span id="cb843-7"><a href="model-comparison.html#cb843-7" tabindex="-1"></a>b1 <span class="ot">=</span> <span class="dv">2</span></span>
-<span id="cb843-8"><a href="model-comparison.html#cb843-8" tabindex="-1"></a>b2 <span class="ot">=</span> <span class="dv">3</span></span>
-<span id="cb843-9"><a href="model-comparison.html#cb843-9" tabindex="-1"></a>sd <span class="ot">=</span> <span class="fl">0.5</span></span>
-<span id="cb843-10"><a href="model-comparison.html#cb843-10" tabindex="-1"></a></span>
-<span id="cb843-11"><a href="model-comparison.html#cb843-11" tabindex="-1"></a><span class="co"># sample</span></span>
-<span id="cb843-12"><a href="model-comparison.html#cb843-12" tabindex="-1"></a>df.data <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">participant =</span> <span class="dv">1</span><span class="sc">:</span>sample_size,</span>
-<span id="cb843-13"><a href="model-comparison.html#cb843-13" tabindex="-1"></a>                 <span class="at">x =</span> <span class="fu">runif</span>(sample_size, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">1</span>),</span>
-<span id="cb843-14"><a href="model-comparison.html#cb843-14" tabindex="-1"></a>                 <span class="at">y =</span> b0 <span class="sc">+</span> b1<span class="sc">*</span>x <span class="sc">+</span> b2<span class="sc">*</span>x<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> <span class="fu">rnorm</span>(sample_size, <span class="at">sd =</span> sd)) </span></code></pre></div>
+<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="model-comparison.html#cb5-1" tabindex="-1"></a><span class="co"># make example reproducible </span></span>
+<span id="cb5-2"><a href="model-comparison.html#cb5-2" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
+<span id="cb5-3"><a href="model-comparison.html#cb5-3" tabindex="-1"></a></span>
+<span id="cb5-4"><a href="model-comparison.html#cb5-4" tabindex="-1"></a><span class="co"># parameters</span></span>
+<span id="cb5-5"><a href="model-comparison.html#cb5-5" tabindex="-1"></a>sample_size <span class="ot">=</span> <span class="dv">100</span></span>
+<span id="cb5-6"><a href="model-comparison.html#cb5-6" tabindex="-1"></a>b0 <span class="ot">=</span> <span class="dv">1</span></span>
+<span id="cb5-7"><a href="model-comparison.html#cb5-7" tabindex="-1"></a>b1 <span class="ot">=</span> <span class="dv">2</span></span>
+<span id="cb5-8"><a href="model-comparison.html#cb5-8" tabindex="-1"></a>b2 <span class="ot">=</span> <span class="dv">3</span></span>
+<span id="cb5-9"><a href="model-comparison.html#cb5-9" tabindex="-1"></a>sd <span class="ot">=</span> <span class="fl">0.5</span></span>
+<span id="cb5-10"><a href="model-comparison.html#cb5-10" tabindex="-1"></a></span>
+<span id="cb5-11"><a href="model-comparison.html#cb5-11" tabindex="-1"></a><span class="co"># sample</span></span>
+<span id="cb5-12"><a href="model-comparison.html#cb5-12" tabindex="-1"></a>df.data <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">participant =</span> <span class="dv">1</span><span class="sc">:</span>sample_size,</span>
+<span id="cb5-13"><a href="model-comparison.html#cb5-13" tabindex="-1"></a>                 <span class="at">x =</span> <span class="fu">runif</span>(sample_size,</span>
+<span id="cb5-14"><a href="model-comparison.html#cb5-14" tabindex="-1"></a>                           <span class="at">min =</span> <span class="dv">0</span>,</span>
+<span id="cb5-15"><a href="model-comparison.html#cb5-15" tabindex="-1"></a>                           <span class="at">max =</span> <span class="dv">1</span>),</span>
+<span id="cb5-16"><a href="model-comparison.html#cb5-16" tabindex="-1"></a>                 <span class="at">y =</span> b0 <span class="sc">+</span> b1<span class="sc">*</span>x <span class="sc">+</span> b2<span class="sc">*</span>x<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> <span class="fu">rnorm</span>(sample_size, <span class="at">sd =</span> sd)) </span></code></pre></div>
 <p>And plot it:</p>
-<div class="sourceCode" id="cb844"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb844-1"><a href="model-comparison.html#cb844-1" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> df.data,</span>
-<span id="cb844-2"><a href="model-comparison.html#cb844-2" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
-<span id="cb844-3"><a href="model-comparison.html#cb844-3" tabindex="-1"></a>                    <span class="at">y =</span> y)) <span class="sc">+</span> </span>
-<span id="cb844-4"><a href="model-comparison.html#cb844-4" tabindex="-1"></a>  <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>,</span>
-<span id="cb844-5"><a href="model-comparison.html#cb844-5" tabindex="-1"></a>              <span class="at">formula =</span> y <span class="sc">~</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>)) <span class="sc">+</span></span>
-<span id="cb844-6"><a href="model-comparison.html#cb844-6" tabindex="-1"></a>  <span class="fu">geom_point</span>()</span></code></pre></div>
-<p><img src="15-model_comparison_files/figure-html/unnamed-chunk-6-1.png" width="672" /></p>
+<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="model-comparison.html#cb6-1" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> df.data,</span>
+<span id="cb6-2"><a href="model-comparison.html#cb6-2" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
+<span id="cb6-3"><a href="model-comparison.html#cb6-3" tabindex="-1"></a>                     <span class="at">y =</span> y)) <span class="sc">+</span> </span>
+<span id="cb6-4"><a href="model-comparison.html#cb6-4" tabindex="-1"></a>  <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>,</span>
+<span id="cb6-5"><a href="model-comparison.html#cb6-5" tabindex="-1"></a>              <span class="at">formula =</span> y <span class="sc">~</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>)) <span class="sc">+</span></span>
+<span id="cb6-6"><a href="model-comparison.html#cb6-6" tabindex="-1"></a>  <span class="fu">geom_point</span>()</span></code></pre></div>
+<p><img src="psych252_files/figure-html/unnamed-chunk-6-1.png" width="672" /></p>
 </div>
 <div id="f-test" class="section level3 hasAnchor" number="15.3.2">
 <h3><span class="header-section-number">15.3.2</span> F-test<a href="model-comparison.html#f-test" class="anchor-section" aria-label="Anchor link to header"></a></h3>
@@ -1058,13 +1035,13 @@ <h3><span class="header-section-number">15.3.2</span> F-test<a href="model-compa
 <p><span class="math display">\[
 \widehat Y_i = b_0 + b_1 \cdot X_i + b_2 \cdot X_i^2
 \]</span></p>
-<div class="sourceCode" id="cb845"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb845-1"><a href="model-comparison.html#cb845-1" tabindex="-1"></a><span class="co"># fit models to the data </span></span>
-<span id="cb845-2"><a href="model-comparison.html#cb845-2" tabindex="-1"></a>fit_simple <span class="ot">=</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x, <span class="at">data =</span> df.data)</span>
-<span id="cb845-3"><a href="model-comparison.html#cb845-3" tabindex="-1"></a>fit_correct <span class="ot">=</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>), <span class="at">data =</span> df.data)</span>
-<span id="cb845-4"><a href="model-comparison.html#cb845-4" tabindex="-1"></a>fit_complex <span class="ot">=</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">3</span>), <span class="at">data =</span> df.data)</span>
-<span id="cb845-5"><a href="model-comparison.html#cb845-5" tabindex="-1"></a></span>
-<span id="cb845-6"><a href="model-comparison.html#cb845-6" tabindex="-1"></a><span class="co"># compare the models using an F-test </span></span>
-<span id="cb845-7"><a href="model-comparison.html#cb845-7" tabindex="-1"></a><span class="fu">anova</span>(fit_simple, fit_correct)</span></code></pre></div>
+<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="model-comparison.html#cb7-1" tabindex="-1"></a><span class="co"># fit models to the data </span></span>
+<span id="cb7-2"><a href="model-comparison.html#cb7-2" tabindex="-1"></a>fit_simple <span class="ot">=</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x, <span class="at">data =</span> df.data)</span>
+<span id="cb7-3"><a href="model-comparison.html#cb7-3" tabindex="-1"></a>fit_correct <span class="ot">=</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>), <span class="at">data =</span> df.data)</span>
+<span id="cb7-4"><a href="model-comparison.html#cb7-4" tabindex="-1"></a>fit_complex <span class="ot">=</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">3</span>), <span class="at">data =</span> df.data)</span>
+<span id="cb7-5"><a href="model-comparison.html#cb7-5" tabindex="-1"></a></span>
+<span id="cb7-6"><a href="model-comparison.html#cb7-6" tabindex="-1"></a><span class="co"># compare the models using an F-test </span></span>
+<span id="cb7-7"><a href="model-comparison.html#cb7-7" tabindex="-1"></a><span class="fu">anova</span>(fit_simple, fit_correct)</span></code></pre></div>
 <pre><code>Analysis of Variance Table
 
 Model 1: y ~ 1 + x
@@ -1074,7 +1051,7 @@ <h3><span class="header-section-number">15.3.2</span> F-test<a href="model-compa
 2     97 21.693  1    3.6039 16.115 0.0001175 ***
 ---
 Signif. codes:  0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1</code></pre>
-<div class="sourceCode" id="cb847"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb847-1"><a href="model-comparison.html#cb847-1" tabindex="-1"></a><span class="fu">anova</span>(fit_correct, fit_complex)</span></code></pre></div>
+<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="model-comparison.html#cb9-1" tabindex="-1"></a><span class="fu">anova</span>(fit_correct, fit_complex)</span></code></pre></div>
 <pre><code>Analysis of Variance Table
 
 Model 1: y ~ 1 + x + I(x^2)
@@ -1103,92 +1080,94 @@ <h3><span class="header-section-number">15.3.3</span> Cross-validation<a href="m
 <div id="leave-one-out-cross-validation" class="section level4 hasAnchor" number="15.3.3.1">
 <h4><span class="header-section-number">15.3.3.1</span> Leave-one-out cross-validation<a href="model-comparison.html#leave-one-out-cross-validation" class="anchor-section" aria-label="Anchor link to header"></a></h4>
 <p>I’ve used code similar to this one to illustrate how LOO works in class. Here is a simple data set with 9 data points. We fit 9 models, where for each model, the training set includes one of the data points, and then we look at how well the model captures the held-out data point. We can then characterize the model’s performance by calculating the mean squared error across the 9 runs.</p>
-<div class="sourceCode" id="cb849"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb849-1"><a href="model-comparison.html#cb849-1" tabindex="-1"></a><span class="co"># make example reproducible </span></span>
-<span id="cb849-2"><a href="model-comparison.html#cb849-2" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
-<span id="cb849-3"><a href="model-comparison.html#cb849-3" tabindex="-1"></a></span>
-<span id="cb849-4"><a href="model-comparison.html#cb849-4" tabindex="-1"></a><span class="co"># sample</span></span>
-<span id="cb849-5"><a href="model-comparison.html#cb849-5" tabindex="-1"></a>df.loo <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">9</span>,</span>
-<span id="cb849-6"><a href="model-comparison.html#cb849-6" tabindex="-1"></a>                <span class="at">y =</span> <span class="fu">c</span>(<span class="dv">5</span>, <span class="dv">2</span>, <span class="dv">4</span>, <span class="dv">10</span>, <span class="dv">3</span>, <span class="dv">4</span>, <span class="dv">10</span>, <span class="dv">2</span>, <span class="dv">8</span>)) </span>
-<span id="cb849-7"><a href="model-comparison.html#cb849-7" tabindex="-1"></a></span>
-<span id="cb849-8"><a href="model-comparison.html#cb849-8" tabindex="-1"></a>df.loo_cross <span class="ot">=</span> df.loo <span class="sc">%&gt;%</span> </span>
-<span id="cb849-9"><a href="model-comparison.html#cb849-9" tabindex="-1"></a>  <span class="fu">crossv_loo</span>() <span class="sc">%&gt;%</span> </span>
-<span id="cb849-10"><a href="model-comparison.html#cb849-10" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">fit =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb849-11"><a href="model-comparison.html#cb849-11" tabindex="-1"></a>                   <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> x, <span class="at">data =</span> .)),</span>
-<span id="cb849-12"><a href="model-comparison.html#cb849-12" tabindex="-1"></a>         <span class="at">tidy =</span> <span class="fu">map</span>(<span class="at">.x =</span> fit,</span>
-<span id="cb849-13"><a href="model-comparison.html#cb849-13" tabindex="-1"></a>                    <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">tidy</span>(.))) <span class="sc">%&gt;%</span> </span>
-<span id="cb849-14"><a href="model-comparison.html#cb849-14" tabindex="-1"></a>  <span class="fu">unnest</span>(tidy)</span>
-<span id="cb849-15"><a href="model-comparison.html#cb849-15" tabindex="-1"></a></span>
-<span id="cb849-16"><a href="model-comparison.html#cb849-16" tabindex="-1"></a><span class="co"># original plot </span></span>
-<span id="cb849-17"><a href="model-comparison.html#cb849-17" tabindex="-1"></a>df.plot <span class="ot">=</span> df.loo <span class="sc">%&gt;%</span> </span>
-<span id="cb849-18"><a href="model-comparison.html#cb849-18" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">color =</span> <span class="dv">1</span>)</span>
-<span id="cb849-19"><a href="model-comparison.html#cb849-19" tabindex="-1"></a></span>
-<span id="cb849-20"><a href="model-comparison.html#cb849-20" tabindex="-1"></a><span class="co"># fit to all data except one </span></span>
-<span id="cb849-21"><a href="model-comparison.html#cb849-21" tabindex="-1"></a>fun.cv_plot <span class="ot">=</span> <span class="cf">function</span>(data_point){</span>
-<span id="cb849-22"><a href="model-comparison.html#cb849-22" tabindex="-1"></a>  </span>
-<span id="cb849-23"><a href="model-comparison.html#cb849-23" tabindex="-1"></a>  <span class="co"># determine which point to leave out </span></span>
-<span id="cb849-24"><a href="model-comparison.html#cb849-24" tabindex="-1"></a>  df.plot <span class="ot">=</span> df.plot <span class="sc">%&gt;%</span> </span>
-<span id="cb849-25"><a href="model-comparison.html#cb849-25" tabindex="-1"></a>    <span class="fu">mutate</span>(<span class="at">color =</span> <span class="fu">ifelse</span>(<span class="fu">row_number</span>() <span class="sc">==</span> data_point, <span class="dv">2</span>, color))</span>
-<span id="cb849-26"><a href="model-comparison.html#cb849-26" tabindex="-1"></a>  </span>
-<span id="cb849-27"><a href="model-comparison.html#cb849-27" tabindex="-1"></a>  <span class="co"># fit </span></span>
-<span id="cb849-28"><a href="model-comparison.html#cb849-28" tabindex="-1"></a>  df.fit <span class="ot">=</span> df.plot <span class="sc">%&gt;%</span> </span>
-<span id="cb849-29"><a href="model-comparison.html#cb849-29" tabindex="-1"></a>    <span class="fu">filter</span>(color <span class="sc">!=</span> <span class="dv">2</span>) <span class="sc">%&gt;%</span> </span>
-<span id="cb849-30"><a href="model-comparison.html#cb849-30" tabindex="-1"></a>    <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> x, <span class="at">data =</span> .) <span class="sc">%&gt;%</span> </span>
-<span id="cb849-31"><a href="model-comparison.html#cb849-31" tabindex="-1"></a>    <span class="fu">augment</span>(<span class="at">newdata =</span> df.plot <span class="sc">%&gt;%</span> </span>
-<span id="cb849-32"><a href="model-comparison.html#cb849-32" tabindex="-1"></a>              <span class="fu">filter</span>(color <span class="sc">==</span> <span class="dv">2</span>)) <span class="sc">%&gt;%</span> </span>
-<span id="cb849-33"><a href="model-comparison.html#cb849-33" tabindex="-1"></a>    <span class="fu">clean_names</span>()</span>
-<span id="cb849-34"><a href="model-comparison.html#cb849-34" tabindex="-1"></a>  </span>
-<span id="cb849-35"><a href="model-comparison.html#cb849-35" tabindex="-1"></a>  p <span class="ot">=</span> <span class="fu">ggplot</span>(df.plot,</span>
-<span id="cb849-36"><a href="model-comparison.html#cb849-36" tabindex="-1"></a>             <span class="fu">aes</span>(x, y, <span class="at">color =</span> <span class="fu">as.factor</span>(color))) <span class="sc">+</span> </span>
-<span id="cb849-37"><a href="model-comparison.html#cb849-37" tabindex="-1"></a>    <span class="fu">geom_segment</span>(<span class="fu">aes</span>(<span class="at">xend =</span> x,</span>
-<span id="cb849-38"><a href="model-comparison.html#cb849-38" tabindex="-1"></a>                     <span class="at">yend =</span> fitted),</span>
-<span id="cb849-39"><a href="model-comparison.html#cb849-39" tabindex="-1"></a>                 <span class="at">data =</span> df.fit,</span>
-<span id="cb849-40"><a href="model-comparison.html#cb849-40" tabindex="-1"></a>                 <span class="at">color =</span> <span class="st">&quot;red&quot;</span>,</span>
-<span id="cb849-41"><a href="model-comparison.html#cb849-41" tabindex="-1"></a>                 <span class="at">size =</span> <span class="dv">1</span>) <span class="sc">+</span></span>
-<span id="cb849-42"><a href="model-comparison.html#cb849-42" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
-<span id="cb849-43"><a href="model-comparison.html#cb849-43" tabindex="-1"></a>    <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>,</span>
-<span id="cb849-44"><a href="model-comparison.html#cb849-44" tabindex="-1"></a>                <span class="at">formula =</span> <span class="st">&quot;y ~ x&quot;</span>,</span>
-<span id="cb849-45"><a href="model-comparison.html#cb849-45" tabindex="-1"></a>                <span class="at">se =</span> F,</span>
-<span id="cb849-46"><a href="model-comparison.html#cb849-46" tabindex="-1"></a>                <span class="at">color =</span> <span class="st">&quot;black&quot;</span>, </span>
-<span id="cb849-47"><a href="model-comparison.html#cb849-47" tabindex="-1"></a>                <span class="at">fullrange =</span> T,</span>
-<span id="cb849-48"><a href="model-comparison.html#cb849-48" tabindex="-1"></a>                <span class="at">data =</span> df.plot <span class="sc">%&gt;%</span> <span class="fu">filter</span>(color <span class="sc">!=</span> <span class="dv">2</span>))  <span class="sc">+</span></span>
-<span id="cb849-49"><a href="model-comparison.html#cb849-49" tabindex="-1"></a>    <span class="fu">scale_color_manual</span>(<span class="at">values =</span> <span class="fu">c</span>(<span class="st">&quot;black&quot;</span>, <span class="st">&quot;red&quot;</span>)) <span class="sc">+</span> </span>
-<span id="cb849-50"><a href="model-comparison.html#cb849-50" tabindex="-1"></a>    <span class="fu">theme</span>(<span class="at">legend.position =</span> <span class="st">&quot;none&quot;</span>,</span>
-<span id="cb849-51"><a href="model-comparison.html#cb849-51" tabindex="-1"></a>          <span class="at">axis.title =</span> <span class="fu">element_blank</span>(),</span>
-<span id="cb849-52"><a href="model-comparison.html#cb849-52" tabindex="-1"></a>          <span class="at">axis.ticks =</span> <span class="fu">element_blank</span>(),</span>
-<span id="cb849-53"><a href="model-comparison.html#cb849-53" tabindex="-1"></a>          <span class="at">axis.text =</span> <span class="fu">element_blank</span>())</span>
-<span id="cb849-54"><a href="model-comparison.html#cb849-54" tabindex="-1"></a>  <span class="fu">return</span>(p)</span>
-<span id="cb849-55"><a href="model-comparison.html#cb849-55" tabindex="-1"></a>}</span>
-<span id="cb849-56"><a href="model-comparison.html#cb849-56" tabindex="-1"></a></span>
-<span id="cb849-57"><a href="model-comparison.html#cb849-57" tabindex="-1"></a><span class="co"># save plots in list </span></span>
-<span id="cb849-58"><a href="model-comparison.html#cb849-58" tabindex="-1"></a>l.plots <span class="ot">=</span> <span class="fu">map</span>(<span class="at">.x =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">9</span>,</span>
-<span id="cb849-59"><a href="model-comparison.html#cb849-59" tabindex="-1"></a>              <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">fun.cv_plot</span>(.))</span>
-<span id="cb849-60"><a href="model-comparison.html#cb849-60" tabindex="-1"></a></span>
-<span id="cb849-61"><a href="model-comparison.html#cb849-61" tabindex="-1"></a><span class="co"># make figure panel </span></span>
-<span id="cb849-62"><a href="model-comparison.html#cb849-62" tabindex="-1"></a><span class="fu">wrap_plots</span>(<span class="at">plotlist =</span> l.plots, <span class="at">ncol =</span> <span class="dv">3</span>)</span></code></pre></div>
-<p><img src="15-model_comparison_files/figure-html/unnamed-chunk-8-1.png" width="672" /></p>
+<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="model-comparison.html#cb11-1" tabindex="-1"></a><span class="co"># make example reproducible </span></span>
+<span id="cb11-2"><a href="model-comparison.html#cb11-2" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
+<span id="cb11-3"><a href="model-comparison.html#cb11-3" tabindex="-1"></a></span>
+<span id="cb11-4"><a href="model-comparison.html#cb11-4" tabindex="-1"></a><span class="co"># sample</span></span>
+<span id="cb11-5"><a href="model-comparison.html#cb11-5" tabindex="-1"></a>df.loo <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">9</span>,</span>
+<span id="cb11-6"><a href="model-comparison.html#cb11-6" tabindex="-1"></a>                <span class="at">y =</span> <span class="fu">c</span>(<span class="dv">5</span>, <span class="dv">2</span>, <span class="dv">4</span>, <span class="dv">10</span>, <span class="dv">3</span>, <span class="dv">4</span>, <span class="dv">10</span>, <span class="dv">2</span>, <span class="dv">8</span>)) </span>
+<span id="cb11-7"><a href="model-comparison.html#cb11-7" tabindex="-1"></a></span>
+<span id="cb11-8"><a href="model-comparison.html#cb11-8" tabindex="-1"></a>df.loo_cross <span class="ot">=</span> df.loo <span class="sc">%&gt;%</span> </span>
+<span id="cb11-9"><a href="model-comparison.html#cb11-9" tabindex="-1"></a>  <span class="fu">crossv_loo</span>() <span class="sc">%&gt;%</span> </span>
+<span id="cb11-10"><a href="model-comparison.html#cb11-10" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">fit =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb11-11"><a href="model-comparison.html#cb11-11" tabindex="-1"></a>                   <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> x, <span class="at">data =</span> .)),</span>
+<span id="cb11-12"><a href="model-comparison.html#cb11-12" tabindex="-1"></a>         <span class="at">tidy =</span> <span class="fu">map</span>(<span class="at">.x =</span> fit,</span>
+<span id="cb11-13"><a href="model-comparison.html#cb11-13" tabindex="-1"></a>                    <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">tidy</span>(.))) <span class="sc">%&gt;%</span> </span>
+<span id="cb11-14"><a href="model-comparison.html#cb11-14" tabindex="-1"></a>  <span class="fu">unnest</span>(tidy)</span>
+<span id="cb11-15"><a href="model-comparison.html#cb11-15" tabindex="-1"></a></span>
+<span id="cb11-16"><a href="model-comparison.html#cb11-16" tabindex="-1"></a><span class="co"># original plot </span></span>
+<span id="cb11-17"><a href="model-comparison.html#cb11-17" tabindex="-1"></a>df.plot <span class="ot">=</span> df.loo <span class="sc">%&gt;%</span> </span>
+<span id="cb11-18"><a href="model-comparison.html#cb11-18" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">color =</span> <span class="dv">1</span>)</span>
+<span id="cb11-19"><a href="model-comparison.html#cb11-19" tabindex="-1"></a></span>
+<span id="cb11-20"><a href="model-comparison.html#cb11-20" tabindex="-1"></a><span class="co"># fit to all data except one </span></span>
+<span id="cb11-21"><a href="model-comparison.html#cb11-21" tabindex="-1"></a>fun.cv_plot <span class="ot">=</span> <span class="cf">function</span>(data_point){</span>
+<span id="cb11-22"><a href="model-comparison.html#cb11-22" tabindex="-1"></a>  </span>
+<span id="cb11-23"><a href="model-comparison.html#cb11-23" tabindex="-1"></a>  <span class="co"># determine which point to leave out </span></span>
+<span id="cb11-24"><a href="model-comparison.html#cb11-24" tabindex="-1"></a>  df.plot <span class="ot">=</span> df.plot <span class="sc">%&gt;%</span> </span>
+<span id="cb11-25"><a href="model-comparison.html#cb11-25" tabindex="-1"></a>    <span class="fu">mutate</span>(<span class="at">color =</span> <span class="fu">ifelse</span>(<span class="fu">row_number</span>() <span class="sc">==</span> data_point, <span class="dv">2</span>, color))</span>
+<span id="cb11-26"><a href="model-comparison.html#cb11-26" tabindex="-1"></a>  </span>
+<span id="cb11-27"><a href="model-comparison.html#cb11-27" tabindex="-1"></a>  <span class="co"># fit </span></span>
+<span id="cb11-28"><a href="model-comparison.html#cb11-28" tabindex="-1"></a>  df.fit <span class="ot">=</span> df.plot <span class="sc">%&gt;%</span> </span>
+<span id="cb11-29"><a href="model-comparison.html#cb11-29" tabindex="-1"></a>    <span class="fu">filter</span>(color <span class="sc">!=</span> <span class="dv">2</span>) <span class="sc">%&gt;%</span> </span>
+<span id="cb11-30"><a href="model-comparison.html#cb11-30" tabindex="-1"></a>    <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> x, <span class="at">data =</span> .) <span class="sc">%&gt;%</span> </span>
+<span id="cb11-31"><a href="model-comparison.html#cb11-31" tabindex="-1"></a>    <span class="fu">augment</span>(<span class="at">newdata =</span> df.plot <span class="sc">%&gt;%</span> </span>
+<span id="cb11-32"><a href="model-comparison.html#cb11-32" tabindex="-1"></a>              <span class="fu">filter</span>(color <span class="sc">==</span> <span class="dv">2</span>)) <span class="sc">%&gt;%</span> </span>
+<span id="cb11-33"><a href="model-comparison.html#cb11-33" tabindex="-1"></a>    <span class="fu">clean_names</span>()</span>
+<span id="cb11-34"><a href="model-comparison.html#cb11-34" tabindex="-1"></a>  </span>
+<span id="cb11-35"><a href="model-comparison.html#cb11-35" tabindex="-1"></a>  p <span class="ot">=</span> <span class="fu">ggplot</span>(<span class="at">data =</span> df.plot,</span>
+<span id="cb11-36"><a href="model-comparison.html#cb11-36" tabindex="-1"></a>             <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x, </span>
+<span id="cb11-37"><a href="model-comparison.html#cb11-37" tabindex="-1"></a>                           <span class="at">y =</span> y, </span>
+<span id="cb11-38"><a href="model-comparison.html#cb11-38" tabindex="-1"></a>                           <span class="at">color =</span> <span class="fu">as.factor</span>(color))) <span class="sc">+</span> </span>
+<span id="cb11-39"><a href="model-comparison.html#cb11-39" tabindex="-1"></a>    <span class="fu">geom_segment</span>(<span class="fu">aes</span>(<span class="at">xend =</span> x,</span>
+<span id="cb11-40"><a href="model-comparison.html#cb11-40" tabindex="-1"></a>                     <span class="at">yend =</span> fitted),</span>
+<span id="cb11-41"><a href="model-comparison.html#cb11-41" tabindex="-1"></a>                 <span class="at">data =</span> df.fit,</span>
+<span id="cb11-42"><a href="model-comparison.html#cb11-42" tabindex="-1"></a>                 <span class="at">color =</span> <span class="st">&quot;red&quot;</span>,</span>
+<span id="cb11-43"><a href="model-comparison.html#cb11-43" tabindex="-1"></a>                 <span class="at">size =</span> <span class="dv">1</span>) <span class="sc">+</span></span>
+<span id="cb11-44"><a href="model-comparison.html#cb11-44" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
+<span id="cb11-45"><a href="model-comparison.html#cb11-45" tabindex="-1"></a>    <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>,</span>
+<span id="cb11-46"><a href="model-comparison.html#cb11-46" tabindex="-1"></a>                <span class="at">formula =</span> <span class="st">&quot;y ~ x&quot;</span>,</span>
+<span id="cb11-47"><a href="model-comparison.html#cb11-47" tabindex="-1"></a>                <span class="at">se =</span> F,</span>
+<span id="cb11-48"><a href="model-comparison.html#cb11-48" tabindex="-1"></a>                <span class="at">color =</span> <span class="st">&quot;black&quot;</span>, </span>
+<span id="cb11-49"><a href="model-comparison.html#cb11-49" tabindex="-1"></a>                <span class="at">fullrange =</span> T,</span>
+<span id="cb11-50"><a href="model-comparison.html#cb11-50" tabindex="-1"></a>                <span class="at">data =</span> df.plot <span class="sc">%&gt;%</span> <span class="fu">filter</span>(color <span class="sc">!=</span> <span class="dv">2</span>))  <span class="sc">+</span></span>
+<span id="cb11-51"><a href="model-comparison.html#cb11-51" tabindex="-1"></a>    <span class="fu">scale_color_manual</span>(<span class="at">values =</span> <span class="fu">c</span>(<span class="st">&quot;black&quot;</span>, <span class="st">&quot;red&quot;</span>)) <span class="sc">+</span> </span>
+<span id="cb11-52"><a href="model-comparison.html#cb11-52" tabindex="-1"></a>    <span class="fu">theme</span>(<span class="at">legend.position =</span> <span class="st">&quot;none&quot;</span>,</span>
+<span id="cb11-53"><a href="model-comparison.html#cb11-53" tabindex="-1"></a>          <span class="at">axis.title =</span> <span class="fu">element_blank</span>(),</span>
+<span id="cb11-54"><a href="model-comparison.html#cb11-54" tabindex="-1"></a>          <span class="at">axis.ticks =</span> <span class="fu">element_blank</span>(),</span>
+<span id="cb11-55"><a href="model-comparison.html#cb11-55" tabindex="-1"></a>          <span class="at">axis.text =</span> <span class="fu">element_blank</span>())</span>
+<span id="cb11-56"><a href="model-comparison.html#cb11-56" tabindex="-1"></a>  <span class="fu">return</span>(p)</span>
+<span id="cb11-57"><a href="model-comparison.html#cb11-57" tabindex="-1"></a>}</span>
+<span id="cb11-58"><a href="model-comparison.html#cb11-58" tabindex="-1"></a></span>
+<span id="cb11-59"><a href="model-comparison.html#cb11-59" tabindex="-1"></a><span class="co"># save plots in list </span></span>
+<span id="cb11-60"><a href="model-comparison.html#cb11-60" tabindex="-1"></a>l.plots <span class="ot">=</span> <span class="fu">map</span>(<span class="at">.x =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">9</span>,</span>
+<span id="cb11-61"><a href="model-comparison.html#cb11-61" tabindex="-1"></a>              <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">fun.cv_plot</span>(.))</span>
+<span id="cb11-62"><a href="model-comparison.html#cb11-62" tabindex="-1"></a></span>
+<span id="cb11-63"><a href="model-comparison.html#cb11-63" tabindex="-1"></a><span class="co"># make figure panel </span></span>
+<span id="cb11-64"><a href="model-comparison.html#cb11-64" tabindex="-1"></a><span class="fu">wrap_plots</span>(<span class="at">plotlist =</span> l.plots, <span class="at">ncol =</span> <span class="dv">3</span>)</span></code></pre></div>
+<p><img src="psych252_files/figure-html/unnamed-chunk-8-1.png" width="672" /></p>
 <p>As you can see, the regression line changes quite a bit depending on which data point is in the test set.</p>
 <p>Now, let’s use LOO to evaluate the models on the data set I’ve created above:</p>
-<div class="sourceCode" id="cb850"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb850-1"><a href="model-comparison.html#cb850-1" tabindex="-1"></a><span class="co"># fit the models and calculate the RMSE for each model on the test set </span></span>
-<span id="cb850-2"><a href="model-comparison.html#cb850-2" tabindex="-1"></a>df.cross <span class="ot">=</span> df.data <span class="sc">%&gt;%</span> </span>
-<span id="cb850-3"><a href="model-comparison.html#cb850-3" tabindex="-1"></a>  <span class="fu">crossv_loo</span>() <span class="sc">%&gt;%</span> <span class="co"># function which generates training and test data sets </span></span>
-<span id="cb850-4"><a href="model-comparison.html#cb850-4" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">model_simple =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb850-5"><a href="model-comparison.html#cb850-5" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x, <span class="at">data =</span> .)),</span>
-<span id="cb850-6"><a href="model-comparison.html#cb850-6" tabindex="-1"></a>         <span class="at">model_correct =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb850-7"><a href="model-comparison.html#cb850-7" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>), <span class="at">data =</span> .)),</span>
-<span id="cb850-8"><a href="model-comparison.html#cb850-8" tabindex="-1"></a>         <span class="at">model_complex =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb850-9"><a href="model-comparison.html#cb850-9" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">3</span>), <span class="at">data =</span> .))) <span class="sc">%&gt;%</span> </span>
-<span id="cb850-10"><a href="model-comparison.html#cb850-10" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(<span class="at">cols =</span> <span class="fu">contains</span>(<span class="st">&quot;model&quot;</span>),</span>
-<span id="cb850-11"><a href="model-comparison.html#cb850-11" tabindex="-1"></a>               <span class="at">names_to =</span> <span class="st">&quot;model&quot;</span>,</span>
-<span id="cb850-12"><a href="model-comparison.html#cb850-12" tabindex="-1"></a>               <span class="at">values_to =</span> <span class="st">&quot;fit&quot;</span>) <span class="sc">%&gt;%</span> </span>
-<span id="cb850-13"><a href="model-comparison.html#cb850-13" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">rmse =</span> <span class="fu">map2_dbl</span>(<span class="at">.x =</span> fit,</span>
-<span id="cb850-14"><a href="model-comparison.html#cb850-14" tabindex="-1"></a>                         <span class="at">.y =</span> test,</span>
-<span id="cb850-15"><a href="model-comparison.html#cb850-15" tabindex="-1"></a>                         <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">rmse</span>(.x, .y)))</span>
-<span id="cb850-16"><a href="model-comparison.html#cb850-16" tabindex="-1"></a></span>
-<span id="cb850-17"><a href="model-comparison.html#cb850-17" tabindex="-1"></a><span class="co"># show the average RMSE for each model </span></span>
-<span id="cb850-18"><a href="model-comparison.html#cb850-18" tabindex="-1"></a>df.cross <span class="sc">%&gt;%</span> </span>
-<span id="cb850-19"><a href="model-comparison.html#cb850-19" tabindex="-1"></a>  <span class="fu">group_by</span>(model) <span class="sc">%&gt;%</span> </span>
-<span id="cb850-20"><a href="model-comparison.html#cb850-20" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">mean_rmse =</span> <span class="fu">mean</span>(rmse) <span class="sc">%&gt;%</span> </span>
-<span id="cb850-21"><a href="model-comparison.html#cb850-21" tabindex="-1"></a>              <span class="fu">round</span>(<span class="dv">3</span>))</span></code></pre></div>
+<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="model-comparison.html#cb12-1" tabindex="-1"></a><span class="co"># fit the models and calculate the RMSE for each model on the test set </span></span>
+<span id="cb12-2"><a href="model-comparison.html#cb12-2" tabindex="-1"></a>df.cross <span class="ot">=</span> df.data <span class="sc">%&gt;%</span> </span>
+<span id="cb12-3"><a href="model-comparison.html#cb12-3" tabindex="-1"></a>  <span class="fu">crossv_loo</span>() <span class="sc">%&gt;%</span> <span class="co"># function which generates training and test data sets </span></span>
+<span id="cb12-4"><a href="model-comparison.html#cb12-4" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">model_simple =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb12-5"><a href="model-comparison.html#cb12-5" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x, <span class="at">data =</span> .)),</span>
+<span id="cb12-6"><a href="model-comparison.html#cb12-6" tabindex="-1"></a>         <span class="at">model_correct =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb12-7"><a href="model-comparison.html#cb12-7" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>), <span class="at">data =</span> .)),</span>
+<span id="cb12-8"><a href="model-comparison.html#cb12-8" tabindex="-1"></a>         <span class="at">model_complex =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb12-9"><a href="model-comparison.html#cb12-9" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">3</span>), <span class="at">data =</span> .))) <span class="sc">%&gt;%</span> </span>
+<span id="cb12-10"><a href="model-comparison.html#cb12-10" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(<span class="at">cols =</span> <span class="fu">contains</span>(<span class="st">&quot;model&quot;</span>),</span>
+<span id="cb12-11"><a href="model-comparison.html#cb12-11" tabindex="-1"></a>               <span class="at">names_to =</span> <span class="st">&quot;model&quot;</span>,</span>
+<span id="cb12-12"><a href="model-comparison.html#cb12-12" tabindex="-1"></a>               <span class="at">values_to =</span> <span class="st">&quot;fit&quot;</span>) <span class="sc">%&gt;%</span> </span>
+<span id="cb12-13"><a href="model-comparison.html#cb12-13" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">rmse =</span> <span class="fu">map2_dbl</span>(<span class="at">.x =</span> fit,</span>
+<span id="cb12-14"><a href="model-comparison.html#cb12-14" tabindex="-1"></a>                         <span class="at">.y =</span> test,</span>
+<span id="cb12-15"><a href="model-comparison.html#cb12-15" tabindex="-1"></a>                         <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">rmse</span>(.x, .y)))</span>
+<span id="cb12-16"><a href="model-comparison.html#cb12-16" tabindex="-1"></a></span>
+<span id="cb12-17"><a href="model-comparison.html#cb12-17" tabindex="-1"></a><span class="co"># show the average RMSE for each model </span></span>
+<span id="cb12-18"><a href="model-comparison.html#cb12-18" tabindex="-1"></a>df.cross <span class="sc">%&gt;%</span> </span>
+<span id="cb12-19"><a href="model-comparison.html#cb12-19" tabindex="-1"></a>  <span class="fu">group_by</span>(model) <span class="sc">%&gt;%</span> </span>
+<span id="cb12-20"><a href="model-comparison.html#cb12-20" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">mean_rmse =</span> <span class="fu">mean</span>(rmse) <span class="sc">%&gt;%</span> </span>
+<span id="cb12-21"><a href="model-comparison.html#cb12-21" tabindex="-1"></a>              <span class="fu">round</span>(<span class="dv">3</span>))</span></code></pre></div>
 <pre><code># A tibble: 3 × 2
   model         mean_rmse
   &lt;chr&gt;             &lt;dbl&gt;
@@ -1202,25 +1181,25 @@ <h4><span class="header-section-number">15.3.3.1</span> Leave-one-out cross-vali
 <h4><span class="header-section-number">15.3.3.2</span> k-fold cross-validation<a href="model-comparison.html#k-fold-cross-validation" class="anchor-section" aria-label="Anchor link to header"></a></h4>
 <p>For k-fold cross-validation, we split the data set in k folds, and then use k-1 folds as the training set, and the remaining fold as the test set.</p>
 <p>The code is almost identical as before. Instead of <code>crossv_loo()</code>, we use the <code>crossv_kfold()</code> function instead and say how many times we want to “fold” the data.</p>
-<div class="sourceCode" id="cb852"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb852-1"><a href="model-comparison.html#cb852-1" tabindex="-1"></a><span class="co"># crossvalidation scheme </span></span>
-<span id="cb852-2"><a href="model-comparison.html#cb852-2" tabindex="-1"></a>df.cross <span class="ot">=</span> df.data <span class="sc">%&gt;%</span> </span>
-<span id="cb852-3"><a href="model-comparison.html#cb852-3" tabindex="-1"></a>  <span class="fu">crossv_kfold</span>(<span class="at">k =</span> <span class="dv">10</span>) <span class="sc">%&gt;%</span> </span>
-<span id="cb852-4"><a href="model-comparison.html#cb852-4" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">model_simple =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb852-5"><a href="model-comparison.html#cb852-5" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x, <span class="at">data =</span> .)),</span>
-<span id="cb852-6"><a href="model-comparison.html#cb852-6" tabindex="-1"></a>         <span class="at">model_correct =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb852-7"><a href="model-comparison.html#cb852-7" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>), <span class="at">data =</span> .)),</span>
-<span id="cb852-8"><a href="model-comparison.html#cb852-8" tabindex="-1"></a>         <span class="at">model_complex =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb852-9"><a href="model-comparison.html#cb852-9" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">3</span>), <span class="at">data =</span> .))) <span class="sc">%&gt;%</span> </span>
-<span id="cb852-10"><a href="model-comparison.html#cb852-10" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(<span class="at">cols =</span> <span class="fu">contains</span>(<span class="st">&quot;model&quot;</span>),</span>
-<span id="cb852-11"><a href="model-comparison.html#cb852-11" tabindex="-1"></a>               <span class="at">names_to =</span> <span class="st">&quot;model&quot;</span>,</span>
-<span id="cb852-12"><a href="model-comparison.html#cb852-12" tabindex="-1"></a>               <span class="at">values_to =</span> <span class="st">&quot;fit&quot;</span>) <span class="sc">%&gt;%</span> </span>
-<span id="cb852-13"><a href="model-comparison.html#cb852-13" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">rsquare =</span> <span class="fu">map2_dbl</span>(<span class="at">.x =</span> fit,</span>
-<span id="cb852-14"><a href="model-comparison.html#cb852-14" tabindex="-1"></a>                            <span class="at">.y =</span> test,</span>
-<span id="cb852-15"><a href="model-comparison.html#cb852-15" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">rsquare</span>(.x, .y)))</span>
-<span id="cb852-16"><a href="model-comparison.html#cb852-16" tabindex="-1"></a></span>
-<span id="cb852-17"><a href="model-comparison.html#cb852-17" tabindex="-1"></a>df.cross <span class="sc">%&gt;%</span> </span>
-<span id="cb852-18"><a href="model-comparison.html#cb852-18" tabindex="-1"></a>  <span class="fu">group_by</span>(model) <span class="sc">%&gt;%</span> </span>
-<span id="cb852-19"><a href="model-comparison.html#cb852-19" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">median_rsquare =</span> <span class="fu">median</span>(rsquare))</span></code></pre></div>
+<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="model-comparison.html#cb14-1" tabindex="-1"></a><span class="co"># crossvalidation scheme </span></span>
+<span id="cb14-2"><a href="model-comparison.html#cb14-2" tabindex="-1"></a>df.cross <span class="ot">=</span> df.data <span class="sc">%&gt;%</span> </span>
+<span id="cb14-3"><a href="model-comparison.html#cb14-3" tabindex="-1"></a>  <span class="fu">crossv_kfold</span>(<span class="at">k =</span> <span class="dv">10</span>) <span class="sc">%&gt;%</span> </span>
+<span id="cb14-4"><a href="model-comparison.html#cb14-4" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">model_simple =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb14-5"><a href="model-comparison.html#cb14-5" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x, <span class="at">data =</span> .)),</span>
+<span id="cb14-6"><a href="model-comparison.html#cb14-6" tabindex="-1"></a>         <span class="at">model_correct =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb14-7"><a href="model-comparison.html#cb14-7" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>), <span class="at">data =</span> .)),</span>
+<span id="cb14-8"><a href="model-comparison.html#cb14-8" tabindex="-1"></a>         <span class="at">model_complex =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb14-9"><a href="model-comparison.html#cb14-9" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">3</span>), <span class="at">data =</span> .))) <span class="sc">%&gt;%</span> </span>
+<span id="cb14-10"><a href="model-comparison.html#cb14-10" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(<span class="at">cols =</span> <span class="fu">contains</span>(<span class="st">&quot;model&quot;</span>),</span>
+<span id="cb14-11"><a href="model-comparison.html#cb14-11" tabindex="-1"></a>               <span class="at">names_to =</span> <span class="st">&quot;model&quot;</span>,</span>
+<span id="cb14-12"><a href="model-comparison.html#cb14-12" tabindex="-1"></a>               <span class="at">values_to =</span> <span class="st">&quot;fit&quot;</span>) <span class="sc">%&gt;%</span> </span>
+<span id="cb14-13"><a href="model-comparison.html#cb14-13" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">rsquare =</span> <span class="fu">map2_dbl</span>(<span class="at">.x =</span> fit,</span>
+<span id="cb14-14"><a href="model-comparison.html#cb14-14" tabindex="-1"></a>                            <span class="at">.y =</span> test,</span>
+<span id="cb14-15"><a href="model-comparison.html#cb14-15" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">rsquare</span>(.x, .y)))</span>
+<span id="cb14-16"><a href="model-comparison.html#cb14-16" tabindex="-1"></a></span>
+<span id="cb14-17"><a href="model-comparison.html#cb14-17" tabindex="-1"></a>df.cross <span class="sc">%&gt;%</span> </span>
+<span id="cb14-18"><a href="model-comparison.html#cb14-18" tabindex="-1"></a>  <span class="fu">group_by</span>(model) <span class="sc">%&gt;%</span> </span>
+<span id="cb14-19"><a href="model-comparison.html#cb14-19" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">median_rsquare =</span> <span class="fu">median</span>(rsquare))</span></code></pre></div>
 <pre><code># A tibble: 3 × 2
   model         median_rsquare
   &lt;chr&gt;                  &lt;dbl&gt;
@@ -1232,25 +1211,25 @@ <h4><span class="header-section-number">15.3.3.2</span> k-fold cross-validation<
 <div id="monte-carlo-cross-validation" class="section level4 hasAnchor" number="15.3.3.3">
 <h4><span class="header-section-number">15.3.3.3</span> Monte Carlo cross-validation<a href="model-comparison.html#monte-carlo-cross-validation" class="anchor-section" aria-label="Anchor link to header"></a></h4>
 <p>Finally, let’s consider another very flexible version of cross-validation. For this version of cross-validation, we determine how many random splits into training set and test set we would like to do, and what proportion of the data should be in the test set.</p>
-<div class="sourceCode" id="cb854"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb854-1"><a href="model-comparison.html#cb854-1" tabindex="-1"></a><span class="co"># crossvalidation scheme </span></span>
-<span id="cb854-2"><a href="model-comparison.html#cb854-2" tabindex="-1"></a>df.cross <span class="ot">=</span> df.data <span class="sc">%&gt;%</span> </span>
-<span id="cb854-3"><a href="model-comparison.html#cb854-3" tabindex="-1"></a>  <span class="fu">crossv_mc</span>(<span class="at">n =</span> <span class="dv">50</span>, <span class="at">test =</span> <span class="fl">0.5</span>) <span class="sc">%&gt;%</span> <span class="co"># number of samples, and percentage of test </span></span>
-<span id="cb854-4"><a href="model-comparison.html#cb854-4" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">model_simple =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb854-5"><a href="model-comparison.html#cb854-5" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x, <span class="at">data =</span> .x)),</span>
-<span id="cb854-6"><a href="model-comparison.html#cb854-6" tabindex="-1"></a>         <span class="at">model_correct =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb854-7"><a href="model-comparison.html#cb854-7" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>), <span class="at">data =</span> .x)),</span>
-<span id="cb854-8"><a href="model-comparison.html#cb854-8" tabindex="-1"></a>         <span class="at">model_complex =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
-<span id="cb854-9"><a href="model-comparison.html#cb854-9" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">3</span>), <span class="at">data =</span> .))) <span class="sc">%&gt;%</span> </span>
-<span id="cb854-10"><a href="model-comparison.html#cb854-10" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(<span class="at">cols =</span> <span class="fu">contains</span>(<span class="st">&quot;model&quot;</span>),</span>
-<span id="cb854-11"><a href="model-comparison.html#cb854-11" tabindex="-1"></a>               <span class="at">names_to =</span> <span class="st">&quot;model&quot;</span>,</span>
-<span id="cb854-12"><a href="model-comparison.html#cb854-12" tabindex="-1"></a>               <span class="at">values_to =</span> <span class="st">&quot;fit&quot;</span>) <span class="sc">%&gt;%</span> </span>
-<span id="cb854-13"><a href="model-comparison.html#cb854-13" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">rmse =</span> <span class="fu">map2_dbl</span>(<span class="at">.x =</span> fit,</span>
-<span id="cb854-14"><a href="model-comparison.html#cb854-14" tabindex="-1"></a>                         <span class="at">.y =</span> test,</span>
-<span id="cb854-15"><a href="model-comparison.html#cb854-15" tabindex="-1"></a>                         <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">rmse</span>(.x, .y)))</span>
-<span id="cb854-16"><a href="model-comparison.html#cb854-16" tabindex="-1"></a></span>
-<span id="cb854-17"><a href="model-comparison.html#cb854-17" tabindex="-1"></a>df.cross <span class="sc">%&gt;%</span> </span>
-<span id="cb854-18"><a href="model-comparison.html#cb854-18" tabindex="-1"></a>  <span class="fu">group_by</span>(model) <span class="sc">%&gt;%</span> </span>
-<span id="cb854-19"><a href="model-comparison.html#cb854-19" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">mean_rmse =</span> <span class="fu">mean</span>(rmse))</span></code></pre></div>
+<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="model-comparison.html#cb16-1" tabindex="-1"></a><span class="co"># crossvalidation scheme </span></span>
+<span id="cb16-2"><a href="model-comparison.html#cb16-2" tabindex="-1"></a>df.cross <span class="ot">=</span> df.data <span class="sc">%&gt;%</span> </span>
+<span id="cb16-3"><a href="model-comparison.html#cb16-3" tabindex="-1"></a>  <span class="fu">crossv_mc</span>(<span class="at">n =</span> <span class="dv">50</span>, <span class="at">test =</span> <span class="fl">0.5</span>) <span class="sc">%&gt;%</span> <span class="co"># number of samples, and percentage of test </span></span>
+<span id="cb16-4"><a href="model-comparison.html#cb16-4" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">model_simple =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb16-5"><a href="model-comparison.html#cb16-5" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x, <span class="at">data =</span> .x)),</span>
+<span id="cb16-6"><a href="model-comparison.html#cb16-6" tabindex="-1"></a>         <span class="at">model_correct =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb16-7"><a href="model-comparison.html#cb16-7" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>), <span class="at">data =</span> .x)),</span>
+<span id="cb16-8"><a href="model-comparison.html#cb16-8" tabindex="-1"></a>         <span class="at">model_complex =</span> <span class="fu">map</span>(<span class="at">.x =</span> train,</span>
+<span id="cb16-9"><a href="model-comparison.html#cb16-9" tabindex="-1"></a>                             <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">lm</span>(y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span> <span class="fu">I</span>(x<span class="sc">^</span><span class="dv">3</span>), <span class="at">data =</span> .))) <span class="sc">%&gt;%</span> </span>
+<span id="cb16-10"><a href="model-comparison.html#cb16-10" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(<span class="at">cols =</span> <span class="fu">contains</span>(<span class="st">&quot;model&quot;</span>),</span>
+<span id="cb16-11"><a href="model-comparison.html#cb16-11" tabindex="-1"></a>               <span class="at">names_to =</span> <span class="st">&quot;model&quot;</span>,</span>
+<span id="cb16-12"><a href="model-comparison.html#cb16-12" tabindex="-1"></a>               <span class="at">values_to =</span> <span class="st">&quot;fit&quot;</span>) <span class="sc">%&gt;%</span> </span>
+<span id="cb16-13"><a href="model-comparison.html#cb16-13" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">rmse =</span> <span class="fu">map2_dbl</span>(<span class="at">.x =</span> fit,</span>
+<span id="cb16-14"><a href="model-comparison.html#cb16-14" tabindex="-1"></a>                         <span class="at">.y =</span> test,</span>
+<span id="cb16-15"><a href="model-comparison.html#cb16-15" tabindex="-1"></a>                         <span class="at">.f =</span> <span class="sc">~</span> <span class="fu">rmse</span>(.x, .y)))</span>
+<span id="cb16-16"><a href="model-comparison.html#cb16-16" tabindex="-1"></a></span>
+<span id="cb16-17"><a href="model-comparison.html#cb16-17" tabindex="-1"></a>df.cross <span class="sc">%&gt;%</span> </span>
+<span id="cb16-18"><a href="model-comparison.html#cb16-18" tabindex="-1"></a>  <span class="fu">group_by</span>(model) <span class="sc">%&gt;%</span> </span>
+<span id="cb16-19"><a href="model-comparison.html#cb16-19" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">mean_rmse =</span> <span class="fu">mean</span>(rmse))</span></code></pre></div>
 <pre><code># A tibble: 3 × 2
   model         mean_rmse
   &lt;chr&gt;             &lt;dbl&gt;
@@ -1264,29 +1243,29 @@ <h4><span class="header-section-number">15.3.3.3</span> Monte Carlo cross-valida
 <h3><span class="header-section-number">15.3.4</span> Bootstrap<a href="model-comparison.html#bootstrap" class="anchor-section" aria-label="Anchor link to header"></a></h3>
 <p>We can also use the <code>modelr</code> package for bootstrapping. The idea is the same as when we did cross-validation. We create a number of data sets from our original data set. Instead of splitting the data set in a training and test data set, for bootstrapping, we sample values from the original data set with replacement. Doing so, we can, for example, calculate the confidence interval of different statistics of interest.</p>
 <p>Here is an example for how to boostrap confidence intervals for a mean.</p>
-<div class="sourceCode" id="cb856"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb856-1"><a href="model-comparison.html#cb856-1" tabindex="-1"></a><span class="co"># make example reproducible </span></span>
-<span id="cb856-2"><a href="model-comparison.html#cb856-2" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
-<span id="cb856-3"><a href="model-comparison.html#cb856-3" tabindex="-1"></a></span>
-<span id="cb856-4"><a href="model-comparison.html#cb856-4" tabindex="-1"></a>sample_size <span class="ot">=</span> <span class="dv">10</span> </span>
-<span id="cb856-5"><a href="model-comparison.html#cb856-5" tabindex="-1"></a></span>
-<span id="cb856-6"><a href="model-comparison.html#cb856-6" tabindex="-1"></a><span class="co"># sample</span></span>
-<span id="cb856-7"><a href="model-comparison.html#cb856-7" tabindex="-1"></a>df.data <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">participant =</span> <span class="dv">1</span><span class="sc">:</span>sample_size,</span>
-<span id="cb856-8"><a href="model-comparison.html#cb856-8" tabindex="-1"></a>                 <span class="at">x =</span> <span class="fu">runif</span>(sample_size, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">1</span>)) </span>
-<span id="cb856-9"><a href="model-comparison.html#cb856-9" tabindex="-1"></a></span>
-<span id="cb856-10"><a href="model-comparison.html#cb856-10" tabindex="-1"></a><span class="co"># mean of the actual sample</span></span>
-<span id="cb856-11"><a href="model-comparison.html#cb856-11" tabindex="-1"></a><span class="fu">mean</span>(df.data<span class="sc">$</span>x)</span></code></pre></div>
+<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="model-comparison.html#cb18-1" tabindex="-1"></a><span class="co"># make example reproducible </span></span>
+<span id="cb18-2"><a href="model-comparison.html#cb18-2" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1</span>)</span>
+<span id="cb18-3"><a href="model-comparison.html#cb18-3" tabindex="-1"></a></span>
+<span id="cb18-4"><a href="model-comparison.html#cb18-4" tabindex="-1"></a>sample_size <span class="ot">=</span> <span class="dv">10</span> </span>
+<span id="cb18-5"><a href="model-comparison.html#cb18-5" tabindex="-1"></a></span>
+<span id="cb18-6"><a href="model-comparison.html#cb18-6" tabindex="-1"></a><span class="co"># sample</span></span>
+<span id="cb18-7"><a href="model-comparison.html#cb18-7" tabindex="-1"></a>df.data <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">participant =</span> <span class="dv">1</span><span class="sc">:</span>sample_size,</span>
+<span id="cb18-8"><a href="model-comparison.html#cb18-8" tabindex="-1"></a>                 <span class="at">x =</span> <span class="fu">runif</span>(sample_size, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">1</span>)) </span>
+<span id="cb18-9"><a href="model-comparison.html#cb18-9" tabindex="-1"></a></span>
+<span id="cb18-10"><a href="model-comparison.html#cb18-10" tabindex="-1"></a><span class="co"># mean of the actual sample</span></span>
+<span id="cb18-11"><a href="model-comparison.html#cb18-11" tabindex="-1"></a><span class="fu">mean</span>(df.data<span class="sc">$</span>x)</span></code></pre></div>
 <pre><code>[1] 0.5515139</code></pre>
-<div class="sourceCode" id="cb858"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb858-1"><a href="model-comparison.html#cb858-1" tabindex="-1"></a><span class="co"># bootstrap to get confidence intervals around the mean </span></span>
-<span id="cb858-2"><a href="model-comparison.html#cb858-2" tabindex="-1"></a>df.data <span class="sc">%&gt;%</span></span>
-<span id="cb858-3"><a href="model-comparison.html#cb858-3" tabindex="-1"></a>  <span class="fu">bootstrap</span>(<span class="at">n =</span> <span class="dv">1000</span>) <span class="sc">%&gt;%</span> <span class="co"># create 1000 bootstrapped samples</span></span>
-<span id="cb858-4"><a href="model-comparison.html#cb858-4" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">estimate =</span> <span class="fu">map_dbl</span>(<span class="at">.x =</span> strap,</span>
-<span id="cb858-5"><a href="model-comparison.html#cb858-5" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> .x <span class="sc">%&gt;%</span> </span>
-<span id="cb858-6"><a href="model-comparison.html#cb858-6" tabindex="-1"></a>                              <span class="fu">as_tibble</span>() <span class="sc">%&gt;%</span> </span>
-<span id="cb858-7"><a href="model-comparison.html#cb858-7" tabindex="-1"></a>                              <span class="fu">pull</span>(x) <span class="sc">%&gt;%</span> </span>
-<span id="cb858-8"><a href="model-comparison.html#cb858-8" tabindex="-1"></a>                              <span class="fu">mean</span>())) <span class="sc">%&gt;%</span> </span>
-<span id="cb858-9"><a href="model-comparison.html#cb858-9" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">mean =</span> <span class="fu">mean</span>(estimate),</span>
-<span id="cb858-10"><a href="model-comparison.html#cb858-10" tabindex="-1"></a>            <span class="at">low =</span> <span class="fu">quantile</span>(estimate, <span class="fl">0.025</span>), <span class="co"># calculate the 2.5 / 97.5 percentiles</span></span>
-<span id="cb858-11"><a href="model-comparison.html#cb858-11" tabindex="-1"></a>            <span class="at">high =</span> <span class="fu">quantile</span>(estimate, <span class="fl">0.975</span>))</span></code></pre></div>
+<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="model-comparison.html#cb20-1" tabindex="-1"></a><span class="co"># bootstrap to get confidence intervals around the mean </span></span>
+<span id="cb20-2"><a href="model-comparison.html#cb20-2" tabindex="-1"></a>df.data <span class="sc">%&gt;%</span></span>
+<span id="cb20-3"><a href="model-comparison.html#cb20-3" tabindex="-1"></a>  <span class="fu">bootstrap</span>(<span class="at">n =</span> <span class="dv">1000</span>) <span class="sc">%&gt;%</span> <span class="co"># create 1000 bootstrapped samples</span></span>
+<span id="cb20-4"><a href="model-comparison.html#cb20-4" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">estimate =</span> <span class="fu">map_dbl</span>(<span class="at">.x =</span> strap,</span>
+<span id="cb20-5"><a href="model-comparison.html#cb20-5" tabindex="-1"></a>                            <span class="at">.f =</span> <span class="sc">~</span> .x <span class="sc">%&gt;%</span> </span>
+<span id="cb20-6"><a href="model-comparison.html#cb20-6" tabindex="-1"></a>                              <span class="fu">as_tibble</span>() <span class="sc">%&gt;%</span> </span>
+<span id="cb20-7"><a href="model-comparison.html#cb20-7" tabindex="-1"></a>                              <span class="fu">pull</span>(x) <span class="sc">%&gt;%</span> </span>
+<span id="cb20-8"><a href="model-comparison.html#cb20-8" tabindex="-1"></a>                              <span class="fu">mean</span>())) <span class="sc">%&gt;%</span> </span>
+<span id="cb20-9"><a href="model-comparison.html#cb20-9" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">mean =</span> <span class="fu">mean</span>(estimate),</span>
+<span id="cb20-10"><a href="model-comparison.html#cb20-10" tabindex="-1"></a>            <span class="at">low =</span> <span class="fu">quantile</span>(estimate, <span class="fl">0.025</span>), <span class="co"># calculate the 2.5 / 97.5 percentiles</span></span>
+<span id="cb20-11"><a href="model-comparison.html#cb20-11" tabindex="-1"></a>            <span class="at">high =</span> <span class="fu">quantile</span>(estimate, <span class="fl">0.975</span>))</span></code></pre></div>
 <pre><code># A tibble: 1 × 3
    mean   low  high
   &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;
@@ -1303,25 +1282,25 @@ <h3><span class="header-section-number">15.3.5</span> AIC and BIC<a href="model-
 \]</span></p>
 <p>where <span class="math inline">\(k\)</span> is the number of parameters in the model, <span class="math inline">\(n\)</span> is the number of observations, and <span class="math inline">\(\hat L\)</span> is the maximized value of the likelihood function of the model. Both AIC and BIC trade off model fit (as measured by the maximum likelihood of the data <span class="math inline">\(\hat L\)</span>) and the number of parameters in the model.</p>
 <p>Calculating AIC and BIC in R is straightforward. We simply need to fit a linear model, and then call the <code>AIC()</code> or <code>BIC()</code> functions on the fitted model like so:</p>
-<div class="sourceCode" id="cb860"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb860-1"><a href="model-comparison.html#cb860-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">0</span>)</span>
-<span id="cb860-2"><a href="model-comparison.html#cb860-2" tabindex="-1"></a></span>
-<span id="cb860-3"><a href="model-comparison.html#cb860-3" tabindex="-1"></a><span class="co"># let&#39;s generate some data </span></span>
-<span id="cb860-4"><a href="model-comparison.html#cb860-4" tabindex="-1"></a>df.example <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="fu">runif</span>(<span class="dv">20</span>, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">1</span>),</span>
-<span id="cb860-5"><a href="model-comparison.html#cb860-5" tabindex="-1"></a>                    <span class="at">y =</span> <span class="dv">1</span> <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x <span class="sc">+</span> <span class="fu">rnorm</span>(<span class="dv">20</span>, <span class="at">sd =</span> <span class="dv">2</span>))</span>
-<span id="cb860-6"><a href="model-comparison.html#cb860-6" tabindex="-1"></a></span>
-<span id="cb860-7"><a href="model-comparison.html#cb860-7" tabindex="-1"></a><span class="co"># fit a linear model </span></span>
-<span id="cb860-8"><a href="model-comparison.html#cb860-8" tabindex="-1"></a>fit <span class="ot">=</span> <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x,</span>
-<span id="cb860-9"><a href="model-comparison.html#cb860-9" tabindex="-1"></a>         <span class="at">data =</span> df.example)</span>
-<span id="cb860-10"><a href="model-comparison.html#cb860-10" tabindex="-1"></a></span>
-<span id="cb860-11"><a href="model-comparison.html#cb860-11" tabindex="-1"></a><span class="co"># get AIC </span></span>
-<span id="cb860-12"><a href="model-comparison.html#cb860-12" tabindex="-1"></a><span class="fu">AIC</span>(fit)</span></code></pre></div>
+<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="model-comparison.html#cb22-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">0</span>)</span>
+<span id="cb22-2"><a href="model-comparison.html#cb22-2" tabindex="-1"></a></span>
+<span id="cb22-3"><a href="model-comparison.html#cb22-3" tabindex="-1"></a><span class="co"># let&#39;s generate some data </span></span>
+<span id="cb22-4"><a href="model-comparison.html#cb22-4" tabindex="-1"></a>df.example <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="fu">runif</span>(<span class="dv">20</span>, <span class="at">min =</span> <span class="dv">0</span>, <span class="at">max =</span> <span class="dv">1</span>),</span>
+<span id="cb22-5"><a href="model-comparison.html#cb22-5" tabindex="-1"></a>                    <span class="at">y =</span> <span class="dv">1</span> <span class="sc">+</span> <span class="dv">3</span> <span class="sc">*</span> x <span class="sc">+</span> <span class="fu">rnorm</span>(<span class="dv">20</span>, <span class="at">sd =</span> <span class="dv">2</span>))</span>
+<span id="cb22-6"><a href="model-comparison.html#cb22-6" tabindex="-1"></a></span>
+<span id="cb22-7"><a href="model-comparison.html#cb22-7" tabindex="-1"></a><span class="co"># fit a linear model </span></span>
+<span id="cb22-8"><a href="model-comparison.html#cb22-8" tabindex="-1"></a>fit <span class="ot">=</span> <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> <span class="dv">1</span> <span class="sc">+</span> x,</span>
+<span id="cb22-9"><a href="model-comparison.html#cb22-9" tabindex="-1"></a>         <span class="at">data =</span> df.example)</span>
+<span id="cb22-10"><a href="model-comparison.html#cb22-10" tabindex="-1"></a></span>
+<span id="cb22-11"><a href="model-comparison.html#cb22-11" tabindex="-1"></a><span class="co"># get AIC </span></span>
+<span id="cb22-12"><a href="model-comparison.html#cb22-12" tabindex="-1"></a><span class="fu">AIC</span>(fit)</span></code></pre></div>
 <pre><code>[1] 75.47296</code></pre>
-<div class="sourceCode" id="cb862"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb862-1"><a href="model-comparison.html#cb862-1" tabindex="-1"></a><span class="co"># get BIC</span></span>
-<span id="cb862-2"><a href="model-comparison.html#cb862-2" tabindex="-1"></a><span class="fu">BIC</span>(fit)</span></code></pre></div>
+<div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="model-comparison.html#cb24-1" tabindex="-1"></a><span class="co"># get BIC</span></span>
+<span id="cb24-2"><a href="model-comparison.html#cb24-2" tabindex="-1"></a><span class="fu">BIC</span>(fit)</span></code></pre></div>
 <pre><code>[1] 78.46016</code></pre>
 <p>We can also just use the <code>broom</code> package to get that information:</p>
-<div class="sourceCode" id="cb864"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb864-1"><a href="model-comparison.html#cb864-1" tabindex="-1"></a>fit <span class="sc">%&gt;%</span> </span>
-<span id="cb864-2"><a href="model-comparison.html#cb864-2" tabindex="-1"></a>  <span class="fu">glance</span>()</span></code></pre></div>
+<div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="model-comparison.html#cb26-1" tabindex="-1"></a>fit <span class="sc">%&gt;%</span> </span>
+<span id="cb26-2"><a href="model-comparison.html#cb26-2" tabindex="-1"></a>  <span class="fu">glance</span>()</span></code></pre></div>
 <pre><code># A tibble: 1 × 12
   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
       &lt;dbl&gt;         &lt;dbl&gt; &lt;dbl&gt;     &lt;dbl&gt;   &lt;dbl&gt; &lt;dbl&gt;  &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;
@@ -1329,86 +1308,88 @@ <h3><span class="header-section-number">15.3.5</span> AIC and BIC<a href="model-
 # ℹ 3 more variables: deviance &lt;dbl&gt;, df.residual &lt;int&gt;, nobs &lt;int&gt;</code></pre>
 <p>Both AIC and BIC take the number of parameters and the model’s likelihood into account. BIC additionally considers the number of observations. But how is the likelihood of a linear model determined?</p>
 <p>Let’s visualize the data first:</p>
-<div class="sourceCode" id="cb866"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb866-1"><a href="model-comparison.html#cb866-1" tabindex="-1"></a><span class="co"># plot the data with a linear model fit  </span></span>
-<span id="cb866-2"><a href="model-comparison.html#cb866-2" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> df.example,</span>
-<span id="cb866-3"><a href="model-comparison.html#cb866-3" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
-<span id="cb866-4"><a href="model-comparison.html#cb866-4" tabindex="-1"></a>                     <span class="at">y =</span> y)) <span class="sc">+</span> </span>
-<span id="cb866-5"><a href="model-comparison.html#cb866-5" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
-<span id="cb866-6"><a href="model-comparison.html#cb866-6" tabindex="-1"></a>  <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>,</span>
-<span id="cb866-7"><a href="model-comparison.html#cb866-7" tabindex="-1"></a>              <span class="at">color =</span> <span class="st">&quot;black&quot;</span>)</span></code></pre></div>
-<p><img src="15-model_comparison_files/figure-html/unnamed-chunk-15-1.png" width="672" /></p>
+<div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="model-comparison.html#cb28-1" tabindex="-1"></a><span class="co"># plot the data with a linear model fit  </span></span>
+<span id="cb28-2"><a href="model-comparison.html#cb28-2" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> df.example,</span>
+<span id="cb28-3"><a href="model-comparison.html#cb28-3" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
+<span id="cb28-4"><a href="model-comparison.html#cb28-4" tabindex="-1"></a>                     <span class="at">y =</span> y)) <span class="sc">+</span> </span>
+<span id="cb28-5"><a href="model-comparison.html#cb28-5" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
+<span id="cb28-6"><a href="model-comparison.html#cb28-6" tabindex="-1"></a>  <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">&quot;lm&quot;</span>,</span>
+<span id="cb28-7"><a href="model-comparison.html#cb28-7" tabindex="-1"></a>              <span class="at">color =</span> <span class="st">&quot;black&quot;</span>)</span></code></pre></div>
+<p><img src="psych252_files/figure-html/unnamed-chunk-15-1.png" width="672" /></p>
 <p>Now, let’s take a look at the residuals by plotting the fitted values on the x axis, and the residuals on the y axis.</p>
-<div class="sourceCode" id="cb867"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb867-1"><a href="model-comparison.html#cb867-1" tabindex="-1"></a><span class="co"># residual plot </span></span>
-<span id="cb867-2"><a href="model-comparison.html#cb867-2" tabindex="-1"></a>df.plot <span class="ot">=</span> df.example <span class="sc">%&gt;%</span> </span>
-<span id="cb867-3"><a href="model-comparison.html#cb867-3" tabindex="-1"></a>  <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> x,</span>
-<span id="cb867-4"><a href="model-comparison.html#cb867-4" tabindex="-1"></a>     <span class="at">data =</span> .) <span class="sc">%&gt;%</span> </span>
-<span id="cb867-5"><a href="model-comparison.html#cb867-5" tabindex="-1"></a>  <span class="fu">augment</span>() <span class="sc">%&gt;%</span> </span>
-<span id="cb867-6"><a href="model-comparison.html#cb867-6" tabindex="-1"></a>  <span class="fu">clean_names</span>()</span>
-<span id="cb867-7"><a href="model-comparison.html#cb867-7" tabindex="-1"></a></span>
-<span id="cb867-8"><a href="model-comparison.html#cb867-8" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> df.plot,</span>
-<span id="cb867-9"><a href="model-comparison.html#cb867-9" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> fitted, </span>
-<span id="cb867-10"><a href="model-comparison.html#cb867-10" tabindex="-1"></a>                     <span class="at">y =</span> resid)) <span class="sc">+</span> </span>
-<span id="cb867-11"><a href="model-comparison.html#cb867-11" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>)</span></code></pre></div>
-<p><img src="15-model_comparison_files/figure-html/unnamed-chunk-16-1.png" width="672" /></p>
+<div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="model-comparison.html#cb29-1" tabindex="-1"></a><span class="co"># residual plot </span></span>
+<span id="cb29-2"><a href="model-comparison.html#cb29-2" tabindex="-1"></a>df.plot <span class="ot">=</span> df.example <span class="sc">%&gt;%</span> </span>
+<span id="cb29-3"><a href="model-comparison.html#cb29-3" tabindex="-1"></a>  <span class="fu">lm</span>(<span class="at">formula =</span> y <span class="sc">~</span> x,</span>
+<span id="cb29-4"><a href="model-comparison.html#cb29-4" tabindex="-1"></a>     <span class="at">data =</span> .) <span class="sc">%&gt;%</span> </span>
+<span id="cb29-5"><a href="model-comparison.html#cb29-5" tabindex="-1"></a>  <span class="fu">augment</span>() <span class="sc">%&gt;%</span> </span>
+<span id="cb29-6"><a href="model-comparison.html#cb29-6" tabindex="-1"></a>  <span class="fu">clean_names</span>()</span>
+<span id="cb29-7"><a href="model-comparison.html#cb29-7" tabindex="-1"></a></span>
+<span id="cb29-8"><a href="model-comparison.html#cb29-8" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> df.plot,</span>
+<span id="cb29-9"><a href="model-comparison.html#cb29-9" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> fitted, </span>
+<span id="cb29-10"><a href="model-comparison.html#cb29-10" tabindex="-1"></a>                     <span class="at">y =</span> resid)) <span class="sc">+</span> </span>
+<span id="cb29-11"><a href="model-comparison.html#cb29-11" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>)</span></code></pre></div>
+<p><img src="psych252_files/figure-html/unnamed-chunk-16-1.png" width="672" /></p>
 <p>Remember that the linear model makes the assumption that the residuals are normally distributed with mean 0 (which is always the case if we fit a linear model) and some fitted standard deviation. In fact, the standard deviation of the normal distribution is fitted such that the overall likelihood of the data is maximized.</p>
 <p>Let’s make a plot that shows a normal distribution alongside the residuals:</p>
-<div class="sourceCode" id="cb868"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb868-1"><a href="model-comparison.html#cb868-1" tabindex="-1"></a><span class="co"># define a normal distribution </span></span>
-<span id="cb868-2"><a href="model-comparison.html#cb868-2" tabindex="-1"></a>df.normal <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">y =</span> <span class="fu">seq</span>(<span class="sc">-</span><span class="dv">5</span>, <span class="dv">5</span>, <span class="fl">0.1</span>),</span>
-<span id="cb868-3"><a href="model-comparison.html#cb868-3" tabindex="-1"></a>                   <span class="at">x =</span> <span class="fu">dnorm</span>(y, <span class="at">sd =</span> <span class="dv">2</span>) <span class="sc">+</span> <span class="fl">3.75</span>)</span>
-<span id="cb868-4"><a href="model-comparison.html#cb868-4" tabindex="-1"></a></span>
-<span id="cb868-5"><a href="model-comparison.html#cb868-5" tabindex="-1"></a><span class="co"># show the residual plot together with the normal distribution</span></span>
-<span id="cb868-6"><a href="model-comparison.html#cb868-6" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> df.plot ,</span>
-<span id="cb868-7"><a href="model-comparison.html#cb868-7" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> fitted, <span class="at">y =</span> resid)) <span class="sc">+</span> </span>
-<span id="cb868-8"><a href="model-comparison.html#cb868-8" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
-<span id="cb868-9"><a href="model-comparison.html#cb868-9" tabindex="-1"></a>  <span class="fu">geom_path</span>(<span class="at">data =</span> df.normal,</span>
-<span id="cb868-10"><a href="model-comparison.html#cb868-10" tabindex="-1"></a>            <span class="fu">aes</span>(<span class="at">x =</span> x, <span class="at">y =</span> y),</span>
-<span id="cb868-11"><a href="model-comparison.html#cb868-11" tabindex="-1"></a>            <span class="at">size =</span> <span class="dv">2</span>)</span></code></pre></div>
-<p><img src="15-model_comparison_files/figure-html/unnamed-chunk-17-1.png" width="672" /></p>
+<div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="model-comparison.html#cb30-1" tabindex="-1"></a><span class="co"># define a normal distribution </span></span>
+<span id="cb30-2"><a href="model-comparison.html#cb30-2" tabindex="-1"></a>df.normal <span class="ot">=</span> <span class="fu">tibble</span>(<span class="at">y =</span> <span class="fu">seq</span>(<span class="sc">-</span><span class="dv">5</span>, <span class="dv">5</span>, <span class="fl">0.1</span>),</span>
+<span id="cb30-3"><a href="model-comparison.html#cb30-3" tabindex="-1"></a>                   <span class="at">x =</span> <span class="fu">dnorm</span>(y, <span class="at">sd =</span> <span class="dv">2</span>) <span class="sc">+</span> <span class="fl">3.75</span>)</span>
+<span id="cb30-4"><a href="model-comparison.html#cb30-4" tabindex="-1"></a></span>
+<span id="cb30-5"><a href="model-comparison.html#cb30-5" tabindex="-1"></a><span class="co"># show the residual plot together with the normal distribution</span></span>
+<span id="cb30-6"><a href="model-comparison.html#cb30-6" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> df.plot ,</span>
+<span id="cb30-7"><a href="model-comparison.html#cb30-7" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> fitted, </span>
+<span id="cb30-8"><a href="model-comparison.html#cb30-8" tabindex="-1"></a>                     <span class="at">y =</span> resid)) <span class="sc">+</span> </span>
+<span id="cb30-9"><a href="model-comparison.html#cb30-9" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
+<span id="cb30-10"><a href="model-comparison.html#cb30-10" tabindex="-1"></a>  <span class="fu">geom_path</span>(<span class="at">data =</span> df.normal,</span>
+<span id="cb30-11"><a href="model-comparison.html#cb30-11" tabindex="-1"></a>            <span class="fu">aes</span>(<span class="at">x =</span> x,</span>
+<span id="cb30-12"><a href="model-comparison.html#cb30-12" tabindex="-1"></a>                <span class="at">y =</span> y),</span>
+<span id="cb30-13"><a href="model-comparison.html#cb30-13" tabindex="-1"></a>            <span class="at">size =</span> <span class="dv">2</span>)</span></code></pre></div>
+<p><img src="psych252_files/figure-html/unnamed-chunk-17-1.png" width="672" /></p>
 <p>To determine the likelihood of the data given the model <span class="math inline">\(\hat L\)</span>, we now calculate the likelihood of each point (with the <code>dnorm()</code> function), and then multiply the likelihood of each data point to get the overall likelihood. We can simply multiply the data points since we also assume that the data points are independent.
 Instead of multiplying likelihoods, we often sum the log likelihoods instead. This is because if we multiply many small values, the overall value gets to close to 0 so that computers get confused. By taking logs instead, we avoid these nasty precision errors.</p>
 <p>To better understand AIC and BIC, let’s calculate them by hand:</p>
-<div class="sourceCode" id="cb869"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb869-1"><a href="model-comparison.html#cb869-1" tabindex="-1"></a><span class="co"># we first get the estimate of the standard deviation of the residuals </span></span>
-<span id="cb869-2"><a href="model-comparison.html#cb869-2" tabindex="-1"></a>sigma <span class="ot">=</span> fit <span class="sc">%&gt;%</span> </span>
-<span id="cb869-3"><a href="model-comparison.html#cb869-3" tabindex="-1"></a>  <span class="fu">glance</span>() <span class="sc">%&gt;%</span> </span>
-<span id="cb869-4"><a href="model-comparison.html#cb869-4" tabindex="-1"></a>  <span class="fu">pull</span>(sigma)</span>
-<span id="cb869-5"><a href="model-comparison.html#cb869-5" tabindex="-1"></a></span>
-<span id="cb869-6"><a href="model-comparison.html#cb869-6" tabindex="-1"></a><span class="co"># then we calculate the log likelihood of the model </span></span>
-<span id="cb869-7"><a href="model-comparison.html#cb869-7" tabindex="-1"></a>log_likelihood <span class="ot">=</span> fit <span class="sc">%&gt;%</span> </span>
-<span id="cb869-8"><a href="model-comparison.html#cb869-8" tabindex="-1"></a>  <span class="fu">augment</span>() <span class="sc">%&gt;%</span> </span>
-<span id="cb869-9"><a href="model-comparison.html#cb869-9" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">likelihood =</span> <span class="fu">dnorm</span>(.resid, <span class="at">sd =</span> sigma)) <span class="sc">%&gt;%</span> </span>
-<span id="cb869-10"><a href="model-comparison.html#cb869-10" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">logLik =</span> <span class="fu">sum</span>(<span class="fu">log</span>(likelihood))) <span class="sc">%&gt;%</span> </span>
-<span id="cb869-11"><a href="model-comparison.html#cb869-11" tabindex="-1"></a>  <span class="fu">as.numeric</span>()</span>
-<span id="cb869-12"><a href="model-comparison.html#cb869-12" tabindex="-1"></a></span>
-<span id="cb869-13"><a href="model-comparison.html#cb869-13" tabindex="-1"></a><span class="co"># then we calculate AIC and BIC using the formulas introduced above</span></span>
-<span id="cb869-14"><a href="model-comparison.html#cb869-14" tabindex="-1"></a>aic <span class="ot">=</span> <span class="dv">2</span><span class="sc">*</span><span class="dv">3</span> <span class="sc">-</span> <span class="dv">2</span> <span class="sc">*</span> log_likelihood</span>
-<span id="cb869-15"><a href="model-comparison.html#cb869-15" tabindex="-1"></a>bic <span class="ot">=</span> <span class="fu">log</span>(<span class="fu">nrow</span>(df.example)) <span class="sc">*</span> <span class="dv">3</span> <span class="sc">-</span> <span class="dv">2</span> <span class="sc">*</span> log_likelihood</span>
-<span id="cb869-16"><a href="model-comparison.html#cb869-16" tabindex="-1"></a></span>
-<span id="cb869-17"><a href="model-comparison.html#cb869-17" tabindex="-1"></a><span class="fu">print</span>(aic)</span></code></pre></div>
+<div class="sourceCode" id="cb31"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb31-1"><a href="model-comparison.html#cb31-1" tabindex="-1"></a><span class="co"># we first get the estimate of the standard deviation of the residuals </span></span>
+<span id="cb31-2"><a href="model-comparison.html#cb31-2" tabindex="-1"></a>sigma <span class="ot">=</span> fit <span class="sc">%&gt;%</span> </span>
+<span id="cb31-3"><a href="model-comparison.html#cb31-3" tabindex="-1"></a>  <span class="fu">glance</span>() <span class="sc">%&gt;%</span> </span>
+<span id="cb31-4"><a href="model-comparison.html#cb31-4" tabindex="-1"></a>  <span class="fu">pull</span>(sigma)</span>
+<span id="cb31-5"><a href="model-comparison.html#cb31-5" tabindex="-1"></a></span>
+<span id="cb31-6"><a href="model-comparison.html#cb31-6" tabindex="-1"></a><span class="co"># then we calculate the log likelihood of the model </span></span>
+<span id="cb31-7"><a href="model-comparison.html#cb31-7" tabindex="-1"></a>log_likelihood <span class="ot">=</span> fit <span class="sc">%&gt;%</span> </span>
+<span id="cb31-8"><a href="model-comparison.html#cb31-8" tabindex="-1"></a>  <span class="fu">augment</span>() <span class="sc">%&gt;%</span> </span>
+<span id="cb31-9"><a href="model-comparison.html#cb31-9" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">likelihood =</span> <span class="fu">dnorm</span>(.resid, <span class="at">sd =</span> sigma)) <span class="sc">%&gt;%</span> </span>
+<span id="cb31-10"><a href="model-comparison.html#cb31-10" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">logLik =</span> <span class="fu">sum</span>(<span class="fu">log</span>(likelihood))) <span class="sc">%&gt;%</span> </span>
+<span id="cb31-11"><a href="model-comparison.html#cb31-11" tabindex="-1"></a>  <span class="fu">as.numeric</span>()</span>
+<span id="cb31-12"><a href="model-comparison.html#cb31-12" tabindex="-1"></a></span>
+<span id="cb31-13"><a href="model-comparison.html#cb31-13" tabindex="-1"></a><span class="co"># then we calculate AIC and BIC using the formulas introduced above</span></span>
+<span id="cb31-14"><a href="model-comparison.html#cb31-14" tabindex="-1"></a>aic <span class="ot">=</span> <span class="dv">2</span><span class="sc">*</span><span class="dv">3</span> <span class="sc">-</span> <span class="dv">2</span> <span class="sc">*</span> log_likelihood</span>
+<span id="cb31-15"><a href="model-comparison.html#cb31-15" tabindex="-1"></a>bic <span class="ot">=</span> <span class="fu">log</span>(<span class="fu">nrow</span>(df.example)) <span class="sc">*</span> <span class="dv">3</span> <span class="sc">-</span> <span class="dv">2</span> <span class="sc">*</span> log_likelihood</span>
+<span id="cb31-16"><a href="model-comparison.html#cb31-16" tabindex="-1"></a></span>
+<span id="cb31-17"><a href="model-comparison.html#cb31-17" tabindex="-1"></a><span class="fu">print</span>(aic)</span></code></pre></div>
 <pre><code>[1] 75.58017</code></pre>
-<div class="sourceCode" id="cb871"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb871-1"><a href="model-comparison.html#cb871-1" tabindex="-1"></a><span class="fu">print</span>(bic)</span></code></pre></div>
+<div class="sourceCode" id="cb33"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb33-1"><a href="model-comparison.html#cb33-1" tabindex="-1"></a><span class="fu">print</span>(bic)</span></code></pre></div>
 <pre><code>[1] 78.56737</code></pre>
 <p>Cool! The values are the same as when we use the <code>glance()</code> function like so (except for a small difference due to rounding):</p>
-<div class="sourceCode" id="cb873"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb873-1"><a href="model-comparison.html#cb873-1" tabindex="-1"></a>fit <span class="sc">%&gt;%</span> </span>
-<span id="cb873-2"><a href="model-comparison.html#cb873-2" tabindex="-1"></a>  <span class="fu">glance</span>() <span class="sc">%&gt;%</span> </span>
-<span id="cb873-3"><a href="model-comparison.html#cb873-3" tabindex="-1"></a>  <span class="fu">select</span>(AIC, BIC)</span></code></pre></div>
+<div class="sourceCode" id="cb35"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb35-1"><a href="model-comparison.html#cb35-1" tabindex="-1"></a>fit <span class="sc">%&gt;%</span> </span>
+<span id="cb35-2"><a href="model-comparison.html#cb35-2" tabindex="-1"></a>  <span class="fu">glance</span>() <span class="sc">%&gt;%</span> </span>
+<span id="cb35-3"><a href="model-comparison.html#cb35-3" tabindex="-1"></a>  <span class="fu">select</span>(AIC, BIC)</span></code></pre></div>
 <pre><code># A tibble: 1 × 2
     AIC   BIC
   &lt;dbl&gt; &lt;dbl&gt;
 1  75.5  78.5</code></pre>
 <div id="log-is-your-friend" class="section level4 hasAnchor" number="15.3.5.1">
 <h4><span class="header-section-number">15.3.5.1</span> log() is your friend<a href="model-comparison.html#log-is-your-friend" class="anchor-section" aria-label="Anchor link to header"></a></h4>
-<div class="sourceCode" id="cb875"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb875-1"><a href="model-comparison.html#cb875-1" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">1</span>)),</span>
-<span id="cb875-2"><a href="model-comparison.html#cb875-2" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x)) <span class="sc">+</span> </span>
-<span id="cb875-3"><a href="model-comparison.html#cb875-3" tabindex="-1"></a>  <span class="fu">stat_function</span>(<span class="at">fun =</span> <span class="st">&quot;log&quot;</span>,</span>
-<span id="cb875-4"><a href="model-comparison.html#cb875-4" tabindex="-1"></a>                <span class="at">size =</span> <span class="dv">1</span>) <span class="sc">+</span></span>
-<span id="cb875-5"><a href="model-comparison.html#cb875-5" tabindex="-1"></a>  <span class="fu">labs</span>(<span class="at">x =</span> <span class="st">&quot;probability&quot;</span>,</span>
-<span id="cb875-6"><a href="model-comparison.html#cb875-6" tabindex="-1"></a>       <span class="at">y =</span> <span class="st">&quot;log(probability)&quot;</span>) <span class="sc">+</span></span>
-<span id="cb875-7"><a href="model-comparison.html#cb875-7" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.text =</span> <span class="fu">element_text</span>(<span class="at">size =</span> <span class="dv">24</span>),</span>
-<span id="cb875-8"><a href="model-comparison.html#cb875-8" tabindex="-1"></a>        <span class="at">axis.title =</span> <span class="fu">element_text</span>(<span class="at">size =</span> <span class="dv">26</span>))</span></code></pre></div>
+<div class="sourceCode" id="cb37"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb37-1"><a href="model-comparison.html#cb37-1" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> <span class="fu">tibble</span>(<span class="at">x =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">1</span>)),</span>
+<span id="cb37-2"><a href="model-comparison.html#cb37-2" tabindex="-1"></a>       <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> x)) <span class="sc">+</span> </span>
+<span id="cb37-3"><a href="model-comparison.html#cb37-3" tabindex="-1"></a>  <span class="fu">stat_function</span>(<span class="at">fun =</span> <span class="st">&quot;log&quot;</span>,</span>
+<span id="cb37-4"><a href="model-comparison.html#cb37-4" tabindex="-1"></a>                <span class="at">size =</span> <span class="dv">1</span>) <span class="sc">+</span></span>
+<span id="cb37-5"><a href="model-comparison.html#cb37-5" tabindex="-1"></a>  <span class="fu">labs</span>(<span class="at">x =</span> <span class="st">&quot;probability&quot;</span>,</span>
+<span id="cb37-6"><a href="model-comparison.html#cb37-6" tabindex="-1"></a>       <span class="at">y =</span> <span class="st">&quot;log(probability)&quot;</span>) <span class="sc">+</span></span>
+<span id="cb37-7"><a href="model-comparison.html#cb37-7" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.text =</span> <span class="fu">element_text</span>(<span class="at">size =</span> <span class="dv">24</span>),</span>
+<span id="cb37-8"><a href="model-comparison.html#cb37-8" tabindex="-1"></a>        <span class="at">axis.title =</span> <span class="fu">element_text</span>(<span class="at">size =</span> <span class="dv">26</span>))</span></code></pre></div>
 <pre><code>Warning: Computation failed in `stat_function()`
 Caused by error in `fun()`:
 ! could not find function &quot;fun&quot;</code></pre>
-<p><img src="15-model_comparison_files/figure-html/unnamed-chunk-20-1.png" width="672" /></p>
+<p><img src="psych252_files/figure-html/unnamed-chunk-20-1.png" width="672" /></p>
 </div>
 </div>
 </div>
@@ -1431,7 +1412,7 @@ <h3><span class="header-section-number">15.4.2</span> Reading<a href="model-comp
 <div id="session-info-14" class="section level2 hasAnchor" number="15.5">
 <h2><span class="header-section-number">15.5</span> Session info<a href="model-comparison.html#session-info-14" class="anchor-section" aria-label="Anchor link to header"></a></h2>
 <p>Information about this R session including which version of R was used, and what packages were loaded.</p>
-<div class="sourceCode" id="cb877"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb877-1"><a href="model-comparison.html#cb877-1" tabindex="-1"></a><span class="fu">sessionInfo</span>()</span></code></pre></div>
+<div class="sourceCode" id="cb39"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb39-1"><a href="model-comparison.html#cb39-1" tabindex="-1"></a><span class="fu">sessionInfo</span>()</span></code></pre></div>
 <pre><code>R version 4.3.2 (2023-10-31)
 Platform: aarch64-apple-darwin20 (64-bit)
 Running under: macOS Sonoma 14.1.2
@@ -1452,7 +1433,7 @@ <h2><span class="header-section-number">15.5</span> Session info<a href="model-c
 other attached packages:
  [1] lubridate_1.9.3  forcats_1.0.0    stringr_1.5.1    dplyr_1.1.4     
  [5] purrr_1.0.2      readr_2.1.4      tidyr_1.3.0      tibble_3.2.1    
- [9] ggplot2_3.4.4    tidyverse_2.0.0  modelr_0.1.11    patchwork_1.1.3 
+ [9] ggplot2_3.4.4    tidyverse_2.0.0  modelr_0.1.11    patchwork_1.2.0 
 [13] broom_1.0.5      janitor_2.2.0    kableExtra_1.3.4 knitr_1.45      
 
 loaded via a namespace (and not attached):
diff --git a/docs/psych252_files/figure-html/unnamed-chunk-15-1.png b/docs/psych252_files/figure-html/unnamed-chunk-15-1.png
index 8cdbaac..552623b 100644
Binary files a/docs/psych252_files/figure-html/unnamed-chunk-15-1.png and b/docs/psych252_files/figure-html/unnamed-chunk-15-1.png differ
diff --git a/docs/psych252_files/figure-html/unnamed-chunk-16-1.png b/docs/psych252_files/figure-html/unnamed-chunk-16-1.png
index 148a954..e9bf2c5 100644
Binary files a/docs/psych252_files/figure-html/unnamed-chunk-16-1.png and b/docs/psych252_files/figure-html/unnamed-chunk-16-1.png differ
diff --git a/docs/psych252_files/figure-html/unnamed-chunk-17-1.png b/docs/psych252_files/figure-html/unnamed-chunk-17-1.png
index 2feae4d..8af53e1 100644
Binary files a/docs/psych252_files/figure-html/unnamed-chunk-17-1.png and b/docs/psych252_files/figure-html/unnamed-chunk-17-1.png differ
diff --git a/docs/psych252_files/figure-html/unnamed-chunk-20-1.png b/docs/psych252_files/figure-html/unnamed-chunk-20-1.png
index 64949a3..5366439 100644
Binary files a/docs/psych252_files/figure-html/unnamed-chunk-20-1.png and b/docs/psych252_files/figure-html/unnamed-chunk-20-1.png differ
diff --git a/docs/psych252_files/figure-html/unnamed-chunk-3-1.png b/docs/psych252_files/figure-html/unnamed-chunk-3-1.png
index c434ea6..20d2ef5 100644
Binary files a/docs/psych252_files/figure-html/unnamed-chunk-3-1.png and b/docs/psych252_files/figure-html/unnamed-chunk-3-1.png differ
diff --git a/docs/psych252_files/figure-html/unnamed-chunk-4-1.png b/docs/psych252_files/figure-html/unnamed-chunk-4-1.png
index 12526bd..fb1cb3c 100644
Binary files a/docs/psych252_files/figure-html/unnamed-chunk-4-1.png and b/docs/psych252_files/figure-html/unnamed-chunk-4-1.png differ
diff --git a/docs/psych252_files/figure-html/unnamed-chunk-6-1.png b/docs/psych252_files/figure-html/unnamed-chunk-6-1.png
index 1009334..4354425 100644
Binary files a/docs/psych252_files/figure-html/unnamed-chunk-6-1.png and b/docs/psych252_files/figure-html/unnamed-chunk-6-1.png differ
diff --git a/docs/psych252_files/figure-html/unnamed-chunk-8-1.png b/docs/psych252_files/figure-html/unnamed-chunk-8-1.png
index d7252eb..44302c0 100644
Binary files a/docs/psych252_files/figure-html/unnamed-chunk-8-1.png and b/docs/psych252_files/figure-html/unnamed-chunk-8-1.png differ
diff --git a/docs/search_index.json b/docs/search_index.json
index 0466019..5aed213 100644
--- a/docs/search_index.json
+++ b/docs/search_index.json
@@ -1 +1 @@
-[["linear-model-4.html", "Chapter 13 Linear model 4 13.1 Load packages and set plotting theme 13.2 Load data sets 13.3 ANOVA with unbalanced design 13.4 Interpreting parameters (very important!) 13.5 Linear contrasts 13.6 Additional resources 13.7 Session info", " Chapter 13 Linear model 4 13.1 Load packages and set plotting theme library(&quot;knitr&quot;) # for knitting RMarkdown library(&quot;kableExtra&quot;) # for making nice tables library(&quot;janitor&quot;) # for cleaning column names library(&quot;broom&quot;) # for tidying up linear models library(&quot;afex&quot;) # for running ANOVAs library(&quot;emmeans&quot;) # for calculating contrasts library(&quot;car&quot;) # for calculating ANOVAs library(&quot;tidyverse&quot;) # for wrangling, plotting, etc. theme_set( theme_classic() + #set the theme theme(text = element_text(size = 20)) #set the default text size ) # these options here change the formatting of how comments are rendered opts_chunk$set(comment = &quot;&quot;, fig.show = &quot;hold&quot;) 13.2 Load data sets Read in the data: df.poker = read_csv(&quot;data/poker.csv&quot;) %&gt;% mutate(skill = factor(skill, levels = 1:2, labels = c(&quot;expert&quot;, &quot;average&quot;)), skill = fct_relevel(skill, &quot;average&quot;, &quot;expert&quot;), hand = factor(hand, levels = 1:3, labels = c(&quot;bad&quot;, &quot;neutral&quot;, &quot;good&quot;)), limit = factor(limit, levels = 1:2, labels = c(&quot;fixed&quot;, &quot;none&quot;)), participant = 1:n()) %&gt;% select(participant, everything()) # creating an unbalanced data set by removing the first 10 participants df.poker.unbalanced = df.poker %&gt;% filter(!participant %in% 1:10) 13.3 ANOVA with unbalanced design For the standard anova() function, the order of the independent predictors matters when the design is unbalanced. There are two reasons for why this happens. In an unbalanced design, the predictors in the model aren’t uncorrelated anymore. The standard anova() function computes Type I (sequential) sums of squares. Sequential sums of squares means that the predictors are added to the model in the order in which the are specified. # one order lm(formula = balance ~ skill + hand, data = df.poker.unbalanced) %&gt;% anova() Analysis of Variance Table Response: balance Df Sum Sq Mean Sq F value Pr(&gt;F) skill 1 74.3 74.28 4.2904 0.03922 * hand 2 2385.1 1192.57 68.8827 &lt; 2e-16 *** Residuals 286 4951.5 17.31 --- Signif. codes: 0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1 # another order lm(formula = balance ~ hand + skill, data = df.poker.unbalanced) %&gt;% anova() Analysis of Variance Table Response: balance Df Sum Sq Mean Sq F value Pr(&gt;F) hand 2 2419.8 1209.92 69.8845 &lt;2e-16 *** skill 1 39.6 39.59 2.2867 0.1316 Residuals 286 4951.5 17.31 --- Signif. codes: 0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1 We should compute an ANOVA with type 3 sums of squares, and set the contrast to sum contrasts. I like to use the joint_tests() function from the “emmeans” package for doing so. It does both of these things for us. # one order lm(formula = balance ~ hand * skill, data = df.poker.unbalanced) %&gt;% joint_tests() model term df1 df2 F.ratio p.value hand 2 284 68.973 &lt;.0001 skill 1 284 2.954 0.0868 hand:skill 2 284 7.440 0.0007 # another order lm(formula = balance ~ skill + hand, data = df.poker.unbalanced) %&gt;% joint_tests() model term df1 df2 F.ratio p.value skill 1 286 2.287 0.1316 hand 2 286 68.883 &lt;.0001 Now, the order of the independent variables doesn’t matter anymore. Alternatively,we can also use the aov_ez() function from the afex package. lm(formula = balance ~ skill * hand, data = df.poker.unbalanced) %&gt;% joint_tests() model term df1 df2 F.ratio p.value skill 1 284 2.954 0.0868 hand 2 284 68.973 &lt;.0001 skill:hand 2 284 7.440 0.0007 fit = aov_ez(id = &quot;participant&quot;, dv = &quot;balance&quot;, data = df.poker.unbalanced, between = c(&quot;hand&quot;, &quot;skill&quot;)) Contrasts set to contr.sum for the following variables: hand, skill fit$Anova Anova Table (Type III tests) Response: dv Sum Sq Df F value Pr(&gt;F) (Intercept) 27781.3 1 1676.9096 &lt; 2.2e-16 *** hand 2285.3 2 68.9729 &lt; 2.2e-16 *** skill 48.9 1 2.9540 0.0867525 . hand:skill 246.5 2 7.4401 0.0007089 *** Residuals 4705.0 284 --- Signif. codes: 0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1 13.4 Interpreting parameters (very important!) fit = lm(formula = balance ~ skill * hand, data = df.poker) fit %&gt;% summary() Call: lm(formula = balance ~ skill * hand, data = df.poker) Residuals: Min 1Q Median 3Q Max -13.6976 -2.4739 0.0348 2.4644 14.7806 Coefficients: Estimate Std. Error t value Pr(&gt;|t|) (Intercept) 4.5866 0.5686 8.067 1.85e-14 *** skillexpert 2.7098 0.8041 3.370 0.000852 *** handneutral 5.2572 0.8041 6.538 2.75e-10 *** handgood 9.2110 0.8041 11.455 &lt; 2e-16 *** skillexpert:handneutral -1.7042 1.1372 -1.499 0.135038 skillexpert:handgood -4.2522 1.1372 -3.739 0.000222 *** --- Signif. codes: 0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1 Residual standard error: 4.02 on 294 degrees of freedom Multiple R-squared: 0.3731, Adjusted R-squared: 0.3624 F-statistic: 34.99 on 5 and 294 DF, p-value: &lt; 2.2e-16 Important: The t-statistic for skillexpert is not telling us that there is a main effect of skill. Instead, it shows the difference between skill = average and skill = expert when all other predictors in the model are 0!! Here, this parameter just captures whether there is a significant difference between average and skilled players when they have a bad hand (because that’s the reference category here). Let’s check that this is true. df.poker %&gt;% group_by(skill, hand) %&gt;% summarize(mean = mean(balance)) %&gt;% filter(hand == &quot;bad&quot;) %&gt;% pivot_wider(names_from = skill, values_from = mean) %&gt;% mutate(difference = expert - average) `summarise()` has grouped output by &#39;skill&#39;. You can override using the `.groups` argument. # A tibble: 1 × 4 hand average expert difference &lt;fct&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; 1 bad 4.59 7.30 2.71 We see here that the difference in balance between the average and expert players when they have a bad hand is 2.7098. This is the same value as the skillexpert parameter in the summary() table above, and the corresponding significance test captures whether this difference is significantly different from 0. It doesn’t capture, whether there is an effect of skill overall! To test this, we need to do an analysis of variance (using the Anova(type = 3) function). 13.5 Linear contrasts Here is a linear contrast that assumes that there is a linear relationship between the quality of one’s hand, and the final balance. df.poker = df.poker %&gt;% mutate(hand_contrast = factor(hand, levels = c(&quot;bad&quot;, &quot;neutral&quot;, &quot;good&quot;), labels = c(-1, 0, 1)), hand_contrast = hand_contrast %&gt;% as.character() %&gt;% as.numeric()) fit.contrast = lm(formula = balance ~ hand_contrast, data = df.poker) Here is a visualization of the model prediction together with the residuals. df.plot = df.poker %&gt;% mutate(hand_jitter = hand %&gt;% as.numeric(), hand_jitter = hand_jitter + runif(n(), min = -0.4, max = 0.4)) df.tidy = fit.contrast %&gt;% tidy() %&gt;% select_if(is.numeric) %&gt;% mutate_all(~ round(., 2)) df.augment = fit.contrast %&gt;% augment() %&gt;% clean_names() %&gt;% bind_cols(df.plot %&gt;% select(hand_jitter)) ggplot(data = df.plot, mapping = aes(x = hand_jitter, y = balance, color = as.factor(hand_contrast))) + geom_point(alpha = 0.8) + geom_segment(data = NULL, aes(x = 0.6, xend = 1.4, y = df.tidy$estimate[1]-df.tidy$estimate[2], yend = df.tidy$estimate[1]-df.tidy$estimate[2]), color = &quot;red&quot;, size = 1) + geom_segment(data = NULL, aes(x = 1.6, xend = 2.4, y = df.tidy$estimate[1], yend = df.tidy$estimate[1]), color = &quot;orange&quot;, size = 1) + geom_segment(data = NULL, aes(x = 2.6, xend = 3.4, y = df.tidy$estimate[1] + df.tidy$estimate[2], yend = df.tidy$estimate[1] + df.tidy$estimate[2]), color = &quot;green&quot;, size = 1) + geom_segment(data = df.augment, aes(xend = hand_jitter, y = balance, yend = fitted), alpha = 0.3) + labs(y = &quot;balance&quot;) + scale_color_manual(values = c(&quot;red&quot;, &quot;orange&quot;, &quot;green&quot;)) + scale_x_continuous(breaks = 1:3, labels = c(&quot;bad&quot;, &quot;neutral&quot;, &quot;good&quot;)) + theme(legend.position = &quot;none&quot;, axis.title.x = element_blank()) Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ℹ Please use `linewidth` instead. This warning is displayed once every 8 hours. Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated. 13.5.1 Hypothetical data Here is some code to generate a hypothetical developmental data set. # make example reproducible set.seed(1) means = c(5, 20, 8) # means = c(3, 5, 20) # means = c(3, 5, 7) # means = c(3, 7, 12) sd = 2 sample_size = 20 # generate data df.development = tibble( group = rep(c(&quot;3-4&quot;, &quot;5-6&quot;, &quot;7-8&quot;), each = sample_size), performance = NA) %&gt;% mutate(performance = ifelse(group == &quot;3-4&quot;, rnorm(sample_size, mean = means[1], sd = sd), performance), performance = ifelse(group == &quot;5-6&quot;, rnorm(sample_size, mean = means[2], sd = sd), performance), performance = ifelse(group == &quot;7-8&quot;, rnorm(sample_size, mean = means[3], sd = sd), performance), group = factor(group, levels = c(&quot;3-4&quot;, &quot;5-6&quot;, &quot;7-8&quot;)), group_contrast = group %&gt;% fct_recode(`-1` = &quot;3-4&quot;, `0` = &quot;5-6&quot;, `1` = &quot;7-8&quot;) %&gt;% as.character() %&gt;% as.numeric()) Let’s define a linear contrast using the emmeans package, and test whether it’s significant. fit = lm(formula = performance ~ group, data = df.development) fit %&gt;% emmeans(&quot;group&quot;, contr = list(linear = c(-0.5, 0, 0.5)), adjust = &quot;bonferroni&quot;) %&gt;% pluck(&quot;contrasts&quot;) contrast estimate SE df t.ratio p.value linear 1.45 0.274 57 5.290 &lt;.0001 Yes, we see that there is a significant positive linear contrast with an estimate of 8.45. This means, it predicts a difference of 8.45 in performance between each of the consecutive age groups. For a visualization of the predictions of this model, see Figure @ref{fig:linear-contrast-model}. 13.5.2 Visualization Total variance: set.seed(1) fit_c = lm(formula = performance ~ 1, data = df.development) df.plot = df.development %&gt;% mutate(group_jitter = 1 + runif(n(), min = -0.25, max = 0.25)) df.augment = fit_c %&gt;% augment() %&gt;% clean_names() %&gt;% bind_cols(df.plot %&gt;% select(group, group_jitter)) ggplot(data = df.plot, mapping = aes(x = group_jitter, y = performance, fill = group)) + geom_hline(yintercept = mean(df.development$performance)) + geom_point(alpha = 0.5) + geom_segment(data = df.augment, aes(xend = group_jitter, yend = fitted), alpha = 0.2) + labs(y = &quot;performance&quot;) + theme(legend.position = &quot;none&quot;, axis.text.x = element_blank(), axis.title.x = element_blank()) With contrast # make example reproducible set.seed(1) fit = lm(formula = performance ~ group_contrast, data = df.development) df.plot = df.development %&gt;% mutate(group_jitter = group %&gt;% as.numeric(), group_jitter = group_jitter + runif(n(), min = -0.4, max = 0.4)) df.tidy = fit %&gt;% tidy() %&gt;% mutate(across(.cols = where(is.numeric), .fns = ~ round(. , 2))) df.augment = fit %&gt;% augment() %&gt;% clean_names() %&gt;% bind_cols(df.plot %&gt;% select(group_jitter)) ggplot(data = df.plot, mapping = aes(x = group_jitter, y = performance, color = as.factor(group_contrast))) + geom_point(alpha = 0.8) + geom_segment(data = NULL, aes(x = 0.6, xend = 1.4, y = df.tidy$estimate[1]-df.tidy$estimate[2], yend = df.tidy$estimate[1]-df.tidy$estimate[2]), color = &quot;red&quot;, size = 1) + geom_segment(data = NULL, aes(x = 1.6, xend = 2.4, y = df.tidy$estimate[1], yend = df.tidy$estimate[1]), color = &quot;orange&quot;, size = 1) + geom_segment(data = NULL, aes(x = 2.6, xend = 3.4, y = df.tidy$estimate[1] + df.tidy$estimate[2], yend = df.tidy$estimate[1] + df.tidy$estimate[2]), color = &quot;green&quot;, size = 1) + geom_segment(data = df.augment, aes(xend = group_jitter, y = performance, yend = fitted), alpha = 0.3) + labs(y = &quot;performance&quot;) + scale_color_manual(values = c(&quot;red&quot;, &quot;orange&quot;, &quot;green&quot;)) + scale_x_continuous(breaks = 1:3, labels = levels(df.development$group)) + theme(legend.position = &quot;none&quot;, axis.title.x = element_blank()) Figure 13.1: Predictions of the linear contrast model Results figure df.development %&gt;% ggplot(mapping = aes(x = group, y = performance)) + geom_point(alpha = 0.3, position = position_jitter(width = 0.1, height = 0)) + stat_summary(fun.data = &quot;mean_cl_boot&quot;, shape = 21, fill = &quot;white&quot;, size = 0.75) Here we test some more specific hypotheses: the the two youngest groups of children are different from the oldest group, and that the 3 year olds are different from the 5 year olds. # fit the linear model fit = lm(formula = performance ~ group, data = df.development) # check factor levels levels(df.development$group) [1] &quot;3-4&quot; &quot;5-6&quot; &quot;7-8&quot; # define the contrasts of interest contrasts = list(young_vs_old = c(-0.5, -0.5, 1), three_vs_five = c(-0.5, 0.5, 0)) # compute significance test on contrasts fit %&gt;% emmeans(&quot;group&quot;, contr = contrasts, adjust = &quot;bonferroni&quot;) %&gt;% pluck(&quot;contrasts&quot;) contrast estimate SE df t.ratio p.value young_vs_old -4.41 0.474 57 -9.292 &lt;.0001 three_vs_five 7.30 0.274 57 26.673 &lt;.0001 P value adjustment: bonferroni method for 2 tests 13.5.3 Post-hoc tests Post-hoc tests for a single predictor (using the poker data set). fit = lm(formula = balance ~ hand, data = df.poker) # post hoc tests fit %&gt;% emmeans(pairwise ~ hand, adjust = &quot;bonferroni&quot;) %&gt;% pluck(&quot;contrasts&quot;) contrast estimate SE df t.ratio p.value bad - neutral -4.41 0.581 297 -7.576 &lt;.0001 bad - good -7.08 0.581 297 -12.185 &lt;.0001 neutral - good -2.68 0.581 297 -4.609 &lt;.0001 P value adjustment: bonferroni method for 3 tests Post-hoc tests for two predictors (: # fit the model fit = lm(formula = balance ~ hand + skill, data = df.poker) # post hoc tests fit %&gt;% emmeans(pairwise ~ hand + skill, adjust = &quot;bonferroni&quot;) %&gt;% pluck(&quot;contrasts&quot;) contrast estimate SE df t.ratio p.value bad average - neutral average -4.405 0.580 296 -7.593 &lt;.0001 bad average - good average -7.085 0.580 296 -12.212 &lt;.0001 bad average - bad expert -0.724 0.474 296 -1.529 1.0000 bad average - neutral expert -5.129 0.749 296 -6.849 &lt;.0001 bad average - good expert -7.809 0.749 296 -10.427 &lt;.0001 neutral average - good average -2.680 0.580 296 -4.619 0.0001 neutral average - bad expert 3.681 0.749 296 4.914 &lt;.0001 neutral average - neutral expert -0.724 0.474 296 -1.529 1.0000 neutral average - good expert -3.404 0.749 296 -4.545 0.0001 good average - bad expert 6.361 0.749 296 8.492 &lt;.0001 good average - neutral expert 1.955 0.749 296 2.611 0.1424 good average - good expert -0.724 0.474 296 -1.529 1.0000 bad expert - neutral expert -4.405 0.580 296 -7.593 &lt;.0001 bad expert - good expert -7.085 0.580 296 -12.212 &lt;.0001 neutral expert - good expert -2.680 0.580 296 -4.619 0.0001 P value adjustment: bonferroni method for 15 tests fit = lm(formula = balance ~ hand, data = df.poker) # comparing each to the mean fit %&gt;% emmeans(eff ~ hand) %&gt;% pluck(&quot;contrasts&quot;) contrast estimate SE df t.ratio p.value bad effect -3.830 0.336 297 -11.409 &lt;.0001 neutral effect 0.575 0.336 297 1.713 0.0877 good effect 3.255 0.336 297 9.696 &lt;.0001 P value adjustment: fdr method for 3 tests # one vs. all others fit %&gt;% emmeans(del.eff ~ hand) %&gt;% pluck(&quot;contrasts&quot;) contrast estimate SE df t.ratio p.value bad effect -5.745 0.504 297 -11.409 &lt;.0001 neutral effect 0.863 0.504 297 1.713 0.0877 good effect 4.882 0.504 297 9.696 &lt;.0001 P value adjustment: fdr method for 3 tests 13.5.4 Understanding dummy coding fit = lm(formula = balance ~ 1 + hand, data = df.poker) fit %&gt;% summary() Call: lm(formula = balance ~ 1 + hand, data = df.poker) Residuals: Min 1Q Median 3Q Max -12.9264 -2.5902 -0.0115 2.6573 15.2834 Coefficients: Estimate Std. Error t value Pr(&gt;|t|) (Intercept) 5.9415 0.4111 14.451 &lt; 2e-16 *** handneutral 4.4051 0.5815 7.576 4.55e-13 *** handgood 7.0849 0.5815 12.185 &lt; 2e-16 *** --- Signif. codes: 0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1 Residual standard error: 4.111 on 297 degrees of freedom Multiple R-squared: 0.3377, Adjusted R-squared: 0.3332 F-statistic: 75.7 on 2 and 297 DF, p-value: &lt; 2.2e-16 model.matrix(fit) %&gt;% as_tibble() %&gt;% distinct() # A tibble: 3 × 3 `(Intercept)` handneutral handgood &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; 1 1 0 0 2 1 1 0 3 1 0 1 df.poker %&gt;% select(participant, hand, balance) %&gt;% group_by(hand) %&gt;% top_n(3, wt = -participant) %&gt;% kable(digits = 2) %&gt;% kable_styling(bootstrap_options = &quot;striped&quot;, full_width = F) participant hand balance 1 bad 4.00 2 bad 5.55 3 bad 9.45 51 neutral 11.74 52 neutral 10.04 53 neutral 9.49 101 good 10.86 102 good 8.68 103 good 14.36 13.5.5 Understanding sum coding fit = lm(formula = balance ~ 1 + hand, contrasts = list(hand = &quot;contr.sum&quot;), data = df.poker) fit %&gt;% summary() Call: lm(formula = balance ~ 1 + hand, data = df.poker, contrasts = list(hand = &quot;contr.sum&quot;)) Residuals: Min 1Q Median 3Q Max -12.9264 -2.5902 -0.0115 2.6573 15.2834 Coefficients: Estimate Std. Error t value Pr(&gt;|t|) (Intercept) 9.7715 0.2374 41.165 &lt;2e-16 *** hand1 -3.8300 0.3357 -11.409 &lt;2e-16 *** hand2 0.5751 0.3357 1.713 0.0877 . --- Signif. codes: 0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1 Residual standard error: 4.111 on 297 degrees of freedom Multiple R-squared: 0.3377, Adjusted R-squared: 0.3332 F-statistic: 75.7 on 2 and 297 DF, p-value: &lt; 2.2e-16 model.matrix(fit) %&gt;% as_tibble() %&gt;% distinct() %&gt;% kable(digits = 2) %&gt;% kable_styling(bootstrap_options = &quot;striped&quot;, full_width = F) (Intercept) hand1 hand2 1 1 0 1 0 1 1 -1 -1 13.6 Additional resources 13.6.1 Misc Overview of different regression models in R 13.7 Session info Information about this R session including which version of R was used, and what packages were loaded. sessionInfo() R version 4.3.2 (2023-10-31) Platform: aarch64-apple-darwin20 (64-bit) Running under: macOS Sonoma 14.1.2 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0 locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 time zone: America/Los_Angeles tzcode source: internal attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 [5] purrr_1.0.2 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 [9] ggplot2_3.4.4 tidyverse_2.0.0 car_3.1-2 carData_3.0-5 [13] emmeans_1.9.0 afex_1.3-0 lme4_1.1-35.1 Matrix_1.6-4 [17] broom_1.0.5 janitor_2.2.0 kableExtra_1.3.4 knitr_1.45 loaded via a namespace (and not attached): [1] tidyselect_1.2.0 viridisLite_0.4.2 farver_2.1.1 [4] fastmap_1.1.1 rpart_4.1.23 digest_0.6.33 [7] timechange_0.2.0 estimability_1.4.1 lifecycle_1.0.4 [10] cluster_2.1.6 magrittr_2.0.3 compiler_4.3.2 [13] Hmisc_5.1-1 rlang_1.1.2 sass_0.4.8 [16] tools_4.3.2 utf8_1.2.4 yaml_2.3.8 [19] data.table_1.14.10 htmlwidgets_1.6.4 labeling_0.4.3 [22] bit_4.0.5 plyr_1.8.9 xml2_1.3.6 [25] abind_1.4-5 foreign_0.8-86 withr_2.5.2 [28] numDeriv_2016.8-1.1 nnet_7.3-19 grid_4.3.2 [31] fansi_1.0.6 xtable_1.8-4 colorspace_2.1-0 [34] scales_1.3.0 MASS_7.3-60 cli_3.6.2 [37] mvtnorm_1.2-4 rmarkdown_2.25 crayon_1.5.2 [40] generics_0.1.3 rstudioapi_0.15.0 httr_1.4.7 [43] reshape2_1.4.4 tzdb_0.4.0 minqa_1.2.6 [46] cachem_1.0.8 splines_4.3.2 rvest_1.0.3 [49] parallel_4.3.2 base64enc_0.1-3 vctrs_0.6.5 [52] boot_1.3-28.1 webshot_0.5.5 jsonlite_1.8.8 [55] bookdown_0.37 hms_1.1.3 bit64_4.0.5 [58] htmlTable_2.4.2 Formula_1.2-5 systemfonts_1.0.5 [61] jquerylib_0.1.4 glue_1.6.2 nloptr_2.0.3 [64] stringi_1.8.3 gtable_0.3.4 lmerTest_3.1-3 [67] munsell_0.5.0 pillar_1.9.0 htmltools_0.5.7 [70] R6_2.5.1 vroom_1.6.5 evaluate_0.23 [73] lattice_0.22-5 highr_0.10 backports_1.4.1 [76] snakecase_0.11.1 bslib_0.6.1 Rcpp_1.0.11 [79] checkmate_2.3.1 gridExtra_2.3 svglite_2.1.3 [82] coda_0.19-4 nlme_3.1-164 xfun_0.41 [85] pkgconfig_2.0.3 "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]]
+[["model-comparison.html", "Chapter 15 Model comparison 15.1 Learning goals 15.2 Load packages and set plotting theme 15.3 Model comparison 15.4 Additional resources 15.5 Session info", " Chapter 15 Model comparison 15.1 Learning goals Model comparison. Underfitting vs. overfitting. Cross-validation. Leave-one-out cross-validation. k-fold cross-validation. Monte Carlo cross-validation. Information criteria: AIC and BIC. 15.2 Load packages and set plotting theme library(&quot;knitr&quot;) # for knitting RMarkdown library(&quot;kableExtra&quot;) # for making nice tables library(&quot;janitor&quot;) # for cleaning column names library(&quot;broom&quot;) # for tidying up linear models library(&quot;patchwork&quot;) # for figure panels library(&quot;modelr&quot;) # for cross-validation library(&quot;tidyverse&quot;) # for wrangling, plotting, etc. theme_set(theme_classic() + #set the theme theme(text = element_text(size = 20))) #set the default text size opts_chunk$set(comment = &quot;&quot;, fig.show = &quot;hold&quot;) 15.3 Model comparison In general, we want our models to explain the data we observed, and correctly predict future data. Often, there is a trade-off between how well the model fits the data we have (e.g. how much of the variance it explains), and how well the model will predict future data. If our model is too complex, then it will not only capture the systematicity in the data but also fit to the noise in the data. If our mdoel is too simple, however, it will not capture some of the systematicity that’s actually present in the data. The goal, as always in statistical modeling, is to find a model that finds the sweet spot between simplicity and complexity. 15.3.1 Fitting vs. predicting Let’s illustrate the trade-off between complexity and simplicty for fitting vs. prediction. We generate data from a model of the following form: \\[ Y_i = \\beta_0 + \\beta_1 \\cdot X_i + \\beta_2 + X_i^2 + \\epsilon_i \\] where \\[ \\epsilon_i \\sim \\mathcal{N}(\\text{mean} = 0, ~\\text{sd} = 20) \\] Here, I’ll use the following parameters: \\(\\beta_0 = 10\\), \\(\\beta_1 = 3\\), and \\(\\beta_2 = 2\\) to generate the data: set.seed(1) n_plots = 3 # sample size n_samples = 20 # number of parameters in the polynomial regression n_parameters = c(1:4, seq(7, 19, length.out = 5)) # generate data df.data = tibble(x = runif(n_samples, min = 0, max = 10), y = 10 + 3 * x + 3 * x^2 + rnorm(n_samples, sd = 20)) # plotting function plot_fit = function(i){ # calculate RMSE rmse = lm(formula = y ~ poly(x, degree = i, raw = TRUE), data = df.data) %&gt;% rmse(data = df.data) # make a plot ggplot(data = df.data, mapping = aes(x = x, y = y)) + geom_point(size = 2) + geom_smooth(method = &quot;lm&quot;, se = F, formula = y ~ poly(x, degree = i, raw = TRUE)) + annotate(geom = &quot;text&quot;, x = Inf, y = -Inf, label = str_c(&quot;RMSE = &quot;, round(rmse, 2)), hjust = 1.1, vjust = -0.3) + theme(axis.ticks = element_blank(), axis.title = element_blank(), axis.text = element_blank()) } # save plots in a list l.p = map(.x = n_parameters, .f = ~ plot_fit(.)) # make figure panel wrap_plots(plotlist = l.p, ncol = 3) As we can see, RMSE becomes smaller and smaller the more parameters the model has to fit the data. But how does the RMSE look like for new data that is generated from the same underlying ground truth? set.seed(1) n_plots = 3 # sample size n_samples = 20 # number of parameters in the polynomial regression n_parameters = c(1:4, seq(7, 19, length.out = 5)) # generate data df.data = tibble( x = runif(n_samples, min = 0, max = 10), y = 10 + 3 * x + 3 * x^2 + rnorm(n_samples, sd = 20) ) # generate some more data df.more_data = tibble(x = runif(50, min = 0, max = 10), y = 10 + 3 * x + 3 * x^2 + rnorm(50, sd = 20)) # list for plots l.p = list() # plotting function plot_fit = function(i){ # calculate RMSE for fitted data fit = lm(formula = y ~ poly(x, degree = i, raw = TRUE), data = df.data) # calculate RMSE for training data rmse = fit %&gt;% rmse(data = df.data) # calculate RMSE for new data rmse_new = fit %&gt;% rmse(data = df.more_data) # make a plot ggplot(data = df.data, mapping = aes(x = x, y = y)) + geom_point(size = 2) + geom_point(data = df.more_data, size = 2, color = &quot;red&quot;) + geom_smooth(method = &quot;lm&quot;, se = F, formula = y ~ poly(x, degree = i, raw = TRUE)) + annotate(geom = &quot;text&quot;, x = Inf, y = -Inf, label = str_c(&quot;RMSE = &quot;, round(rmse, 2)), hjust = 1.1, vjust = -0.3) + annotate(geom = &quot;text&quot;, x = Inf, y = -Inf, label = str_c(&quot;RMSE = &quot;, round(rmse_new, 2)), hjust = 1.1, vjust = -2, color = &quot;red&quot;) + theme(axis.ticks = element_blank(), axis.title = element_blank(), axis.text = element_blank()) } # map over the parameters l.p = map(.x = n_parameters, .f = ~ plot_fit(.)) # make figure panel wrap_plots(plotlist = l.p, ncol = 3) The RMSE in black shows the root mean squared error for the data that the model was fit on. The RMSE in red shows the RMSE on the new data. As you can see, the complex models do really poorly. They overfit the noise in the original data which leads to make poor predictions for new data. The simplest model (with two parameters) doesn’t do particularly well either since it misses out on the quadratic trend in the data. Both the model with the quadratic term (top middle) and a model that includes a cubic term (top right) provide a good balance – their RMSE on the new data is lowest. Let’s generate another data set: # make example reproducible set.seed(1) # parameters sample_size = 100 b0 = 1 b1 = 2 b2 = 3 sd = 0.5 # sample df.data = tibble(participant = 1:sample_size, x = runif(sample_size, min = 0, max = 1), y = b0 + b1*x + b2*x^2 + rnorm(sample_size, sd = sd)) And plot it: ggplot(data = df.data, mapping = aes(x = x, y = y)) + geom_smooth(method = &quot;lm&quot;, formula = y ~ x + I(x^2)) + geom_point() 15.3.2 F-test Let’s fit three models of increasing complexity to the data. The model which fits the way in which the data were generated has the following form: \\[ \\widehat Y_i = b_0 + b_1 \\cdot X_i + b_2 \\cdot X_i^2 \\] # fit models to the data fit_simple = lm(y ~ 1 + x, data = df.data) fit_correct = lm(y ~ 1 + x + I(x^2), data = df.data) fit_complex = lm(y ~ 1 + x + I(x^2) + I(x^3), data = df.data) # compare the models using an F-test anova(fit_simple, fit_correct) Analysis of Variance Table Model 1: y ~ 1 + x Model 2: y ~ 1 + x + I(x^2) Res.Df RSS Df Sum of Sq F Pr(&gt;F) 1 98 25.297 2 97 21.693 1 3.6039 16.115 0.0001175 *** --- Signif. codes: 0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1 anova(fit_correct, fit_complex) Analysis of Variance Table Model 1: y ~ 1 + x + I(x^2) Model 2: y ~ 1 + x + I(x^2) + I(x^3) Res.Df RSS Df Sum of Sq F Pr(&gt;F) 1 97 21.693 2 96 21.643 1 0.050399 0.2236 0.6374 The F-test tells us that fit_correct explains significantly more variance than fit_simple, whereas fit_complex doesn’t explain significantly more variance than fit_correct. But, as discussed in class, there are many situations in which we cannot use the F-test to compare models. Namely, whenever we want to compare unnested models where one models does not include all the predictors of the other model. But, we can still use cross-validation in this case. Let’s take a look. 15.3.3 Cross-validation Cross-validation is a powerful technique for finding the sweet spot between simplicity and complexity. Moreover, we can use cross-validation to compare models that we cannot compare using the F-test approach that we’ve been using up until now. There are many different kinds of cross-validation. All have the same idea in common though: we first fit the model to a subset of the data, often called training data and then check how well the model captures the held-out data, often called test data Different versions of cross-validation differ in how the training and test data sets are defined. We’ll look at three different cross-validation techniques: Leave-on-out cross-validation k-fold cross-validation Monte Carlo cross-validation 15.3.3.1 Leave-one-out cross-validation I’ve used code similar to this one to illustrate how LOO works in class. Here is a simple data set with 9 data points. We fit 9 models, where for each model, the training set includes one of the data points, and then we look at how well the model captures the held-out data point. We can then characterize the model’s performance by calculating the mean squared error across the 9 runs. # make example reproducible set.seed(1) # sample df.loo = tibble(x = 1:9, y = c(5, 2, 4, 10, 3, 4, 10, 2, 8)) df.loo_cross = df.loo %&gt;% crossv_loo() %&gt;% mutate(fit = map(.x = train, .f = ~ lm(y ~ x, data = .)), tidy = map(.x = fit, .f = ~ tidy(.))) %&gt;% unnest(tidy) # original plot df.plot = df.loo %&gt;% mutate(color = 1) # fit to all data except one fun.cv_plot = function(data_point){ # determine which point to leave out df.plot = df.plot %&gt;% mutate(color = ifelse(row_number() == data_point, 2, color)) # fit df.fit = df.plot %&gt;% filter(color != 2) %&gt;% lm(formula = y ~ x, data = .) %&gt;% augment(newdata = df.plot %&gt;% filter(color == 2)) %&gt;% clean_names() p = ggplot(data = df.plot, mapping = aes(x = x, y = y, color = as.factor(color))) + geom_segment(aes(xend = x, yend = fitted), data = df.fit, color = &quot;red&quot;, size = 1) + geom_point(size = 2) + geom_smooth(method = &quot;lm&quot;, formula = &quot;y ~ x&quot;, se = F, color = &quot;black&quot;, fullrange = T, data = df.plot %&gt;% filter(color != 2)) + scale_color_manual(values = c(&quot;black&quot;, &quot;red&quot;)) + theme(legend.position = &quot;none&quot;, axis.title = element_blank(), axis.ticks = element_blank(), axis.text = element_blank()) return(p) } # save plots in list l.plots = map(.x = 1:9, .f = ~ fun.cv_plot(.)) # make figure panel wrap_plots(plotlist = l.plots, ncol = 3) As you can see, the regression line changes quite a bit depending on which data point is in the test set. Now, let’s use LOO to evaluate the models on the data set I’ve created above: # fit the models and calculate the RMSE for each model on the test set df.cross = df.data %&gt;% crossv_loo() %&gt;% # function which generates training and test data sets mutate(model_simple = map(.x = train, .f = ~ lm(y ~ 1 + x, data = .)), model_correct = map(.x = train, .f = ~ lm(y ~ 1 + x + I(x^2), data = .)), model_complex = map(.x = train, .f = ~ lm(y ~ 1 + x + I(x^2) + I(x^3), data = .))) %&gt;% pivot_longer(cols = contains(&quot;model&quot;), names_to = &quot;model&quot;, values_to = &quot;fit&quot;) %&gt;% mutate(rmse = map2_dbl(.x = fit, .y = test, .f = ~ rmse(.x, .y))) # show the average RMSE for each model df.cross %&gt;% group_by(model) %&gt;% summarize(mean_rmse = mean(rmse) %&gt;% round(3)) # A tibble: 3 × 2 model mean_rmse &lt;chr&gt; &lt;dbl&gt; 1 model_complex 0.382 2 model_correct 0.378 3 model_simple 0.401 As we can see, the model_correct has the lowest average RMSE on the test data. One downside with LOO is that it becomes unfeasible when the number of data points is very large, as the number of cross validation runs equals the number of data points. The next cross-validation procedures help in this case. 15.3.3.2 k-fold cross-validation For k-fold cross-validation, we split the data set in k folds, and then use k-1 folds as the training set, and the remaining fold as the test set. The code is almost identical as before. Instead of crossv_loo(), we use the crossv_kfold() function instead and say how many times we want to “fold” the data. # crossvalidation scheme df.cross = df.data %&gt;% crossv_kfold(k = 10) %&gt;% mutate(model_simple = map(.x = train, .f = ~ lm(y ~ 1 + x, data = .)), model_correct = map(.x = train, .f = ~ lm(y ~ 1 + x + I(x^2), data = .)), model_complex = map(.x = train, .f = ~ lm(y ~ 1 + x + I(x^2) + I(x^3), data = .))) %&gt;% pivot_longer(cols = contains(&quot;model&quot;), names_to = &quot;model&quot;, values_to = &quot;fit&quot;) %&gt;% mutate(rsquare = map2_dbl(.x = fit, .y = test, .f = ~ rsquare(.x, .y))) df.cross %&gt;% group_by(model) %&gt;% summarize(median_rsquare = median(rsquare)) # A tibble: 3 × 2 model median_rsquare &lt;chr&gt; &lt;dbl&gt; 1 model_complex 0.884 2 model_correct 0.889 3 model_simple 0.880 Note, for this example, I’ve calculated \\(R^2\\) (the variance explained by each model) instead of RMSE – just to show you that you can do this, too. Often it’s useful to do both: show how well the model correlates, but also show the error. 15.3.3.3 Monte Carlo cross-validation Finally, let’s consider another very flexible version of cross-validation. For this version of cross-validation, we determine how many random splits into training set and test set we would like to do, and what proportion of the data should be in the test set. # crossvalidation scheme df.cross = df.data %&gt;% crossv_mc(n = 50, test = 0.5) %&gt;% # number of samples, and percentage of test mutate(model_simple = map(.x = train, .f = ~ lm(y ~ 1 + x, data = .x)), model_correct = map(.x = train, .f = ~ lm(y ~ 1 + x + I(x^2), data = .x)), model_complex = map(.x = train, .f = ~ lm(y ~ 1 + x + I(x^2) + I(x^3), data = .))) %&gt;% pivot_longer(cols = contains(&quot;model&quot;), names_to = &quot;model&quot;, values_to = &quot;fit&quot;) %&gt;% mutate(rmse = map2_dbl(.x = fit, .y = test, .f = ~ rmse(.x, .y))) df.cross %&gt;% group_by(model) %&gt;% summarize(mean_rmse = mean(rmse)) # A tibble: 3 × 2 model mean_rmse &lt;chr&gt; &lt;dbl&gt; 1 model_complex 0.492 2 model_correct 0.484 3 model_simple 0.515 In this example, I’ve asked for \\(n = 50\\) splits and for each split, half of the data was in the training set, and half of the data in the test set. 15.3.4 Bootstrap We can also use the modelr package for bootstrapping. The idea is the same as when we did cross-validation. We create a number of data sets from our original data set. Instead of splitting the data set in a training and test data set, for bootstrapping, we sample values from the original data set with replacement. Doing so, we can, for example, calculate the confidence interval of different statistics of interest. Here is an example for how to boostrap confidence intervals for a mean. # make example reproducible set.seed(1) sample_size = 10 # sample df.data = tibble(participant = 1:sample_size, x = runif(sample_size, min = 0, max = 1)) # mean of the actual sample mean(df.data$x) [1] 0.5515139 # bootstrap to get confidence intervals around the mean df.data %&gt;% bootstrap(n = 1000) %&gt;% # create 1000 bootstrapped samples mutate(estimate = map_dbl(.x = strap, .f = ~ .x %&gt;% as_tibble() %&gt;% pull(x) %&gt;% mean())) %&gt;% summarize(mean = mean(estimate), low = quantile(estimate, 0.025), # calculate the 2.5 / 97.5 percentiles high = quantile(estimate, 0.975)) # A tibble: 1 × 3 mean low high &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; 1 0.556 0.378 0.732 15.3.5 AIC and BIC The Akaike Information Criterion (AIC) and the Bayesian Information Criterion (BIC) are defined as follows: \\[ \\text{AIC} = 2k-2\\ln(\\hat L) \\] \\[ \\text{BIC} = \\ln(n)k-2\\ln(\\hat L) \\] where \\(k\\) is the number of parameters in the model, \\(n\\) is the number of observations, and \\(\\hat L\\) is the maximized value of the likelihood function of the model. Both AIC and BIC trade off model fit (as measured by the maximum likelihood of the data \\(\\hat L\\)) and the number of parameters in the model. Calculating AIC and BIC in R is straightforward. We simply need to fit a linear model, and then call the AIC() or BIC() functions on the fitted model like so: set.seed(0) # let&#39;s generate some data df.example = tibble(x = runif(20, min = 0, max = 1), y = 1 + 3 * x + rnorm(20, sd = 2)) # fit a linear model fit = lm(formula = y ~ 1 + x, data = df.example) # get AIC AIC(fit) [1] 75.47296 # get BIC BIC(fit) [1] 78.46016 We can also just use the broom package to get that information: fit %&gt;% glance() # A tibble: 1 × 12 r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; 1 0.255 0.214 1.45 6.16 0.0232 1 -34.7 75.5 78.5 # ℹ 3 more variables: deviance &lt;dbl&gt;, df.residual &lt;int&gt;, nobs &lt;int&gt; Both AIC and BIC take the number of parameters and the model’s likelihood into account. BIC additionally considers the number of observations. But how is the likelihood of a linear model determined? Let’s visualize the data first: # plot the data with a linear model fit ggplot(data = df.example, mapping = aes(x = x, y = y)) + geom_point(size = 2) + geom_smooth(method = &quot;lm&quot;, color = &quot;black&quot;) Now, let’s take a look at the residuals by plotting the fitted values on the x axis, and the residuals on the y axis. # residual plot df.plot = df.example %&gt;% lm(formula = y ~ x, data = .) %&gt;% augment() %&gt;% clean_names() ggplot(data = df.plot, mapping = aes(x = fitted, y = resid)) + geom_point(size = 2) Remember that the linear model makes the assumption that the residuals are normally distributed with mean 0 (which is always the case if we fit a linear model) and some fitted standard deviation. In fact, the standard deviation of the normal distribution is fitted such that the overall likelihood of the data is maximized. Let’s make a plot that shows a normal distribution alongside the residuals: # define a normal distribution df.normal = tibble(y = seq(-5, 5, 0.1), x = dnorm(y, sd = 2) + 3.75) # show the residual plot together with the normal distribution ggplot(data = df.plot , mapping = aes(x = fitted, y = resid)) + geom_point() + geom_path(data = df.normal, aes(x = x, y = y), size = 2) To determine the likelihood of the data given the model \\(\\hat L\\), we now calculate the likelihood of each point (with the dnorm() function), and then multiply the likelihood of each data point to get the overall likelihood. We can simply multiply the data points since we also assume that the data points are independent. Instead of multiplying likelihoods, we often sum the log likelihoods instead. This is because if we multiply many small values, the overall value gets to close to 0 so that computers get confused. By taking logs instead, we avoid these nasty precision errors. To better understand AIC and BIC, let’s calculate them by hand: # we first get the estimate of the standard deviation of the residuals sigma = fit %&gt;% glance() %&gt;% pull(sigma) # then we calculate the log likelihood of the model log_likelihood = fit %&gt;% augment() %&gt;% mutate(likelihood = dnorm(.resid, sd = sigma)) %&gt;% summarize(logLik = sum(log(likelihood))) %&gt;% as.numeric() # then we calculate AIC and BIC using the formulas introduced above aic = 2*3 - 2 * log_likelihood bic = log(nrow(df.example)) * 3 - 2 * log_likelihood print(aic) [1] 75.58017 print(bic) [1] 78.56737 Cool! The values are the same as when we use the glance() function like so (except for a small difference due to rounding): fit %&gt;% glance() %&gt;% select(AIC, BIC) # A tibble: 1 × 2 AIC BIC &lt;dbl&gt; &lt;dbl&gt; 1 75.5 78.5 15.3.5.1 log() is your friend ggplot(data = tibble(x = c(0, 1)), mapping = aes(x = x)) + stat_function(fun = &quot;log&quot;, size = 1) + labs(x = &quot;probability&quot;, y = &quot;log(probability)&quot;) + theme(axis.text = element_text(size = 24), axis.title = element_text(size = 26)) Warning: Computation failed in `stat_function()` Caused by error in `fun()`: ! could not find function &quot;fun&quot; 15.4 Additional resources 15.4.1 Datacamp course Foundations of Functional Programming with purrr Intermediate functional programming with purrr 15.4.2 Reading R for Data Science: Chapter 25 15.5 Session info Information about this R session including which version of R was used, and what packages were loaded. sessionInfo() R version 4.3.2 (2023-10-31) Platform: aarch64-apple-darwin20 (64-bit) Running under: macOS Sonoma 14.1.2 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0 locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 time zone: America/Los_Angeles tzcode source: internal attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 [5] purrr_1.0.2 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 [9] ggplot2_3.4.4 tidyverse_2.0.0 modelr_0.1.11 patchwork_1.2.0 [13] broom_1.0.5 janitor_2.2.0 kableExtra_1.3.4 knitr_1.45 loaded via a namespace (and not attached): [1] gtable_0.3.4 xfun_0.41 bslib_0.6.1 lattice_0.22-5 [5] tzdb_0.4.0 vctrs_0.6.5 tools_4.3.2 generics_0.1.3 [9] fansi_1.0.6 highr_0.10 pkgconfig_2.0.3 Matrix_1.6-4 [13] webshot_0.5.5 lifecycle_1.0.4 compiler_4.3.2 farver_2.1.1 [17] munsell_0.5.0 snakecase_0.11.1 htmltools_0.5.7 sass_0.4.8 [21] yaml_2.3.8 pillar_1.9.0 jquerylib_0.1.4 cachem_1.0.8 [25] nlme_3.1-164 tidyselect_1.2.0 rvest_1.0.3 digest_0.6.33 [29] stringi_1.8.3 bookdown_0.37 labeling_0.4.3 splines_4.3.2 [33] fastmap_1.1.1 grid_4.3.2 colorspace_2.1-0 cli_3.6.2 [37] magrittr_2.0.3 utf8_1.2.4 withr_2.5.2 scales_1.3.0 [41] backports_1.4.1 timechange_0.2.0 rmarkdown_2.25 httr_1.4.7 [45] hms_1.1.3 evaluate_0.23 viridisLite_0.4.2 mgcv_1.9-1 [49] rlang_1.1.2 glue_1.6.2 xml2_1.3.6 svglite_2.1.3 [53] rstudioapi_0.15.0 jsonlite_1.8.8 R6_2.5.1 systemfonts_1.0.5 "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]]
diff --git a/psych252_files/figure-html/unnamed-chunk-15-1.png b/psych252_files/figure-html/unnamed-chunk-15-1.png
index 8cdbaac..552623b 100644
Binary files a/psych252_files/figure-html/unnamed-chunk-15-1.png and b/psych252_files/figure-html/unnamed-chunk-15-1.png differ
diff --git a/psych252_files/figure-html/unnamed-chunk-16-1.png b/psych252_files/figure-html/unnamed-chunk-16-1.png
index 148a954..e9bf2c5 100644
Binary files a/psych252_files/figure-html/unnamed-chunk-16-1.png and b/psych252_files/figure-html/unnamed-chunk-16-1.png differ
diff --git a/psych252_files/figure-html/unnamed-chunk-17-1.png b/psych252_files/figure-html/unnamed-chunk-17-1.png
index 2feae4d..8af53e1 100644
Binary files a/psych252_files/figure-html/unnamed-chunk-17-1.png and b/psych252_files/figure-html/unnamed-chunk-17-1.png differ
diff --git a/psych252_files/figure-html/unnamed-chunk-20-1.png b/psych252_files/figure-html/unnamed-chunk-20-1.png
index 64949a3..5366439 100644
Binary files a/psych252_files/figure-html/unnamed-chunk-20-1.png and b/psych252_files/figure-html/unnamed-chunk-20-1.png differ
diff --git a/psych252_files/figure-html/unnamed-chunk-3-1.png b/psych252_files/figure-html/unnamed-chunk-3-1.png
index c434ea6..20d2ef5 100644
Binary files a/psych252_files/figure-html/unnamed-chunk-3-1.png and b/psych252_files/figure-html/unnamed-chunk-3-1.png differ
diff --git a/psych252_files/figure-html/unnamed-chunk-4-1.png b/psych252_files/figure-html/unnamed-chunk-4-1.png
index 12526bd..fb1cb3c 100644
Binary files a/psych252_files/figure-html/unnamed-chunk-4-1.png and b/psych252_files/figure-html/unnamed-chunk-4-1.png differ
diff --git a/psych252_files/figure-html/unnamed-chunk-6-1.png b/psych252_files/figure-html/unnamed-chunk-6-1.png
index 1009334..4354425 100644
Binary files a/psych252_files/figure-html/unnamed-chunk-6-1.png and b/psych252_files/figure-html/unnamed-chunk-6-1.png differ
diff --git a/psych252_files/figure-html/unnamed-chunk-8-1.png b/psych252_files/figure-html/unnamed-chunk-8-1.png
index d7252eb..44302c0 100644
Binary files a/psych252_files/figure-html/unnamed-chunk-8-1.png and b/psych252_files/figure-html/unnamed-chunk-8-1.png differ