diff --git a/11-mlm-viz.qmd b/11-mlm-viz.qmd
index 44eaa459..486e84a4 100644
--- a/11-mlm-viz.qmd
+++ b/11-mlm-viz.qmd
@@ -117,7 +117,7 @@ I refer to this as _effect size scaling_, because it is similar to an effect siz
 univariate models, e.g., $ES = (\bar{y}_1 - \bar{y}_2) / s_e$ in a two-group, univariate design.
 
 This is illustrated in ...
-
+<!--
 ```{r}
 op <- par(mar = c(4, 4, 1, 1) + .5,
           mfrow = c(1, 2))
@@ -133,12 +133,15 @@ covEllipses(cbind(Sepal.Length, Sepal.Width) ~ Species, data=iris,
       label.pos = c(3, 1, 3, 0),
       xlim = c(4,8), ylim = c(2,4))
 
+iris.mod <- lm(cbind(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) ~
+                 Species, data=iris)
 heplot(iris.mod, size = "effect",
        cex = 1.5, cex.lab = 1.5,
        fill=TRUE, fill.alpha=c(0.3,0.1),
        xlim = c(4,8), ylim = c(2,4))
 par(op)
 ```
+-->
 
 
 The geometry of ellipsoids and multivariate tests allow us to go further with another re-scaling of the $\mat{H}$ ellipsoid
@@ -147,7 +150,7 @@ This is done simply by dividing $\mat{H} / df_e$ further
 by the $\alpha$-critical value of the corresponding test statistic to show the strength of evidence against
 the null hypothesis.
 Among the various multivariate test statistics,
-Roy's maximum root test, based on the largest eigenvalue $\lambda_1$ of $\mat{H} \mat{E}^{-1},
+Roy's maximum root test, based on the largest eigenvalue $\lambda_1$ of $\mat{H} \mat{E}^{-1}$,
 gives $\mat{H} / (\lambda_\alpha df_e)$
 which has the visual property that the
 scaled $\mat{H}$ ellipsoid will protrude _somewhere_ outside the standard $\mat{E}$ ellipsoid if and only if
diff --git a/R/digits-tSNE.R b/R/digits-tSNE.R
new file mode 100644
index 00000000..e8b8e8ee
--- /dev/null
+++ b/R/digits-tSNE.R
@@ -0,0 +1,162 @@
+# R tSNE with 3D plots
+# https://www.appsilon.com/post/r-tsne
+
+library(dplyr)
+library(Rtsne)
+library(ggplot2)
+library(plotly)
+
+# https://github.com/pjreddie/mnist-csv-png?tab=readme-ov-file
+digits <- read.csv("mnist_train.csv", header = FALSE)
+colnames(digits) <- c("digit", paste0("pixel", 1:784))
+
+# print first row as a matrix
+first_digit <- matrix(digits[1, ] |> 
+	select(-digit) |> unlist(), nrow = 28, ncol = 28, byrow = TRUE)
+first_digit
+
+# visualize the digit
+rotate <- function(x) t(apply(x, 2, rev))
+image(rotate(first_digit), col = (gray(255:0 / 255)))
+
+# Or, if you want to get a glimpse into a larger portion of the dataset, run the following snippet:
+par(mfrow = c(5, 5))
+
+for (i in sample(1:nrow(digits), 25)) {
+  digit_matrix <- matrix(digits[i, ] |> 
+  	select(-digit) |> 
+  	unlist(), nrow = 28, ncol = 28, byrow = TRUE)
+  image(rotate(digit_matrix), col = gray(255:0 / 255), axes = FALSE, xlab = "", ylab = "")
+}
+par(mfrow = c(1, 1))
+
+# That�s too much to include in a single chart, so we�ll reduce the per-class sample size to 100:
+data_sample <- digits |>
+  group_by(digit) |>
+  sample_n(100) |>
+  ungroup()
+
+data_sample |>
+  group_by(digit) |>
+  count()
+
+#  let�s also make a feature/target split:
+X <- data_sample |> select(-digit)
+y <- data_sample |> select(digit)
+
+# Run tSNE
+
+tsne_results <- Rtsne(X, dims = 2, perplexity = 25, verbose = TRUE, max_iter = 1500)
+
+tsne_df <- data.frame(
+  X = tsne_results$Y[, 1],
+  Y = tsne_results$Y[, 2],
+  digit = y
+)
+colors <- c("#E6194B", "#3CB44B", "#FFE119", "#4363D8", "#F58231", "#911EB4", "#46F0F0", "#F032E6", "#BCF60C", "#FABEBE")
+
+ggplot(tsne_df, aes(x = X, y = Y, color = factor(digit))) +
+  geom_point(size = 1.5) +
+  scale_color_manual(values = colors) +
+  labs(
+    title = "t-SNE 2-Dimensional Digit Visualization",
+    x = "t-SNE Dimension 1",
+    y = "t-SNE Dimension 2"
+  ) +
+  theme_minimal() +
+  theme(
+    plot.title = element_text(size = 20)
+  )
+
+# Plotting t-SNE Results in 3 Dimensions
+
+tsne_results <- Rtsne(features, dims = 3, perplexity = 30, verbose = TRUE, max_iter = 1500)
+
+tsne_df <- data.frame(
+  X = tsne_results$Y[, 1],
+  Y = tsne_results$Y[, 2],
+  Z = tsne_results$Y[, 3],
+  digit = factor(labels)
+)
+head(tsne_df)
+
+colors <- c("#E6194B", "#3CB44B", "#FFE119", "#4363D8", "#F58231", "#911EB4", "#46F0F0", "#F032E6", "#BCF60C", "#FABEBE")
+hover_text <- paste(
+  "Digit:", tsne_df$digit, "",
+  "Dimension 1:", round(tsne_df$X, 3),
+  "Dimension 2:", round(tsne_df$Y, 3),
+  "Dimension 3:", round(tsne_df$Z, 3)
+)
+
+plot_ly(
+  data = tsne_df,
+  x = ~X,
+  y = ~Y,
+  z = ~Z,
+  type = "scatter3d",
+  mode = "markers",
+  marker = list(size = 6),
+  text = hover_text,
+  hoverinfo = "text",
+  color = ~digit,
+  colors = colors
+) |>
+  layout(
+    title = "t-SNE 3-Dimensional Digit Visualization",
+    scene = list(
+      xaxis = list(title = "t-SNE Dimension 1"),
+      yaxis = list(title = "t-SNE Dimension 2"),
+      zaxis = list(title = "t-SNE Dimension 3")
+    )
+  )
+
+# Perplexity Tuning
+
+# Unlike PCA, the results of t-SNE will often vary (sometimes significantly) because of the tweakable parameters and the nature of gradient descent.
+
+# This section demonstrates how to tweak the most important parameter - perplexity - and shows you just how different the results are. 
+# The values for this parameter typically range from 5 to 50, so we�ll go over this entire range with the step size of 5.
+
+library(gganimate)
+
+perplexity_values <- c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50)
+
+tsne_results_list <- lapply(perplexity_values, function(perp) {
+  tsne <- Rtsne(X, dims = 2, perplexity = perp, verbose = TRUE, max_iter = 1500)
+  data.frame(
+    X = tsne$Y[, 1],
+    Y = tsne$Y[, 2],
+    digit = y,
+    perplexity = perp
+  )
+})
+
+tsne_df <- do.call(rbind, tsne_results_list)
+
+plot <- ggplot(tsne_df, aes(x = X, y = Y, color = factor(digit))) +
+  geom_point(size = 1.5) +
+  scale_color_manual(values = colors) +
+  labs(
+    title = "t-SNE 2-Dimensional Digit Visualization",
+    subtitle = "Perplexity: {closest_state}", # This will display the perplexity value
+    x = "t-SNE Dimension 1",
+    y = "t-SNE Dimension 2"
+  ) +
+  theme_minimal() +
+  theme(
+    plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 16)
+  ) +
+  transition_states(perplexity, transition_length = 2, state_length = 1) +
+  ease_aes("linear")
+
+animate(
+  plot,
+  width = 800,
+  height = 600,
+  res = 100,
+  nframes = 300,
+  fps = 30,
+  renderer = gifski_renderer(file = "tsne-2d-animated.gif")
+)
+
diff --git a/bib/pkgs.bib b/bib/pkgs.bib
index 4ad18e43..b1a988e1 100644
--- a/bib/pkgs.bib
+++ b/bib/pkgs.bib
@@ -34,8 +34,7 @@ @Manual{R-broom
 }
 
 @Manual{R-candisc,
-  title = {candisc: Visualizing Generalized Canonical Discriminant and Canonical
-Correlation Analysis},
+  title = {candisc: Visualizing Generalized Canonical Discriminant and Canonical Correlation Analysis},
   author = {Michael Friendly and John Fox},
   year = {2024},
   note = {R package version 0.9.0},
@@ -135,7 +134,7 @@ @Manual{R-effectsize
   title = {effectsize: Indices of Effect Size},
   author = {Mattan S. Ben-Shachar and Dominique Makowski and Daniel Lüdecke and Indrajeet Patil and Brenton M. Wiernik and Rémi Thériault and Philip Waggoner},
   year = {2024},
-  note = {R package version 0.8.9},
+  note = {R package version 1.0.0},
   url = {https://easystats.github.io/effectsize/},
 }
 
@@ -207,7 +206,7 @@ @Manual{R-ggbiplot
   title = {ggbiplot: A Grammar of Graphics Implementation of Biplots},
   author = {Vincent Q. Vu and Michael Friendly},
   year = {2024},
-  note = {R package version 0.6.3},
+  note = {R package version 0.6.2},
   url = {https://github.com/friendly/ggbiplot},
 }
 
@@ -333,19 +332,11 @@ @Manual{R-lattice
   url = {https://lattice.r-forge.r-project.org/},
 }
 
-@Manual{R-liminal,
-  title = {liminal: Multivariate Data Visualization with Tours and Embeddings},
-  author = {Stuart Lee},
-  year = {2021},
-  note = {R package version 0.1.2},
-  url = {https://github.com/sa-lee/liminal/},
-}
-
 @Manual{R-lubridate,
   title = {lubridate: Make Dealing with Dates a Little Easier},
   author = {Vitalie Spinu and Garrett Grolemund and Hadley Wickham},
-  year = {2023},
-  note = {R package version 1.9.3},
+  year = {2024},
+  note = {R package version 1.9.4},
   url = {https://lubridate.tidyverse.org},
 }
 
diff --git a/bib/pkgs.txt b/bib/pkgs.txt
index 935da98f..cc14beb4 100644
--- a/bib/pkgs.txt
+++ b/bib/pkgs.txt
@@ -116,3 +116,30 @@ knitr
 matlib
 patchwork
 tidyr
+broom
+car
+carData
+dplyr
+ggplot2
+heplots
+knitr
+tidyr
+broom
+candisc
+car
+carData
+dplyr
+ggplot2
+heplots
+knitr
+tidyr
+broom
+candisc
+car
+carData
+corrgram
+dplyr
+ggplot2
+heplots
+knitr
+tidyr
diff --git a/docs/01-intro.html b/docs/01-intro.html
index 52f81896..91279784 100644
--- a/docs/01-intro.html
+++ b/docs/01-intro.html
@@ -378,7 +378,7 @@ <h1 class="title"><span id="sec-introduction" class="quarto-section-identifier">
 </section><section id="visualization-is-harder" class="level2" data-number="1.4"><h2 data-number="1.4" class="anchored" data-anchor-id="visualization-is-harder">
 <span class="header-section-number">1.4</span> Visualization is harder</h2>
 <p>However, with two or more response variables, visualizations for multivariate models are not as simple as they are for their univariate counterparts for understanding the effects of predictors, model parameters, or model diagnostics. Consequently, the results of such studies are often explored and discussed solely in terms of coefficients and significance, and visualizations of the relationships are only provided for one response variable at a time, if at all. This tradition can mask important nuances, and lead researchers to draw erroneous conclusions.</p>
-<p>The aim of this book is to describe and illustrate some central methods that we have developed over the last ten years that aid in the understanding and communication of the results of multivariate linear models <span class="citation" data-cites="Friendly-07-manova FriendlyMeyer:2016:DDAR">(<a href="#ref-Friendly-07-manova" role="doc-biblioref">Friendly, 2007</a>;<!-- @Friendly-etal:ellipses:2013;  --> <a href="#ref-FriendlyMeyer:2016:DDAR" role="doc-biblioref">Friendly &amp; Meyer, 2016</a>)</span>. These methods rely on <em>data ellipsoids</em> as simple, minimally sufficient visualizations of variance that can be shown in 2D and 3D plots. As will be demonstrated, the <em>Hypothesis-Error (HE) plot</em> framework applies this idea to the results of multivariate tests of linear hypotheses. </p>
+<p>The aim of this book is to describe and illustrate some central methods that we have developed over the last ten years that aid in the understanding and communication of the results of multivariate linear models <span class="citation" data-cites="Friendly-07-manova FriendlyMeyer:2016:DDAR">(<a href="95-references.html#ref-Friendly-07-manova" role="doc-biblioref">Friendly, 2007</a>;<!-- @Friendly-etal:ellipses:2013;  --> <a href="95-references.html#ref-FriendlyMeyer:2016:DDAR" role="doc-biblioref">Friendly &amp; Meyer, 2016</a>)</span>. These methods rely on <em>data ellipsoids</em> as simple, minimally sufficient visualizations of variance that can be shown in 2D and 3D plots. As will be demonstrated, the <em>Hypothesis-Error (HE) plot</em> framework applies this idea to the results of multivariate tests of linear hypotheses. </p>
 <p>Further, in the case where there are more than just a few outcome variables, the important nectar of their relationships to predictors can often be distilled in a multivariate juicer— a <strong>projection</strong> of the multivariate relationships to the predictors in the low-D space that captures most of the flavor. This idea can be applied using <em>canonical correlation plots</em> and with <em>canonical discriminant HE plots</em>. </p>
 <div class="quarto-figure quarto-figure-center">
 <figure class="figure"><p><img src="images/Cover-GBE.png" class="img-fluid figure-img"></p>
@@ -401,7 +401,7 @@ <h1 class="title"><span id="sec-introduction" class="quarto-section-identifier">
 <!-- ## References {.unnumbered} -->
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-Friendly-07-manova" class="csl-entry" role="listitem">
 Friendly, M. (2007). <span>HE</span> plots for multivariate general linear models. <em>Journal of Computational and Graphical Statistics</em>, <em>16</em>(2), 421–444. <a href="https://doi.org/10.1198/106186007X208407">https://doi.org/10.1198/106186007X208407</a>
 </div>
diff --git a/docs/02-getting_started.html b/docs/02-getting_started.html
index f2302145..755592af 100644
--- a/docs/02-getting_started.html
+++ b/docs/02-getting_started.html
@@ -369,11 +369,11 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 <main class="content quarto-banner-title-block" id="quarto-document-content"><section id="sec-why_plot" class="level2" data-number="2.1"><h2 data-number="2.1" class="anchored" data-anchor-id="sec-why_plot">
 <span class="header-section-number">2.1</span> Why plot your data?</h2>
 <blockquote class="blockquote">
-<p>Getting information from a table is like extracting sunlight from a cucumber. <span class="citation" data-cites="FarquharFarquhar:91">Farquhar &amp; Farquhar (<a href="#ref-FarquharFarquhar:91" role="doc-biblioref">1891</a>)</span></p>
+<p>Getting information from a table is like extracting sunlight from a cucumber. <span class="citation" data-cites="FarquharFarquhar:91">Farquhar &amp; Farquhar (<a href="95-references.html#ref-FarquharFarquhar:91" role="doc-biblioref">1891</a>)</span></p>
 </blockquote>
 <p>At the time the Farhquhar brothers wrote this pithy aphorism, graphical methods for understanding data had advanced considerably, but were not universally practiced, prompting their complaint.</p>
-<p>The main graphic forms we use today—the pie chart, line graphs and bar—were invented by William Playfair around 1800 <span class="citation" data-cites="Playfair:1786 Playfair:1801">(<a href="#ref-Playfair:1786" role="doc-biblioref">Playfair, 1786</a>, <a href="#ref-Playfair:1801" role="doc-biblioref">1801</a>)</span>. The scatterplot arrived shortly after <span class="citation" data-cites="Herschel:1833">(<a href="#ref-Herschel:1833" role="doc-biblioref">Herschel, 1833</a>)</span> and thematic maps showing the spatial distributions of social variables (crime, suicides, literacy) were used for the first time to reason about important societal questions <span class="citation" data-cites="Guerry:1833">(<a href="#ref-Guerry:1833" role="doc-biblioref">Guerry, 1833</a>)</span> such as “is increased education associated with lower rates of crime?” </p>
-<p>In the last half of the 18th Century, the idea of correlation was developed <span class="citation" data-cites="Galton:1886 Pearson:1896">(<a href="#ref-Galton:1886" role="doc-biblioref">Galton, 1886</a>; <a href="#ref-Pearson:1896" role="doc-biblioref">Pearson, 1896</a>)</span> and the period, roughly 1860–1890, dubbed the “Golden Age of Graphics <span class="citation" data-cites="Funkhouser:1937">(<a href="#ref-Funkhouser:1937" role="doc-biblioref">Funkhouser, 1937</a>)</span> became the richest period of innovation and beauty in the entire history of data visualization. During this time there was an incredible development of visual thinking, represented by the work of Charles Joseph Minard, advances in the role of visualization within scientific discovery, as illustrated through Francis Galton, and graphical excellence, embodied in state statistical atlases produced in France and elsewhere. See <span class="citation" data-cites="Friendly:2008:golden">Friendly (<a href="#ref-Friendly:2008:golden" role="doc-biblioref">2008</a>)</span>; <span class="citation" data-cites="FriendlyWainer:2021:TOGS">Friendly &amp; Wainer (<a href="#ref-FriendlyWainer:2021:TOGS" role="doc-biblioref">2021</a>)</span> for this history.</p>
+<p>The main graphic forms we use today—the pie chart, line graphs and bar—were invented by William Playfair around 1800 <span class="citation" data-cites="Playfair:1786 Playfair:1801">(<a href="95-references.html#ref-Playfair:1786" role="doc-biblioref">Playfair, 1786</a>, <a href="95-references.html#ref-Playfair:1801" role="doc-biblioref">1801</a>)</span>. The scatterplot arrived shortly after <span class="citation" data-cites="Herschel:1833">(<a href="95-references.html#ref-Herschel:1833" role="doc-biblioref">Herschel, 1833</a>)</span> and thematic maps showing the spatial distributions of social variables (crime, suicides, literacy) were used for the first time to reason about important societal questions <span class="citation" data-cites="Guerry:1833">(<a href="95-references.html#ref-Guerry:1833" role="doc-biblioref">Guerry, 1833</a>)</span> such as “is increased education associated with lower rates of crime?” </p>
+<p>In the last half of the 18th Century, the idea of correlation was developed <span class="citation" data-cites="Galton:1886 Pearson:1896">(<a href="95-references.html#ref-Galton:1886" role="doc-biblioref">Galton, 1886</a>; <a href="95-references.html#ref-Pearson:1896" role="doc-biblioref">Pearson, 1896</a>)</span> and the period, roughly 1860–1890, dubbed the “Golden Age of Graphics <span class="citation" data-cites="Funkhouser:1937">(<a href="95-references.html#ref-Funkhouser:1937" role="doc-biblioref">Funkhouser, 1937</a>)</span> became the richest period of innovation and beauty in the entire history of data visualization. During this time there was an incredible development of visual thinking, represented by the work of Charles Joseph Minard, advances in the role of visualization within scientific discovery, as illustrated through Francis Galton, and graphical excellence, embodied in state statistical atlases produced in France and elsewhere. See <span class="citation" data-cites="Friendly:2008:golden">Friendly (<a href="95-references.html#ref-Friendly:2008:golden" role="doc-biblioref">2008</a>)</span>; <span class="citation" data-cites="FriendlyWainer:2021:TOGS">Friendly &amp; Wainer (<a href="95-references.html#ref-FriendlyWainer:2021:TOGS" role="doc-biblioref">2021</a>)</span> for this history.</p>
 <p>This chapter introduces the importance of graphing data through three nearly classic stories with the following themes:</p>
 <ul>
 <li><p><strong>summary statistics are not enough</strong>: Anscombe’s Quartet demonstrates datasets that are indistinguishable by numerical summary statistics (mean, standard deviation, correlation), but whose relationships are vastly different.</p></li>
@@ -382,7 +382,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 </ul>
 <!-- was: "child/02-anscombe.qmd" --><section id="sec-anscombe" class="level3" data-number="2.1.1"><h3 data-number="2.1.1" class="anchored" data-anchor-id="sec-anscombe">
 <span class="header-section-number">2.1.1</span> Anscombe’s Quartet</h3>
-<p>In 1973, Francis Anscombe <span class="citation" data-cites="Anscombe:73">(<a href="#ref-Anscombe:73" role="doc-biblioref">Anscombe, 1973</a>)</span> famously constructed a set of four datasets illustrate the importance of plotting the graphs before analyzing and model building, and the effect of unusual observations on fitted models. Now known as <em>Anscombe’s Quartet</em>, these datasets had identical statistical properties: the same means, standard deviations, correlations and regression lines.</p>
+<p>In 1973, Francis Anscombe <span class="citation" data-cites="Anscombe:73">(<a href="95-references.html#ref-Anscombe:73" role="doc-biblioref">Anscombe, 1973</a>)</span> famously constructed a set of four datasets illustrate the importance of plotting the graphs before analyzing and model building, and the effect of unusual observations on fitted models. Now known as <em>Anscombe’s Quartet</em>, these datasets had identical statistical properties: the same means, standard deviations, correlations and regression lines.</p>
 <p>His purpose was to debunk three notions that had been prevalent at the time:</p>
 <ul>
 <li>Numerical calculations are exact, but graphs are coarse and limited by perception and resolution;</li>
@@ -427,7 +427,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 <span><span class="co">#&gt; 4 4           9  7.50 0.817      3.00 0.500</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>As we can see, all four datasets have nearly identical univariate and bivariate statistical measures. You can only see how they differ in graphs, which show their true natures to be vastly different.</p>
-<p><a href="#fig-ch02-anscombe1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ch02-anscombe1</span></a> is an enhanced version of Anscombe’s plot of these data, adding helpful annotations to show visually the underlying statistical summaries.</p>
+<p><a href="#fig-ch02-anscombe1" class="quarto-xref">Figure&nbsp;<span>2.1</span></a> is an enhanced version of Anscombe’s plot of these data, adding helpful annotations to show visually the underlying statistical summaries.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-ch02-anscombe1" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -440,7 +440,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 </div>
 </div>
 </div>
-<p>This figure is produced as follows, using a single call to <code><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot()</a></code>, faceted by <code>dataset</code>. As we will see later (<a href="#sec-data-ellipse" class="quarto-xref"><span class="quarto-unresolved-ref">sec-data-ellipse</span></a>), the data ellipse (produced by <code><a href="https://ggplot2.tidyverse.org/reference/stat_ellipse.html">stat_ellipse()</a></code>) reflects the correlation between the variables.</p>
+<p>This figure is produced as follows, using a single call to <code><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot()</a></code>, faceted by <code>dataset</code>. As we will see later (<a href="03-multivariate_plots.html#sec-data-ellipse" class="quarto-xref"><span>Section 3.2</span></a>), the data ellipse (produced by <code><a href="https://ggplot2.tidyverse.org/reference/stat_ellipse.html">stat_ellipse()</a></code>) reflects the correlation between the variables.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb3" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">desc</span> <span class="op">&lt;-</span> <span class="fu">tibble</span><span class="op">(</span></span>
 <span>  dataset <span class="op">=</span> <span class="fl">1</span><span class="op">:</span><span class="fl">4</span>,</span>
@@ -479,7 +479,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 </div>
 <div class="callout-body-container callout-body">
 <p>The method Anscombe used to compose his quartet is unknown, but it turns out that that there is a method to construct a wider collection of datasets with identical statistical properties. After all, in a bivariate dataset with <span class="math inline">\(n\)</span> observations, the correlation has <span class="math inline">\((n-2)\)</span> degrees of freedom, so it is possible to choose <span class="math inline">\(n-2\)</span> of the <span class="math inline">\((x, y)\)</span> pairs to yield any given value. As it happens, it is also possible to create any number of datasets with the same means, standard deviations and correlations with nearly any shape you like — even a dinosaur!</p>
-<p>The <em>Datasaurus Dozen</em> was first publicized by Alberto Cairo in a <a href="http://www.thefunctionalart.com/2016/08/download-datasaurus-never-trust-summary.html">blog post</a> and are available in the <span style="color: brown;"><strong>datasauRus</strong></span> package <span class="citation" data-cites="R-datasauRus">(<a href="#ref-R-datasauRus" role="doc-biblioref">Davies et al., 2022</a>)</span>. As shown in <a href="#fig-datasaurus" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-datasaurus</span></a>, the sets include a star, cross, circle, bullseye, horizontal and vertical lines, and, of course the “dino”. The method <span class="citation" data-cites="MatejkaFitzmaurice2017">(<a href="#ref-MatejkaFitzmaurice2017" role="doc-biblioref">Matejka &amp; Fitzmaurice, 2017</a>)</span> uses <em>simulated annealing</em>, an iterative process that perturbs the points in a scatterplot, moving them towards a given shape while keeping the statistical summaries close to the fixed target value.</p>
+<p>The <em>Datasaurus Dozen</em> was first publicized by Alberto Cairo in a <a href="http://www.thefunctionalart.com/2016/08/download-datasaurus-never-trust-summary.html">blog post</a> and are available in the <span style="color: brown;"><strong>datasauRus</strong></span> package <span class="citation" data-cites="R-datasauRus">(<a href="95-references.html#ref-R-datasauRus" role="doc-biblioref">Davies et al., 2022</a>)</span>. As shown in <a href="#fig-datasaurus" class="quarto-xref">Figure&nbsp;<span>2.2</span></a>, the sets include a star, cross, circle, bullseye, horizontal and vertical lines, and, of course the “dino”. The method <span class="citation" data-cites="MatejkaFitzmaurice2017">(<a href="95-references.html#ref-MatejkaFitzmaurice2017" role="doc-biblioref">Matejka &amp; Fitzmaurice, 2017</a>)</span> uses <em>simulated annealing</em>, an iterative process that perturbs the points in a scatterplot, moving them towards a given shape while keeping the statistical summaries close to the fixed target value.</p>
 <p>The <span style="color: brown;"><strong>datasauRus</strong></span> package just contains the datasets, but a general method, called <em>statistical metamers</em>, for producing such datasets has been described by <a href="https://eliocamp.github.io/codigo-r/en/2019/01/statistical-metamerism/">Elio Campitelli</a> and implemented in the <span style="color: brown;"><strong>metamer</strong></span> package.</p>
 </div>
 </div>
@@ -505,13 +505,13 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 </div>
 </div>
 <div class="callout-body-container callout-body">
-<p>The essential idea of a statistical “quartet” is to illustrate four quite different datasets or circumstances that seem superficially the same, but yet are paradoxically very different when you look behind the scenes. For example, in the context of causal analysis <span class="citation" data-cites="Gelman-etal:2023">Gelman et al. (<a href="#ref-Gelman-etal:2023" role="doc-biblioref">2023</a>)</span>, illustrated sets of four graphs, within each of which all four represent the same average (latent) causal effect but with much different patterns of individual effects; <span class="citation" data-cites="McGowan2023">McGowan et al. (<a href="#ref-McGowan2023" role="doc-biblioref">2023</a>)</span> provide another illustration with four seemingly identical data sets each generated by a different causal mechanism. As an example of machine learning models, <span class="citation" data-cites="Biecek-etal:2023">Biecek et al. (<a href="#ref-Biecek-etal:2023" role="doc-biblioref">2023</a>)</span>, introduced the “Rashamon Quartet”, a synthetic dataset for which four models from different classes (linear model, regression tree, random forest, neural network) have practically identical predictive performance. In all cases, the paradox is solved when their visualization reveals the distinct ways of understanding structure in the data. The <a href="https://r-causal.github.io/quartets/"><strong>quartets</strong></a> package contains these and other variations on this theme.</p>
+<p>The essential idea of a statistical “quartet” is to illustrate four quite different datasets or circumstances that seem superficially the same, but yet are paradoxically very different when you look behind the scenes. For example, in the context of causal analysis <span class="citation" data-cites="Gelman-etal:2023">Gelman et al. (<a href="95-references.html#ref-Gelman-etal:2023" role="doc-biblioref">2023</a>)</span>, illustrated sets of four graphs, within each of which all four represent the same average (latent) causal effect but with much different patterns of individual effects; <span class="citation" data-cites="McGowan2023">McGowan et al. (<a href="95-references.html#ref-McGowan2023" role="doc-biblioref">2023</a>)</span> provide another illustration with four seemingly identical data sets each generated by a different causal mechanism. As an example of machine learning models, <span class="citation" data-cites="Biecek-etal:2023">Biecek et al. (<a href="95-references.html#ref-Biecek-etal:2023" role="doc-biblioref">2023</a>)</span>, introduced the “Rashamon Quartet”, a synthetic dataset for which four models from different classes (linear model, regression tree, random forest, neural network) have practically identical predictive performance. In all cases, the paradox is solved when their visualization reveals the distinct ways of understanding structure in the data. The <a href="https://r-causal.github.io/quartets/"><strong>quartets</strong></a> package contains these and other variations on this theme.</p>
 </div>
 </div>
 <!-- was: "child/02-davis.qmd" -->
 </section><section id="sec-davis" class="level3" data-number="2.1.2"><h3 data-number="2.1.2" class="anchored" data-anchor-id="sec-davis">
 <span class="header-section-number">2.1.2</span> One lousy point can ruin your day</h3>
-<p>In the mid 1980s, a consulting client had a strange problem.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> She was conducting a study of the relation between body image and weight preoccupation in exercising and non-exercising people <span class="citation" data-cites="Davis:1990">(<a href="#ref-Davis:1990" role="doc-biblioref">Davis, 1990</a>)</span>. As part of the design, the researcher wanted to know if self-reported weight could be taken as a reliable indicator of true weight measured on a scale. It was expected that the correlations between reported and measured weight should be close to 1.0, and the slope of the regression lines for men and women should also be close to 1.0. The dataset is <code>car::Davis</code>.</p>
+<p>In the mid 1980s, a consulting client had a strange problem.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> She was conducting a study of the relation between body image and weight preoccupation in exercising and non-exercising people <span class="citation" data-cites="Davis:1990">(<a href="95-references.html#ref-Davis:1990" role="doc-biblioref">Davis, 1990</a>)</span>. As part of the design, the researcher wanted to know if self-reported weight could be taken as a reliable indicator of true weight measured on a scale. It was expected that the correlations between reported and measured weight should be close to 1.0, and the slope of the regression lines for men and women should also be close to 1.0. The dataset is <code>car::Davis</code>.</p>
 <p>She was therefore very surprise to see the following numerical results: For men, the correlation was nearly perfect, but not so for women. <!-- figure-code: R/Davis-reg.R --></p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb4" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">Davis</span>, package<span class="op">=</span><span class="st">"carData"</span><span class="op">)</span></span>
@@ -592,7 +592,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 </div>
 </div>
 </div>
-<p>In <a href="#fig-ch02-davis-reg2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ch02-davis-reg2</span></a>, this discrepant observation again stands out like a sore thumb, but it makes very little difference in the fitted line for females. The reason is that this point is well within the range of the <span class="math inline">\(x\)</span> variable (<code>repwt</code>). To impact the slope of the regression line, an observation must be unusual in_both_ <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>. We take up the topic of how to detect influential observations and what to do about them in <a href="#sec-linear-models-plots" class="quarto-xref"><span class="quarto-unresolved-ref">sec-linear-models-plots</span></a>.</p>
+<p>In <a href="#fig-ch02-davis-reg2" class="quarto-xref">Figure&nbsp;<span>2.4</span></a>, this discrepant observation again stands out like a sore thumb, but it makes very little difference in the fitted line for females. The reason is that this point is well within the range of the <span class="math inline">\(x\)</span> variable (<code>repwt</code>). To impact the slope of the regression line, an observation must be unusual in_both_ <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>. We take up the topic of how to detect influential observations and what to do about them in <a href="06-linear_models-plots.html" class="quarto-xref"><span>Chapter 6</span></a>.</p>
 <p>The value of such plots is not only that they can reveal possible problems with an analysis, but also help identify their reasons and suggest corrective action. What went wrong here? Examination of the original data showed that this person switched the values, recording her reported weight in the box for measured weight and vice versa.</p>
 </section><section id="sec-draft1970" class="level3" data-number="2.1.3"><h3 data-number="2.1.3" class="anchored" data-anchor-id="sec-draft1970">
 <span class="header-section-number">2.1.3</span> Shaken, not stirred: The 1970 Draft Lottery</h3>
@@ -609,7 +609,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 </div>
 </div>
 </div>
-<p>In an attempt to make the selection process also transparent, the proceeding was covered on radio, TV and film and the dates posted in order on a large display board. The first capsule—drawn by Congressman Alexander Pirnie (R-NY) of the House Armed Services Committee—contained the date September 14, so all men born on September 14 in any year between 1944 and 1950 were assigned lottery number 1, and would be drafted first. April 24 was drawn next, then December 30, February 14, and so on until June 8, selected last. At the time of the drawing, US officials stated that those with birthdays drawn in the first third would almost certainly be drafted, while those in the last third would probably avoid the draft <span class="citation" data-cites="Fienberg:71">(<a href="#ref-Fienberg:71" role="doc-biblioref">Fienberg, 1971</a>)</span>.</p>
+<p>In an attempt to make the selection process also transparent, the proceeding was covered on radio, TV and film and the dates posted in order on a large display board. The first capsule—drawn by Congressman Alexander Pirnie (R-NY) of the House Armed Services Committee—contained the date September 14, so all men born on September 14 in any year between 1944 and 1950 were assigned lottery number 1, and would be drafted first. April 24 was drawn next, then December 30, February 14, and so on until June 8, selected last. At the time of the drawing, US officials stated that those with birthdays drawn in the first third would almost certainly be drafted, while those in the last third would probably avoid the draft <span class="citation" data-cites="Fienberg:71">(<a href="95-references.html#ref-Fienberg:71" role="doc-biblioref">Fienberg, 1971</a>)</span>.</p>
 <p>I watched this unfold with considerable interest because I was eligible for the Draft that year. I was dismayed when my birthday, May 7, came up ranked 35. Ugh!</p>
 <p>The data, from the official <a href="https://www.sss.gov/wp-content/uploads/2020/03/1970-Vietnam-Lottery.pdf">Selective Service listing</a> are contained in the dataset <code><a href="http://friendly.github.io/vcdExtra/reference/Draft1970.html">vcdExtra::Draft1970</a></code>, ordered by <code>Month</code> and birthdate (<code>Day</code>), with <code>Rank</code> as the order in which the birthdates were drawn.</p>
 <!-- figure-code: R/draft1970.R -->
@@ -624,7 +624,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 <span><span class="co">#&gt; $ Rank  &lt;int&gt; 305, 159, 251, 215, 101, 224, 306, 199, 194, 325, 32…</span></span>
 <span><span class="co">#&gt; $ Month &lt;ord&gt; Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Ja…</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>A basic scatterplot, slightly prettified, is shown in <a href="#fig-draft-gg1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-draft-gg1</span></a>. The points are colored by month, and month labels are shown at the bottom.</p>
+<p>A basic scatterplot, slightly prettified, is shown in <a href="#fig-draft-gg1" class="quarto-xref">Figure&nbsp;<span>2.5</span></a>. The points are colored by month, and month labels are shown at the bottom.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>Show the code</summary><div class="sourceCode" id="cb9" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># make markers for months at their mid points</span></span>
 <span><span class="va">months</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html">data.frame</a></span><span class="op">(</span></span>
@@ -661,7 +661,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 <pre data-code-line-numbers=""><code>If you stare at the graph in @fig-draft-gg1 long enough, you can make out a sparsity of points in the</code></pre>
 <p>upper right corner and also in the lower left corner compared to the opposite corners.</p>
 <section id="visual-smoothers" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="visual-smoothers">Visual smoothers</h4>
-<p>Fitting a linear regression line or a smoothed (loess) curve can bring out the signal lurking in the background of a field of nearly random points. <a href="#fig-draft-gg2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-draft-gg2</span></a> shows a definite trend to lower ranks for birthdays toward the end of the year. Those born earlier in the year were more likely to be given lower ranks, calling them up sooner for the draft.</p>
+<p>Fitting a linear regression line or a smoothed (loess) curve can bring out the signal lurking in the background of a field of nearly random points. <a href="#fig-draft-gg2" class="quarto-xref">Figure&nbsp;<span>2.6</span></a> shows a definite trend to lower ranks for birthdays toward the end of the year. Those born earlier in the year were more likely to be given lower ranks, calling them up sooner for the draft.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>Show the code</summary><div class="sourceCode" id="cb11" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span><span class="op">(</span><span class="va">Draft1970</span>, <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span><span class="op">(</span>x <span class="op">=</span> <span class="va">Day</span>, y <span class="op">=</span> <span class="va">Rank</span><span class="op">)</span><span class="op">)</span> <span class="op">+</span></span>
 <span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_point.html">geom_point</a></span><span class="op">(</span>size <span class="op">=</span> <span class="fl">2.5</span>, shape <span class="op">=</span> <span class="fl">21</span>, </span>
@@ -706,7 +706,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 <p>So, smoothing the data, using either the linear regression line or a nonparametric smoother is one important technique for seeing a weak signal in a noisy background.</p>
 </section><section id="statistical-summaries" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="statistical-summaries">Statistical summaries</h4>
 <p>Another way to enhance the signal-to-noise ratio of a graph is to plot summaries of the messy data points. For example, you might make boxplots of the ranks by month, or calculate and plot the mean or median rank by month and plot those together with some indication of variability within month.</p>
-<p><a href="#fig-draft-means" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-draft-means</span></a> plots the average <code>Rank</code> for each month with error bars showing the mean <span class="math inline">\(\pm 1\)</span> standard errors against the average <code>Day</code>. The message of rank decreasing nearly linearly with month is now more dramatic. The correlation between the means is <span class="math inline">\(r = -0.867\)</span>.</p>
+<p><a href="#fig-draft-means" class="quarto-xref">Figure&nbsp;<span>2.7</span></a> plots the average <code>Rank</code> for each month with error bars showing the mean <span class="math inline">\(\pm 1\)</span> standard errors against the average <code>Day</code>. The message of rank decreasing nearly linearly with month is now more dramatic. The correlation between the means is <span class="math inline">\(r = -0.867\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb13" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">means</span> <span class="op">&lt;-</span> <span class="va">Draft1970</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span><span class="op">(</span><span class="va">Month</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -733,12 +733,12 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 </div>
 </div>
 </div>
-<p>The visual impression of a linearly decreasing trend in lottery rank is much stronger in <a href="#fig-draft-means" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-draft-means</span></a> than in <a href="#fig-draft-gg2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-draft-gg2</span></a> for two reasons:</p>
+<p>The visual impression of a linearly decreasing trend in lottery rank is much stronger in <a href="#fig-draft-means" class="quarto-xref">Figure&nbsp;<span>2.7</span></a> than in <a href="#fig-draft-gg2" class="quarto-xref">Figure&nbsp;<span>2.6</span></a> for two reasons:</p>
 <ul>
 <li>Replacing the data points with their means strengthens the signal in relation to noise.</li>
 <li>The narrower vertical range (100–250) in the plot of means makes the slope of the line appear steeper. (However, the correlation of the means, <span class="math inline">\(r = -0.231\)</span> is nearly the same as the correlation of the data points.)</li>
 </ul></section><section id="what-happened-here" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="what-happened-here">What happened here?</h4>
-<p>Previous lotteries carried out by drawing capsules from a container had occasionally suffered the embarrassment that an empty capsule was selected because of vigorous mixing <span class="citation" data-cites="Fienberg:71">(<a href="#ref-Fienberg:71" role="doc-biblioref">Fienberg, 1971</a>)</span>. So for the 1970 lottery, the birthdate capsules were put in cardboard boxes, one for each month and these were carefully emptied into the glass container in order of month: Jan., Feb., … Dec., gently shaken in atop the pile already there. All might have been well had the persons drawing the capsules put their hand in truly randomly, but generally they picked from toward the top of the container. Consequently, those born later in the year had a greater chance of being picked earlier.</p>
+<p>Previous lotteries carried out by drawing capsules from a container had occasionally suffered the embarrassment that an empty capsule was selected because of vigorous mixing <span class="citation" data-cites="Fienberg:71">(<a href="95-references.html#ref-Fienberg:71" role="doc-biblioref">Fienberg, 1971</a>)</span>. So for the 1970 lottery, the birthdate capsules were put in cardboard boxes, one for each month and these were carefully emptied into the glass container in order of month: Jan., Feb., … Dec., gently shaken in atop the pile already there. All might have been well had the persons drawing the capsules put their hand in truly randomly, but generally they picked from toward the top of the container. Consequently, those born later in the year had a greater chance of being picked earlier.</p>
 <p>There was considerable criticism of this procedure once the flaw had been revealed by analyses such as described here. In the following year, the Selective Service called upon the National Bureau of Standards to devise a better procedure. In 1971 they used two drums, one with the dates of the year and another with the rank numbers 1-366. As a date capsule was drawn randomly from the first drum, another from the numbers drum was picked simultaneously, giving a doubly-randomized sequence.</p>
 <p>Of course, if they had R, the entire process could have been done using <code><a href="https://rdrr.io/r/base/sample.html">sample()</a></code>:</p>
 <div class="cell" data-layout-align="center">
@@ -811,7 +811,7 @@ <h1 class="title"><span id="sec-getting_started" class="quarto-section-identifie
 <!-- ## References {.unnumbered} -->
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-Anscombe:73" class="csl-entry" role="listitem">
 Anscombe, F. J. (1973). Graphs in statistical analysis. <em>The American Statistician</em>, <em>27</em>, 17–21.
 </div>
diff --git a/docs/03-multivariate_plots.html b/docs/03-multivariate_plots.html
index 820c3ea2..227a2fad 100644
--- a/docs/03-multivariate_plots.html
+++ b/docs/03-multivariate_plots.html
@@ -400,7 +400,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <p>There is no excuse for failing to plot and look.</p>
 <p>The greatest value of a picture is when it forces us to notice what we never expected to see. — John W. Tukey, <em>Exploratory Data Analysis</em>, 1977</p>
 </blockquote>
-<p>These quotes from John Tukey remind us that data analysis should nearly always start with graphs to help us understand the main features of our data. It is important to understand the general <em>patterns</em> and <em>trends</em>: Are relationships increasing or decreasing? Are they approximately linear or non-linear? But it is also important to spot <em>anomalies</em>: “unusual” observations, groups of points that seem to differ from the rest, and so forth. As we saw with Anscombe’s quartet (<a href="#sec-anscombe" class="quarto-xref"><span class="quarto-unresolved-ref">sec-anscombe</span></a>) numerical summaries hide features that are immediately apparent in a plot.</p>
+<p>These quotes from John Tukey remind us that data analysis should nearly always start with graphs to help us understand the main features of our data. It is important to understand the general <em>patterns</em> and <em>trends</em>: Are relationships increasing or decreasing? Are they approximately linear or non-linear? But it is also important to spot <em>anomalies</em>: “unusual” observations, groups of points that seem to differ from the rest, and so forth. As we saw with Anscombe’s quartet (<a href="02-getting_started.html#sec-anscombe" class="quarto-xref"><span>Section 2.1.1</span></a>) numerical summaries hide features that are immediately apparent in a plot.</p>
 <p>This chapter introduces a toolbox of basic graphical methods for visualizing multivariate datasets. It starts with some simple techniques to enhance the basic scatterplot with graphical <em>annotations</em> such as fitted lines, curves and data ellipses to <em>summarize</em> the relation between two variables.</p>
 <p>To visualize more than two variables, we can view all pairs of variables in a scatterplot matrix or shift gears entirely to show multiple variables along a set of parallel axes. As the number of variables increases, we may need to suppress details with stronger summaries for a high-level reconnaissance of our data terrain, as we do by zooming out on a map. For example, we can simply remove the data points or make them nearly transparent to focus on the visual summaries provided by fitted lines or other graphical summaries.</p>
 <p><strong>Packages</strong></p>
@@ -497,13 +497,13 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 <!--# UA: This is a fantastic graph. My only (very minor) suggestion is to replace one of the colours because the most common type of colour vision deficiency makes it hard to tell the difference between red and green. So, maybe green and purple (purple would match the inline code highlight colour)-->
-<p>This serves to highlight some of our impressions from the basic scatterplot shown in <a href="#fig-Salaries-scat" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Salaries-scat</span></a>, making them more apparent. And that’s precisely the point: the regression smoother draws attention to a possible pattern that we can consider as a visual summary of the data. You can think of this as showing what a linear (or quadratic) regression “sees” in the data. Statistical tests <!--# (secref?) --> can help you decide if there is more evidence for a quadratic fit compared to the simpler linear relation. <!--# UA: Great paragraph!--></p>
-<p>It is useful to also show some indication of <em>uncertainty</em> (or inversely, <em>precision</em>) associated with the predicted values. Both the linear and quadratic trends are shown in <a href="#fig-Salaries-lm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Salaries-lm</span></a> with 95% pointwise confidence bands.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> These are necessarily narrower in the center of the range of <span class="math inline">\(x\)</span> where there is typically more data; they get wider toward the highest values of experience where the data are thinner.</p>
+<p>This serves to highlight some of our impressions from the basic scatterplot shown in <a href="#fig-Salaries-scat" class="quarto-xref">Figure&nbsp;<span>3.1</span></a>, making them more apparent. And that’s precisely the point: the regression smoother draws attention to a possible pattern that we can consider as a visual summary of the data. You can think of this as showing what a linear (or quadratic) regression “sees” in the data. Statistical tests <!--# (secref?) --> can help you decide if there is more evidence for a quadratic fit compared to the simpler linear relation. <!--# UA: Great paragraph!--></p>
+<p>It is useful to also show some indication of <em>uncertainty</em> (or inversely, <em>precision</em>) associated with the predicted values. Both the linear and quadratic trends are shown in <a href="#fig-Salaries-lm" class="quarto-xref">Figure&nbsp;<span>3.2</span></a> with 95% pointwise confidence bands.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> These are necessarily narrower in the center of the range of <span class="math inline">\(x\)</span> where there is typically more data; they get wider toward the highest values of experience where the data are thinner.</p>
 <section id="non-parametric-smoothers" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="non-parametric-smoothers">Non-parametric smoothers</h4>
 <p>The most generally useful idea is a smoother that tracks an average value, <span class="math inline">\(\mathbb{E} (y | x)\)</span>, of <span class="math inline">\(y\)</span> as <span class="math inline">\(x\)</span> varies across its’ range <em>without</em> assuming any particular functional form, and so avoiding the necessity to choose among <code>y ~ poly(x, 1)</code>, or <code>y ~ poly(x, 2)</code>, or <code>y ~ poly(x, 3)</code>, etc.</p>
-<p>Non-parametric smoothers attempt to estimate <span class="math inline">\(\mathbb{E} (y | x) = f(x)\)</span> where <span class="math inline">\(f(x)\)</span> is some smooth function. These typically use a collection of weighted <em>local regressions</em> for each <span class="math inline">\(x_i\)</span> within a window centered at that value. In the method called <em>lowess</em> or <em>loess</em> <span class="citation" data-cites="Cleveland:79 ClevelandDevlin:88">(<a href="#ref-Cleveland:79" role="doc-biblioref">Cleveland, 1979</a>; <a href="#ref-ClevelandDevlin:88" role="doc-biblioref">Cleveland &amp; Devlin, 1988</a>)</span>, a weight function is applied, giving greatest weight to <span class="math inline">\(x_i\)</span> and a weight of 0 outside a window containing a certain fraction, <span class="math inline">\(s\)</span>, called <em>span</em>, of the nearest neighbors of <span class="math inline">\(x_i\)</span>. The fraction, <span class="math inline">\(s\)</span>, is usually within the range <span class="math inline">\(1/3 \le s \le 2/3\)</span>, and it determines the smoothness of the resulting curve; smaller values produce a wigglier curve and larger values giving a smoother fit (an optimal span can be determined by <span class="math inline">\(k\)</span>-fold cross-validation to minimize a measure of overall error of approximation).</p>
-<p>Non-parametric regression is a broad topic; see <span class="citation" data-cites="Fox:2016:ARA">Fox (<a href="#ref-Fox:2016:ARA" role="doc-biblioref">2016</a>)</span>, Ch. 18 for a more general treatment including smoothing splines, and <span class="citation" data-cites="Wood:2006">Wood (<a href="#ref-Wood:2006" role="doc-biblioref">2006</a>)</span> for generalized additive models, fit using <code>method = "gam"</code> in <strong>ggplot2</strong>, which is the default when the largest group has more than 1,000 observations.</p>
-<p><a href="#fig-Salaries-loess" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Salaries-loess</span></a> shows the addition of a loess smooth to the plot in <a href="#fig-Salaries-lm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Salaries-lm</span></a>, suppressing the confidence band for the linear regression. The loess fit is nearly coincident with the quadratic fit, but has a slightly wider confidence band.</p>
+<p>Non-parametric smoothers attempt to estimate <span class="math inline">\(\mathbb{E} (y | x) = f(x)\)</span> where <span class="math inline">\(f(x)\)</span> is some smooth function. These typically use a collection of weighted <em>local regressions</em> for each <span class="math inline">\(x_i\)</span> within a window centered at that value. In the method called <em>lowess</em> or <em>loess</em> <span class="citation" data-cites="Cleveland:79 ClevelandDevlin:88">(<a href="95-references.html#ref-Cleveland:79" role="doc-biblioref">Cleveland, 1979</a>; <a href="95-references.html#ref-ClevelandDevlin:88" role="doc-biblioref">Cleveland &amp; Devlin, 1988</a>)</span>, a weight function is applied, giving greatest weight to <span class="math inline">\(x_i\)</span> and a weight of 0 outside a window containing a certain fraction, <span class="math inline">\(s\)</span>, called <em>span</em>, of the nearest neighbors of <span class="math inline">\(x_i\)</span>. The fraction, <span class="math inline">\(s\)</span>, is usually within the range <span class="math inline">\(1/3 \le s \le 2/3\)</span>, and it determines the smoothness of the resulting curve; smaller values produce a wigglier curve and larger values giving a smoother fit (an optimal span can be determined by <span class="math inline">\(k\)</span>-fold cross-validation to minimize a measure of overall error of approximation).</p>
+<p>Non-parametric regression is a broad topic; see <span class="citation" data-cites="Fox:2016:ARA">Fox (<a href="95-references.html#ref-Fox:2016:ARA" role="doc-biblioref">2016</a>)</span>, Ch. 18 for a more general treatment including smoothing splines, and <span class="citation" data-cites="Wood:2006">Wood (<a href="95-references.html#ref-Wood:2006" role="doc-biblioref">2006</a>)</span> for generalized additive models, fit using <code>method = "gam"</code> in <strong>ggplot2</strong>, which is the default when the largest group has more than 1,000 observations.</p>
+<p><a href="#fig-Salaries-loess" class="quarto-xref">Figure&nbsp;<span>3.3</span></a> shows the addition of a loess smooth to the plot in <a href="#fig-Salaries-lm" class="quarto-xref">Figure&nbsp;<span>3.2</span></a>, suppressing the confidence band for the linear regression. The loess fit is nearly coincident with the quadratic fit, but has a slightly wider confidence band.</p>
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb5" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">gg1</span> <span class="op">+</span> </span>
 <span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_smooth.html">geom_smooth</a></span><span class="op">(</span>method <span class="op">=</span> <span class="st">"loess"</span>, formula <span class="op">=</span> <span class="st">"y ~ x"</span>, </span>
@@ -546,14 +546,14 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 <div class="callout-body-container callout-body">
 <p>Recognition of the roles of visual grouping by factors within a panel and conditioning in multi-panel displays was an important advance in the development of modern statistical graphics. It began at A.T.&amp;T. Bell Labs in Murray Hill, NJ in conjunction with the <strong>S</strong> language, the mother of R.</p>
-<p>Conditioning displays (originally called <em>coplots</em> <span class="citation" data-cites="ChambersHastie1991">(<a href="#ref-ChambersHastie1991" role="doc-biblioref">Chambers &amp; Hastie, 1991</a>)</span>) are simply a collection of 1D, 2D or 3D plots separate panels for subsets of the data broken down by one or more factors, or, for quantitative variables, subdivided into a factor with several overlapping intervals (<em>shingles</em>). The first implementation was in <em>Trellis</em> plots <span class="citation" data-cites="Becker:1996:VDC Cleveland:85">(<a href="#ref-Becker:1996:VDC" role="doc-biblioref">Becker et al., 1996</a>; <a href="#ref-Cleveland:85" role="doc-biblioref">Cleveland, 1985</a>)</span>.</p>
-<p>Trellis displays were extended in the <span style="color: brown;"><strong>lattice</strong></span> package <span class="citation" data-cites="R-lattice">(<a href="#ref-R-lattice" role="doc-biblioref">Sarkar, 2024</a>)</span>, which offered:</p>
+<p>Conditioning displays (originally called <em>coplots</em> <span class="citation" data-cites="ChambersHastie1991">(<a href="95-references.html#ref-ChambersHastie1991" role="doc-biblioref">Chambers &amp; Hastie, 1991</a>)</span>) are simply a collection of 1D, 2D or 3D plots separate panels for subsets of the data broken down by one or more factors, or, for quantitative variables, subdivided into a factor with several overlapping intervals (<em>shingles</em>). The first implementation was in <em>Trellis</em> plots <span class="citation" data-cites="Becker:1996:VDC Cleveland:85">(<a href="95-references.html#ref-Becker:1996:VDC" role="doc-biblioref">Becker et al., 1996</a>; <a href="95-references.html#ref-Cleveland:85" role="doc-biblioref">Cleveland, 1985</a>)</span>.</p>
+<p>Trellis displays were extended in the <span style="color: brown;"><strong>lattice</strong></span> package <span class="citation" data-cites="R-lattice">(<a href="95-references.html#ref-R-lattice" role="doc-biblioref">Sarkar, 2024</a>)</span>, which offered:</p>
 <ul>
 <li>A <strong>graphing syntax</strong> similar to that used in statistical model formulas: <code>y ~ x | g</code> conditions the data by the levels of <code>g</code>, with <code>|</code> read as “given”; two or more conditioning are specified as <code>y ~ x | g1 + g2 + ...</code>, with <code>+</code> read as “and”.</li>
 <li>
 <strong>Panel functions</strong> define what is plotted in a given panel. <code>panel.xyplot()</code> is the default for scatterplots, plotting points, but you can add <code>panel.lmline()</code> for regression lines, <code><a href="https://rdrr.io/pkg/latticeExtra/man/panel.smoother.html">latticeExtra::panel.smoother()</a></code> for loess smooths and a wide variety of others.</li>
 </ul>
-<p>The <span style="color: brown;"><strong>car</strong></span> package <span class="citation" data-cites="R-car">(<a href="#ref-R-car" role="doc-biblioref">Fox et al., 2023</a>)</span> supports this graphing syntax in many of its functions. <span style="color: brown;"><strong>ggplot2</strong></span> does not; it uses aesthetics (<code><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes()</a></code>), which map variables in the data to visual characteristics in displays.</p>
+<p>The <span style="color: brown;"><strong>car</strong></span> package <span class="citation" data-cites="R-car">(<a href="95-references.html#ref-R-car" role="doc-biblioref">Fox et al., 2023</a>)</span> supports this graphing syntax in many of its functions. <span style="color: brown;"><strong>ggplot2</strong></span> does not; it uses aesthetics (<code><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes()</a></code>), which map variables in the data to visual characteristics in displays.</p>
 </div>
 </div>
 <p>The most obvious variable that affects academic salary is <code>rank</code>, because faculty typically get an increase in salary with a promotion that carries through in their future salary. What can we see if we group by <code>rank</code> and fit a separate smoothed curve for each?</p>
@@ -620,7 +620,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>The story in <a href="#fig-Salaries-discipline" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Salaries-discipline</span></a> is again different. Faculty in applied disciplines on average earn about 10,000$ more per year on average than their theoretical colleagues.</p>
+<p>The story in <a href="#fig-Salaries-discipline" class="quarto-xref">Figure&nbsp;<span>3.5</span></a> is again different. Faculty in applied disciplines on average earn about 10,000$ more per year on average than their theoretical colleagues.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb8" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">Salaries</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span><span class="op">(</span><span class="va">discipline</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -689,7 +689,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </section></section><section id="sec-data-ellipse" class="level2" data-number="3.2"><h2 data-number="3.2" class="anchored" data-anchor-id="sec-data-ellipse">
 <span class="header-section-number">3.2</span> Data Ellipses</h2>
-<p>The <em>data ellipse</em> <span class="citation" data-cites="Monette:90">(<a href="#ref-Monette:90" role="doc-biblioref">Monette, 1990</a>)</span>, or <em>concentration ellipse</em> <span class="citation" data-cites="Dempster:69">(<a href="#ref-Dempster:69" role="doc-biblioref">Dempster, 1969</a>)</span> is a remarkably simple and effective display for viewing and understanding bivariate relationships in multivariate data. The data ellipse is typically used to add a visual summary to a scatterplot, that shows all together the means, standard deviations, correlation, and slope of the regression line for two variables, perhaps stratified by another variable. Under the classical assumption that the data are bivariate normally distributed, the data ellipse is also a <strong>sufficient</strong> visual summary, in the sense that it captures <strong>all</strong> relevant features of the data. See <span class="citation" data-cites="Friendly-etal:ellipses:2013">Friendly et al. (<a href="#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">2013</a>)</span> for a complete discussion of the role of ellipsoids in statistical data visualization.</p>
+<p>The <em>data ellipse</em> <span class="citation" data-cites="Monette:90">(<a href="95-references.html#ref-Monette:90" role="doc-biblioref">Monette, 1990</a>)</span>, or <em>concentration ellipse</em> <span class="citation" data-cites="Dempster:69">(<a href="95-references.html#ref-Dempster:69" role="doc-biblioref">Dempster, 1969</a>)</span> is a remarkably simple and effective display for viewing and understanding bivariate relationships in multivariate data. The data ellipse is typically used to add a visual summary to a scatterplot, that shows all together the means, standard deviations, correlation, and slope of the regression line for two variables, perhaps stratified by another variable. Under the classical assumption that the data are bivariate normally distributed, the data ellipse is also a <strong>sufficient</strong> visual summary, in the sense that it captures <strong>all</strong> relevant features of the data. See <span class="citation" data-cites="Friendly-etal:ellipses:2013">Friendly et al. (<a href="95-references.html#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">2013</a>)</span> for a complete discussion of the role of ellipsoids in statistical data visualization.</p>
 <p>It is based on the idea that in a bivariate normal distribution, the contours of equal probability form a series of concentric ellipses. If the variables were uncorrelated and had the same variances, these would be circles, and Euclidean distance would measure the distance of each observation from the mean. When the variables are correlated, a different measure, <em>Mahalanobis distance</em> is the proper measure of how far a point is from the mean, taking the correlation into account.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -707,7 +707,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 This doesn't work
 #| fig-cap: !expr paste("2D data with curves of constant distance from the centroid. The", colorize('blue'), "solid ellipse shows a contour of constant Mahalanobis distance, taking the correlation into account; the dashed", colorize('blue'), "circle is a contour of equal Euclidean distance. Given the data points,  Which of the points **A** and **B** is further from the mean (X)? _Source_: Re-drawn from [Ou Zhang](https://ouzhang.rbind.io/2020/11/16/outliers-part4/)")
 -->
-<p>To illustrate, <a href="#fig-mahalanobis" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mahalanobis</span></a> shows a scatterplot with labels for two points, “A” and “B”. Which is further from the mean, “X”? A contour of constant Euclidean distance, shown by the <span style="color: red;">red</span> dashed circle, ignores the apparent negative correlation, so point “A” is further. The <span style="color: blue;">blue</span> ellipse for Mahalanobis distance takes the correlation into account, so point “B” has a greater distance from the mean.</p>
+<p>To illustrate, <a href="#fig-mahalanobis" class="quarto-xref">Figure&nbsp;<span>3.8</span></a> shows a scatterplot with labels for two points, “A” and “B”. Which is further from the mean, “X”? A contour of constant Euclidean distance, shown by the <span style="color: red;">red</span> dashed circle, ignores the apparent negative correlation, so point “A” is further. The <span style="color: blue;">blue</span> ellipse for Mahalanobis distance takes the correlation into account, so point “B” has a greater distance from the mean.</p>
 <p>Mathematically, Euclidean (squared) distance for <span class="math inline">\(p\)</span> variables, <span class="math inline">\(j = 1, 2, \dots , p\)</span>, is just a generalization of the square of a univariate standardized (<span class="math inline">\(z\)</span>) score, <span class="math inline">\(z^2 = [(y - \bar{y}) / s]^2\)</span>,</p>
 <p><span class="math display">\[
 D_E^2 (\mathbf{y}) = \sum_j^p z_j^2 = \mathbf{z}^\textsf{T}  \mathbf{z} = (\mathbf{y} - \bar{\mathbf{y}})^\textsf{T} \operatorname{diag}(\mathbf{S})^{-1} (\mathbf{y} - \bar{\mathbf{y}}) \; ,
@@ -715,7 +715,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <p>Mahalanobis’ distance takes the correlations into account simply by using the covariances as well as the variances, <span id="eq-Dsq"><span class="math display">\[
 D_M^2 (\mathbf{y}) = (\mathbf{y} - \bar{\mathbf{y}})^\mathsf{T} S^{-1} (\mathbf{y} - \bar{\mathbf{y}}) \; .
 \tag{3.1}\]</span></span></p>
-<p>In <a href="#eq-Dsq" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-Dsq</span></a>, the inverse <span class="math inline">\(S^{-1}\)</span> serves to “divide” the matrix <span class="math inline">\((\mathbf{y} - \bar{\mathbf{y}})^\mathsf{T} (\mathbf{y} - \bar{\mathbf{y}})\)</span> of squared distances by the variances (and covariances) of the variables, as in the univariate case.</p>
+<p>In <a href="#eq-Dsq" class="quarto-xref">Equation&nbsp;<span>3.1</span></a>, the inverse <span class="math inline">\(S^{-1}\)</span> serves to “divide” the matrix <span class="math inline">\((\mathbf{y} - \bar{\mathbf{y}})^\mathsf{T} (\mathbf{y} - \bar{\mathbf{y}})\)</span> of squared distances by the variances (and covariances) of the variables, as in the univariate case.</p>
 <p>For <span class="math inline">\(p\)</span> variables, the data <em>ellipsoid</em> <span class="math inline">\(\mathcal{E}_c\)</span> of size <span class="math inline">\(c\)</span> is a <span class="math inline">\(p\)</span>-dimensional ellipse, defined as the set of points <span class="math inline">\(\mathbf{y} = (y_1, y_2, \dots y_p)\)</span> whose squared Mahalanobis distance, <span class="math inline">\(D_M^2 ( \mathbf{y} )\)</span> is less than or equal to <span class="math inline">\(c^2\)</span>, <span class="math display">\[
 \mathcal{E}_c (\bar{\mathbf{y}}, \mathbf{S}) := \{ D_M^2 (\mathbf{y}) \le c^2 \} \; .
 \]</span> A computational definition recognizes that the boundary of the ellipsoid can be found by transforming a unit sphere <span class="math inline">\(\mathcal{P}\)</span> centered at the origin, <span class="math inline">\(\mathcal{P} : \{ \mathbf{x}^\textsf{T} \mathbf{x}= 1\}\)</span>, by <span class="math inline">\(\mathbf{S}^{1/2}\)</span> and then shifting that to centroid of the data,</p>
@@ -735,8 +735,8 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <p>In not-large samples, the radius <span class="math inline">\(c\)</span> of the ellipsoid is better approximated by a multiple of a <span class="math inline">\(F_{p, n-p}\)</span> distribution, becoming <span class="math inline">\(c =\sqrt{ 2 F_{2, n-2}^{1-\alpha} }\)</span> in the bivariate case (<span class="math inline">\(p=2\)</span>) for coverage <span class="math inline">\(1-\alpha\)</span>.</p>
 <section id="ellipse-properties" class="level3" data-number="3.2.1"><h3 data-number="3.2.1" class="anchored" data-anchor-id="ellipse-properties">
 <span class="header-section-number">3.2.1</span> Ellipse properties</h3>
-<p>The essential ideas of correlation and regression and their relation to ellipses go back to <span class="citation" data-cites="Galton:1886">Galton (<a href="#ref-Galton:1886" role="doc-biblioref">1886</a>)</span>. Galton’s goal was to predict (or explain) how a heritable trait, <span class="math inline">\(Y\)</span>, (e.g., height) of children was related to that of their parents, <span class="math inline">\(X\)</span>. He made a semi-graphic table of the frequencies of 928 observations of the average height of father and mother versus the height of their child, shown in <a href="#fig-galton-corr" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-galton-corr</span></a>. He then drew smoothed contour lines of equal frequencies and had the wonderful visual insight that these formed concentric shapes that were tolerably close to ellipses.</p>
-<p>He then calculated summaries, <span class="math inline">\(\text{Ave}(Y | X)\)</span>, and, for symmetry, <span class="math inline">\(\text{Ave}(X | Y)\)</span>, and plotted these as lines of means on his diagram. Lo and behold, he had a second visual insight: the lines of means of (<span class="math inline">\(Y | X\)</span>) and (<span class="math inline">\(X | Y\)</span>) corresponded approximately to the loci of horizontal and vertical tangents to the concentric ellipses. To complete the picture, he added lines showing the major and minor axes of the family of ellipses (which turned out to be the principal components) with the result shown in <a href="#fig-galton-corr" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-galton-corr</span></a>.</p>
+<p>The essential ideas of correlation and regression and their relation to ellipses go back to <span class="citation" data-cites="Galton:1886">Galton (<a href="95-references.html#ref-Galton:1886" role="doc-biblioref">1886</a>)</span>. Galton’s goal was to predict (or explain) how a heritable trait, <span class="math inline">\(Y\)</span>, (e.g., height) of children was related to that of their parents, <span class="math inline">\(X\)</span>. He made a semi-graphic table of the frequencies of 928 observations of the average height of father and mother versus the height of their child, shown in <a href="#fig-galton-corr" class="quarto-xref">Figure&nbsp;<span>3.9</span></a>. He then drew smoothed contour lines of equal frequencies and had the wonderful visual insight that these formed concentric shapes that were tolerably close to ellipses.</p>
+<p>He then calculated summaries, <span class="math inline">\(\text{Ave}(Y | X)\)</span>, and, for symmetry, <span class="math inline">\(\text{Ave}(X | Y)\)</span>, and plotted these as lines of means on his diagram. Lo and behold, he had a second visual insight: the lines of means of (<span class="math inline">\(Y | X\)</span>) and (<span class="math inline">\(X | Y\)</span>) corresponded approximately to the loci of horizontal and vertical tangents to the concentric ellipses. To complete the picture, he added lines showing the major and minor axes of the family of ellipses (which turned out to be the principal components) with the result shown in <a href="#fig-galton-corr" class="quarto-xref">Figure&nbsp;<span>3.9</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-galton-corr" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -749,7 +749,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>For two variables, <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>, the remarkable properties of the data ellipse are illustrated in <a href="#fig-galton-ellipse-r" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-galton-ellipse-r</span></a>, a modern reconstruction of Galton’s diagram.</p>
+<p>For two variables, <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>, the remarkable properties of the data ellipse are illustrated in <a href="#fig-galton-ellipse-r" class="quarto-xref">Figure&nbsp;<span>3.10</span></a>, a modern reconstruction of Galton’s diagram.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-galton-ellipse-r" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -773,7 +773,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <li><p>The correlation <span class="math inline">\(r(x, y)\)</span> is the ratio of the vertical segment from the mean of <span class="math inline">\(y\)</span> to the regression line to the vertical segment going to the top of the ellipse as shown at the right of the figure. It is <span class="math inline">\(r = 0.46\)</span> in this example.</p></li>
 <li><p>The residual standard deviation, <span class="math inline">\(s_e = \sqrt{MSE} = \sqrt{\Sigma (y - \bar{y})^2 / n-2}\)</span>, is the half-length of the ellipse at the mean <span class="math inline">\(\bar{x}\)</span>.</p></li>
 </ul>
-<p>Because Galton’s values of <code>parent</code> and <code>child</code> height were recorded in class intervals of 1 in., they are shown as sunflower symbols in <a href="#fig-galton-ellipse-r" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-galton-ellipse-r</span></a>, with multiple ‘petals’ reflecting the number of observations at each location. This plot (except for annotations) is constructed using <code><a href="https://rdrr.io/r/graphics/sunflowerplot.html">sunflowerplot()</a></code> and <code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::dataEllipse()</a></code> for the ellipses.</p>
+<p>Because Galton’s values of <code>parent</code> and <code>child</code> height were recorded in class intervals of 1 in., they are shown as sunflower symbols in <a href="#fig-galton-ellipse-r" class="quarto-xref">Figure&nbsp;<span>3.10</span></a>, with multiple ‘petals’ reflecting the number of observations at each location. This plot (except for annotations) is constructed using <code><a href="https://rdrr.io/r/graphics/sunflowerplot.html">sunflowerplot()</a></code> and <code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::dataEllipse()</a></code> for the ellipses.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb11" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">Galton</span>, package <span class="op">=</span> <span class="st">"HistData"</span><span class="op">)</span></span>
 <span></span>
@@ -797,7 +797,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span>         lty<span class="op">=</span><span class="fl">1</span><span class="op">:</span><span class="fl">3</span><span class="op">)</span></span>
 <span>    <span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Finally, as Galton noted in his diagram, the principal major and minor axes of the ellipse have important statistical properties. <span class="citation" data-cites="Pearson:1901">Pearson (<a href="#ref-Pearson:1901" role="doc-biblioref">1901</a>)</span> would later show that their directions are determined by the eigenvectors <span class="math inline">\(\mathbf{v}_1, \mathbf{v}_2, \dots\)</span> of the covariance matrix <span class="math inline">\(\mathbf{S}\)</span> and their radii by the square roots, <span class="math inline">\(\sqrt{\mathbf{v}_1}, \sqrt{\mathbf{v}_1}, \dots\)</span> of the corresponding eigenvalues.</p>
+<p>Finally, as Galton noted in his diagram, the principal major and minor axes of the ellipse have important statistical properties. <span class="citation" data-cites="Pearson:1901">Pearson (<a href="95-references.html#ref-Pearson:1901" role="doc-biblioref">1901</a>)</span> would later show that their directions are determined by the eigenvectors <span class="math inline">\(\mathbf{v}_1, \mathbf{v}_2, \dots\)</span> of the covariance matrix <span class="math inline">\(\mathbf{S}\)</span> and their radii by the square roots, <span class="math inline">\(\sqrt{\mathbf{v}_1}, \sqrt{\mathbf{v}_1}, \dots\)</span> of the corresponding eigenvalues.</p>
 </section><section id="r-functions-for-data-ellipses" class="level3" data-number="3.2.2"><h3 data-number="3.2.2" class="anchored" data-anchor-id="r-functions-for-data-ellipses">
 <span class="header-section-number">3.2.2</span> R functions for data ellipses</h3>
 <p>A number of packages provide functions for drawing data ellipses in a scatterplot, with various features.</p>
@@ -812,7 +812,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <code><a href="https://ggplot2.tidyverse.org/reference/stat_ellipse.html">ggplot2::stat_ellipse()</a></code>: uses the calculation methods of <code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::dataEllipse()</a></code> to add unfilled (<code>geom = "path"</code>) or filled (<code>geom = polygon"</code>) data ellipses in a <code>ggplot</code> scatterplot, using inherited aesthetics.</li>
 </ul></section><section id="sec-prestige" class="level3" data-number="3.2.3"><h3 data-number="3.2.3" class="anchored" data-anchor-id="sec-prestige">
 <span class="header-section-number">3.2.3</span> Example: Canadian occupational prestige</h3>
-<p>These examples use the data on the prestige of 102 occupational categories and other measures from the 1971 Canadian Census, recorded in <code><a href="https://rdrr.io/pkg/carData/man/Prestige.html">carData::Prestige</a></code>.<a href="#fn3" class="footnote-ref" id="fnref3" role="doc-noteref"><sup>3</sup></a> Our interest is in understanding how <code>prestige</code> (the <span class="citation" data-cites="PineoPorter2008">Pineo &amp; Porter (<a href="#ref-PineoPorter2008" role="doc-biblioref">2008</a>)</span> prestige score for an occupational category, derived from a social survey) is related to census measures of the average education, income, percent women of incumbents in those occupations. Occupation <code>type</code> is a factor with levels <code>"bc"</code> (blue collar), <code>"wc"</code> (white collar) and <code>"prof"</code> (professional).</p>
+<p>These examples use the data on the prestige of 102 occupational categories and other measures from the 1971 Canadian Census, recorded in <code><a href="https://rdrr.io/pkg/carData/man/Prestige.html">carData::Prestige</a></code>.<a href="#fn3" class="footnote-ref" id="fnref3" role="doc-noteref"><sup>3</sup></a> Our interest is in understanding how <code>prestige</code> (the <span class="citation" data-cites="PineoPorter2008">Pineo &amp; Porter (<a href="95-references.html#ref-PineoPorter2008" role="doc-biblioref">2008</a>)</span> prestige score for an occupational category, derived from a social survey) is related to census measures of the average education, income, percent women of incumbents in those occupations. Occupation <code>type</code> is a factor with levels <code>"bc"</code> (blue collar), <code>"wc"</code> (white collar) and <code>"prof"</code> (professional).</p>
 <!-- figure-code: R/prestige.R -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb12" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">Prestige</span>, package<span class="op">=</span><span class="st">"carData"</span><span class="op">)</span></span>
@@ -828,7 +828,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span><span class="co">#&gt;  $ census   : int  1113 1130 1171 1175 2111 2113 2133 2141 2143 2153 ...</span></span>
 <span><span class="co">#&gt;  $ type     : Ord.factor w/ 3 levels "bc"&lt;"wc"&lt;"prof": 3 3 3 3 3 3 3 3 3 3 ...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>I first illustrate the relation between <code>income</code> and <code>prestige</code> in <a href="#fig-Prestige-scatterplot-income1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot-income1</span></a> using <code><a href="https://rdrr.io/pkg/car/man/scatterplot.html">car::scatterplot()</a></code> with many of its bells and whistles, including marginal boxplots for the variables, the linear regression line, loess smooth and the 68% data ellipse.</p>
+<p>I first illustrate the relation between <code>income</code> and <code>prestige</code> in <a href="#fig-Prestige-scatterplot-income1" class="quarto-xref">Figure&nbsp;<span>3.11</span></a> using <code><a href="https://rdrr.io/pkg/car/man/scatterplot.html">car::scatterplot()</a></code> with many of its bells and whistles, including marginal boxplots for the variables, the linear regression line, loess smooth and the 68% data ellipse.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb13" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/scatterplot.html">scatterplot</a></span><span class="op">(</span><span class="va">prestige</span> <span class="op">~</span> <span class="va">income</span>, data<span class="op">=</span><span class="va">Prestige</span>,</span>
 <span>  pch <span class="op">=</span> <span class="fl">16</span>, cex.lab <span class="op">=</span> <span class="fl">1.25</span>,</span>
@@ -860,7 +860,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <li>The data ellipse, centered at the means encloses approximately 68% of the data points. It adds visual information about the correlation and precision of the linear regression; but here, the non-linear trend for higher incomes strongly suggests a different approach.</li>
 <li>The four points identified by their labels are those with the largest Mahalanobis distances. <code><a href="https://rdrr.io/pkg/car/man/scatterplot.html">scatterplot()</a></code> prints their labels to the console.</li>
 </ul>
-<p><a href="#fig-Prestige-scatterplot-educ1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot-educ1</span></a> shows a similar plot for education, which from the boxplot appears to be reasonably symmetric. The smoothed curve is quite close to the linear regression, according to which <code>prestige</code> increases on average <code>coef(lm(prestige ~ education, data=Prestige))["education"]</code> = 5.361 with each year of education.</p>
+<p><a href="#fig-Prestige-scatterplot-educ1" class="quarto-xref">Figure&nbsp;<span>3.12</span></a> shows a similar plot for education, which from the boxplot appears to be reasonably symmetric. The smoothed curve is quite close to the linear regression, according to which <code>prestige</code> increases on average <code>coef(lm(prestige ~ education, data=Prestige))["education"]</code> = 5.361 with each year of education.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb14" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/scatterplot.html">scatterplot</a></span><span class="op">(</span><span class="va">prestige</span> <span class="op">~</span> <span class="va">education</span>, data<span class="op">=</span><span class="va">Prestige</span>,</span>
 <span>  pch <span class="op">=</span> <span class="fl">16</span>, cex.lab <span class="op">=</span> <span class="fl">1.25</span>,</span>
@@ -913,7 +913,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <section id="sec-log-scale" class="level4" data-number="3.2.3.1"><h4 data-number="3.2.3.1" class="anchored" data-anchor-id="sec-log-scale">
 <span class="header-section-number">3.2.3.1</span> Plotting on a log scale</h4>
 <p>A typical remedy for the non-linear relationship of income to prestige is to plot income on a log scale. This usually makes sense, and expresses a belief that a <strong>multiple</strong> of or <strong>percentage increase</strong> in income has a constant impact on prestige, as opposed to the <strong>additive</strong> interpretation for income itself.</p>
-<p>For example, the slope of the linear regression line in <a href="#fig-Prestige-scatterplot-income1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot-income1</span></a> is given by <code>coef(lm(prestige ~ income, data=Prestige))["income"]</code> = 0.003. Multiplying this by 1000 says that a $1000 increase in <code>income</code> is associated with with an average increase of <code>prestige</code> of 2.9.</p>
+<p>For example, the slope of the linear regression line in <a href="#fig-Prestige-scatterplot-income1" class="quarto-xref">Figure&nbsp;<span>3.11</span></a> is given by <code>coef(lm(prestige ~ income, data=Prestige))["income"]</code> = 0.003. Multiplying this by 1000 says that a $1000 increase in <code>income</code> is associated with with an average increase of <code>prestige</code> of 2.9.</p>
 <p>In the plot below, <code>scatterplot(..., log = "x")</code> re-scales the x-axis to the <span class="math inline">\(\log_e()\)</span> scale. The slope, <code>coef(lm(prestige ~ log(income), data=Prestige))["log(income)"]</code> = 21.556 says that a 1% increase in salary is associated with an average change of 21.55 / 100 in prestige.</p>
 <!-- removed: #| source-line-numbers: "2" -->
 <div class="cell" data-layout-align="center">
@@ -939,7 +939,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>The smoothed curve in <a href="#fig-Prestige-scatterplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot2</span></a> exhibits a slight tendency to bend upwards, but a linear relation is a reasonable approximation.</p>
+<p>The smoothed curve in <a href="#fig-Prestige-scatterplot2" class="quarto-xref">Figure&nbsp;<span>3.14</span></a> exhibits a slight tendency to bend upwards, but a linear relation is a reasonable approximation.</p>
 </section><section id="sec-stratifying" class="level4" data-number="3.2.3.2"><h4 data-number="3.2.3.2" class="anchored" data-anchor-id="sec-stratifying">
 <span class="header-section-number">3.2.3.2</span> Stratifying</h4>
 <p>Before going further, it is instructive to ask what we could see in the relationship between income and prestige if we stratified by type of occupation, fitting separate regressions and smooths for blue collar, white collar and professional incumbents in these occupations.</p>
@@ -964,7 +964,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>This visual analysis offers a different interpretation of the dependence of prestige on income, which appeared to be non-linear when occupation type was ignored. Instead, <a href="#fig-Prestige-scatterplot3" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot3</span></a> suggests an <em>interaction</em> of income by type. In a model formula this would be expressed as one of:</p>
+<p>This visual analysis offers a different interpretation of the dependence of prestige on income, which appeared to be non-linear when occupation type was ignored. Instead, <a href="#fig-Prestige-scatterplot3" class="quarto-xref">Figure&nbsp;<span>3.15</span></a> suggests an <em>interaction</em> of income by type. In a model formula this would be expressed as one of:</p>
 <div class="sourceCode" id="cb18" data-code-line-numbers=""><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">prestige</span> <span class="op">~</span> <span class="va">income</span> <span class="op">+</span> <span class="va">type</span> <span class="op">+</span> <span class="va">income</span><span class="op">:</span><span class="va">type</span>, data <span class="op">=</span> <span class="va">Prestige</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">prestige</span> <span class="op">~</span> <span class="va">income</span> <span class="op">*</span> <span class="va">type</span>, data <span class="op">=</span> <span class="va">Prestige</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>These models signify that there are different slopes (and intercepts) for the three occupational types. In this interpretation, <code>type</code> is a moderator variable, with a different story. The slopes of the fitted lines suggest that among blue collar workers, prestige increases sharply with their income. For white collar and professional workers, there is still an increasing relation of prestige with income, but the effect of income (slope) diminishes with higher occupational category. A different plot entails a different story.</p>
@@ -982,8 +982,8 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>The <code>penguins</code> dataset from the <span style="color: brown;"><strong>palmerpenguins</strong></span> package <span class="citation" data-cites="R-palmerpenguins">(<a href="#ref-R-palmerpenguins" role="doc-biblioref">Horst et al., 2022</a>)</span> provides further instructive examples of plots and analyses of multivariate data. The data consists of measurements of body size (flipper length, body mass, bill length and depth) of 344 penguins collected at the <a href="https://pallter.marine.rutgers.edu/">Palmer Research Station</a> in Antarctica.</p>
-<p>There were three different species of penguins (Adélie, Chinstrap &amp; Gentoo) collected from 3 islands in the Palmer Archipelago between 2007–2009 <span class="citation" data-cites="Gorman2014">(<a href="#ref-Gorman2014" role="doc-biblioref">Gorman et al., 2014</a>)</span>. The purpose was to examine differences in size or appearance of these species, particularly differences among the sexes (sexual dimorphism) in relation to foraging and habitat.</p>
+<p>The <code>penguins</code> dataset from the <span style="color: brown;"><strong>palmerpenguins</strong></span> package <span class="citation" data-cites="R-palmerpenguins">(<a href="95-references.html#ref-R-palmerpenguins" role="doc-biblioref">Horst et al., 2022</a>)</span> provides further instructive examples of plots and analyses of multivariate data. The data consists of measurements of body size (flipper length, body mass, bill length and depth) of 344 penguins collected at the <a href="https://pallter.marine.rutgers.edu/">Palmer Research Station</a> in Antarctica.</p>
+<p>There were three different species of penguins (Adélie, Chinstrap &amp; Gentoo) collected from 3 islands in the Palmer Archipelago between 2007–2009 <span class="citation" data-cites="Gorman2014">(<a href="95-references.html#ref-Gorman2014" role="doc-biblioref">Gorman et al., 2014</a>)</span>. The purpose was to examine differences in size or appearance of these species, particularly differences among the sexes (sexual dimorphism) in relation to foraging and habitat.</p>
 <p>Here, I use a slightly altered version of the dataset, <code>peng</code>, renaming variables to remove the units, making factors of character variables and deleting a few cases with missing data.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb19" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">penguins</span>, package <span class="op">=</span> <span class="st">"palmerpenguins"</span><span class="op">)</span></span>
@@ -1011,7 +1011,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span><span class="co">#&gt;  $ year          : int [1:333] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>There are quite a few variables to choose for illustrating data ellipses in scatterplots. Here I focus on the measures of their bills, <code>bill_length</code> and <code>bill_depth</code> (indicating curvature) and show how to use <code>ggplot2</code> for these plots.</p>
-<p>I’ll be using the penguins data quite a lot, so it is useful to set up custom colors like those used in <a href="#fig-penguin-species" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-penguin-species</span></a>, and shown in <a href="#fig-peng-colors" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-colors</span></a> with their color codes. These are shades of:</p>
+<p>I’ll be using the penguins data quite a lot, so it is useful to set up custom colors like those used in <a href="#fig-penguin-species" class="quarto-xref">Figure&nbsp;<span>3.16</span></a>, and shown in <a href="#fig-peng-colors" class="quarto-xref">Figure&nbsp;<span>3.17</span></a> with their color codes. These are shades of:</p>
 <ul>
 <li>
 <span style="color: orange;">Adelie</span>: <span style="color: orange;">orange</span>,</li>
@@ -1077,7 +1077,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span>      <span class="op">)</span></span>
 <span><span class="op">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>An initial plot using <code>ggplot2</code> shown in <a href="#fig-peng-ggplot1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggplot1</span></a> uses color and point shape to distinguish the three penguin species. I annotate the plot of points using the linear regression lines, loess smooths to check for non-linearity and 95% data ellipses to show precision of the linear relation.</p>
+<p>An initial plot using <code>ggplot2</code> shown in <a href="#fig-peng-ggplot1" class="quarto-xref">Figure&nbsp;<span>3.18</span></a> uses color and point shape to distinguish the three penguin species. I annotate the plot of points using the linear regression lines, loess smooths to check for non-linearity and 95% data ellipses to show precision of the linear relation.</p>
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb22" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span><span class="op">(</span><span class="va">peng</span>, </span>
 <span>       <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span><span class="op">(</span>x <span class="op">=</span> <span class="va">bill_length</span>, y <span class="op">=</span> <span class="va">bill_depth</span>,</span>
@@ -1102,7 +1102,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>Overall, the three species occupy different regions of this 2D space and for each species the relation between bill length and depth appears reasonably linear. Given this, we can suppress plotting the data points to get a visual summary of the data using the fitted regression lines and data ellipses, as shown in <a href="#fig-peng-ggplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggplot2</span></a>.</p>
+<p>Overall, the three species occupy different regions of this 2D space and for each species the relation between bill length and depth appears reasonably linear. Given this, we can suppress plotting the data points to get a visual summary of the data using the fitted regression lines and data ellipses, as shown in <a href="#fig-peng-ggplot2" class="quarto-xref">Figure&nbsp;<span>3.19</span></a>.</p>
 <p>This idea, of <strong>visual thinning</strong> a graph to focus on what should be seen, becomes increasingly useful as the data becomes more complex. The <code>ggplot2</code> framework encourages this, because we can think of various components as layers, to be included or not. Here I chose to include only the regression line and add data ellipses of 40%, 68% and 95% coverage to highlight the increasing bivariate density around the group means.</p>
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb23" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span><span class="op">(</span><span class="va">peng</span>, </span>
@@ -1129,9 +1129,9 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <section id="nonparamtric-bivariate-density-plots" class="level4" data-number="3.2.4.1"><h4 data-number="3.2.4.1" class="anchored" data-anchor-id="nonparamtric-bivariate-density-plots">
 <span class="header-section-number">3.2.4.1</span> Nonparamtric bivariate density plots</h4>
 <p>While I emphasize data ellipses (because I like their beautiful geometry), other visual summaries of the bivariate density are possible and often useful.</p>
-<p>For a single variable, <code><a href="https://rdrr.io/r/stats/density.html">stats::density()</a></code> and <code><a href="https://ggplot2.tidyverse.org/reference/geom_density.html">ggplot2::geom_density()</a></code> calculate a smoothed estimate of the density using nonparametric kernel methods <span class="citation" data-cites="Silverman:86">(<a href="#ref-Silverman:86" role="doc-biblioref">Silverman, 1986</a>)</span> whose smoothness is controlled by a bandwidth parameter, analogous to the span in a loess smoother. This idea extends to two (and more) variables <span class="citation" data-cites="Scott1992">(<a href="#ref-Scott1992" role="doc-biblioref">Scott, 1992</a>)</span>. For bivariate data, <code><a href="https://rdrr.io/pkg/MASS/man/kde2d.html">MASS::kde2d()</a></code> estimates the density on a square <span class="math inline">\(n \times n\)</span> grid over the ranges of the variables.</p>
-<p><code>ggplot2</code> provides <code><a href="https://ggplot2.tidyverse.org/reference/geom_density_2d.html">geom_density_2d()</a></code> which uses <code><a href="https://rdrr.io/pkg/MASS/man/kde2d.html">MASS::kde2d()</a></code> and displays these as contours— horizontal slices of the 3D surface at equally-spaced heights and projects these onto the 2D plane. The <span style="color: brown;"><strong>ggdensity</strong></span> package <span class="citation" data-cites="R-ggdensity">(<a href="#ref-R-ggdensity" role="doc-biblioref">Otto &amp; Kahle, 2023</a>)</span> extends this with <code><a href="https://jamesotto852.github.io/ggdensity/reference/geom_hdr.html">geom_hdr()</a></code>, computing the high density regions that bound given levels of probability and maps these to the <code>alpha</code> transparency aesthetic. A <code>method</code> argument allows you to specify various nonparametric (<code>method ="kde"</code> is the default) and parametric (<code>method ="mvnorm"</code> gives normal data ellipses) ways to estimate the underlying bivariate distribution.</p>
-<p><a href="#fig-peng-ggdensity" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggdensity</span></a> shows these side-by-side for comparison. With <code><a href="https://ggplot2.tidyverse.org/reference/geom_density_2d.html">geom_density_2d()</a></code> you can specify either the number of contour <code>bins</code> or the width of these bins (<code>binwidth</code>). For <code><a href="https://jamesotto852.github.io/ggdensity/reference/geom_hdr.html">geom_hdr()</a></code>, the <code>probs</code> argument gives a result that is easier to understand.</p>
+<p>For a single variable, <code><a href="https://rdrr.io/r/stats/density.html">stats::density()</a></code> and <code><a href="https://ggplot2.tidyverse.org/reference/geom_density.html">ggplot2::geom_density()</a></code> calculate a smoothed estimate of the density using nonparametric kernel methods <span class="citation" data-cites="Silverman:86">(<a href="95-references.html#ref-Silverman:86" role="doc-biblioref">Silverman, 1986</a>)</span> whose smoothness is controlled by a bandwidth parameter, analogous to the span in a loess smoother. This idea extends to two (and more) variables <span class="citation" data-cites="Scott1992">(<a href="95-references.html#ref-Scott1992" role="doc-biblioref">Scott, 1992</a>)</span>. For bivariate data, <code><a href="https://rdrr.io/pkg/MASS/man/kde2d.html">MASS::kde2d()</a></code> estimates the density on a square <span class="math inline">\(n \times n\)</span> grid over the ranges of the variables.</p>
+<p><code>ggplot2</code> provides <code><a href="https://ggplot2.tidyverse.org/reference/geom_density_2d.html">geom_density_2d()</a></code> which uses <code><a href="https://rdrr.io/pkg/MASS/man/kde2d.html">MASS::kde2d()</a></code> and displays these as contours— horizontal slices of the 3D surface at equally-spaced heights and projects these onto the 2D plane. The <span style="color: brown;"><strong>ggdensity</strong></span> package <span class="citation" data-cites="R-ggdensity">(<a href="95-references.html#ref-R-ggdensity" role="doc-biblioref">Otto &amp; Kahle, 2023</a>)</span> extends this with <code><a href="https://jamesotto852.github.io/ggdensity/reference/geom_hdr.html">geom_hdr()</a></code>, computing the high density regions that bound given levels of probability and maps these to the <code>alpha</code> transparency aesthetic. A <code>method</code> argument allows you to specify various nonparametric (<code>method ="kde"</code> is the default) and parametric (<code>method ="mvnorm"</code> gives normal data ellipses) ways to estimate the underlying bivariate distribution.</p>
+<p><a href="#fig-peng-ggdensity" class="quarto-xref">Figure&nbsp;<span>3.20</span></a> shows these side-by-side for comparison. With <code><a href="https://ggplot2.tidyverse.org/reference/geom_density_2d.html">geom_density_2d()</a></code> you can specify either the number of contour <code>bins</code> or the width of these bins (<code>binwidth</code>). For <code><a href="https://jamesotto852.github.io/ggdensity/reference/geom_hdr.html">geom_hdr()</a></code>, the <code>probs</code> argument gives a result that is easier to understand.</p>
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb24" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://jamesotto852.github.io/ggdensity/">ggdensity</a></span><span class="op">)</span></span>
 <span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://patchwork.data-imaginist.com">patchwork</a></span><span class="op">)</span></span>
@@ -1171,8 +1171,8 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </section></section><section id="simpsons-paradox-marginal-and-conditional-relationships" class="level3" data-number="3.2.5"><h3 data-number="3.2.5" class="anchored" data-anchor-id="simpsons-paradox-marginal-and-conditional-relationships">
 <span class="header-section-number">3.2.5</span> Simpson’s paradox: marginal and conditional relationships</h3>
 <p>Because it provides a visual representation of means, variances, and correlations, the data ellipse is ideally suited as a tool for illustrating and explicating various phenomena that occur in the analysis of linear models. One class of simple, but important, examples concerns the difference between the <em>marginal relationship</em> between variables, ignoring some important factor or covariate, and the <em>conditional</em> relationship, adjusting (controlling) for that variable.</p>
-<p>Simpson’s <span class="citation" data-cites="Simpson:51">(<a href="#ref-Simpson:51" role="doc-biblioref">1951</a>)</span> paradox occurs when the marginal and conditional relationships differ in direction. That is, the overall correlation in a model <code>y ~ x</code> might be negative, while the within-group correlations in separate models for each group <code>y[g] ~ x[g]</code> might be positive, or vice versa.</p>
-<p>This may be seen in the plots of bill length against bill depth for the penguin data shown in <a href="#fig-peng-simpsons" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-simpsons</span></a>. Ignoring penguin species, the marginal, total-sample correlation is slightly negative as seen in panel (a). The individual-sample ellipses in panel (b) show that the conditional, within-species correlations are all positive, with approximately equal regression slopes. However the group means have a negative relationship, accounting for the negative marginal correlation when species is ignored.</p>
+<p>Simpson’s <span class="citation" data-cites="Simpson:51">(<a href="95-references.html#ref-Simpson:51" role="doc-biblioref">1951</a>)</span> paradox occurs when the marginal and conditional relationships differ in direction. That is, the overall correlation in a model <code>y ~ x</code> might be negative, while the within-group correlations in separate models for each group <code>y[g] ~ x[g]</code> might be positive, or vice versa.</p>
+<p>This may be seen in the plots of bill length against bill depth for the penguin data shown in <a href="#fig-peng-simpsons" class="quarto-xref">Figure&nbsp;<span>3.21</span></a>. Ignoring penguin species, the marginal, total-sample correlation is slightly negative as seen in panel (a). The individual-sample ellipses in panel (b) show that the conditional, within-species correlations are all positive, with approximately equal regression slopes. However the group means have a negative relationship, accounting for the negative marginal correlation when species is ignored.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-peng-simpsons" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1191,8 +1191,8 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 \sum_{i=1}^g
 (n_i - 1) \mathbf{S}_i \, / \, (N - g)
 \:\: ,
-\]</span> where <span class="math inline">\(N = \sum n_i\)</span>. The result is shown in panel (c) of <a href="#fig-peng-simpsons" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-simpsons</span></a>.</p>
-<p>In this graph, the data for each species were first transformed to deviations from the species means on both variables and then translated back to the grand means. You can also see here that the shapes and sizes of the individual data ellipses are roughly comparable, but perhaps not identical. This visual idea of centering groups to a common mean will become important in <a href="#sec-eqcov" class="quarto-xref"><span class="quarto-unresolved-ref">sec-eqcov</span></a> when we want to test the assumption of equality of error covariances in multivariate models.</p>
+\]</span> where <span class="math inline">\(N = \sum n_i\)</span>. The result is shown in panel (c) of <a href="#fig-peng-simpsons" class="quarto-xref">Figure&nbsp;<span>3.21</span></a>.</p>
+<p>In this graph, the data for each species were first transformed to deviations from the species means on both variables and then translated back to the grand means. You can also see here that the shapes and sizes of the individual data ellipses are roughly comparable, but perhaps not identical. This visual idea of centering groups to a common mean will become important in <a href="12-eqcov.html" class="quarto-xref"><span>Chapter 12</span></a> when we want to test the assumption of equality of error covariances in multivariate models.</p>
 <p>The <code>ggplot2</code> code for the panels in this figure are shown below. Note that for components that will be the same across panels, you can define elements (e.g., <code>labels</code>, <code>theme_penguins()</code>, <code>legend_position</code>) once, and then re-use these across several graphs.</p>
 <div class="tabset-margin-container"></div><div class="panel-tabset">
 <ul class="nav nav-tabs" role="tablist">
@@ -1280,9 +1280,9 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </section></section><section id="sec-scatmat" class="level2" data-number="3.3"><h2 data-number="3.3" class="anchored" data-anchor-id="sec-scatmat">
 <span class="header-section-number">3.3</span> Scatterplot matrices</h2>
-<p>Going beyond bivariate scatterplots, a <em>pairs</em> plot (or <em>scatterplot matrix</em>) displays all possible <span class="math inline">\(p \times p\)</span> pairs of <span class="math inline">\(p\)</span> variables in a matrix-like display where variables <span class="math inline">\((x_i, x_j)\)</span> are shown in a plot for row <span class="math inline">\(i\)</span>, column <span class="math inline">\(j\)</span>. This idea, due to <span class="citation" data-cites="Hartigan:75b">Hartigan (<a href="#ref-Hartigan:75b" role="doc-biblioref">1975b</a>)</span>, uses small multiple plots, so that the eye can easily scan across a row or down a column to see how a given variable is related to all the others.</p>
+<p>Going beyond bivariate scatterplots, a <em>pairs</em> plot (or <em>scatterplot matrix</em>) displays all possible <span class="math inline">\(p \times p\)</span> pairs of <span class="math inline">\(p\)</span> variables in a matrix-like display where variables <span class="math inline">\((x_i, x_j)\)</span> are shown in a plot for row <span class="math inline">\(i\)</span>, column <span class="math inline">\(j\)</span>. This idea, due to <span class="citation" data-cites="Hartigan:75b">Hartigan (<a href="95-references.html#ref-Hartigan:75b" role="doc-biblioref">1975b</a>)</span>, uses small multiple plots, so that the eye can easily scan across a row or down a column to see how a given variable is related to all the others.</p>
 <p>The most basic version is provided by <code><a href="https://rdrr.io/r/graphics/pairs.html">pairs()</a></code> in base R. When one variable is considered as an outcome or response, it is usually helpful to put this in the first row and column. For the <code>Prestige</code> data, in addition to income and education, we also have a measure of % women in each occupational category.</p>
-<p>Plotting these together gives <a href="#fig-prestige-pairs" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-pairs</span></a>. In such plots, the diagonal cells give labels for the variables, but they are also a guide to interpreting what is shown. In each row, say row 2 for <code>income</code>, income is the vertical <span class="math inline">\(y\)</span> variable in plots against other variables. In each column, say column 3 for <code>education</code>, education is the horizontal <span class="math inline">\(x\)</span> variable.</p>
+<p>Plotting these together gives <a href="#fig-prestige-pairs" class="quarto-xref">Figure&nbsp;<span>3.22</span></a>. In such plots, the diagonal cells give labels for the variables, but they are also a guide to interpreting what is shown. In each row, say row 2 for <code>income</code>, income is the vertical <span class="math inline">\(y\)</span> variable in plots against other variables. In each column, say column 3 for <code>education</code>, education is the horizontal <span class="math inline">\(x\)</span> variable.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb28" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/graphics/pairs.html">pairs</a></span><span class="op">(</span><span class="op">~</span> <span class="va">prestige</span> <span class="op">+</span> <span class="va">income</span> <span class="op">+</span> <span class="va">education</span> <span class="op">+</span> <span class="va">women</span>,</span>
 <span>      data<span class="op">=</span><span class="va">Prestige</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1298,9 +1298,9 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 <p>The plots in the first row show what we have seen before for the relations between prestige and income and education, adding to those the plot of prestige vs.&nbsp;% women. Plots in the first column show the same data, but with <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span> interchanged.</p>
-<p>But this basic <code><a href="https://rdrr.io/r/graphics/pairs.html">pairs()</a></code> plot is very limited. A more feature-rich version is provided by <code><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">car::scatterplotMatrix()</a></code> which can add the regression lines, loess smooths and data ellipses for each pair, as shown in <a href="#fig-prestige-spm1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-spm1</span></a>.</p>
+<p>But this basic <code><a href="https://rdrr.io/r/graphics/pairs.html">pairs()</a></code> plot is very limited. A more feature-rich version is provided by <code><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">car::scatterplotMatrix()</a></code> which can add the regression lines, loess smooths and data ellipses for each pair, as shown in <a href="#fig-prestige-spm1" class="quarto-xref">Figure&nbsp;<span>3.23</span></a>.</p>
 <p>The diagonal panels show density curves for the distribution of each variable; for example, the distribution of <code>education</code> appears to be multi-modal and that of <code>women</code> shows that most of the occupations have a low percentage of women.</p>
-<p>The combination of the regression line with the loess smoothed curve, but without their confidence envelopes, provides about the right amount of detail to take in at a glance where the relations are non-linear. We’ve already seen (<a href="#fig-Prestige-scatterplot-income1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot-income1</span></a>) the non-linear relation between prestige and income (row 1, column 2) when occupational type is ignored. But all relations with income in column 2 are non-linear, reinforcing our idea (<a href="#sec-log-scale" class="quarto-xref"><span class="quarto-unresolved-ref">sec-log-scale</span></a>) that effects of income should be assessed on a log scale.</p>
+<p>The combination of the regression line with the loess smoothed curve, but without their confidence envelopes, provides about the right amount of detail to take in at a glance where the relations are non-linear. We’ve already seen (<a href="#fig-Prestige-scatterplot-income1" class="quarto-xref">Figure&nbsp;<span>3.11</span></a>) the non-linear relation between prestige and income (row 1, column 2) when occupational type is ignored. But all relations with income in column 2 are non-linear, reinforcing our idea (<a href="#sec-log-scale" class="quarto-xref"><span>Section 3.2.3.1</span></a>) that effects of income should be assessed on a log scale.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb29" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">scatterplotMatrix</a></span><span class="op">(</span><span class="op">~</span> <span class="va">prestige</span> <span class="op">+</span> <span class="va">income</span> <span class="op">+</span> <span class="va">education</span> <span class="op">+</span> <span class="va">women</span>,</span>
 <span>  data<span class="op">=</span><span class="va">Prestige</span>,</span>
@@ -1319,7 +1319,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p><code><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">scatterplotMatrix()</a></code> can also label points using the <code>id =</code> argument (though this can get messy) and can stratify the observations by a grouping variable with different symbols and colors. For example, <a href="#fig-prestige-spm2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-spm2</span></a> uses the syntax <code>~ prestige + education + income + women | type</code> to provide separate regression lines, smoothed curves and data ellipses for the three types of occupations. (The default colors are somewhat garish, so I use <code><a href="https://scales.r-lib.org/reference/pal_hue.html">scales::hue_pal()</a></code> to mimic the discrete color scale used in <code>ggplot2</code>).</p>
+<p><code><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">scatterplotMatrix()</a></code> can also label points using the <code>id =</code> argument (though this can get messy) and can stratify the observations by a grouping variable with different symbols and colors. For example, <a href="#fig-prestige-spm2" class="quarto-xref">Figure&nbsp;<span>3.24</span></a> uses the syntax <code>~ prestige + education + income + women | type</code> to provide separate regression lines, smoothed curves and data ellipses for the three types of occupations. (The default colors are somewhat garish, so I use <code><a href="https://scales.r-lib.org/reference/pal_hue.html">scales::hue_pal()</a></code> to mimic the discrete color scale used in <code>ggplot2</code>).</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb30" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">scatterplotMatrix</a></span><span class="op">(</span><span class="op">~</span> <span class="va">prestige</span> <span class="op">+</span> <span class="va">income</span> <span class="op">+</span> <span class="va">education</span> <span class="op">+</span> <span class="va">women</span> <span class="op">|</span> <span class="va">type</span>,</span>
 <span>  data <span class="op">=</span> <span class="va">Prestige</span>,</span>
@@ -1340,7 +1340,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 <p>It is now easy to see why education is multi-modal: blue collar, white collar and professional occupations have largely non-overlapping years of education. As well, the distribution of % women is much higher in the white collar category.</p>
-<p>For the <code>penguins</code> data, given what we’ve seen before in <a href="#fig-peng-ggplot1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggplot1</span></a> and <a href="#fig-peng-ggplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggplot2</span></a>, we may wish to suppress details of the points (<code>plot.points = FALSE</code>) and loess smooths (<code>smooth = FALSE</code>) to focus attention on the similarity of regression lines and data ellipses for the three penguin species. In <a href="#fig-peng-spm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-spm</span></a>, I’ve chosen to show boxplots rather than density curves in the diagonal panels in order to highlight differences in the means and interquartile ranges of the species, and to show 68% and 95% data ellipses in the off-diagonal panels.</p>
+<p>For the <code>penguins</code> data, given what we’ve seen before in <a href="#fig-peng-ggplot1" class="quarto-xref">Figure&nbsp;<span>3.18</span></a> and <a href="#fig-peng-ggplot2" class="quarto-xref">Figure&nbsp;<span>3.19</span></a>, we may wish to suppress details of the points (<code>plot.points = FALSE</code>) and loess smooths (<code>smooth = FALSE</code>) to focus attention on the similarity of regression lines and data ellipses for the three penguin species. In <a href="#fig-peng-spm" class="quarto-xref">Figure&nbsp;<span>3.25</span></a>, I’ve chosen to show boxplots rather than density curves in the diagonal panels in order to highlight differences in the means and interquartile ranges of the species, and to show 68% and 95% data ellipses in the off-diagonal panels.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb31" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">scatterplotMatrix</a></span><span class="op">(</span><span class="op">~</span> <span class="va">bill_length</span> <span class="op">+</span> <span class="va">bill_depth</span> <span class="op">+</span> <span class="va">flipper_length</span> <span class="op">+</span> <span class="va">body_mass</span> <span class="op">|</span> <span class="va">species</span>,</span>
 <span>  data <span class="op">=</span> <span class="va">peng</span>, </span>
@@ -1376,24 +1376,24 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 <div class="callout-body-container callout-body">
-<p><a href="#fig-peng-spm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-spm</span></a> provides a reasonably complete visual summary of the data in relation to multivariate models that ask “do the species differ in their means on these body size measures?” This corresponds to the MANOVA model,</p>
+<p><a href="#fig-peng-spm" class="quarto-xref">Figure&nbsp;<span>3.25</span></a> provides a reasonably complete visual summary of the data in relation to multivariate models that ask “do the species differ in their means on these body size measures?” This corresponds to the MANOVA model,</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb32" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">peng.mod</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">bill_length</span>, <span class="va">bill_depth</span>, <span class="va">flipper_length</span>, <span class="va">body_mass</span><span class="op">)</span> <span class="op">~</span> <span class="va">species</span>, </span>
 <span>               data<span class="op">=</span><span class="va">peng</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Hypothesis-error (HE) plots, described in <a href="#sec-vis-mlm" class="quarto-xref"><span class="quarto-unresolved-ref">sec-vis-mlm</span></a> provide a better summary of the evidence for the MANOVA test of differences among means on all variables together. These give an <span class="math inline">\(\mathbf{H}\)</span> ellipse reflecting the differences among means, to be compared with an <span class="math inline">\(\mathbf{E}\)</span> ellipse reflecting within-group variation and a visual test of significance.</p>
+<p>Hypothesis-error (HE) plots, described in <a href="11-mlm-viz.html" class="quarto-xref"><span>Chapter 11</span></a> provide a better summary of the evidence for the MANOVA test of differences among means on all variables together. These give an <span class="math inline">\(\mathbf{H}\)</span> ellipse reflecting the differences among means, to be compared with an <span class="math inline">\(\mathbf{E}\)</span> ellipse reflecting within-group variation and a visual test of significance.</p>
 <p>A related question is “how well are the penguin species distinguished by these body size measures?” Here, the relevant model is linear discriminant analysis (LDA), where <code>species</code> plays the role of the response in the model,</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb33" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">peng.lda</span> <span class="op">&lt;-</span> <span class="va">MASS</span><span class="op">:</span><span class="fu">lda</span><span class="op">(</span> <span class="va">species</span> <span class="op">~</span> <span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">bill_length</span>, <span class="va">bill_depth</span>, <span class="va">flipper_length</span>, <span class="va">body_mass</span><span class="op">)</span>, </span>
 <span>               data<span class="op">=</span><span class="va">peng</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Both MANOVA and LDA depend on the assumption that the variances and correlations between the variables are the same for all groups. This assumption can be tested and visualized using the methods in <a href="#sec-eqcov" class="quarto-xref"><span class="quarto-unresolved-ref">sec-eqcov</span></a>.</p>
+<p>Both MANOVA and LDA depend on the assumption that the variances and correlations between the variables are the same for all groups. This assumption can be tested and visualized using the methods in <a href="12-eqcov.html" class="quarto-xref"><span>Chapter 12</span></a>.</p>
 </div>
 </div>
 <section id="visual-thinning" class="level3" data-number="3.3.1"><h3 data-number="3.3.1" class="anchored" data-anchor-id="visual-thinning">
 <span class="header-section-number">3.3.1</span> Visual thinning</h3>
 <p>What can you do if there are even more variables than in these examples? If what you want is a high-level, zoomed-out display summarizing the pairwise relations more strongly, you can apply the idea of visual thinning to show only the most important features.</p>
-<p>This example uses data on the rate of various crimes in the 50 U.S. states from the United States Statistical Abstracts, 1970, used by <span class="citation" data-cites="Hartigan:75">Hartigan (<a href="#ref-Hartigan:75" role="doc-biblioref">1975a</a>)</span> and <span class="citation" data-cites="Friendly:91">Friendly (<a href="#ref-Friendly:91" role="doc-biblioref">1991</a>)</span>. These are ordered in the dataset roughly by seriousness of crime or from crimes of violence to property crimes.</p>
+<p>This example uses data on the rate of various crimes in the 50 U.S. states from the United States Statistical Abstracts, 1970, used by <span class="citation" data-cites="Hartigan:75">Hartigan (<a href="95-references.html#ref-Hartigan:75" role="doc-biblioref">1975a</a>)</span> and <span class="citation" data-cites="Friendly:91">Friendly (<a href="95-references.html#ref-Friendly:91" role="doc-biblioref">1991</a>)</span>. These are ordered in the dataset roughly by seriousness of crime or from crimes of violence to property crimes.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb34" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">crime</span>, package <span class="op">=</span> <span class="st">"ggbiplot"</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/utils/str.html">str</a></span><span class="op">(</span><span class="va">crime</span><span class="op">)</span></span>
@@ -1410,7 +1410,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span><span class="co">#&gt;  $ region  : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <!-- **TODO**: This dataset is actually `data(crime, package = "ggbiplot")` but this depends on my new version, not yet on CRAN. -->
-<p><a href="#fig-crime-spm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-spm</span></a> displays the scatterplot matrix for these seven variables, using only the regression line and data ellipse to show the linear relation and the loess smooth to show potential non-linearity.<!--# I believe you used non-linear, hyphenated earlier so I added the - --> To make this even more schematic, the axis tick marks and labels are also removed using the <code><a href="https://rdrr.io/r/graphics/par.html">par()</a></code> settings <code>xaxt = "n", yaxt = "n"</code>.</p>
+<p><a href="#fig-crime-spm" class="quarto-xref">Figure&nbsp;<span>3.26</span></a> displays the scatterplot matrix for these seven variables, using only the regression line and data ellipse to show the linear relation and the loess smooth to show potential non-linearity.<!--# I believe you used non-linear, hyphenated earlier so I added the - --> To make this even more schematic, the axis tick marks and labels are also removed using the <code><a href="https://rdrr.io/r/graphics/par.html">par()</a></code> settings <code>xaxt = "n", yaxt = "n"</code>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb35" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">crime</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span><span class="op">(</span><span class="fu"><a href="https://tidyselect.r-lib.org/reference/where.html">where</a></span><span class="op">(</span><span class="va">is.numeric</span><span class="op">)</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -1436,8 +1436,8 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </section></section><section id="sec-corrgram" class="level2" data-number="3.4"><h2 data-number="3.4" class="anchored" data-anchor-id="sec-corrgram">
 <span class="header-section-number">3.4</span> Corrgrams</h2>
 <p>What if you want to summarize the data even further simple visual thinning. For example with many variables you might want to show only the value of the correlation for each pair of variables, but do so in a way to help see patterns in the correlations that would be invisible in just a table.</p>
-<p>A <strong>corrgram</strong> <span class="citation" data-cites="Friendly:02:corrgram">(<a href="#ref-Friendly:02:corrgram" role="doc-biblioref">Friendly, 2002</a>)</span> is a visual display of a correlation matrix, where the correlation can be rendered in a variety of ways to show the direction and magnitude: circular “pac-man” (or pie) symbols, ellipses, colored vars or shaded rectangles, as shown in <a href="#fig-corrgram-renderings" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-corrgram-renderings</span></a>.</p>
-<p>Another aspect is that of <strong>effect ordering</strong> <span class="citation" data-cites="FriendlyKwan:03:effect">(<a href="#ref-FriendlyKwan:03:effect" role="doc-biblioref">Friendly &amp; Kwan, 2003</a>)</span>, ordering the levels of factors and variables in graphic displays to make important features most apparent. For variables, this means that we can arrange the variables in a matrix-like display in such a way as to make the pattern of relationships easiest to see. Methods to achieve this include using principal components and cluster analysis to put the most related variables together as described in <a href="#sec-pca-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-pca-biplot</span></a>.</p>
+<p>A <strong>corrgram</strong> <span class="citation" data-cites="Friendly:02:corrgram">(<a href="95-references.html#ref-Friendly:02:corrgram" role="doc-biblioref">Friendly, 2002</a>)</span> is a visual display of a correlation matrix, where the correlation can be rendered in a variety of ways to show the direction and magnitude: circular “pac-man” (or pie) symbols, ellipses, colored vars or shaded rectangles, as shown in <a href="#fig-corrgram-renderings" class="quarto-xref">Figure&nbsp;<span>3.27</span></a>.</p>
+<p>Another aspect is that of <strong>effect ordering</strong> <span class="citation" data-cites="FriendlyKwan:03:effect">(<a href="95-references.html#ref-FriendlyKwan:03:effect" role="doc-biblioref">Friendly &amp; Kwan, 2003</a>)</span>, ordering the levels of factors and variables in graphic displays to make important features most apparent. For variables, this means that we can arrange the variables in a matrix-like display in such a way as to make the pattern of relationships easiest to see. Methods to achieve this include using principal components and cluster analysis to put the most related variables together as described in <a href="04-pca-biplot.html" class="quarto-xref"><span>Chapter 4</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-corrgram-renderings" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1450,7 +1450,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>In R, these diagrams can be created using the <span style="color: brown;"><strong>corrgram</strong></span> <span class="citation" data-cites="R-corrgram">(<a href="#ref-R-corrgram" role="doc-biblioref">Wright, 2021</a>)</span> and <span style="color: brown;"><strong>corrplot</strong></span> <span class="citation" data-cites="R-corrplot">(<a href="#ref-R-corrplot" role="doc-biblioref">Wei &amp; Simko, 2024</a>)</span> packages, with different features. <code><a href="http://kwstat.github.io/corrgram/reference/corrgram.html">corrgram::corrgram()</a></code> is closest to <span class="citation" data-cites="Friendly:02:corrgram">Friendly (<a href="#ref-Friendly:02:corrgram" role="doc-biblioref">2002</a>)</span>, in that it allows different rendering functions for the lower, upper and diagonal panels as illustrated in <a href="#fig-corrgram-renderings" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-corrgram-renderings</span></a>. For example, a corrgram similar to <a href="#fig-crime-spm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-spm</span></a> can be produced as follows (not shown here):</p>
+<p>In R, these diagrams can be created using the <span style="color: brown;"><strong>corrgram</strong></span> <span class="citation" data-cites="R-corrgram">(<a href="95-references.html#ref-R-corrgram" role="doc-biblioref">Wright, 2021</a>)</span> and <span style="color: brown;"><strong>corrplot</strong></span> <span class="citation" data-cites="R-corrplot">(<a href="95-references.html#ref-R-corrplot" role="doc-biblioref">Wei &amp; Simko, 2024</a>)</span> packages, with different features. <code><a href="http://kwstat.github.io/corrgram/reference/corrgram.html">corrgram::corrgram()</a></code> is closest to <span class="citation" data-cites="Friendly:02:corrgram">Friendly (<a href="95-references.html#ref-Friendly:02:corrgram" role="doc-biblioref">2002</a>)</span>, in that it allows different rendering functions for the lower, upper and diagonal panels as illustrated in <a href="#fig-corrgram-renderings" class="quarto-xref">Figure&nbsp;<span>3.27</span></a>. For example, a corrgram similar to <a href="#fig-crime-spm" class="quarto-xref">Figure&nbsp;<span>3.26</span></a> can be produced as follows (not shown here):</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb36" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">crime</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span><span class="op">(</span><span class="fu"><a href="https://tidyselect.r-lib.org/reference/where.html">where</a></span><span class="op">(</span><span class="va">is.numeric</span><span class="op">)</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -1458,7 +1458,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span>           upper.panel <span class="op">=</span> <span class="va">panel.ellipse</span>,</span>
 <span>           diag.panel <span class="op">=</span> <span class="va">panel.density</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>With the <span style="color: brown;"><strong>corrplot</strong></span> package, <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> provides the rendering methods <code>c("circle", "square", "ellipse", "number", "shade", "color", "pie")</code>, but only one can be used at a time. The function <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.mixed.html">corrplot.mixed()</a></code> allows different options to be selected for the lower and upper triangles. The iconic rendering shape is colored with a gradient in relation to the correlation value. For comparison, <a href="#fig-crime-corrplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-corrplot</span></a> uses ellipses below the diagonal and filled pie charts below the diagonal using a gradient of the fill color in both cases.</p>
+<p>With the <span style="color: brown;"><strong>corrplot</strong></span> package, <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> provides the rendering methods <code>c("circle", "square", "ellipse", "number", "shade", "color", "pie")</code>, but only one can be used at a time. The function <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.mixed.html">corrplot.mixed()</a></code> allows different options to be selected for the lower and upper triangles. The iconic rendering shape is colored with a gradient in relation to the correlation value. For comparison, <a href="#fig-crime-corrplot" class="quarto-xref">Figure&nbsp;<span>3.28</span></a> uses ellipses below the diagonal and filled pie charts below the diagonal using a gradient of the fill color in both cases.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb37" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">crime.cor</span> <span class="op">&lt;-</span> <span class="va">crime</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span><span class="op">(</span><span class="fu"><a href="https://tidyselect.r-lib.org/reference/where.html">where</a></span><span class="op">(</span><span class="va">is.numeric</span><span class="op">)</span><span class="op">)</span> <span class="op">|&gt;</span> </span>
@@ -1483,9 +1483,9 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>The combination of renderings shown in <a href="#fig-crime-corrplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-corrplot</span></a> is instructive. Small differences among correlation values are easier to see with the pie symbols than with the ellipses; for example, compare the values for murder with larceny and auto theft in row 1, columns 6-7 with those in column 1, rows 6-7, where the former are easier to distinguish. The shading color adds another visual cue.</p>
-<p>The variables in <a href="#fig-crime-spm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-spm</span></a> and <a href="#fig-crime-corrplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-corrplot</span></a> are arranged by their order in the dataset, which is not often the most useful. A better idea is to arrange the variables so that the most highly correlated variables are adjacent.</p>
-<p>A general method described in <a href="#sec-var-order" class="quarto-xref"><span class="quarto-unresolved-ref">sec-var-order</span></a> orders the variables according to the angles of the first two eigenvectors from a principal components analysis (PCA) around a unit circle. The function <code><a href="https://rdrr.io/pkg/corrplot/man/corrMatOrder.html">corrMatOrder()</a></code> provides several methods (<code>order = c("AOE", "FPC", "hclust", "alphabet")</code>) for doing this, and PCA ordering is <code>order = "AOE"</code>. Murder and auto theft are still first and last, but some of the intermediate crimes have been rearranged.</p>
+<p>The combination of renderings shown in <a href="#fig-crime-corrplot" class="quarto-xref">Figure&nbsp;<span>3.28</span></a> is instructive. Small differences among correlation values are easier to see with the pie symbols than with the ellipses; for example, compare the values for murder with larceny and auto theft in row 1, columns 6-7 with those in column 1, rows 6-7, where the former are easier to distinguish. The shading color adds another visual cue.</p>
+<p>The variables in <a href="#fig-crime-spm" class="quarto-xref">Figure&nbsp;<span>3.26</span></a> and <a href="#fig-crime-corrplot" class="quarto-xref">Figure&nbsp;<span>3.28</span></a> are arranged by their order in the dataset, which is not often the most useful. A better idea is to arrange the variables so that the most highly correlated variables are adjacent.</p>
+<p>A general method described in <a href="04-pca-biplot.html#sec-var-order" class="quarto-xref"><span>Section 4.5</span></a> orders the variables according to the angles of the first two eigenvectors from a principal components analysis (PCA) around a unit circle. The function <code><a href="https://rdrr.io/pkg/corrplot/man/corrMatOrder.html">corrMatOrder()</a></code> provides several methods (<code>order = c("AOE", "FPC", "hclust", "alphabet")</code>) for doing this, and PCA ordering is <code>order = "AOE"</code>. Murder and auto theft are still first and last, but some of the intermediate crimes have been rearranged.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb38" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">ord</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/pkg/corrplot/man/corrMatOrder.html">corrMatOrder</a></span><span class="op">(</span><span class="va">crime.cor</span>, order <span class="op">=</span> <span class="st">"AOE"</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/base/colnames.html">rownames</a></span><span class="op">(</span><span class="va">crime.cor</span><span class="op">)</span><span class="op">[</span><span class="va">ord</span><span class="op">]</span></span>
@@ -1494,7 +1494,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 <!-- other orders: FPC (PC1), hclust (with various clustering methods), alphabet, original -->
 <!-- calculated with corrMatOrder() -->
-<p>Using this ordering in <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> produces <a href="#fig-crime-corrplot-AOE" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-corrplot-AOE</span></a>.</p>
+<p>Using this ordering in <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> produces <a href="#fig-crime-corrplot-AOE" class="quarto-xref">Figure&nbsp;<span>3.29</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb39" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/corrplot/man/corrplot.mixed.html">corrplot.mixed</a></span><span class="op">(</span><span class="va">crime.cor</span>,</span>
 <span>  order <span class="op">=</span> <span class="st">"AOE"</span>, </span>
@@ -1516,12 +1516,12 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>In this case, where the correlations among the crime variables are all positive, the effect of variable re-ordering is subtle, but note that there is now a slightly pronounced pattern of highest correlations near the diagonal, and decreasing away from the diagonal. <a href="#fig-mtcars-corrplot-varorder" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mtcars-corrplot-varorder</span></a> and <a href="#fig-mtcars-corrplot-pcaorder" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mtcars-corrplot-pcaorder</span></a> in <a href="#sec-var-order" class="quarto-xref"><span class="quarto-unresolved-ref">sec-var-order</span></a> provide a more dramatic example of variable ordering using this method.</p>
+<p>In this case, where the correlations among the crime variables are all positive, the effect of variable re-ordering is subtle, but note that there is now a slightly pronounced pattern of highest correlations near the diagonal, and decreasing away from the diagonal. <a href="04-pca-biplot.html#fig-mtcars-corrplot-varorder" class="quarto-xref">Figure&nbsp;<span>4.27</span></a> and <a href="04-pca-biplot.html#fig-mtcars-corrplot-pcaorder" class="quarto-xref">Figure&nbsp;<span>4.29</span></a> in <a href="04-pca-biplot.html#sec-var-order" class="quarto-xref"><span>Section 4.5</span></a> provide a more dramatic example of variable ordering using this method.</p>
 <p>Variations of corrgrams are worthy replacements for a numeric table of correlations, which are often presented in publications only for archival value. Including the numeric value (rounded here, for presentation purposes), makes this an attractive alternative to boring tables of correlations.</p>
 </section><section id="sec-ggpairs" class="level2" data-number="3.5"><h2 data-number="3.5" class="anchored" data-anchor-id="sec-ggpairs">
 <span class="header-section-number">3.5</span> Generalized pairs plots</h2>
 <p>When a dataset contains one or more discrete variables, the traditional pairs plot cannot cope, because the discrete categories would plot as many overlaid points. This cannot be represented using only color and/or point symbols in a meaningful scatterplot.</p>
-<p>But the associations between categorical variables in a frequency table <em>can</em> be shown in <em>mosaic displays</em> <span class="citation" data-cites="Friendly:94a">(<a href="#ref-Friendly:94a" role="doc-biblioref">Friendly, 1994</a>)</span>, using an array of tiles whose areas are depict the cell frequencies. For an <span class="math inline">\(n\)</span>-way frequency, an analog of the scatterplot matrix uses mosaic plots for each pair of variables. The <span style="color: brown;"><strong>vcd</strong></span> package <span class="citation" data-cites="R-vcd">(<a href="#ref-R-vcd" role="doc-biblioref">Meyer et al., 2024</a>)</span> implements very general <code><a href="https://rdrr.io/r/graphics/pairs.html">pairs()</a></code> methods for <code>"table"</code> objects and <span style="color: brown;"><strong>vcdExtra</strong></span> <span class="citation" data-cites="R-vcdExtra">(<a href="#ref-R-vcdExtra" role="doc-biblioref">Friendly, 2023</a>)</span> extends this to wide classes of loglinear models <span class="citation" data-cites="Friendly:99:EMD">(<a href="#ref-Friendly:99:EMD" role="doc-biblioref">Friendly, 1999</a>)</span> See <span class="citation" data-cites="Friendly:99:EMD">Friendly (<a href="#ref-Friendly:99:EMD" role="doc-biblioref">1999</a>)</span> and my book <em>Discrete Data Analysis with R</em> <span class="citation" data-cites="FriendlyMeyer:2016:DDAR">(<a href="#ref-FriendlyMeyer:2016:DDAR" role="doc-biblioref">Friendly &amp; Meyer, 2016</a>)</span> for mosaic plots and mosaic matrices.</p>
+<p>But the associations between categorical variables in a frequency table <em>can</em> be shown in <em>mosaic displays</em> <span class="citation" data-cites="Friendly:94a">(<a href="95-references.html#ref-Friendly:94a" role="doc-biblioref">Friendly, 1994</a>)</span>, using an array of tiles whose areas are depict the cell frequencies. For an <span class="math inline">\(n\)</span>-way frequency, an analog of the scatterplot matrix uses mosaic plots for each pair of variables. The <span style="color: brown;"><strong>vcd</strong></span> package <span class="citation" data-cites="R-vcd">(<a href="95-references.html#ref-R-vcd" role="doc-biblioref">Meyer et al., 2024</a>)</span> implements very general <code><a href="https://rdrr.io/r/graphics/pairs.html">pairs()</a></code> methods for <code>"table"</code> objects and <span style="color: brown;"><strong>vcdExtra</strong></span> <span class="citation" data-cites="R-vcdExtra">(<a href="95-references.html#ref-R-vcdExtra" role="doc-biblioref">Friendly, 2023</a>)</span> extends this to wide classes of loglinear models <span class="citation" data-cites="Friendly:99:EMD">(<a href="95-references.html#ref-Friendly:99:EMD" role="doc-biblioref">Friendly, 1999</a>)</span> See <span class="citation" data-cites="Friendly:99:EMD">Friendly (<a href="95-references.html#ref-Friendly:99:EMD" role="doc-biblioref">1999</a>)</span> and my book <em>Discrete Data Analysis with R</em> <span class="citation" data-cites="FriendlyMeyer:2016:DDAR">(<a href="95-references.html#ref-FriendlyMeyer:2016:DDAR" role="doc-biblioref">Friendly &amp; Meyer, 2016</a>)</span> for mosaic plots and mosaic matrices.</p>
 <p>For example, we can tabulate the distributions of penguin species by sex and the island where they were observed using <code><a href="https://rdrr.io/r/stats/xtabs.html">xtabs()</a></code>. <code><a href="https://rdrr.io/r/stats/ftable.html">ftable()</a></code> prints this three-way table more compactly. (In this example, and what follows in the chapter, I’ve changed the labels for sex from (“f”, “m”) to (“Female”, “Male”)).</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb40" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># use better labels for sex</span></span>
@@ -1540,7 +1540,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span><span class="co">#&gt;           Male              61     0         0</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>We can see immediately that the penguin species differ by island: only Adelie were observed on all three islands; Biscoe Island had no Chinstraps and Dream Island had no Gentoos.</p>
-<p><code>vcd::pairs()</code> produces all pairwise mosaic plots, as shown in <a href="#fig-peng-mosaic" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-mosaic</span></a>. The diagonal panels show the one-way frequencies by width of the divided bars. Each off-diagonal panel shows the bivariate counts, breaking down each column variable by splitting the bars in proportion to a second variable. Consequently, the frequency of each cell is represented by its’ area. The purpose is to show the <strong>pattern of association</strong> between each pair, and so, the tiles in the mosaic are shaded according to the signed standardized residual, <span class="math inline">\(d_{ij} = (n_{ij} - \hat{n}_{ij}) / \sqrt{\hat{n}_{ij}}\)</span> in a simple <span class="math inline">\(\chi^2 = \Sigma_{ij} \; d_{ij}^2\)</span> test for association— <span style="color: blue;">blue</span> where the observed frequency <span class="math inline">\(n_{ij}\)</span> is significantly greater than expected <span class="math inline">\(\hat{n}_{ij}\)</span> under independence, and <span style="color: red;">red</span> where it is less than expected. The tiles are unshaded when <span class="math inline">\(| d_{ij} | &lt; 2\)</span>.</p>
+<p><code>vcd::pairs()</code> produces all pairwise mosaic plots, as shown in <a href="#fig-peng-mosaic" class="quarto-xref">Figure&nbsp;<span>3.30</span></a>. The diagonal panels show the one-way frequencies by width of the divided bars. Each off-diagonal panel shows the bivariate counts, breaking down each column variable by splitting the bars in proportion to a second variable. Consequently, the frequency of each cell is represented by its’ area. The purpose is to show the <strong>pattern of association</strong> between each pair, and so, the tiles in the mosaic are shaded according to the signed standardized residual, <span class="math inline">\(d_{ij} = (n_{ij} - \hat{n}_{ij}) / \sqrt{\hat{n}_{ij}}\)</span> in a simple <span class="math inline">\(\chi^2 = \Sigma_{ij} \; d_{ij}^2\)</span> test for association— <span style="color: blue;">blue</span> where the observed frequency <span class="math inline">\(n_{ij}\)</span> is significantly greater than expected <span class="math inline">\(\hat{n}_{ij}\)</span> under independence, and <span style="color: red;">red</span> where it is less than expected. The tiles are unshaded when <span class="math inline">\(| d_{ij} | &lt; 2\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb41" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va">vcd</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/graphics/pairs.html">pairs</a></span><span class="op">(</span><span class="va">peng.table</span>, shade <span class="op">=</span> <span class="cn">TRUE</span>,</span>
@@ -1557,15 +1557,15 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>The shading patterns in cells (1,3) and (3,1) of <a href="#fig-peng-mosaic" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-mosaic</span></a> show what we’ve seen before in the table of frequencies: The distribution of species varies across island because on each island one or more species did not occur. Row 2 and column 2 show that sex is nearly exactly proportional among species and islands, indicating independence, <span class="math inline">\(\text{sex} \perp \{\text{species}, \text{island}\}\)</span>. More importantly, mosaic pairs plots can show, at a glance, all (bivariate) associations among multivariate categorical variables.</p>
-<p>The next step, by John Emerson and others <span class="citation" data-cites="Emerson-etal:2013">(<a href="#ref-Emerson-etal:2013" role="doc-biblioref">Emerson et al., 2013</a>)</span> was to recognize that combinations of continuous and discrete, categorical variables could be plotted in different ways.</p>
+<p>The shading patterns in cells (1,3) and (3,1) of <a href="#fig-peng-mosaic" class="quarto-xref">Figure&nbsp;<span>3.30</span></a> show what we’ve seen before in the table of frequencies: The distribution of species varies across island because on each island one or more species did not occur. Row 2 and column 2 show that sex is nearly exactly proportional among species and islands, indicating independence, <span class="math inline">\(\text{sex} \perp \{\text{species}, \text{island}\}\)</span>. More importantly, mosaic pairs plots can show, at a glance, all (bivariate) associations among multivariate categorical variables.</p>
+<p>The next step, by John Emerson and others <span class="citation" data-cites="Emerson-etal:2013">(<a href="95-references.html#ref-Emerson-etal:2013" role="doc-biblioref">Emerson et al., 2013</a>)</span> was to recognize that combinations of continuous and discrete, categorical variables could be plotted in different ways.</p>
 <ul>
 <li>Two continuous variables can be shown as a standard scatterplot of points and/or bivariate density contours, or simply by numeric summaries such as a correlation value;</li>
 <li>A pair of one continuous and one categorical variable can be shown as side-by-side boxplots or violin plots, histograms or density plots;</li>
 <li>Two categorical variables could be shown in a mosaic plot or by grouped bar plots.</li>
 </ul>
-<p>In the ggplot2 framework, these displays are implemented using the <code><a href="https://ggobi.github.io/ggally/reference/ggpairs.html">ggpairs()</a></code> function from the <span style="color: brown;"><strong>GGally</strong></span> package <span class="citation" data-cites="R-GGally">(<a href="#ref-R-GGally" role="doc-biblioref">Schloerke et al., 2024</a>)</span>. This allows different plot types to be shown in the lower and upper triangles and in the diagonal cells of the plot matrix. As well, aesthetics such as color and shape can be used within the plots to distinguish groups directly. As illustrated below, you can define custom functions to control exactly what is plotted in any panel.</p>
-<p>The basic, default plot shows scatterplots for pairs of continuous variables in the lower triangle and the values of correlations in the upper triangle. A combination of a discrete and continuous variables is plotted as histograms in the lower triangle and boxplots in the upper triangle. <a href="#fig-peng-ggpairs1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggpairs1</span></a> includes <code>sex</code> to illustrate the combinations.</p>
+<p>In the ggplot2 framework, these displays are implemented using the <code><a href="https://ggobi.github.io/ggally/reference/ggpairs.html">ggpairs()</a></code> function from the <span style="color: brown;"><strong>GGally</strong></span> package <span class="citation" data-cites="R-GGally">(<a href="95-references.html#ref-R-GGally" role="doc-biblioref">Schloerke et al., 2024</a>)</span>. This allows different plot types to be shown in the lower and upper triangles and in the diagonal cells of the plot matrix. As well, aesthetics such as color and shape can be used within the plots to distinguish groups directly. As illustrated below, you can define custom functions to control exactly what is plotted in any panel.</p>
+<p>The basic, default plot shows scatterplots for pairs of continuous variables in the lower triangle and the values of correlations in the upper triangle. A combination of a discrete and continuous variables is plotted as histograms in the lower triangle and boxplots in the upper triangle. <a href="#fig-peng-ggpairs1" class="quarto-xref">Figure&nbsp;<span>3.31</span></a> includes <code>sex</code> to illustrate the combinations.</p>
 <!-- fig.code: R/peng/peng-ggally.R -->
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb42" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://ggobi.github.io/ggally/reference/ggpairs.html">ggpairs</a></span><span class="op">(</span><span class="va">peng</span>, columns<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">3</span><span class="op">:</span><span class="fl">6</span>, <span class="fl">7</span><span class="op">)</span>,</span>
@@ -1588,8 +1588,8 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>To my eye, printing the values of correlations in the upper triangle is often a waste of graphic space. But in this example the correlations show something peculiar and interesting if you look closely: In all pairs among the penguin size measurements, there are positive correlations within each species, as we can see in <a href="#fig-peng-spm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-spm</span></a>. Yet, in three of these panels, the overall correlation ignoring species is negative. For example, the overall correlation between bill depth and flipper length is <span class="math inline">\(r = -0.579\)</span> in row 2, column 3; the scatterplot in the diagonally opposite cell, row 3, column 2 shows the data. These cases, of differing signs for an overall correlation, ignoring a group variable and the within group correlations are examples of <strong>Simpson’s Paradox</strong>, explored later in Chapter XX. <!--# TODO: add chapter number when known --></p>
-<p>The last row and column, for <code>sex</code> in <a href="#fig-peng-ggpairs1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggpairs1</span></a>, provides an initial glance at the issue of sex differences among penguin species that motivated the collection of these data. We can go further by also examining differences among species and island, but first we need to understand how to display exactly what we want for each pairwise plot.</p>
+<p>To my eye, printing the values of correlations in the upper triangle is often a waste of graphic space. But in this example the correlations show something peculiar and interesting if you look closely: In all pairs among the penguin size measurements, there are positive correlations within each species, as we can see in <a href="#fig-peng-spm" class="quarto-xref">Figure&nbsp;<span>3.25</span></a>. Yet, in three of these panels, the overall correlation ignoring species is negative. For example, the overall correlation between bill depth and flipper length is <span class="math inline">\(r = -0.579\)</span> in row 2, column 3; the scatterplot in the diagonally opposite cell, row 3, column 2 shows the data. These cases, of differing signs for an overall correlation, ignoring a group variable and the within group correlations are examples of <strong>Simpson’s Paradox</strong>, explored later in Chapter XX. <!--# TODO: add chapter number when known --></p>
+<p>The last row and column, for <code>sex</code> in <a href="#fig-peng-ggpairs1" class="quarto-xref">Figure&nbsp;<span>3.31</span></a>, provides an initial glance at the issue of sex differences among penguin species that motivated the collection of these data. We can go further by also examining differences among species and island, but first we need to understand how to display exactly what we want for each pairwise plot.</p>
 <p><code><a href="https://ggobi.github.io/ggally/reference/ggpairs.html">ggpairs()</a></code> is extremely general in that for each of the <code>lower</code>, <code>upper</code> and <code>diag</code> sections you can assign any of a large number of built-in functions (of the form <code>ggally_NAME</code>), or your own custom function for what is plotted, depending on the types of variables in each plot.</p>
 <ul>
 <li>
@@ -1599,7 +1599,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <li>
 <code>discrete</code>: both X and Y are discrete variables.</li>
 </ul>
-<p>The defaults, which were used in <a href="#fig-peng-ggpairs1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggpairs1</span></a>, are:</p>
+<p>The defaults, which were used in <a href="#fig-peng-ggpairs1" class="quarto-xref">Figure&nbsp;<span>3.31</span></a>, are:</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb43" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">upper</span> <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span>continuous <span class="op">=</span> <span class="st">"cor"</span>,          <span class="co"># correlation values</span></span>
 <span>             combo <span class="op">=</span> <span class="st">"box_no_facet"</span>,      <span class="co"># boxplots </span></span>
@@ -1637,7 +1637,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span>  <span class="va">p</span></span>
 <span><span class="op">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Then, to show what can be done, <a href="#fig-peng-ggpairs7" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggpairs7</span></a> uses <code>my_panel1()</code> for the scatterplots in the 4 x 4 block of plots in the upper left. The combination of the continuous body size measures and the discrete factors <code>species</code>, <code>island</code> and <code>sex</code> are shown in upper triangle by boxplots but by faceted histograms in the lower portion. The factors are shown as rectangles with area proportional to count (poor-man’s mosaic plots) above the diagonal and as faceted bar plots below.</p>
+<p>Then, to show what can be done, <a href="#fig-peng-ggpairs7" class="quarto-xref">Figure&nbsp;<span>3.32</span></a> uses <code>my_panel1()</code> for the scatterplots in the 4 x 4 block of plots in the upper left. The combination of the continuous body size measures and the discrete factors <code>species</code>, <code>island</code> and <code>sex</code> are shown in upper triangle by boxplots but by faceted histograms in the lower portion. The factors are shown as rectangles with area proportional to count (poor-man’s mosaic plots) above the diagonal and as faceted bar plots below.</p>
 <!-- fig.code: R/peng/peng-ggally.R -->
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb47" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://ggobi.github.io/ggally/reference/ggpairs.html">ggpairs</a></span><span class="op">(</span><span class="va">peng</span>, columns<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">3</span><span class="op">:</span><span class="fl">6</span>, <span class="fl">1</span>, <span class="fl">2</span>, <span class="fl">7</span><span class="op">)</span>,</span>
@@ -1664,7 +1664,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>There is certainly a lot going on in <a href="#fig-peng-ggpairs7" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggpairs7</span></a>, but it does show a high-level overview of all the variables (except <code>year</code>) in the penguins dataset. It is probably easiest to “read” this figure by focusing on the four blocks for the combinations of 4 continuous and 3 categorical measures. In the upper left block, visual thinning of the scatterplots, showing only the data ellipses and regression lines gives a simple view as it did in <a href="#fig-peng-spm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-spm</span></a>.</p>
+<p>There is certainly a lot going on in <a href="#fig-peng-ggpairs7" class="quarto-xref">Figure&nbsp;<span>3.32</span></a>, but it does show a high-level overview of all the variables (except <code>year</code>) in the penguins dataset. It is probably easiest to “read” this figure by focusing on the four blocks for the combinations of 4 continuous and 3 categorical measures. In the upper left block, visual thinning of the scatterplots, showing only the data ellipses and regression lines gives a simple view as it did in <a href="#fig-peng-spm" class="quarto-xref">Figure&nbsp;<span>3.25</span></a>.</p>
 </section><section id="sec-parcoord" class="level2" data-number="3.6"><h2 data-number="3.6" class="anchored" data-anchor-id="sec-parcoord">
 <span class="header-section-number">3.6</span> Parallel coordinate plots</h2>
 <p>As we have seen above, scatterplot matrices and generalized pairs plots extend data visualization to multivariate data, but these variables share one 2D space, so resolution decreases as the number of variable increase. You need a very large screen or sheet of paper to see more than, say 5-6 variables with any clarity.</p>
@@ -1683,15 +1683,15 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <blockquote class="blockquote">
 <p>Those who don’t know history are doomed to plagarize it —The author</p>
 </blockquote>
-<p>The theory of projective geometry originated with the French mathematician Maurice d’Ocagne <span class="citation" data-cites="Ocagne:1885">(<a href="#ref-Ocagne:1885" role="doc-biblioref">1885</a>)</span> who sought a way to provide graphic calculation of mathematical functions with alignment diagrams or <em>nomograms</em> using parallel axes with different scales. A three-variable equation, for example, could be solved using three parallel axes, where known values could be marked on their scales, a line drawn between them, and an unknown read on its scale at the point where the line intersects that scale.</p>
-<p>Henry Gannet (1880), in work preceding the <em>Statistical Atlas of the United States</em> for the 1890 Census <span class="citation" data-cites="Gannett:1898">(<a href="#ref-Gannett:1898" role="doc-biblioref">Gannett, 1898</a>)</span>, is widely credited with being the first to use parallel coordinates plots to show data, in his case, to show the <a href="https://www.davidrumsey.com/luna/servlet/detail/RUMSEY~8~1~32803~1152181">rank ordering of US states</a> by 10 measures including population, occupations, wealth, manufacturing, agriculture and so on.</p>
-<p>However, both d’Ocagne and Gannet were far preceded in this by Andre-Michel Guerry <span class="citation" data-cites="Guerry:1833">(<a href="#ref-Guerry:1833" role="doc-biblioref">1833</a>)</span> who used this method to show how the rank order of various crimes changed with age of the accused. See <span class="citation" data-cites="Friendly2022">Friendly (<a href="#ref-Friendly2022" role="doc-biblioref">2022</a>)</span>, Figure 7 for his version and for an appreciation of the remarkable contributions of this amateur statistician to the history of data visualization.</p>
+<p>The theory of projective geometry originated with the French mathematician Maurice d’Ocagne <span class="citation" data-cites="Ocagne:1885">(<a href="95-references.html#ref-Ocagne:1885" role="doc-biblioref">1885</a>)</span> who sought a way to provide graphic calculation of mathematical functions with alignment diagrams or <em>nomograms</em> using parallel axes with different scales. A three-variable equation, for example, could be solved using three parallel axes, where known values could be marked on their scales, a line drawn between them, and an unknown read on its scale at the point where the line intersects that scale.</p>
+<p>Henry Gannet (1880), in work preceding the <em>Statistical Atlas of the United States</em> for the 1890 Census <span class="citation" data-cites="Gannett:1898">(<a href="95-references.html#ref-Gannett:1898" role="doc-biblioref">Gannett, 1898</a>)</span>, is widely credited with being the first to use parallel coordinates plots to show data, in his case, to show the <a href="https://www.davidrumsey.com/luna/servlet/detail/RUMSEY~8~1~32803~1152181">rank ordering of US states</a> by 10 measures including population, occupations, wealth, manufacturing, agriculture and so on.</p>
+<p>However, both d’Ocagne and Gannet were far preceded in this by Andre-Michel Guerry <span class="citation" data-cites="Guerry:1833">(<a href="95-references.html#ref-Guerry:1833" role="doc-biblioref">1833</a>)</span> who used this method to show how the rank order of various crimes changed with age of the accused. See <span class="citation" data-cites="Friendly2022">Friendly (<a href="95-references.html#ref-Friendly2022" role="doc-biblioref">2022</a>)</span>, Figure 7 for his version and for an appreciation of the remarkable contributions of this amateur statistician to the history of data visualization.</p>
 <!-- **TODO**: Revise the _History_ section of the Wikipedia page for [Parallel coordinates](https://en.wikipedia.org/wiki/Parallel_coordinates). -->
-<p>The use of parallel coordinates for display of multidimensional data was rediscovered by Alfred Inselberg <span class="citation" data-cites="Inselberg:1985">(<a href="#ref-Inselberg:1985" role="doc-biblioref">1985</a>)</span> and extended by Edward Wegman <span class="citation" data-cites="Wegman:1990">(<a href="#ref-Wegman:1990" role="doc-biblioref">1990</a>)</span>, neither of whom recognized the earlier history. Somewhat earlier, David Andrews <span class="citation" data-cites="Andrews:72">(<a href="#ref-Andrews:72" role="doc-biblioref">1972</a>)</span> proposed mapping multivariate observations to smooth Fourrier functions composed of alternating <span class="math inline">\(\sin()\)</span> and <span class="math inline">\(\cos()\)</span> terms. And in my book, <em>SAS System for Statistical Graphics</em> <span class="citation" data-cites="Friendly:91">(<a href="#ref-Friendly:91" role="doc-biblioref">Friendly, 1991</a>)</span>, I implemented what I called <a href="https://blogs.sas.com/content/iml/2022/11/14/profile-plots-sas.html"><em>profile plots</em></a> without knowing their earlier history as parallel coordinate plots.</p>
+<p>The use of parallel coordinates for display of multidimensional data was rediscovered by Alfred Inselberg <span class="citation" data-cites="Inselberg:1985">(<a href="95-references.html#ref-Inselberg:1985" role="doc-biblioref">1985</a>)</span> and extended by Edward Wegman <span class="citation" data-cites="Wegman:1990">(<a href="95-references.html#ref-Wegman:1990" role="doc-biblioref">1990</a>)</span>, neither of whom recognized the earlier history. Somewhat earlier, David Andrews <span class="citation" data-cites="Andrews:72">(<a href="95-references.html#ref-Andrews:72" role="doc-biblioref">1972</a>)</span> proposed mapping multivariate observations to smooth Fourrier functions composed of alternating <span class="math inline">\(\sin()\)</span> and <span class="math inline">\(\cos()\)</span> terms. And in my book, <em>SAS System for Statistical Graphics</em> <span class="citation" data-cites="Friendly:91">(<a href="95-references.html#ref-Friendly:91" role="doc-biblioref">Friendly, 1991</a>)</span>, I implemented what I called <a href="https://blogs.sas.com/content/iml/2022/11/14/profile-plots-sas.html"><em>profile plots</em></a> without knowing their earlier history as parallel coordinate plots.</p>
 </div>
 </div>
 <p>Parallel coordinate plots present a challenge for graphic developers, in that they require a different way to think about plot construction for multiple variables, which can be quantitative, as in the original idea, or categorical factors, all to be shown along parallel axes.</p>
-<p>Here, I use the <span style="color: brown;"><strong>ggpcp</strong></span> package <span class="citation" data-cites="R-ggpcp">(<a href="#ref-R-ggpcp" role="doc-biblioref">Hofmann et al., 2022</a>)</span>, best described in <span class="citation" data-cites="VanderPlas2023">VanderPlas et al. (<a href="#ref-VanderPlas2023" role="doc-biblioref">2023</a>)</span>, who also review the modern history.<a href="#fn4" class="footnote-ref" id="fnref4" role="doc-noteref"><sup>4</sup></a> This takes some getting used to, because they develop <code>pcp_*()</code> extensions of the <code>ggplot2</code> grammar of graphics framework to allow:</p>
+<p>Here, I use the <span style="color: brown;"><strong>ggpcp</strong></span> package <span class="citation" data-cites="R-ggpcp">(<a href="95-references.html#ref-R-ggpcp" role="doc-biblioref">Hofmann et al., 2022</a>)</span>, best described in <span class="citation" data-cites="VanderPlas2023">VanderPlas et al. (<a href="95-references.html#ref-VanderPlas2023" role="doc-biblioref">2023</a>)</span>, who also review the modern history.<a href="#fn4" class="footnote-ref" id="fnref4" role="doc-noteref"><sup>4</sup></a> This takes some getting used to, because they develop <code>pcp_*()</code> extensions of the <code>ggplot2</code> grammar of graphics framework to allow:</p>
 <ul>
 <li>
 <code><a href="https://rdrr.io/pkg/ggpcp/man/pcp_select.html">pcp_select()</a></code>: selections of the variables to be plotted and their horizontal order on parallel axes,</li>
@@ -1700,7 +1700,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <li>
 <code><a href="https://rdrr.io/pkg/ggpcp/man/pcp_arrange.html">pcp_arrange()</a></code>: methods for breaking ties in factor variables to space them out.</li>
 </ul>
-<p>Then, it provides <code>geom_pcp_*()</code> functions to control the display of axes with appropriate aesthetics, labels for categorical factors and so forth. <a href="#fig-peng-ggpcp1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggpcp1</span></a> illustrates this type of display, using sex and species in addition to the quantitative variables for the penguin data.</p>
+<p>Then, it provides <code>geom_pcp_*()</code> functions to control the display of axes with appropriate aesthetics, labels for categorical factors and so forth. <a href="#fig-peng-ggpcp1" class="quarto-xref">Figure&nbsp;<span>3.33</span></a> illustrates this type of display, using sex and species in addition to the quantitative variables for the penguin data.</p>
 <!-- fig.code: R/peng/peng-ggpcp.R -->
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb48" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">peng</span> <span class="op">|&gt;</span></span>
@@ -1730,7 +1730,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>Rearranging the order of variables and the ordering of factor levels can make a difference in what we can see in such plots. For a simple example (following <span class="citation" data-cites="VanderPlas2023">VanderPlas et al. (<a href="#ref-VanderPlas2023" role="doc-biblioref">2023</a>)</span>), we reorder the levels of species and islands to make it clearer which species occur on each island.</p>
+<p>Rearranging the order of variables and the ordering of factor levels can make a difference in what we can see in such plots. For a simple example (following <span class="citation" data-cites="VanderPlas2023">VanderPlas et al. (<a href="95-references.html#ref-VanderPlas2023" role="doc-biblioref">2023</a>)</span>), we reorder the levels of species and islands to make it clearer which species occur on each island.</p>
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb49" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">peng1</span> <span class="op">&lt;-</span> <span class="va">peng</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span><span class="op">(</span>species <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/factor.html">factor</a></span><span class="op">(</span><span class="va">species</span>, levels <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"Chinstrap"</span>, <span class="st">"Adelie"</span>, <span class="st">"Gentoo"</span><span class="op">)</span><span class="op">)</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -1762,16 +1762,16 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>The order of variables in this plot emphasizes the relation between penguin species and the island where they were observed and then shows the values of the quantitative body size measurements. More generally, quantitative variables can, and probably should, be ordered to place the most highly correlated variables adjacently to minimize the degree of crossing lines from one variable to the next <span class="citation" data-cites="MartiLaguna2003">(<a href="#ref-MartiLaguna2003" role="doc-biblioref">Martí &amp; Laguna, 2003</a>)</span>. When variables are highly <em>negatively</em> correlated (such as <code>bill_depth</code> and <code>flipper_length</code> here), crossings can be reduced simply by reversing the scale of one of the variables, e.g., by plotting <code>-bill_depth</code>. </p>
+<p>The order of variables in this plot emphasizes the relation between penguin species and the island where they were observed and then shows the values of the quantitative body size measurements. More generally, quantitative variables can, and probably should, be ordered to place the most highly correlated variables adjacently to minimize the degree of crossing lines from one variable to the next <span class="citation" data-cites="MartiLaguna2003">(<a href="95-references.html#ref-MartiLaguna2003" role="doc-biblioref">Martí &amp; Laguna, 2003</a>)</span>. When variables are highly <em>negatively</em> correlated (such as <code>bill_depth</code> and <code>flipper_length</code> here), crossings can be reduced simply by reversing the scale of one of the variables, e.g., by plotting <code>-bill_depth</code>. </p>
 </section><section id="animated-tours" class="level2" data-number="3.7"><h2 data-number="3.7" class="anchored" data-anchor-id="animated-tours">
 <span class="header-section-number">3.7</span> Animated tours</h2>
 <p>In the mid 17<span class="math inline">\(^{th}\)</span> to early 19<span class="math inline">\(^{th}\)</span>-century the <strong>Grand Tour</strong> became a coming-of-age custom for young Europeans (mainly British nobility and landed gentry) of sufficient rank and means to undertake a journey to the principal sites of Europe (Paris, Geneva, Rome, Athens, …) to complete their education by learning something of the cultural legacies in history, art, and music from antiquity to the Renaissance. Thereby, they could gain a wider appreciation of history and be prepared to play a role in polite society or in their chosen endeavors.</p>
 <p>Travels in high-dimensional data space might be less thrilling than a journey from London through Paris and Millan to Rome. Yet, in both cases it is useful to think of the path taken, and what might be seen along the way. But there are different kinds of tours. We might simply take a meandering tour, exploring all the way, or want to plan a tour to see the most interesting sites in travel or have a tour guided by an expert. Similarly in data space, we might travel randomly to see what we can find or be guided to find interesting features such as clusters, outliers or non-linear relations in data.</p>
-<p>Following the demonstration in PRIM-9 (<a href="#sec-discoveries" class="quarto-xref"><span class="quarto-unresolved-ref">sec-discoveries</span></a>) of exploring multidimensional data space by rotation <span class="citation" data-cites="Asimov:85">Asimov (<a href="#ref-Asimov:85" role="doc-biblioref">1985</a>)</span> developed the idea of the <em>grand tour</em>, a computer method for viewing multivariate statistical data via orthogonal projections onto an animated sequence of low-dimensional subspaces, like a movie. In contrast to a scatterplot matrix which shows a static view of a data cloud projected onto all pairwise variable axes, a statistical tour is like the view of an eye moving smoothly in high-dimensional space, capturing what it sees from a given location onto the 2-d plane of the computer screen.</p>
+<p>Following the demonstration in PRIM-9 (<a href="index.html#sec-discoveries" class="quarto-xref"><span>Section 3.1</span></a>) of exploring multidimensional data space by rotation <span class="citation" data-cites="Asimov:85">Asimov (<a href="95-references.html#ref-Asimov:85" role="doc-biblioref">1985</a>)</span> developed the idea of the <em>grand tour</em>, a computer method for viewing multivariate statistical data via orthogonal projections onto an animated sequence of low-dimensional subspaces, like a movie. In contrast to a scatterplot matrix which shows a static view of a data cloud projected onto all pairwise variable axes, a statistical tour is like the view of an eye moving smoothly in high-dimensional space, capturing what it sees from a given location onto the 2-d plane of the computer screen.</p>
 <p>More generally, statistical tours are a type of dynamic projections onto orthogonal axes (called a <em>basis</em>) that embed data in a <span class="math inline">\(p\)</span>−dimensional space into a <span class="math inline">\(d\)</span>−dimensional viewing subspace. Typically, <span class="math inline">\(d=2\)</span>, and the result is displayed as scatterplots, together with vectors representing the projections of the data variables in this space. But the projected data can be rendered in 1-d as densities or histograms, or in other number of dimensions as glyphs, or even as parallel coordinate plots. The essential idea is that we can define, and animate, a <em>tour path</em> as a smooth sequence of such projections over small changes to the projection basis, which gives the orientation of the data in the viewing space.</p>
 <section id="projections" class="level3" data-number="3.7.1"><h3 data-number="3.7.1" class="anchored" data-anchor-id="projections">
 <span class="header-section-number">3.7.1</span> Projections</h3>
-<p>The idea of a projection is fundamental to touring methods and other visualizations of high-D data, so it is useful to understand what a projection is. Quite simply, you can think of a projection as the shadow of an object or cloud of points. This is nicely illustrated by the cover image (<a href="#fig-cover-GBE" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-cover-GBE</span></a>) used for Douglas Hofstadter’s <span class="citation" data-cites="Hofstadter1979">(<a href="#ref-Hofstadter1979" role="doc-biblioref">1979</a>)</span> <em>Gödel, Bach and Escher</em> which shows 3D solid shapes illuminated by light sources so their shadows form the letters G, B and E projected onto the planes formed by pairs of the three coordinate axes. The set of three 2D views is essentially the same that we see in a scatterplot matrix, where a 3D dataset is portrayed by the set of shadows of the points on planes formed by pairs of coordinate axes.</p>
+<p>The idea of a projection is fundamental to touring methods and other visualizations of high-D data, so it is useful to understand what a projection is. Quite simply, you can think of a projection as the shadow of an object or cloud of points. This is nicely illustrated by the cover image (<a href="#fig-cover-GBE" class="quarto-xref">Figure&nbsp;<span>3.34</span></a>) used for Douglas Hofstadter’s <span class="citation" data-cites="Hofstadter1979">(<a href="95-references.html#ref-Hofstadter1979" role="doc-biblioref">1979</a>)</span> <em>Gödel, Bach and Escher</em> which shows 3D solid shapes illuminated by light sources so their shadows form the letters G, B and E projected onto the planes formed by pairs of the three coordinate axes. The set of three 2D views is essentially the same that we see in a scatterplot matrix, where a 3D dataset is portrayed by the set of shadows of the points on planes formed by pairs of coordinate axes.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-cover-GBE" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1779,12 +1779,12 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <img src="images/Cover-GBE.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:40.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-cover-GBE-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;3.34: The cover image from <span class="citation" data-cites="Hofstadter1979">Hofstadter (<a href="#ref-Hofstadter1979" role="doc-biblioref">1979</a>)</span> illustrates how projections are shadows of an object cast by a light from a given direction.
+Figure&nbsp;3.34: The cover image from <span class="citation" data-cites="Hofstadter1979">Hofstadter (<a href="95-references.html#ref-Hofstadter1979" role="doc-biblioref">1979</a>)</span> illustrates how projections are shadows of an object cast by a light from a given direction.
 </figcaption></figure>
 </div>
 </div>
 </div>
-<p>In the simplest case, a data point <span class="math inline">\(\mathbf{x} = (x_1, x_2)\)</span> in two dimensions can be represented geometrically as a vector from the origin as shown in <a href="#fig-projection" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-projection</span></a>. This point can be projected on any one-dimensional axis <span class="math inline">\(\mathbf{p}\)</span> by dropping a line perpendicular to <span class="math inline">\(\mathbf{p}\)</span>, which is the idea of a shadow. Mathematically, this is calculated as the product <span class="math inline">\(\mathbf{x}^\mathsf{T} \mathbf{p} = x_1 p_1 + x_2 p_2\)</span> and suitably normalized to give the correct length. …</p>
+<p>In the simplest case, a data point <span class="math inline">\(\mathbf{x} = (x_1, x_2)\)</span> in two dimensions can be represented geometrically as a vector from the origin as shown in <a href="#fig-projection" class="quarto-xref">Figure&nbsp;<span>3.35</span></a>. This point can be projected on any one-dimensional axis <span class="math inline">\(\mathbf{p}\)</span> by dropping a line perpendicular to <span class="math inline">\(\mathbf{p}\)</span>, which is the idea of a shadow. Mathematically, this is calculated as the product <span class="math inline">\(\mathbf{x}^\mathsf{T} \mathbf{p} = x_1 p_1 + x_2 p_2\)</span> and suitably normalized to give the correct length. …</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-projection" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1868,7 +1868,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span><span class="va">Y2</span> <span class="op">&lt;-</span> <span class="va">X</span> <span class="op"><a href="https://rdrr.io/r/base/matmult.html">%*%</a></span> <span class="va">P2</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<p>In this example, the matrix <span class="math inline">\(\mathbf{X}\)</span> consists of 8 points at the vertices of a cube of size 10, as shown in <a href="#fig-proj-combined" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-proj-combined</span></a> (a). The projections <span class="math inline">\(\mathbf{Y}_1 = \mathbf{P}_1 \mathbf{X}\)</span> and <span class="math inline">\(\mathbf{Y}_2 = \mathbf{P}_2 \mathbf{X}\)</span> are shown in panels (b) and (c). To make it easier to relate the points in different views, shapes and colors are assigned so that each point has a unique combination of these attributes.<a href="#fn6" class="footnote-ref" id="fnref6" role="doc-noteref"><sup>6</sup></a></p>
+<p>In this example, the matrix <span class="math inline">\(\mathbf{X}\)</span> consists of 8 points at the vertices of a cube of size 10, as shown in <a href="#fig-proj-combined" class="quarto-xref">Figure&nbsp;<span>3.36</span></a> (a). The projections <span class="math inline">\(\mathbf{Y}_1 = \mathbf{P}_1 \mathbf{X}\)</span> and <span class="math inline">\(\mathbf{Y}_2 = \mathbf{P}_2 \mathbf{X}\)</span> are shown in panels (b) and (c). To make it easier to relate the points in different views, shapes and colors are assigned so that each point has a unique combination of these attributes.<a href="#fn6" class="footnote-ref" id="fnref6" role="doc-noteref"><sup>6</sup></a></p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb51" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">pch</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html">rep</a></span><span class="op">(</span><span class="fl">15</span><span class="op">:</span><span class="fl">18</span>, times <span class="op">=</span> <span class="fl">2</span><span class="op">)</span></span>
 <span><span class="va">colors</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"red"</span>, <span class="st">"blue"</span>, <span class="st">"darkgreen"</span>, <span class="st">"brown"</span><span class="op">)</span></span>
@@ -1896,7 +1896,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>But, if we are traveling in the projection space of <span class="math inline">\(\mathbf{Y}\)</span>, we need some signposts to tell us how the new dimensions relate to those of <span class="math inline">\(\mathbf{X}\)</span>. The answer is provided simply by plotting the rows of <span class="math inline">\(\mathbf{P}\)</span> as vectors, as shown in <a href="#fig-proj-vectors" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-proj-vectors</span></a>. In these plots, each row of <span class="math inline">\(\mathbf{P}_1\)</span> and <span class="math inline">\(\mathbf{P}_2\)</span> appears as a vector from the origin. It’s direction shows the contribution each of <span class="math inline">\(\mathbf{x}_1, \mathbf{x}_2, \mathbf{x}_3\)</span> make to the new coordinates <span class="math inline">\(\mathbf{y}_1\)</span> and <span class="math inline">\(\mathbf{y}_2\)</span>.</p>
+<p>But, if we are traveling in the projection space of <span class="math inline">\(\mathbf{Y}\)</span>, we need some signposts to tell us how the new dimensions relate to those of <span class="math inline">\(\mathbf{X}\)</span>. The answer is provided simply by plotting the rows of <span class="math inline">\(\mathbf{P}\)</span> as vectors, as shown in <a href="#fig-proj-vectors" class="quarto-xref">Figure&nbsp;<span>3.37</span></a>. In these plots, each row of <span class="math inline">\(\mathbf{P}_1\)</span> and <span class="math inline">\(\mathbf{P}_2\)</span> appears as a vector from the origin. It’s direction shows the contribution each of <span class="math inline">\(\mathbf{x}_1, \mathbf{x}_2, \mathbf{x}_3\)</span> make to the new coordinates <span class="math inline">\(\mathbf{y}_1\)</span> and <span class="math inline">\(\mathbf{y}_2\)</span>.</p>
 <p>In <span class="math inline">\(\mathbf{P}_1\)</span>, the projected variable <span class="math inline">\(\mathbf{y}_1\)</span> is related only to <span class="math inline">\(\mathbf{x}_1\)</span>, while <span class="math inline">\(\mathbf{y}_2\)</span> is related only to <span class="math inline">\(\mathbf{x}_2\)</span> <span class="math inline">\(\mathbf{x}_3\)</span> makes no contribution, and appears at the origin. However in the projection given by <span class="math inline">\(\mathbf{P}_2\)</span>, <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span> make the same contribution to <span class="math inline">\(\mathbf{y}_1\)</span>, while <span class="math inline">\(\mathbf{x}_3\)</span> has no contribution to that horizontal axis. The vertical axis, <span class="math inline">\(\mathbf{y}_2\)</span> here is completely aligned with <span class="math inline">\(\mathbf{x}_3\)</span>; <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span> have vertical components that are half of that for <span class="math inline">\(\mathbf{x}_3\)</span> in absolute value.</p>
 <details class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb52" data-code-line-numbers=""><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/friendly/matlib">matlib</a></span><span class="op">)</span></span>
 <span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">4</span>, <span class="fl">5</span>, <span class="fl">1</span>, <span class="fl">1</span><span class="op">)</span><span class="op">+</span><span class="fl">.1</span><span class="op">)</span></span>
@@ -1935,7 +1935,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 <section id="vector-lengths" class="level4" data-number="3.7.1.1"><h4 data-number="3.7.1.1" class="anchored" data-anchor-id="vector-lengths">
 <span class="header-section-number">3.7.1.1</span> Vector lengths</h4>
-<p>In <a href="#fig-proj-vectors" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-proj-vectors</span></a>, the <strong>lengths</strong> of the <span class="math inline">\(\mathbf{x}\)</span> vectors reflect the relative degree to which each variable is represented in the space of the projection, and this is important for interpretation. For the <span class="math inline">\(\mathbf{P}_1\)</span> projection, <span class="math inline">\(\mathbf{x}_3\)</span> is of length 0, while <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span> fill the unit circle. In the projection given by <span class="math inline">\(\mathbf{P}_2\)</span>, all three <span class="math inline">\(\mathbf{x}\)</span> are approximately the same length.</p>
+<p>In <a href="#fig-proj-vectors" class="quarto-xref">Figure&nbsp;<span>3.37</span></a>, the <strong>lengths</strong> of the <span class="math inline">\(\mathbf{x}\)</span> vectors reflect the relative degree to which each variable is represented in the space of the projection, and this is important for interpretation. For the <span class="math inline">\(\mathbf{P}_1\)</span> projection, <span class="math inline">\(\mathbf{x}_3\)</span> is of length 0, while <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span> fill the unit circle. In the projection given by <span class="math inline">\(\mathbf{P}_2\)</span>, all three <span class="math inline">\(\mathbf{x}\)</span> are approximately the same length.</p>
 <p>In algebra, the length of a vector <span class="math inline">\(\mathbf{x}\)</span> is <span class="math inline">\(||\mathbf{x}|| = (\mathbf{x}^\mathsf{T} \mathbf{x})^{1/2} = \sqrt{\Sigma x_i^2}\)</span>, the Euclidean distance of the tip of the vector from the origin. In R, we calculate the lengths of row vectors in a projection matrix by transposing and using <code><a href="http://friendly.github.io/matlib/reference/len.html">matlib::len()</a></code>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb53" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">P1</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://rdrr.io/r/base/t.html">t</a></span><span class="op">(</span><span class="op">)</span> <span class="op">|&gt;</span> <span class="fu">matlib</span><span class="fu">::</span><span class="fu"><a href="http://friendly.github.io/matlib/reference/len.html">len</a></span><span class="op">(</span><span class="op">)</span></span>
@@ -1945,7 +1945,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </section><section id="joint-views" class="level4" data-number="3.7.1.2"><h4 data-number="3.7.1.2" class="anchored" data-anchor-id="joint-views">
 <span class="header-section-number">3.7.1.2</span> Joint-views</h4>
-<p>To interpret such projections, we want to see <strong>both</strong> the projected data and the signposts that tell us where we are in relation to the original variables. To do this, we can overlay the variable vectors represented by the rows of the projection matrix <span class="math inline">\(\mathbf{P}\)</span> onto plots like <a href="#fig-proj-combined" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-proj-combined</span></a> (b) and <a href="#fig-proj-combined" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-proj-combined</span></a> (c) to see how the axes in a projection relate to those in the data. To place these together on the same plot, we can either center the columns of <span class="math inline">\(\mathbf{Y}\)</span> at their means or shift the the columns of <span class="math inline">\(\mathbf{P}\)</span> to <code>colMeans(Y)</code>. It is only the directions of the vectors that matters, so we are free to scale their lengths by any convenient factor.</p>
+<p>To interpret such projections, we want to see <strong>both</strong> the projected data and the signposts that tell us where we are in relation to the original variables. To do this, we can overlay the variable vectors represented by the rows of the projection matrix <span class="math inline">\(\mathbf{P}\)</span> onto plots like <a href="#fig-proj-combined" class="quarto-xref">Figure&nbsp;<span>3.36</span></a> (b) and <a href="#fig-proj-combined" class="quarto-xref">Figure&nbsp;<span>3.36</span></a> (c) to see how the axes in a projection relate to those in the data. To place these together on the same plot, we can either center the columns of <span class="math inline">\(\mathbf{Y}\)</span> at their means or shift the the columns of <span class="math inline">\(\mathbf{P}\)</span> to <code>colMeans(Y)</code>. It is only the directions of the vectors that matters, so we are free to scale their lengths by any convenient factor.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb54" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">Y2s</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/scale.html">scale</a></span><span class="op">(</span><span class="va">Y2</span>, scale<span class="op">=</span><span class="cn">FALSE</span><span class="op">)</span>       <span class="co"># center Y2</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/graphics/plot.default.html">plot</a></span><span class="op">(</span><span class="va">Y2s</span>, cex <span class="op">=</span> <span class="fl">3</span>, </span>
@@ -1959,7 +1959,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <span><span class="fu"><a href="http://friendly.github.io/matlib/reference/vectors.html">vectors</a></span><span class="op">(</span><span class="op">-</span><span class="va">vecs</span>, labels <span class="op">=</span> <span class="cn">NULL</span>, lty <span class="op">=</span> <span class="fl">1</span>, angle <span class="op">=</span> <span class="fl">1</span>, col <span class="op">=</span> <span class="st">"gray"</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<p>The plot in <a href="#fig-proj-P2-vec" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-proj-P2-vec</span></a> illustrates this, centering <span class="math inline">\(\mathbf{Y}\)</span>, and multiplying the vectors in <span class="math inline">\(\mathbf{P}\)</span> by 7. To check your understanding, try to see if you can relate what is shown in this plot to the 3D plot in <a href="#fig-proj-combined" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-proj-combined</span></a> (a).</p>
+<p>The plot in <a href="#fig-proj-P2-vec" class="quarto-xref">Figure&nbsp;<span>3.38</span></a> illustrates this, centering <span class="math inline">\(\mathbf{Y}\)</span>, and multiplying the vectors in <span class="math inline">\(\mathbf{P}\)</span> by 7. To check your understanding, try to see if you can relate what is shown in this plot to the 3D plot in <a href="#fig-proj-combined" class="quarto-xref">Figure&nbsp;<span>3.36</span></a> (a).</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-proj-P2-vec" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1972,10 +1972,10 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>The idea of viewing low-dimensional projections of data together with vectors representing the contributions of the original variables to the dimensions shown in a display is also the basis of <strong>biplot</strong> techniques (<a href="#sec-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-biplot</span></a>) we will use in relation to principal components analysis.</p>
+<p>The idea of viewing low-dimensional projections of data together with vectors representing the contributions of the original variables to the dimensions shown in a display is also the basis of <strong>biplot</strong> techniques (<a href="04-pca-biplot.html#sec-biplot" class="quarto-xref"><span>Section 4.3</span></a>) we will use in relation to principal components analysis.</p>
 </section></section><section id="touring-methods" class="level3" data-number="3.7.2"><h3 data-number="3.7.2" class="anchored" data-anchor-id="touring-methods">
 <span class="header-section-number">3.7.2</span> Touring methods</h3>
-<p>The trick of statistical touring methods is to generate a smooth sequence of interpolated projections <span class="math inline">\(\mathbf{P}_{(t)}\)</span> indexed by time <span class="math inline">\(t\)</span>, <span class="math inline">\(\mathbf{P}_{(1)}, \mathbf{P}_{(2)}, \mathbf{P}_{(3)}, \dots, \mathbf{P}_{(T)}\)</span>. This gives a path of views <span class="math inline">\(\mathbf{Y}_{(t)} = \mathbf{X} \mathbf{P}_{(t)}\)</span>, that can be animated in successive frames, as shown schematically in <a href="#fig-peng-tourr-diagram" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-tourr-diagram</span></a>.</p>
+<p>The trick of statistical touring methods is to generate a smooth sequence of interpolated projections <span class="math inline">\(\mathbf{P}_{(t)}\)</span> indexed by time <span class="math inline">\(t\)</span>, <span class="math inline">\(\mathbf{P}_{(1)}, \mathbf{P}_{(2)}, \mathbf{P}_{(3)}, \dots, \mathbf{P}_{(T)}\)</span>. This gives a path of views <span class="math inline">\(\mathbf{Y}_{(t)} = \mathbf{X} \mathbf{P}_{(t)}\)</span>, that can be animated in successive frames, as shown schematically in <a href="#fig-peng-tourr-diagram" class="quarto-xref">Figure&nbsp;<span>3.39</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-peng-tourr-diagram" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1988,15 +1988,15 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>Asimov’s <span class="citation" data-cites="Asimov:85">(<a href="#ref-Asimov:85" role="doc-biblioref">1985</a>)</span> original idea of the grand tour was that of a random path, picking orthogonal projections <span class="math inline">\(\mathbf{P}_{(i)}\)</span> at random. Given enough time, the grand tour gives a space-filling path and would eventually show every possible projection of the data. But it does so smoothly, by interpolating from one projection to the next. In the travel analogy, the path by road from London to Paris might go smoothly through Kent to Dover, thence via Amiens and Beauvais before reaching Paris. By air, the tour would follow a smoother <em>geodesic</em> path, and this is what the grand tour does. The sense in watching an animation of a statistical grand tour is that of continuous motion. The grand tour algorithm is described in detail by <span class="citation" data-cites="Buja-etal-2005">Buja et al. (<a href="#ref-Buja-etal-2005" role="doc-biblioref">2005</a>)</span> and <span class="citation" data-cites="Cook-etal-2008">Cook et al. (<a href="#ref-Cook-etal-2008" role="doc-biblioref">2008</a>)</span>.</p>
+<p>Asimov’s <span class="citation" data-cites="Asimov:85">(<a href="95-references.html#ref-Asimov:85" role="doc-biblioref">1985</a>)</span> original idea of the grand tour was that of a random path, picking orthogonal projections <span class="math inline">\(\mathbf{P}_{(i)}\)</span> at random. Given enough time, the grand tour gives a space-filling path and would eventually show every possible projection of the data. But it does so smoothly, by interpolating from one projection to the next. In the travel analogy, the path by road from London to Paris might go smoothly through Kent to Dover, thence via Amiens and Beauvais before reaching Paris. By air, the tour would follow a smoother <em>geodesic</em> path, and this is what the grand tour does. The sense in watching an animation of a statistical grand tour is that of continuous motion. The grand tour algorithm is described in detail by <span class="citation" data-cites="Buja-etal-2005">Buja et al. (<a href="95-references.html#ref-Buja-etal-2005" role="doc-biblioref">2005</a>)</span> and <span class="citation" data-cites="Cook-etal-2008">Cook et al. (<a href="95-references.html#ref-Cook-etal-2008" role="doc-biblioref">2008</a>)</span>.</p>
 <!-- Projection pursuit @Cook-etal-1995 -> guided tour -->
 <section id="guided-tours" class="level4" data-number="3.7.2.1"><h4 data-number="3.7.2.1" class="anchored" data-anchor-id="guided-tours">
 <span class="header-section-number">3.7.2.1</span> Guided tours</h4>
-<p>The next big idea was that rather than traveling randomly in projection space one could take a <em>guided tour</em>, following a path that leads to “interesting projections”, such as those that reveal clusters, gaps in data space or outliers. This idea, called <em>projection pursuit</em> <span class="citation" data-cites="Cook-etal-1995">(<a href="#ref-Cook-etal-1995" role="doc-biblioref">Cook et al., 1995</a>)</span>, works by defining a measure of interestingness of a data projection. In a guided tour, the next projection is chosen to increase that index, so over time the projection moves toward one that is maximizes that index.</p>
-<p>In the time since <span class="citation" data-cites="Asimov:85">Asimov (<a href="#ref-Asimov:85" role="doc-biblioref">1985</a>)</span>, there have been many implementations of touring visualization methods. XGobi <span class="citation" data-cites="Swayne-etal-1998">(<a href="#ref-Swayne-etal-1998" role="doc-biblioref">Swayne et al., 1998</a>)</span> for X-Windows displays on Linux systems provided a test-bed for dynamic, interactive graphic methods; it’s successor, GGobi <span class="citation" data-cites="Swayne-etal-2003 CookSwayne:2007">(<a href="#ref-CookSwayne:2007" role="doc-biblioref">Cook &amp; Swayne, 2007</a>; <a href="#ref-Swayne-etal-2003" role="doc-biblioref">Swayne et al., 2003</a>)</span> extended the range of touring methods to include a wider variety of projection pursuit indices.</p>
+<p>The next big idea was that rather than traveling randomly in projection space one could take a <em>guided tour</em>, following a path that leads to “interesting projections”, such as those that reveal clusters, gaps in data space or outliers. This idea, called <em>projection pursuit</em> <span class="citation" data-cites="Cook-etal-1995">(<a href="95-references.html#ref-Cook-etal-1995" role="doc-biblioref">Cook et al., 1995</a>)</span>, works by defining a measure of interestingness of a data projection. In a guided tour, the next projection is chosen to increase that index, so over time the projection moves toward one that is maximizes that index.</p>
+<p>In the time since <span class="citation" data-cites="Asimov:85">Asimov (<a href="95-references.html#ref-Asimov:85" role="doc-biblioref">1985</a>)</span>, there have been many implementations of touring visualization methods. XGobi <span class="citation" data-cites="Swayne-etal-1998">(<a href="95-references.html#ref-Swayne-etal-1998" role="doc-biblioref">Swayne et al., 1998</a>)</span> for X-Windows displays on Linux systems provided a test-bed for dynamic, interactive graphic methods; it’s successor, GGobi <span class="citation" data-cites="Swayne-etal-2003 CookSwayne:2007">(<a href="95-references.html#ref-CookSwayne:2007" role="doc-biblioref">Cook &amp; Swayne, 2007</a>; <a href="95-references.html#ref-Swayne-etal-2003" role="doc-biblioref">Swayne et al., 2003</a>)</span> extended the range of touring methods to include a wider variety of projection pursuit indices.</p>
 </section><section id="tourr-package" class="level4" data-number="3.7.2.2"><h4 data-number="3.7.2.2" class="anchored" data-anchor-id="tourr-package">
 <span class="header-section-number">3.7.2.2</span> <code>tourr</code> package</h4>
-<p>The current state of art is best captured in the <span style="color: brown;"><strong>tourr</strong></span> package for R <span class="citation" data-cites="Wickham-etal-2011 R-tourr">(<a href="#ref-Wickham-etal-2011" role="doc-biblioref">Wickham et al., 2011</a>; <a href="#ref-R-tourr" role="doc-biblioref">Wickham &amp; Cook, 2024</a>)</span>. It defines a tour to consist of three components:</p>
+<p>The current state of art is best captured in the <span style="color: brown;"><strong>tourr</strong></span> package for R <span class="citation" data-cites="Wickham-etal-2011 R-tourr">(<a href="95-references.html#ref-Wickham-etal-2011" role="doc-biblioref">Wickham et al., 2011</a>; <a href="95-references.html#ref-R-tourr" role="doc-biblioref">Wickham &amp; Cook, 2024</a>)</span>. It defines a tour to consist of three components:</p>
 <ul>
 <li>
 <strong>data</strong>: An <span class="math inline">\((n \times p)\)</span> numerical data matrix to be viewed.</li>
@@ -2044,12 +2044,12 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <li>Holes (<code><a href="https://rdrr.io/pkg/tourr/man/holes.html">holes()</a></code>): This is sensitive to projections with separated clusters of points, with few points near the origin</li>
 <li>Central mass (<code><a href="https://rdrr.io/pkg/tourr/man/cmass.html">cmass()</a></code>): Sensitive to projections with lots of points in the center, but perhaps with some outliers</li>
 <li>Linear discriminant analysis (<code><a href="https://rdrr.io/pkg/tourr/man/lda_pp.html">lda_pp()</a></code>): For data with a grouping factor, optimizes a measure of separation of the group means as in MANOVA or linear discriminant analysis.</li>
-<li>PDA analysis (<code><a href="https://rdrr.io/pkg/tourr/man/pda_pp.html">pda_pp()</a></code>): A penalized version of <code><a href="https://rdrr.io/pkg/tourr/man/lda_pp.html">lda_pp()</a></code> for cases of large <span class="math inline">\(p\)</span> relative to sample size <span class="math inline">\(n\)</span> <span class="citation" data-cites="LeeCook-2009">(<a href="#ref-LeeCook-2009" role="doc-biblioref">E.-K. Lee &amp; Cook, 2009</a>)</span>.</li>
+<li>PDA analysis (<code><a href="https://rdrr.io/pkg/tourr/man/pda_pp.html">pda_pp()</a></code>): A penalized version of <code><a href="https://rdrr.io/pkg/tourr/man/lda_pp.html">lda_pp()</a></code> for cases of large <span class="math inline">\(p\)</span> relative to sample size <span class="math inline">\(n\)</span> <span class="citation" data-cites="LeeCook-2009">(<a href="95-references.html#ref-LeeCook-2009" role="doc-biblioref">E.-K. Lee &amp; Cook, 2009</a>)</span>.</li>
 </ul>
 <p>In addition, there is now a <code><a href="https://rdrr.io/pkg/tourr/man/guided_anomaly_tour.html">guided_anomaly_tour()</a></code> that looks for the best projection of observations that are outside the data ellipsoid, finding a view showing observations with large Mahalanobis distances from the centroid.</p>
 </section><section id="penguin-tours" class="level4" data-number="3.7.2.3"><h4 data-number="3.7.2.3" class="anchored" data-anchor-id="penguin-tours">
 <span class="header-section-number">3.7.2.3</span> Penguin tours</h4>
-<p>Penguins are a traveling species. They make yearly travels inland to breeding sites in early spring, repeating the patterns of their ancestors. Near the beginning of summer, adult penguins and their chicks return to the sea and spend the rest of the summer feeding there <span class="citation" data-cites="Black-etal-2018">(<a href="#ref-Black-etal-2018" role="doc-biblioref">Black et al., 2018</a>)</span>. If they were also data scientists, they might wonder about the relations among among their cousins of different species and take a tour of their measurements…</p>
+<p>Penguins are a traveling species. They make yearly travels inland to breeding sites in early spring, repeating the patterns of their ancestors. Near the beginning of summer, adult penguins and their chicks return to the sea and spend the rest of the summer feeding there <span class="citation" data-cites="Black-etal-2018">(<a href="95-references.html#ref-Black-etal-2018" role="doc-biblioref">Black et al., 2018</a>)</span>. If they were also data scientists, they might wonder about the relations among among their cousins of different species and take a tour of their measurements…</p>
 <!-- Cite: Black et al Time-lapse imagery of Ade´lie penguins, https://doi.org/10.1371/journal.pone.0193532 -->
 <p>For example, using the Penguins dataset, the following calls produce grand tours in 2, 3, and 4 dimensions. The 2D tour is displayed as a scatterplot, the 3D tour using simulated depth as shown by variation in point size and transparency, and the 4D tour is shown using a parallel coordinate plot.</p>
 <div class="cell" data-layout-align="center">
@@ -2132,7 +2132,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p><a href="#fig-peng-tour-grand-frames" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-tour-grand-frames</span></a> shows three frames from this movie. The first (a) is the initial frame that shows the projection in the plane of bill depth and bill length. The variable vectors indicate that bill length differentiates Adelie penguins from the others. In frame (b), the three species are widely separated, with bill depth distinguishing Gentoo from the others. In frame (c) the three species are largely mixed, but two points stand out as outliers, with exceptionally long bills compared to the rest.</p>
+<p><a href="#fig-peng-tour-grand-frames" class="quarto-xref">Figure&nbsp;<span>3.42</span></a> shows three frames from this movie. The first (a) is the initial frame that shows the projection in the plane of bill depth and bill length. The variable vectors indicate that bill length differentiates Adelie penguins from the others. In frame (b), the three species are widely separated, with bill depth distinguishing Gentoo from the others. In frame (c) the three species are largely mixed, but two points stand out as outliers, with exceptionally long bills compared to the rest.</p>
 <div id="fig-peng-tour-grand-frames" class="quarto-layout-panel">
 <figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-peng-tour-grand-frames-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
 <div class="quarto-layout-row">
@@ -2233,7 +2233,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 Figure&nbsp;3.44: <strong>Guided tours</strong>: These figures show the final frame in the animations of guided tours designed to find the projection that optimize an index. (a) The <code><a href="https://rdrr.io/pkg/tourr/man/lda_pp.html">lda_pp()</a></code> criterion optimizes the separation of the means for species relative to within-group variation. (b) The <code>anomalies_index()</code> optimizes the average Mahalanobis distance of points from the centroid
 </figcaption></figure>
 </div>
-<p>These examples are intended to highlight what is possible with dynamic graphics for exploring high-dimensional data visually. <span class="citation" data-cites="CookLaa-mulgar">Cook &amp; Laa (<a href="#ref-CookLaa-mulgar" role="doc-biblioref">2024</a>)</span> extend the discussion of these methods from <span class="citation" data-cites="CookSwayne:2007">Cook &amp; Swayne (<a href="#ref-CookSwayne:2007" role="doc-biblioref">2007</a>)</span> (which used Ggobi) to the <span style="color: brown;"><strong>tourr</strong></span> package. They illustrate dimension reduction, various cluster analysis methods, trees and random forests and some machine-learning techniques.</p>
+<p>These examples are intended to highlight what is possible with dynamic graphics for exploring high-dimensional data visually. <span class="citation" data-cites="CookLaa-mulgar">Cook &amp; Laa (<a href="95-references.html#ref-CookLaa-mulgar" role="doc-biblioref">2024</a>)</span> extend the discussion of these methods from <span class="citation" data-cites="CookSwayne:2007">Cook &amp; Swayne (<a href="95-references.html#ref-CookSwayne:2007" role="doc-biblioref">2007</a>)</span> (which used Ggobi) to the <span style="color: brown;"><strong>tourr</strong></span> package. They illustrate dimension reduction, various cluster analysis methods, trees and random forests and some machine-learning techniques.</p>
 <!-- **TODO**: Clean up these package references. I'm linking to the pkgdown sites, b/c I don't have them installed and can't easily get a `citation()`.  -->
 <p>Ideally, we should be able interact with a tour,</p>
 <ul>
@@ -2241,13 +2241,13 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <li>selecting or highlighting unusual points,</li>
 <li>changing tour methods or variables displayed on the fly, and so forth.</li>
 </ul>
-<p>Some packages that provide these capabilities are: <a href="https://casperhart.github.io/detourr/index.html"><strong>detourr</strong></a> <span class="citation" data-cites="R-detourr">(<a href="#ref-R-detourr" role="doc-biblioref">Hart &amp; Wang, 2022</a>)</span> <a href="https://sa-lee.github.io/liminal/"><strong>liminal</strong></a> <span class="citation" data-cites="R-liminal">(<a href="#ref-R-liminal" role="doc-biblioref">S. Lee, 2021</a>)</span> and <a href="https://logarithmic.net/langevitour/index.html"><strong>langevitour</strong></a> <span class="citation" data-cites="R-langevitour Harrison2023">(<a href="#ref-Harrison2023" role="doc-biblioref">Harrison, 2023</a>, <a href="#ref-R-langevitour" role="doc-biblioref">2024</a>)</span> The <span style="color: brown;"><strong>loon</strong></span> package <span class="citation" data-cites="R-loon">(<a href="#ref-R-loon" role="doc-biblioref">Waddell &amp; Oldford, 2023</a>)</span> is a general toolkit that enables highly interactive data visualization. It provides a <strong>loon.tour</strong> package <span class="citation" data-cites="R-loon-tour">(<a href="#ref-R-loon-tour" role="doc-biblioref">Xu &amp; Oldford, 2021</a>)</span> for using touring methods within the <code>loon</code> environment.</p>
+<p>Some packages that provide these capabilities are: <a href="https://casperhart.github.io/detourr/index.html"><strong>detourr</strong></a> <span class="citation" data-cites="R-detourr">(<a href="95-references.html#ref-R-detourr" role="doc-biblioref">Hart &amp; Wang, 2022</a>)</span> <a href="https://sa-lee.github.io/liminal/"><strong>liminal</strong></a> <span class="citation" data-cites="R-liminal">(<a href="95-references.html#ref-R-liminal" role="doc-biblioref">S. Lee, 2021</a>)</span> and <a href="https://logarithmic.net/langevitour/index.html"><strong>langevitour</strong></a> <span class="citation" data-cites="R-langevitour Harrison2023">(<a href="95-references.html#ref-Harrison2023" role="doc-biblioref">Harrison, 2023</a>, <a href="95-references.html#ref-R-langevitour" role="doc-biblioref">2024</a>)</span> The <span style="color: brown;"><strong>loon</strong></span> package <span class="citation" data-cites="R-loon">(<a href="95-references.html#ref-R-loon" role="doc-biblioref">Waddell &amp; Oldford, 2023</a>)</span> is a general toolkit that enables highly interactive data visualization. It provides a <strong>loon.tour</strong> package <span class="citation" data-cites="R-loon-tour">(<a href="95-references.html#ref-R-loon-tour" role="doc-biblioref">Xu &amp; Oldford, 2021</a>)</span> for using touring methods within the <code>loon</code> environment.</p>
 </section></section></section><section id="sec-network" class="level2" data-number="3.8"><h2 data-number="3.8" class="anchored" data-anchor-id="sec-network">
 <span class="header-section-number">3.8</span> Network diagrams</h2>
 <p>A major theme throughout this chapter has been to understand how to extend data visualization from simple bivariate scatterplots to increasingly more complex situations with larger datasets. With a moderate number of variables, techniques such as smoothing, summarizing with data ellipses and fitted curves, and visual thinning can be used to tame “big <span class="math inline">\(N\)</span>” datasets with thousands of cases.</p>
 <p>However “big <span class="math inline">\(p\)</span>” datasets, with more than a moderate number (<span class="math inline">\(p\)</span>) of variables still remain a challenge. It is hard to see how the more advanced methods (corrgrams, parallel coordinate) described earlier could cope with <span class="math inline">\(p = 20, 50, 100, 500, \dots\)</span> variables. At some point, each of these begins to break down for the purpose of visualizing associations among many variables. We are forced to thin the information presented in graphs more and more as the number of variables increases.</p>
 <p>It turns out that there is a way to increase the number of variables displayed dramatically, if we are mainly interested in the pairwise correlations for reasonably normally distributed data. A graphical <strong>network diagram</strong> portrays variables by <em>nodes</em> (vertices), connected by (weighted) <em>edges</em> whose properties reflect the strength of connections between pairs, such as a correlation. Such diagrams can reveal properties not readily seen by other means.</p>
-<p>As an example consider <a href="#fig-big5-qgraph-rodrigues" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-big5-qgraph-rodrigues</span></a>, which portrays the correlations among 25 self-report items reflecting 5 factors (the “Big Five”) considered in personality psychology to represent the dominant aspects of all of personality. These factors are easily remembered by the acronum <strong>OCEAN</strong>: <strong>O</strong>penness, <strong>C</strong>onscientiousness, <strong>E</strong>xtraversion, <strong>A</strong>greeableness and <strong>N</strong>euroticism. The dataset, <code><a href="https://rdrr.io/pkg/psych/man/bfi.html">psych::bfi</a></code>, contains data from an online sample of <span class="math inline">\(n=2800\)</span> with 5 items for each scale.</p>
+<p>As an example consider <a href="#fig-big5-qgraph-rodrigues" class="quarto-xref">Figure&nbsp;<span>3.45</span></a>, which portrays the correlations among 25 self-report items reflecting 5 factors (the “Big Five”) considered in personality psychology to represent the dominant aspects of all of personality. These factors are easily remembered by the acronum <strong>OCEAN</strong>: <strong>O</strong>penness, <strong>C</strong>onscientiousness, <strong>E</strong>xtraversion, <strong>A</strong>greeableness and <strong>N</strong>euroticism. The dataset, <code><a href="https://rdrr.io/pkg/psych/man/bfi.html">psych::bfi</a></code>, contains data from an online sample of <span class="math inline">\(n=2800\)</span> with 5 items for each scale.</p>
 <p>In this figure (taken from <a href="https://bit.ly/3A6kvq5">Rodrigues (2021)</a>), the item nodes are labeled according to the OCEAN factor they are assumed to measure. For 25 items, there are <span class="math inline">\(25 \times 24 / 2 = 300\)</span> correlations, way too much to see. A clearer picture arises when we reduce the number of edges shown according to some criterion. Here, edges are drawn <em>only</em> between nodes where the correlation is considered important by a method (“glasso” = graphical LASSO) designed to make the graph optimally sparse.</p>
 <!-- Actually, the psych::bfi data is:
 25 personality self report items taken from the International Personality Item Pool (ipip.ori.org) were included as part of the Synthetic Aperture Personality Assessment (SAPA) web based personality assessment project. The data from 2800 subjects are included here as a demonstration set for scale construction, factor analysis, and Item Response Theory analysis. Three additional demographic variables (sex, education, and age) are also included.
@@ -2264,17 +2264,17 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p>The edges shown in <a href="#fig-big5-qgraph-rodrigues" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-big5-qgraph-rodrigues</span></a> reflect the Pearson correlation between a given pair of items by the visual attributes of color and line style: magnitude is shown by both the thickness and transparency of the edge; the sign of the correlation is shown by color and line type: solid <span style="color: blue;">blue</span> for positive correlations and dashed <span style="color: red;">red</span> for negative ones.</p>
-<p>According to some theories, the five personality factors should be largely non-overlapping, so there should not be many edges connecting items of one factor with those of another. Yet, there are quite a few cross-factor connections in <a href="#fig-big5-qgraph-rodrigues" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-big5-qgraph-rodrigues</span></a>, so perhaps the theory is wrong, or, more likely, the 25 items are not good representatives of these underlying dimensions. The network diagram shown here is a visual tool for thought and refinement. See <span class="citation" data-cites="Costantini2015">Costantini et al. (<a href="#ref-Costantini2015" role="doc-biblioref">2015</a>)</span> for a tutorial on network analysis of personality data in R.</p>
-<p>Network diagrams stem from mathematical graph theory <span class="citation" data-cites="BondyMurty2008 West2001">(<a href="#ref-BondyMurty2008" role="doc-biblioref">Bondy &amp; Murty, 2008</a>; <a href="#ref-West2001" role="doc-biblioref">West, 2001</a>)</span> of the abstract properties of nodes and edges used to represent pairwise relationships. These can be used to model many types of relations and processes in physical, biological, social and other sciences, where such properties as connectedness, centrality, cliques of connected nodes and so forth provide a vocabulary used to understand and explain complex systems.</p>
-<p>For one example, <span class="citation" data-cites="Grandjean2016">Grandjean (<a href="#ref-Grandjean2016" role="doc-biblioref">2016</a>)</span> used network analysis to study the connections among 2500 Twitter users (nodes) who identified as belonging to a “digital humanities” community from the relations (edges) of who follows whom. Grandjean also used these methods to study the relationships among <a href="http://www.martingrandjean.ch/network-visualization-shakespeare/">characters in Shakespeare’s tragedies</a> in terms of the characters (nodes) and edges representing how often they appeared in the same scene.</p>
-<p>The wide applicability of these ideas has led to what is now called <em>network science</em> <span class="citation" data-cites="Barabasi2016network">(<a href="#ref-Barabasi2016network" role="doc-biblioref">Barab’asi, 2016</a>)</span> encompassing computer networks, biological networks, cognitive and semantic networks, and social networks. Recent developments in psychology led to a framework of <em>network psychometrics</em> <span class="citation" data-cites="IsvoranuEpskamp2022">(<a href="#ref-IsvoranuEpskamp2022" role="doc-biblioref">Isvoranu et al., 2022</a>)</span>, where, for example, symptoms of psychopathology (phobias, anxiety, substance abuse) can be conceptualized as an interconnected network of clusters and studied for possible causal relations <span class="citation" data-cites="Robinaugh2019">(<a href="#ref-Robinaugh2019" role="doc-biblioref">Robinaugh et al., 2019</a>)</span>.</p>
+<p>The edges shown in <a href="#fig-big5-qgraph-rodrigues" class="quarto-xref">Figure&nbsp;<span>3.45</span></a> reflect the Pearson correlation between a given pair of items by the visual attributes of color and line style: magnitude is shown by both the thickness and transparency of the edge; the sign of the correlation is shown by color and line type: solid <span style="color: blue;">blue</span> for positive correlations and dashed <span style="color: red;">red</span> for negative ones.</p>
+<p>According to some theories, the five personality factors should be largely non-overlapping, so there should not be many edges connecting items of one factor with those of another. Yet, there are quite a few cross-factor connections in <a href="#fig-big5-qgraph-rodrigues" class="quarto-xref">Figure&nbsp;<span>3.45</span></a>, so perhaps the theory is wrong, or, more likely, the 25 items are not good representatives of these underlying dimensions. The network diagram shown here is a visual tool for thought and refinement. See <span class="citation" data-cites="Costantini2015">Costantini et al. (<a href="95-references.html#ref-Costantini2015" role="doc-biblioref">2015</a>)</span> for a tutorial on network analysis of personality data in R.</p>
+<p>Network diagrams stem from mathematical graph theory <span class="citation" data-cites="BondyMurty2008 West2001">(<a href="95-references.html#ref-BondyMurty2008" role="doc-biblioref">Bondy &amp; Murty, 2008</a>; <a href="95-references.html#ref-West2001" role="doc-biblioref">West, 2001</a>)</span> of the abstract properties of nodes and edges used to represent pairwise relationships. These can be used to model many types of relations and processes in physical, biological, social and other sciences, where such properties as connectedness, centrality, cliques of connected nodes and so forth provide a vocabulary used to understand and explain complex systems.</p>
+<p>For one example, <span class="citation" data-cites="Grandjean2016">Grandjean (<a href="95-references.html#ref-Grandjean2016" role="doc-biblioref">2016</a>)</span> used network analysis to study the connections among 2500 Twitter users (nodes) who identified as belonging to a “digital humanities” community from the relations (edges) of who follows whom. Grandjean also used these methods to study the relationships among <a href="http://www.martingrandjean.ch/network-visualization-shakespeare/">characters in Shakespeare’s tragedies</a> in terms of the characters (nodes) and edges representing how often they appeared in the same scene.</p>
+<p>The wide applicability of these ideas has led to what is now called <em>network science</em> <span class="citation" data-cites="Barabasi2016network">(<a href="95-references.html#ref-Barabasi2016network" role="doc-biblioref">Barab’asi, 2016</a>)</span> encompassing computer networks, biological networks, cognitive and semantic networks, and social networks. Recent developments in psychology led to a framework of <em>network psychometrics</em> <span class="citation" data-cites="IsvoranuEpskamp2022">(<a href="95-references.html#ref-IsvoranuEpskamp2022" role="doc-biblioref">Isvoranu et al., 2022</a>)</span>, where, for example, symptoms of psychopathology (phobias, anxiety, substance abuse) can be conceptualized as an interconnected network of clusters and studied for possible causal relations <span class="citation" data-cites="Robinaugh2019">(<a href="95-references.html#ref-Robinaugh2019" role="doc-biblioref">Robinaugh et al., 2019</a>)</span>.</p>
 <p>Because a network diagram can potentially reflect hundreds of variables, various <a href="https://en.wikipedia.org/wiki/Graph_drawing">graph layout algorithms</a> have been developed to automatically position the nodes so as to generate aesthetically pleasing network visualizations that emphasize important structural properties, like clusters and central nodes, while minimizing visual clutter (many crossing lines) to promote understandability and usability.</p>
-<p>There are quite a few R packages for constructing network diagrams, both static and dynamic / interactive, and these differ considerably in how the information required for a graph is structured as R objects, and the flexibility to produce attractive graphs. Among these, <span style="color: brown;"><strong>igraph</strong></span> <span class="citation" data-cites="R-igraph">(<a href="#ref-R-igraph" role="doc-biblioref">Csárdi et al., 2024</a>)</span> structures the data as a dataset of vertices and edges with properties</p>
+<p>There are quite a few R packages for constructing network diagrams, both static and dynamic / interactive, and these differ considerably in how the information required for a graph is structured as R objects, and the flexibility to produce attractive graphs. Among these, <span style="color: brown;"><strong>igraph</strong></span> <span class="citation" data-cites="R-igraph">(<a href="95-references.html#ref-R-igraph" role="doc-biblioref">Csárdi et al., 2024</a>)</span> structures the data as a dataset of vertices and edges with properties</p>
 <p>-&gt; packages: qgraph, …</p>
 <section id="crime-data" class="level3" data-number="3.8.1"><h3 data-number="3.8.1" class="anchored" data-anchor-id="crime-data">
 <span class="header-section-number">3.8.1</span> Crime data</h3>
-<p>For the present purposes, let’s see what network diagrams can tell us about the crime data analyzed earlier. Here, I first reorder the variables as in <a href="#fig-crime-corrplot-AOE" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-corrplot-AOE</span></a>. In the call to <code><a href="https://rdrr.io/pkg/qgraph/man/qgraph.html">qgraph()</a></code>, the argument <code>minimum = "sig"</code> says to show only the edges for significant correlations (at <span class="math inline">\(\alpha = 0.01\)</span> here). In <a href="#fig-crime-cor" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-cor</span></a>, the variable nodes are positioned around a circle (<code>layout = "circle"</code>), which is the default.</p>
+<p>For the present purposes, let’s see what network diagrams can tell us about the crime data analyzed earlier. Here, I first reorder the variables as in <a href="#fig-crime-corrplot-AOE" class="quarto-xref">Figure&nbsp;<span>3.29</span></a>. In the call to <code><a href="https://rdrr.io/pkg/qgraph/man/qgraph.html">qgraph()</a></code>, the argument <code>minimum = "sig"</code> says to show only the edges for significant correlations (at <span class="math inline">\(\alpha = 0.01\)</span> here). In <a href="#fig-crime-cor" class="quarto-xref">Figure&nbsp;<span>3.46</span></a>, the variable nodes are positioned around a circle (<code>layout = "circle"</code>), which is the default.</p>
 <!-- fig.code: R/crime/crime-network.R -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb61" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va">qgraph</span><span class="op">)</span></span>
@@ -2319,7 +2319,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <p>In this figure, you can see the group of property crimes (auto theft, larceny, burglary) at the left separated from the violent crimes against persons at the right.</p>
 </section><section id="sec-partial-cor" class="level3" data-number="3.8.2"><h3 data-number="3.8.2" class="anchored" data-anchor-id="sec-partial-cor">
 <span class="header-section-number">3.8.2</span> Partial correlations</h3>
-<p>Among the more important statistical applications of network graph theory is the idea that you can also use them to study the the <em>partial</em> (conditional) associations among variables with the contributions of all other variables removed in what are called Graphical Gaussian Models (GGMs) <span class="citation" data-cites="Lauritzen1996 Hojsgaard2012graphical">(<a href="#ref-Hojsgaard2012graphical" role="doc-biblioref">Højsgaard et al., 2012</a>; <a href="#ref-Lauritzen1996" role="doc-biblioref">Lauritzen, 1996</a>)</span>. In a network diagram of these partial associations,</p>
+<p>Among the more important statistical applications of network graph theory is the idea that you can also use them to study the the <em>partial</em> (conditional) associations among variables with the contributions of all other variables removed in what are called Graphical Gaussian Models (GGMs) <span class="citation" data-cites="Lauritzen1996 Hojsgaard2012graphical">(<a href="95-references.html#ref-Hojsgaard2012graphical" role="doc-biblioref">Højsgaard et al., 2012</a>; <a href="95-references.html#ref-Lauritzen1996" role="doc-biblioref">Lauritzen, 1996</a>)</span>. In a network diagram of these partial associations,</p>
 <ul>
 <li><p>The edges between nodes represent the <em>partial correlations</em> between those variables.</p></li>
 <li><p>The absence of an edge between two nodes indicates their variables are <em>conditionally independent</em>, given the other variables.</p></li>
@@ -2329,7 +2329,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <p>Mathematically, let <span class="math inline">\(\hat{x}_i\)</span> and <span class="math inline">\(\hat{x}_j\)</span> be the predicted values from the linear regressions of <span class="math inline">\(x_i\)</span> on <span class="math inline">\(\mathbf{Z}\)</span> and of <span class="math inline">\(x_j\)</span> on <span class="math inline">\(\mathbf{Z}\)</span>, respectively. The partial correlation <span class="math inline">\(p_{ij}\)</span> between <span class="math inline">\(x_i\)</span> and <span class="math inline">\(x_j\)</span> controlling for <span class="math inline">\(\mathbf{Z}\)</span> is given by: <span id="eq-parcor"><span class="math display">\[
 p_{x_i,x_j|\mathbf{Z}} = r( x_i, x_j \mid \text{others}) = \text{cor}[ (x_i - \hat{x}_i),\; (x_j - \hat{x}_j)]
 \tag{3.3}\]</span></span></p>
-<p>But, rather than running all these linear regressions, they can all be computed from the inverse of the correlation matrix <span class="citation" data-cites="Whittaker1990">(<a href="#ref-Whittaker1990" role="doc-biblioref">Whittaker, 1990</a>, Ch. 5)</span>, a relation first noted by <span class="citation" data-cites="Dempster1972">Dempster (<a href="#ref-Dempster1972" role="doc-biblioref">1972</a>)</span>. Let <span class="math inline">\(\mathbf{R}\)</span> be the correlation matrix of the variables. Then, the matrix <span class="math inline">\(\mathbf{P}\)</span> of partial correlations can be obtained from the negative inverse, <span class="math inline">\(-\mathbf{R}^{-1}\)</span>, standardized to a correlation matrix by dividing by the square root of product of its diagonal elements, <span class="math display">\[
+<p>But, rather than running all these linear regressions, they can all be computed from the inverse of the correlation matrix <span class="citation" data-cites="Whittaker1990">(<a href="95-references.html#ref-Whittaker1990" role="doc-biblioref">Whittaker, 1990</a>, Ch. 5)</span>, a relation first noted by <span class="citation" data-cites="Dempster1972">Dempster (<a href="95-references.html#ref-Dempster1972" role="doc-biblioref">1972</a>)</span>. Let <span class="math inline">\(\mathbf{R}\)</span> be the correlation matrix of the variables. Then, the matrix <span class="math inline">\(\mathbf{P}\)</span> of partial correlations can be obtained from the negative inverse, <span class="math inline">\(-\mathbf{R}^{-1}\)</span>, standardized to a correlation matrix by dividing by the square root of product of its diagonal elements, <span class="math display">\[
 P_{ij} = - \frac{R^{-1}_{ij}}{\sqrt{(R^{-1}_{ii} \cdot R^{-1}_{jj})}} \:\: .
 \]</span></p>
 <!-- Detrius from earlier descriptions ...
@@ -2353,7 +2353,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <li><p>If a partial correlation is close to zero, it suggests the relationship between two variables is primarily mediated through other variables.</p></li>
 <li><p>Non-zero partial correlations indicate a direct relationship that persists after controlling for other variables.</p></li>
 </ul>
-<p><a href="#fig-crime-partial-spring" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-partial-spring</span></a> shows the partial correlation network for the crime data, using the <code><a href="https://rdrr.io/pkg/qgraph/man/qgraph.html">qgraph()</a></code> argument <code>graph = "pcor"</code> To provide a more interpretable result, the argument <code>layout = "spring"</code> positions the nodes using a force-embedded algorithm where edges act like springs, pulling connected nodes together and unconnected nodes repel each other, pushing them apart.</p>
+<p><a href="#fig-crime-partial-spring" class="quarto-xref">Figure&nbsp;<span>3.47</span></a> shows the partial correlation network for the crime data, using the <code><a href="https://rdrr.io/pkg/qgraph/man/qgraph.html">qgraph()</a></code> argument <code>graph = "pcor"</code> To provide a more interpretable result, the argument <code>layout = "spring"</code> positions the nodes using a force-embedded algorithm where edges act like springs, pulling connected nodes together and unconnected nodes repel each other, pushing them apart.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb62" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/qgraph/man/qgraph.html">qgraph</a></span><span class="op">(</span><span class="va">crime.cor</span>, </span>
 <span>       title <span class="op">=</span> <span class="st">"Crime data:\npartial correlations"</span>, title.cex <span class="op">=</span> <span class="fl">1.5</span>,</span>
@@ -2375,12 +2375,12 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 </div>
-<p><a href="#fig-crime-partial-spring" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-partial-spring</span></a> shows that, once all other crime variables are controlled for each pair, there remain only a few partial correlations at the <span class="math inline">\(\alpha = 0.05\)</span> level. Of these, only the largest three in absolute value are significant at <span class="math inline">\(\alpha = 0.01\)</span>.</p>
+<p><a href="#fig-crime-partial-spring" class="quarto-xref">Figure&nbsp;<span>3.47</span></a> shows that, once all other crime variables are controlled for each pair, there remain only a few partial correlations at the <span class="math inline">\(\alpha = 0.05\)</span> level. Of these, only the largest three in absolute value are significant at <span class="math inline">\(\alpha = 0.01\)</span>.</p>
 <p>Thus, once all other variables are taken into account, what remains is mainly a strong positive association between burglary and larceny and a moderate one between auto theft and robbery. There also remains a moderate negative correlation between murder and larceny. The spring layout makes it clear that, with suppression of weak edges, auto theft and robbery form a cluster separated from the other variables.</p>
 </section><section id="sec-pvPlot" class="level3" data-number="3.8.3"><h3 data-number="3.8.3" class="anchored" data-anchor-id="sec-pvPlot">
 <span class="header-section-number">3.8.3</span> Visualizing partial correlations</h3>
 <p>Just as you can visualize <em>marginal</em> association between variables in a scatterplot, you can also visualize <em>conditional</em> association. A <strong>partial variables plot</strong> is simply a scatterplot of the partial residuals <span class="math inline">\(e_i = (x_i - \hat{x}_i)\)</span> from a regression of <span class="math inline">\(x_i\)</span> on the other variables <span class="math inline">\(Z\)</span> against those <span class="math inline">\(e_j = (x_j - \hat{x}_j)\)</span> for another variable <span class="math inline">\(x_j\)</span>.</p>
-<p>In this, you can use all the bells and whistles of standard scatterplots (regression lines, smooths, data ellipses, …) to listen more attentively to the story partial association has to tell. The function <code>pvPlot()</code> calculates the partial residuals and then calls <code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::dataEllipse()</a></code> for display. The five most “unusual” observations by Mahalanobis <span class="math inline">\(D^2\)</span> are identified with their abbreviated state labels. <a href="#fig-crime-pvPlots" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-pvPlots</span></a> shows these plots for the variable pairs with the two largest partial correlations.</p>
+<p>In this, you can use all the bells and whistles of standard scatterplots (regression lines, smooths, data ellipses, …) to listen more attentively to the story partial association has to tell. The function <code>pvPlot()</code> calculates the partial residuals and then calls <code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::dataEllipse()</a></code> for display. The five most “unusual” observations by Mahalanobis <span class="math inline">\(D^2\)</span> are identified with their abbreviated state labels. <a href="#fig-crime-pvPlots" class="quarto-xref">Figure&nbsp;<span>3.48</span></a> shows these plots for the variable pairs with the two largest partial correlations.</p>
 <!-- **TODO**: I'd like to put these two figures side-by-side and integrate the code chunks, but `car::scatterplot` doesn't seem to allow this. -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb63" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/source.html">source</a></span><span class="op">(</span><span class="st">"R/pvPlot.R"</span><span class="op">)</span></span>
@@ -2409,10 +2409,10 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </div>
 </div>
 <p>In the pvPlot for burglary and larceny, you can see that the high partial correlation is largely driven by the extreme points at the left and and right sides. Once all other variables are taken into account, Arizona (AZ) and Hawaii (HI) have larger incidence of both crimes, while Arkansas (AK) are smaller on both.</p>
-<p>In the pvPlot for robbery and auto theft, New York stands out as an influential, high-leverage point (see <a href="#sec-leverage" class="quarto-xref"><span class="quarto-unresolved-ref">sec-leverage</span></a>); Massachusetts (MA) is noteworthy because auto theft in that state is considerably higher than what would be predicted from all other variables.</p>
+<p>In the pvPlot for robbery and auto theft, New York stands out as an influential, high-leverage point (see <a href="06-linear_models-plots.html#sec-leverage" class="quarto-xref"><span>Section 6.6</span></a>); Massachusetts (MA) is noteworthy because auto theft in that state is considerably higher than what would be predicted from all other variables.</p>
 </section></section><section id="multivariate-thinking-and-visualization" class="level2" data-number="3.9"><h2 data-number="3.9" class="anchored" data-anchor-id="multivariate-thinking-and-visualization">
 <span class="header-section-number">3.9</span> Multivariate thinking and visualization</h2>
-<p><strong>TODO</strong>: These are just initial notes on a chapter summary, and pointing the way to dimension reduction methods in <a href="#sec-pca-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-pca-biplot</span></a>.</p>
+<p><strong>TODO</strong>: These are just initial notes on a chapter summary, and pointing the way to dimension reduction methods in <a href="04-pca-biplot.html" class="quarto-xref"><span>Chapter 4</span></a>.</p>
 <p>This chapter has covered a lot of ground. We started with simple scatterplots and how to enhance them with graphical summaries and annotations …</p>
 <p><strong>The two curses</strong></p>
 <p>Multivariate data is often said to suffer from the <strong>curse of dimensionality</strong> (ref: Bellman1957), meaning that that as the dimensionality of data increases, the volume of the space increases so fast that the available data become sparse, so that the amount of data needed often grows exponentially with the dimensionality.</p>
@@ -2425,7 +2425,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 <!-- ## References {.unnumbered} -->
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-Andrews:72" class="csl-entry" role="listitem">
 Andrews, D. F. (1972). Plots of high dimensional data. <em>Biometrics</em>, <em>28</em>, 123–136.
 </div>
@@ -2586,7 +2586,7 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 Lee, E.-K., &amp; Cook, D. (2009). A projection pursuit index for large p small n data. <em>Statistics and Computing</em>, <em>20</em>(3), 381–392. <a href="https://doi.org/10.1007/s11222-009-9131-1">https://doi.org/10.1007/s11222-009-9131-1</a>
 </div>
 <div id="ref-R-liminal" class="csl-entry" role="listitem">
-Lee, S. (2021). <em>Liminal: Multivariate data visualization with tours and embeddings</em>. <a href="https://github.com/sa-lee/liminal/">https://github.com/sa-lee/liminal/</a>
+Lee, S. (2021). <em>Liminal: Multivariate data visualization with tours and embeddings</em>. <a href="https://CRAN.R-project.org/package=liminal">https://CRAN.R-project.org/package=liminal</a>
 </div>
 <div id="ref-MartiLaguna2003" class="csl-entry" role="listitem">
 Martí, R., &amp; Laguna, M. (2003). Heuristics and meta-heuristics for 2-layer straight line crossing minimization. <em>Discrete Applied Mathematics</em>, <em>127</em>(3), 665–678.
@@ -2667,10 +2667,10 @@ <h1 class="title"><span id="sec-multivariate_plots" class="quarto-section-identi
 </section><section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes"><hr>
 <ol>
 <li id="fn1"><p>Confidence bands allow us to visualize the uncertainty around a fitted regression curve, which can be of two types: <em>pointwise intervals</em> or <em>simultaneous intervals</em>. The default setting in `<code><a href="https://ggplot2.tidyverse.org/reference/geom_smooth.html">ggplot2::geom_smooth()</a></code> calculates pointwise intervals (using <code>stats::predict.lm(..., interval="confidence")</code> at a confidence level <span class="math inline">\(1-\alpha\)</span> for the predicted response at <em>each value</em> <span class="math inline">\(x_i\)</span> of a predictor, and have the frequentist interpretation that over repeated sampling only <span class="math inline">\(100\;\alpha\)</span> of the predictions at <span class="math inline">\(x_i\)</span> will be outside that interval. In contrast, simultaneous intervals are calculated so that <span class="math inline">\(1 - \alpha\)</span> is the probability that <em>all of them</em> cover their corresponding true values simultaneously. These are necessarily wider than pointwise intervals. Commonly used methods for constructing simultaneous confidence bands in regression are the Bonferroni and Scheffé methods, which control the family-wise error rate over all values of <span class="math inline">\(x_i\)</span>. See <a href="https://en.wikipedia.org/wiki/Confidence_and_prediction_bands"></a> for precise definitions of these terms. These are different from a <em>prediction band</em>, which is used to represent the uncertainty about the value of a <strong>new</strong> data-point on the curve, but subject to the additional variance reflected in one observation.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn2"><p>The classic study by <span class="citation" data-cites="ClevelandMcGill:84b">Cleveland &amp; McGill (<a href="#ref-ClevelandMcGill:84b" role="doc-biblioref">1984</a>)</span>;<span class="citation" data-cites="ClevelandMcGill:85">Cleveland &amp; McGill (<a href="#ref-ClevelandMcGill:85" role="doc-biblioref">1985</a>)</span> shows that judgements of magnitude along a common scale are more accurate than those along separate, aligned scales.<a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn3"><p>The dataset was collected by Bernard Blishen, William Carroll and Catherine Moore, but apparently unpublished. A version updated to the 1981 census is described in <span class="citation" data-cites="Blishen-etal-1987">Blishen et al. (<a href="#ref-Blishen-etal-1987" role="doc-biblioref">1987</a>)</span>.<a href="#fnref3" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn2"><p>The classic study by <span class="citation" data-cites="ClevelandMcGill:84b">Cleveland &amp; McGill (<a href="95-references.html#ref-ClevelandMcGill:84b" role="doc-biblioref">1984</a>)</span>;<span class="citation" data-cites="ClevelandMcGill:85">Cleveland &amp; McGill (<a href="95-references.html#ref-ClevelandMcGill:85" role="doc-biblioref">1985</a>)</span> shows that judgements of magnitude along a common scale are more accurate than those along separate, aligned scales.<a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn3"><p>The dataset was collected by Bernard Blishen, William Carroll and Catherine Moore, but apparently unpublished. A version updated to the 1981 census is described in <span class="citation" data-cites="Blishen-etal-1987">Blishen et al. (<a href="95-references.html#ref-Blishen-etal-1987" role="doc-biblioref">1987</a>)</span>.<a href="#fnref3" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 <li id="fn4"><p>Other implementations of parallel coordinate plots in R include: <code><a href="https://rdrr.io/pkg/MASS/man/parcoord.html">MASS::parcoord()</a></code>, <code>GGally::ggparcoord() and</code>PairViz::pcp()`. The <strong>ggpcp</strong> version used here is the most general.<a href="#fnref4" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn5"><p>This example was modified from one used by <span class="citation" data-cites="Cook-etal-2008">Cook et al. (<a href="#ref-Cook-etal-2008" role="doc-biblioref">2008</a>)</span>.<a href="#fnref5" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn5"><p>This example was modified from one used by <span class="citation" data-cites="Cook-etal-2008">Cook et al. (<a href="95-references.html#ref-Cook-etal-2008" role="doc-biblioref">2008</a>)</span>.<a href="#fnref5" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 <li id="fn6"><p>Plot shapes given by <code>pch = 15:18</code> correspond to: filled square (15), filled circle (16), filled triangle point-up (17), filled diamond (18).<a href="#fnref6" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 </ol></section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
 window.document.addEventListener("DOMContentLoaded", function (event) {
diff --git a/docs/04-pca-biplot.html b/docs/04-pca-biplot.html
index e200f57a..2fda9ee6 100644
--- a/docs/04-pca-biplot.html
+++ b/docs/04-pca-biplot.html
@@ -396,20 +396,20 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <blockquote class="blockquote">
 <p>It is high time that I should pass from these brief and discursive notes about Flatland to the central event of this book, my initiation into the mysteries of Space. THAT is my subject; all that has gone before is merely preface — Edwin Abbott, <em>Flatland</em>, p.&nbsp;57.</p>
 </blockquote>
-<p>There was a cloud in the sky above <em>Flatland</em> one day. But it was a huge, multidimensional cloud of sparkly points that might contain some important message, perhaps like the hidden EUREKA (<a href="#fig-pollen-eureka" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-pollen-eureka</span></a>), or perhaps forecasting the upcoming harvest, if only Flatlanders could appreciate it.</p>
+<p>There was a cloud in the sky above <em>Flatland</em> one day. But it was a huge, multidimensional cloud of sparkly points that might contain some important message, perhaps like the hidden EUREKA (<a href="index.html#fig-pollen-eureka" class="quarto-xref">Figure&nbsp;<span>5</span></a>), or perhaps forecasting the upcoming harvest, if only Flatlanders could appreciate it.</p>
 <p>A leading citizen, A SQUARE, who had traveled once to Spaceland and therefore had an inkling of its majesty beyond the simple world of his life in the plane looked at that cloud and had a brilliant thought, an OMG moment:</p>
 <blockquote class="blockquote">
 <p>“Oh, can I, in my imagination, rotate that cloud and squeeze its juice so that it rains down on Flatland with greatest joy?”</p>
 </blockquote>
 <p>As it happened, our Square friend, although he could never really <em>see</em> in three dimensions, he could now at least <em>think</em> of a world described by <strong>height</strong> as well as breadth and width, and think of the <strong>shadow</strong> cast by a cloud as something mutable, changing size and shape depending on its’ orientation over Flatland.</p>
 <p>And what a world it was, inhabited by Pyramids, Cubes and wondrous creatures called Polyhedrons with many <span class="math inline">\(C\)</span>orners, <span class="math inline">\(F\)</span>aces and <span class="math inline">\(E\)</span>dges. Not only that, but all those Polyhedra were forced in Spaceland to obey a magic formula: <span class="math inline">\(C + F - E = 2\)</span>.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> How cool was that!</p>
-<p>Indeed, there were even exalted Spheres, having so many faces that its surface became as smooth as a baby’s bottom with no need for pointed corners or edges, just as Circles were the smoothest occupants of his world with far too many sides to count. It was his dream of a Sphere passing through Flatland (<a href="#fig-flatland-spheres" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-flatland-spheres</span></a>) that first awakened him to a third dimension.</p>
-<p>He also marveled at Ellipsoids, as smooth as Spheres, but in Spaceland having three natural axes of different extent and capable of being appearing fatter or slimmer when rotated from different views. An Ellipsoid had magical properties: it could appear as so thin in one or more dimensions that it became a simple 2D ellipse, or a 1D line, or even a 0D point <span class="citation" data-cites="Friendly-etal:ellipses:2013">(<a href="#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">Friendly et al., 2013</a>)</span>. <!-- **TODO**: somehow mention the `gellipsoid` package here. --></p>
+<p>Indeed, there were even exalted Spheres, having so many faces that its surface became as smooth as a baby’s bottom with no need for pointed corners or edges, just as Circles were the smoothest occupants of his world with far too many sides to count. It was his dream of a Sphere passing through Flatland (<a href="index.html#fig-flatland-spheres" class="quarto-xref">Figure&nbsp;<span>1</span></a>) that first awakened him to a third dimension.</p>
+<p>He also marveled at Ellipsoids, as smooth as Spheres, but in Spaceland having three natural axes of different extent and capable of being appearing fatter or slimmer when rotated from different views. An Ellipsoid had magical properties: it could appear as so thin in one or more dimensions that it became a simple 2D ellipse, or a 1D line, or even a 0D point <span class="citation" data-cites="Friendly-etal:ellipses:2013">(<a href="95-references.html#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">Friendly et al., 2013</a>)</span>. <!-- **TODO**: somehow mention the `gellipsoid` package here. --></p>
 <p>All of these now arose in Square’s richer 3D imagination. And, all of this came from just one more dimension than his life in Flatland.</p>
 <section id="multivariate-juicers" class="level3" data-number="4.1.1"><h3 data-number="4.1.1" class="anchored" data-anchor-id="multivariate-juicers">
 <span class="header-section-number">4.1.1</span> Multivariate juicers</h3>
 <p>Up to now, we have also been living in Flatland. We have been trying to understand data in <strong>data space</strong> of possibly many dimensions, but confined to the 2D plane of a graph window. Scatterplot matrices and parallel coordinate plots provided some relief. The former did so by <strong>projecting</strong> the data into sets of 2D views in the coordinates of data space; the latter did so by providing multiple axes in a 2D space along which we could trace the paths of individual observations.</p>
-<p>This chapter is about seeing data in a different projection, a low-dimensional (usually 2D) space that squeezes out the most juice from multidimensional data for a particular purpose (<a href="#fig-MV-juicer" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-MV-juicer</span></a>), where what we want to understand can be more easily seen.</p>
+<p>This chapter is about seeing data in a different projection, a low-dimensional (usually 2D) space that squeezes out the most juice from multidimensional data for a particular purpose (<a href="#fig-MV-juicer" class="quarto-xref">Figure&nbsp;<span>4.1</span></a>), where what we want to understand can be more easily seen.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-MV-juicer" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -423,7 +423,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 <p>Here, I concentrate on <strong>principal components analysis</strong> (PCA), whose goal reflects A Square’s desire to see that sparkly cloud of points in <span class="math inline">\(nD\)</span> space in the plane showing the greatest variation (squeezing the most juice) among all other possible views. This appealed to his sense of geometry, but left him wondering how the variables in that high-D cloud were related to the dimensions he could see in a best-fitting plane.</p>
-<p>The idea of a <strong>biplot</strong>, showing the data points in the plane, together with thick pointed arrows—variable vectors— in one view is the other topic explained in this chapter (<a href="#sec-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-biplot</span></a>). The biplot is the simplest example of a multivariate juicer. The essential idea is to project the cloud of data points in <span class="math inline">\(n\)</span> dimensions into the 2D space of principal components and simultaneously show how the original variables relate to this space. For exploratory analysis to get an initial, incisive view of a multivariate dataset, a biplot is often my first choice.</p>
+<p>The idea of a <strong>biplot</strong>, showing the data points in the plane, together with thick pointed arrows—variable vectors— in one view is the other topic explained in this chapter (<a href="#sec-biplot" class="quarto-xref"><span>Section 4.3</span></a>). The biplot is the simplest example of a multivariate juicer. The essential idea is to project the cloud of data points in <span class="math inline">\(n\)</span> dimensions into the 2D space of principal components and simultaneously show how the original variables relate to this space. For exploratory analysis to get an initial, incisive view of a multivariate dataset, a biplot is often my first choice.</p>
 <div class="callout callout-style-default callout-note callout-titled" title="Looking ahead">
 <div class="callout-header d-flex align-content-center">
 <div class="callout-icon-container">
@@ -456,9 +456,9 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </section></section><section id="sec-pca" class="level2" data-number="4.2"><h2 data-number="4.2" class="anchored" data-anchor-id="sec-pca">
 <span class="header-section-number">4.2</span> Principal components analysis (PCA)</h2>
-<p>When Francis Galton <span class="citation" data-cites="Galton:1886">(<a href="#ref-Galton:1886" role="doc-biblioref">1886</a>)</span> first discovered the idea of regression toward the mean and presented his famous diagram (<a href="#fig-galton-corr" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-galton-corr</span></a>), he had little thought that he had provided a window to a higher-dimensional world, beyond what even A Square could imagine. His friend, Karl Pearson <span class="citation" data-cites="Pearson:1896">(<a href="#ref-Pearson:1896" role="doc-biblioref">1896</a>)</span> took that idea and developed it into a theory of regression and a measure of correlation that would bear his name, Pearson’s <span class="math inline">\(r\)</span>.</p>
-<p>But then Pearson <span class="citation" data-cites="Pearson:1901">(<a href="#ref-Pearson:1901" role="doc-biblioref">1901</a>)</span> had a further inspiration, akin to that of A Square. If he also had a cloud of sparkly points in <span class="math inline">\(2, 3, 4, ..., p\)</span> dimensions, could he find a point (<span class="math inline">\(0D\)</span>), or line (<span class="math inline">\(1D\)</span>), or plane (<span class="math inline">\(2D\)</span>), or even a hyperplane (<span class="math inline">\(nD\)</span>) that best summarized — squeezed out the most juice—from multivariate data? This was the first truly multivariate problem in the history of statistics <span class="citation" data-cites="FriendlyWainer:2021:TOGS">(<a href="#ref-FriendlyWainer:2021:TOGS" role="doc-biblioref">Friendly &amp; Wainer, 2021, p. 186</a>)</span>.</p>
-<p>The best <span class="math inline">\(0D\)</span> point was easy— it was simply the centroid, the means of each of the variables in the data, <span class="math inline">\((\bar{x}_1, \bar{x}_2, ..., \bar{x}_p)\)</span>, because that was “closest” to the data in the sense of minimizing the sum of squared differences, <span class="math inline">\(\Sigma_i\Sigma_j (x_{ij} - \bar{x}_j)^2\)</span>. In higher dimensions, his solution was also an application of the method of least squares, but he argued it geometrically and visually as shown in <a href="#fig-Pearson1901" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Pearson1901</span></a>.</p>
+<p>When Francis Galton <span class="citation" data-cites="Galton:1886">(<a href="95-references.html#ref-Galton:1886" role="doc-biblioref">1886</a>)</span> first discovered the idea of regression toward the mean and presented his famous diagram (<a href="03-multivariate_plots.html#fig-galton-corr" class="quarto-xref">Figure&nbsp;<span>3.9</span></a>), he had little thought that he had provided a window to a higher-dimensional world, beyond what even A Square could imagine. His friend, Karl Pearson <span class="citation" data-cites="Pearson:1896">(<a href="95-references.html#ref-Pearson:1896" role="doc-biblioref">1896</a>)</span> took that idea and developed it into a theory of regression and a measure of correlation that would bear his name, Pearson’s <span class="math inline">\(r\)</span>.</p>
+<p>But then Pearson <span class="citation" data-cites="Pearson:1901">(<a href="95-references.html#ref-Pearson:1901" role="doc-biblioref">1901</a>)</span> had a further inspiration, akin to that of A Square. If he also had a cloud of sparkly points in <span class="math inline">\(2, 3, 4, ..., p\)</span> dimensions, could he find a point (<span class="math inline">\(0D\)</span>), or line (<span class="math inline">\(1D\)</span>), or plane (<span class="math inline">\(2D\)</span>), or even a hyperplane (<span class="math inline">\(nD\)</span>) that best summarized — squeezed out the most juice—from multivariate data? This was the first truly multivariate problem in the history of statistics <span class="citation" data-cites="FriendlyWainer:2021:TOGS">(<a href="95-references.html#ref-FriendlyWainer:2021:TOGS" role="doc-biblioref">Friendly &amp; Wainer, 2021, p. 186</a>)</span>.</p>
+<p>The best <span class="math inline">\(0D\)</span> point was easy— it was simply the centroid, the means of each of the variables in the data, <span class="math inline">\((\bar{x}_1, \bar{x}_2, ..., \bar{x}_p)\)</span>, because that was “closest” to the data in the sense of minimizing the sum of squared differences, <span class="math inline">\(\Sigma_i\Sigma_j (x_{ij} - \bar{x}_j)^2\)</span>. In higher dimensions, his solution was also an application of the method of least squares, but he argued it geometrically and visually as shown in <a href="#fig-Pearson1901" class="quarto-xref">Figure&nbsp;<span>4.2</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-Pearson1901" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -472,7 +472,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 <p>For a <span class="math inline">\(1D\)</span> summary, the line of best fit to the points <span class="math inline">\(P_1, P_2, \dots P_n\)</span> is the line that goes through the centroid and made the average squared length of the <em>perpendicular</em> segments from those points to a line as small as possible. This was different from the case in linear regression, for fitting <span class="math inline">\(y\)</span> from <span class="math inline">\(x\)</span>, where the average squared length of the <em>vertical</em> segments, <span class="math inline">\(\Sigma_i (y_i - \hat{y}_i)^2\)</span> was minimized by least squares.</p>
-<p>He went on to prove the visual insights from simple smoothing of <span class="citation" data-cites="Galton:1886">Galton (<a href="#ref-Galton:1886" role="doc-biblioref">1886</a>)</span> (shown in <a href="#fig-galton-corr" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-galton-corr</span></a>) regarding the regression lines of <code>y ~ x</code> and <code>x ~ y</code>. More importantly, he proved that the cloud of points is captured, for the purpose of finding a best line, plane or hyperplane, by the ellipsoid that encloses it, as seen in his diagram, <a href="#fig-Pearson1901-2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Pearson1901-2</span></a>. The major axis of the 2D ellipse is the line of best fit, along which the data points have the smallest average squared distance from the line. The axis at right angles to that—the minor axis— is labeled “line of worst fit” with the largest average squared distance.</p>
+<p>He went on to prove the visual insights from simple smoothing of <span class="citation" data-cites="Galton:1886">Galton (<a href="95-references.html#ref-Galton:1886" role="doc-biblioref">1886</a>)</span> (shown in <a href="03-multivariate_plots.html#fig-galton-corr" class="quarto-xref">Figure&nbsp;<span>3.9</span></a>) regarding the regression lines of <code>y ~ x</code> and <code>x ~ y</code>. More importantly, he proved that the cloud of points is captured, for the purpose of finding a best line, plane or hyperplane, by the ellipsoid that encloses it, as seen in his diagram, <a href="#fig-Pearson1901-2" class="quarto-xref">Figure&nbsp;<span>4.3</span></a>. The major axis of the 2D ellipse is the line of best fit, along which the data points have the smallest average squared distance from the line. The axis at right angles to that—the minor axis— is labeled “line of worst fit” with the largest average squared distance.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-Pearson1901-2" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -487,7 +487,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 <p>Even more importantly— and this is the basis for PCA — he recognized that the two orthogonal axes of the ellipse gave new coordinates for the data which were uncorrelated, whatever the correlation of <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>.</p>
 <blockquote class="blockquote">
-<p>Physically, the axes of the correlation type-ellipse are the directions of independent and uncorrelated variation. — <span class="citation" data-cites="Pearson:1901">Pearson (<a href="#ref-Pearson:1901" role="doc-biblioref">1901</a>)</span>, p.&nbsp;566.</p>
+<p>Physically, the axes of the correlation type-ellipse are the directions of independent and uncorrelated variation. — <span class="citation" data-cites="Pearson:1901">Pearson (<a href="95-references.html#ref-Pearson:1901" role="doc-biblioref">1901</a>)</span>, p.&nbsp;566.</p>
 </blockquote>
 <p>It was but a small step to recognize that for two variables, <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>:</p>
 <ul>
@@ -496,7 +496,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <li>These could be seen as a rotation of the data space of <span class="math inline">\((x, y)\)</span> to a new space (PC1, PC2) with uncorrelated variables.</li>
 <li>The total variation of the points in data space, <span class="math inline">\(\text{Var}(x) + \text{Var}(y)\)</span>, being unchanged by rotation, was equally well expressed as the total variation <span class="math inline">\(\text{Var}(PC1) + \text{Var}(PC2)\)</span> of the scores on what are now called the principal component axes.</li>
 </ul>
-<p>It would have appealed to Pearson (and also to A Square) to see these observations demonstrated in a 3D video. <a href="#fig-pca-animation" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-pca-animation</span></a> shows a 3D plot of the variables <code>Sepal.Length</code>, <code>Sepal.Width</code> and <code>Petal.Length</code> in Edgar Anderson’s <code>iris</code> data, with points colored by species and the 95% data ellipsoid. This is rotated smoothly by interpolation until the first two principal axes, PC1 and PC2 are aligned with the horizontal and vertical dimensions. Because this is a rigid rotation of the cloud of points, the total variability is obviously unchanged.</p>
+<p>It would have appealed to Pearson (and also to A Square) to see these observations demonstrated in a 3D video. <a href="#fig-pca-animation" class="quarto-xref">Figure&nbsp;<span>4.4</span></a> shows a 3D plot of the variables <code>Sepal.Length</code>, <code>Sepal.Width</code> and <code>Petal.Length</code> in Edgar Anderson’s <code>iris</code> data, with points colored by species and the 95% data ellipsoid. This is rotated smoothly by interpolation until the first two principal axes, PC1 and PC2 are aligned with the horizontal and vertical dimensions. Because this is a rigid rotation of the cloud of points, the total variability is obviously unchanged.</p>
 <!-- ::: {#fig-pca-animation} -->
 <!-- <div align="center"> -->
 <!-- <iframe width="946" height="594" src="images/pca-animation1.gif"></iframe> -->
@@ -520,8 +520,8 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span class="header-section-number">4.2.1</span> PCA by springs</h3>
 <p>Before delving into the mathematics of PCA, it is useful to see how Pearson’s problem, and fitting by least squares generally, could be solved in a physical realization.</p>
 <p>From elementary statistics, you may be familiar with a physical demonstration that the mean, <span class="math inline">\(\bar{x}\)</span>, of a sample is the value for which the sum of deviations, <span class="math inline">\(\Sigma_i (x_i - \bar{x})\)</span> is zero, so the mean can be visualized as the point of balance on a line where those differences <span class="math inline">\((x_i - \bar{x})\)</span> are placed. Equally well, there is a physical realization of the mean as the point along an axis where weights connected by springs will minimize the sum of squared differences, because springs with a constant stiffness, <span class="math inline">\(k\)</span>, exert forces proportional to <span class="math inline">\(k (x_i - \bar{x}) ^2\)</span>. That’s the reason it is useful as a measure of central tendency: it minimizes the average squared error.</p>
-<p>In two dimensions, imagine that we have points, <span class="math inline">\((x_i, y_i)\)</span> and these are attached by springs of equal stiffness <span class="math inline">\(k\)</span>, to a line anchored at the centroid, <span class="math inline">\((\bar{x}, \bar{y})\)</span> as shown in <a href="#fig-pca-springs" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-pca-springs</span></a>. If we rotate the line to some initial position and release it, the springs will pull the line clockwise or counterclockwise and the line will bounce around until the forces, proportional to the squares of the lengths of the springs, will eventually balance out at the position (shown by the <span style="color: red;">red</span> fixed line segments at the ends). This is the position that minimizes the the sum of squared lengths of the connecting springs, and also minimizes the kinetic energy in the system.</p>
-<p>If you look closely at <a href="#fig-pca-springs" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-pca-springs</span></a> you will see something else: When the line is at its final position of minimum squared length and energy, the positions of the <span style="color: red;">red</span> points on this line are spread out furthest, i.e., have <strong>maximum</strong> variance. Conversely, when the line is at right angles to its final position (shown by the black line at 90<span class="math inline">\(^o\)</span>) the projected points have the smallest possible variance.</p>
+<p>In two dimensions, imagine that we have points, <span class="math inline">\((x_i, y_i)\)</span> and these are attached by springs of equal stiffness <span class="math inline">\(k\)</span>, to a line anchored at the centroid, <span class="math inline">\((\bar{x}, \bar{y})\)</span> as shown in <a href="#fig-pca-springs" class="quarto-xref">Figure&nbsp;<span>4.5</span></a>. If we rotate the line to some initial position and release it, the springs will pull the line clockwise or counterclockwise and the line will bounce around until the forces, proportional to the squares of the lengths of the springs, will eventually balance out at the position (shown by the <span style="color: red;">red</span> fixed line segments at the ends). This is the position that minimizes the the sum of squared lengths of the connecting springs, and also minimizes the kinetic energy in the system.</p>
+<p>If you look closely at <a href="#fig-pca-springs" class="quarto-xref">Figure&nbsp;<span>4.5</span></a> you will see something else: When the line is at its final position of minimum squared length and energy, the positions of the <span style="color: red;">red</span> points on this line are spread out furthest, i.e., have <strong>maximum</strong> variance. Conversely, when the line is at right angles to its final position (shown by the black line at 90<span class="math inline">\(^o\)</span>) the projected points have the smallest possible variance.</p>
 <div id="fig-pca-springs" class="quarto-float quarto-figure quarto-figure-center anchored">
 <figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-pca-springs-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
 <div data-align="center">
@@ -575,7 +575,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <li><p>It is usually the case that the variables <span class="math inline">\(\mathbf{x}_1, \mathbf{x}_2, \dots, \mathbf{x}_p\)</span> are linearly independent, which means that none of these is an exact linear combination of the others. In this case, all eigenvalues <span class="math inline">\(\lambda_i\)</span> are positive and the covariance matrix <span class="math inline">\(\mathbf{S}\)</span> is said to have <strong>rank</strong> <span class="math inline">\(p\)</span>. (Rank is the number of non-zero eigenvalues.)</p></li>
 <li><p>Here is a key fact: If, as usual, the eigenvalues are arranged in order, so that <span class="math inline">\(\lambda_1 &gt; \lambda_2 &gt; \dots &gt; \lambda_p\)</span>, then the first <span class="math inline">\(d\)</span> components give a <span class="math inline">\(d\)</span>-dimensional approximation to <span class="math inline">\(\mathbf{S}\)</span>, which accounts for <span class="math inline">\(\Sigma_i^d \lambda_i\)</span> of the <span class="math inline">\(\Sigma_i^p \lambda_i\)</span> total variance, usually interpreted as the proportion, <span class="math inline">\((\Sigma_i^d \lambda_i) / (\Sigma_i^p \lambda_i)\)</span>.</p></li>
 </ol>
-<p>For the case of two variables, <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span> <a href="#fig-pca-rotation" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-pca-rotation</span></a> shows the transformation from data space to component space. The eigenvectors, <span class="math inline">\(\mathbf{v}_1, \mathbf{v}_2\)</span> are the major and minor axes of the data ellipse, whose lengths are the square roots <span class="math inline">\(\sqrt{\lambda_1}, \sqrt{\lambda_2}\)</span> of the eigenvalues.</p>
+<p>For the case of two variables, <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span> <a href="#fig-pca-rotation" class="quarto-xref">Figure&nbsp;<span>4.6</span></a> shows the transformation from data space to component space. The eigenvectors, <span class="math inline">\(\mathbf{v}_1, \mathbf{v}_2\)</span> are the major and minor axes of the data ellipse, whose lengths are the square roots <span class="math inline">\(\sqrt{\lambda_1}, \sqrt{\lambda_2}\)</span> of the eigenvalues.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-pca-rotation" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -657,7 +657,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="fl">100</span> <span class="op">*</span> <span class="va">Lambda</span> <span class="op">/</span> <span class="fu"><a href="https://rdrr.io/r/base/sum.html">sum</a></span><span class="op">(</span><span class="va">Lambda</span><span class="op">)</span></span>
 <span><span class="co">#&gt; [1] 93.2  6.8</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Using these, you can express the eigenvalue decomposition of <span class="math inline">\(\mathbf{S}\)</span> in <a href="#eq-S-eigen" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-S-eigen</span></a> with <code><a href="http://friendly.github.io/matlib/reference/latexMatrix.html">latexMatrix()</a></code> and <code>Eqn</code> from the <span style="color: brown;"><strong>matlib</strong></span> package <span class="citation" data-cites="R-matlib">(<a href="#ref-R-matlib" role="doc-biblioref">Friendly et al., 2024</a>)</span> as:</p>
+<p>Using these, you can express the eigenvalue decomposition of <span class="math inline">\(\mathbf{S}\)</span> in <a href="#eq-S-eigen" class="quarto-xref">Equation&nbsp;<span>4.1</span></a> with <code><a href="http://friendly.github.io/matlib/reference/latexMatrix.html">latexMatrix()</a></code> and <code>Eqn</code> from the <span style="color: brown;"><strong>matlib</strong></span> package <span class="citation" data-cites="R-matlib">(<a href="95-references.html#ref-R-matlib" role="doc-biblioref">Friendly et al., 2024</a>)</span> as:</p>
 <!-- This works in HTML, but not in PDF
 ERROR: 
 compilation failed- error
@@ -671,7 +671,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="va">spacer</span> <span class="op">&lt;-</span> <span class="st">"\\phantom{00000000000000}"</span></span>
 <span><span class="fu"><a href="http://friendly.github.io/matlib/reference/Eqn.html">Eqn</a></span><span class="op">(</span><span class="st">"\\mathbf{S} &amp; = \\mathbf{V}"</span>, <span class="va">spacer</span>,</span>
 <span>    <span class="st">"\\mathbf{\\Lambda}"</span>, <span class="va">spacer</span>,  </span>
-<span>    <span class="st">"\\mathbf{V}^\\top"</span>, <span class="fu"><a href="http://friendly.github.io/matlib/reference/Eqn_helpers.html">Eqn_newline</a></span><span class="op">(</span><span class="op">)</span>,</span>
+<span>    <span class="st">"\\mathbf{V}^\\top"</span>, <span class="fu"><a href="http://friendly.github.io/matlib/reference/Eqn.html">Eqn_newline</a></span><span class="op">(</span><span class="op">)</span>,</span>
 <span>    <span class="fu"><a href="http://friendly.github.io/matlib/reference/latexMatrix.html">latexMatrix</a></span><span class="op">(</span><span class="va">S</span><span class="op">)</span>, <span class="st">"&amp; ="</span>, </span>
 <span>    <span class="fu"><a href="http://friendly.github.io/matlib/reference/latexMatrix.html">latexMatrix</a></span><span class="op">(</span><span class="va">V</span><span class="op">)</span>, <span class="st">"  "</span>, <span class="fu"><a href="https://rdrr.io/r/base/diag.html">diag</a></span><span class="op">(</span><span class="va">Lambda</span><span class="op">)</span>, <span class="st">"  "</span>, <span class="fu"><a href="http://friendly.github.io/matlib/reference/latexMatrix.html">latexMatrix</a></span><span class="op">(</span><span class="va">V</span>, transpose<span class="op">=</span><span class="cn">TRUE</span><span class="op">)</span>,</span>
 <span>    align <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -724,7 +724,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="co">#&gt; Ethan   46.3 21.37</span></span>
 <span><span class="co">#&gt; Francie 49.2 17.32</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Then, you can visualize the geometry of PCA as in <a href="#fig-pca-rotation" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-pca-rotation</span></a> (left) by plotting the data ellipse for the points, along with the PCA axes (<code><a href="https://friendly.github.io/heplots/reference/ellipse.axes.html">heplots::ellipse.axes()</a></code>). <a href="#fig-workers-pca" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-workers-pca</span></a> also shows the bounding box of the data ellipse, which are parallel to the PC axes and scaled to have the same “radius” as the data ellipse.</p>
+<p>Then, you can visualize the geometry of PCA as in <a href="#fig-pca-rotation" class="quarto-xref">Figure&nbsp;<span>4.6</span></a> (left) by plotting the data ellipse for the points, along with the PCA axes (<code><a href="https://friendly.github.io/heplots/reference/ellipse.axes.html">heplots::ellipse.axes()</a></code>). <a href="#fig-workers-pca" class="quarto-xref">Figure&nbsp;<span>4.8</span></a> also shows the bounding box of the data ellipse, which are parallel to the PC axes and scaled to have the same “radius” as the data ellipse.</p>
 <!-- Nasty conflict loading `spida2`: labs() conflicts with ggplot2 -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb9" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># calculate conjugate axes for PCA factorization</span></span>
@@ -783,8 +783,8 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </section></section><section id="finding-principal-components" class="level3" data-number="4.2.3"><h3 data-number="4.2.3" class="anchored" data-anchor-id="finding-principal-components">
 <span class="header-section-number">4.2.3</span> Finding principal components</h3>
-<p>In R, PCA is most easily carried out using <code><a href="https://rdrr.io/r/stats/prcomp.html">stats::prcomp()</a></code> or <code><a href="https://rdrr.io/r/stats/princomp.html">stats::princomp()</a></code> or similar functions in other packages such as <code>FactomineR::PCA()</code>. The <span style="color: brown;"><strong>FactoMineR</strong></span> package <span class="citation" data-cites="R-FactoMineR Husson-etal-2017">(<a href="#ref-Husson-etal-2017" role="doc-biblioref">Husson et al., 2017</a>, <a href="#ref-R-FactoMineR" role="doc-biblioref">2024</a>)</span> has extensive capabilities for exploratory analysis of multivariate data (PCA, correspondence analysis, cluster analysis).</p>
-<p>A particular strength of FactoMineR for PCA is that it allows the inclusion of <em>supplementary variables</em> (which can be categorical or quantitative) and <em>supplementary points</em> for individuals. These are not used in the analysis, but are projected into the plots to facilitate interpretation. For example, in the analysis of the <code>crime</code> data described below, it would be useful to have measures of other characteristics of the U.S. states, such as poverty and average level of education (<a href="#sec-supp-vars" class="quarto-xref"><span class="quarto-unresolved-ref">sec-supp-vars</span></a>).</p>
+<p>In R, PCA is most easily carried out using <code><a href="https://rdrr.io/r/stats/prcomp.html">stats::prcomp()</a></code> or <code><a href="https://rdrr.io/r/stats/princomp.html">stats::princomp()</a></code> or similar functions in other packages such as <code>FactomineR::PCA()</code>. The <span style="color: brown;"><strong>FactoMineR</strong></span> package <span class="citation" data-cites="R-FactoMineR Husson-etal-2017">(<a href="95-references.html#ref-Husson-etal-2017" role="doc-biblioref">Husson et al., 2017</a>, <a href="95-references.html#ref-R-FactoMineR" role="doc-biblioref">2024</a>)</span> has extensive capabilities for exploratory analysis of multivariate data (PCA, correspondence analysis, cluster analysis).</p>
+<p>A particular strength of FactoMineR for PCA is that it allows the inclusion of <em>supplementary variables</em> (which can be categorical or quantitative) and <em>supplementary points</em> for individuals. These are not used in the analysis, but are projected into the plots to facilitate interpretation. For example, in the analysis of the <code>crime</code> data described below, it would be useful to have measures of other characteristics of the U.S. states, such as poverty and average level of education (<a href="#sec-supp-vars" class="quarto-xref"><span>Section 4.3.5</span></a>).</p>
 <p>Unfortunately, although all of these functions perform similar calculations, the options for analysis and the details of the result they return differ.</p>
 <p>The important options for analysis include:</p>
 <ul>
@@ -823,7 +823,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="co">#&gt; [1] 2</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <section id="example-crime-data" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="example-crime-data">Example: Crime data</h4>
-<p>The dataset <code>crime</code>, analysed in <a href="#sec-corrgram" class="quarto-xref"><span class="quarto-unresolved-ref">sec-corrgram</span></a>, showed all positive correlations among the rates of various crimes in the corrgram, <a href="#fig-crime-corrplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-corrplot</span></a>. What can we see from a PCA? Is it possible that a few dimensions can account for most of the juice in this data?</p>
+<p>The dataset <code>crime</code>, analysed in <a href="03-multivariate_plots.html#sec-corrgram" class="quarto-xref"><span>Section 3.4</span></a>, showed all positive correlations among the rates of various crimes in the corrgram, <a href="03-multivariate_plots.html#fig-crime-corrplot" class="quarto-xref">Figure&nbsp;<span>3.28</span></a>. What can we see from a PCA? Is it possible that a few dimensions can account for most of the juice in this data?</p>
 <p>In this example, you can easily find the PCA solution using <code><a href="https://rdrr.io/r/stats/prcomp.html">prcomp()</a></code> in a single line in base-R. You need to specify the numeric variables to analyze by their columns in the data frame. The most important option here is <code>scale. = TRUE</code>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb13" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">crime</span>, package <span class="op">=</span> <span class="st">"ggbiplot"</span><span class="op">)</span></span>
@@ -877,9 +877,9 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="co">#&gt; 6     6   0.471  0.0317      0.982</span></span>
 <span><span class="co">#&gt; 7     7   0.352  0.0177      1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Then, a simple visualization is a plot of the proportion of variance for each component (or cumulative proportion) against the component number, usually called a <strong>screeplot</strong>. The idea, introduced by <span class="citation" data-cites="Cattell1966">Cattell (<a href="#ref-Cattell1966" role="doc-biblioref">1966</a>)</span>, is that after the largest, dominant components, the remainder should resemble the rubble, or scree formed by rocks falling from a cliff.</p>
+<p>Then, a simple visualization is a plot of the proportion of variance for each component (or cumulative proportion) against the component number, usually called a <strong>screeplot</strong>. The idea, introduced by <span class="citation" data-cites="Cattell1966">Cattell (<a href="95-references.html#ref-Cattell1966" role="doc-biblioref">1966</a>)</span>, is that after the largest, dominant components, the remainder should resemble the rubble, or scree formed by rocks falling from a cliff.</p>
 <p>From this plot, imagine drawing a straight line through the plotted eigenvalues, starting with the largest one. The typical rough guidance is that the last point to fall on this line represents the last component to extract, the idea being that beyond this, the amount of additional variance explained is non-meaningful. Another rule of thumb is to choose the number of components to extract a desired proportion of total variance, usually in the range of 80 - 90%.</p>
-<p><code>stats::plot(crime.pca)</code> would give a bar plot of the variances of the components, however <code><a href="http://friendly.github.io/ggbiplot/reference/ggscreeplot.html">ggbiplot::ggscreeplot()</a></code> gives nicer and more flexible displays as shown in <a href="#fig-crime-ggscreeplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-ggscreeplot</span></a>.</p>
+<p><code>stats::plot(crime.pca)</code> would give a bar plot of the variances of the components, however <code><a href="http://friendly.github.io/ggbiplot/reference/ggscreeplot.html">ggbiplot::ggscreeplot()</a></code> gives nicer and more flexible displays as shown in <a href="#fig-crime-ggscreeplot" class="quarto-xref">Figure&nbsp;<span>4.9</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb18" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">p1</span> <span class="op">&lt;-</span> <span class="fu"><a href="http://friendly.github.io/ggbiplot/reference/ggscreeplot.html">ggscreeplot</a></span><span class="op">(</span><span class="va">crime.pca</span><span class="op">)</span> <span class="op">+</span></span>
 <span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_smooth.html">stat_smooth</a></span><span class="op">(</span>data <span class="op">=</span> <span class="va">crime.eig</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span><span class="op">(</span><span class="va">PC</span><span class="op">&gt;=</span><span class="fl">4</span><span class="op">)</span>, </span>
@@ -908,7 +908,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </section><section id="visualizing-pca-scores-and-variable-vectors" class="level3" data-number="4.2.5"><h3 data-number="4.2.5" class="anchored" data-anchor-id="visualizing-pca-scores-and-variable-vectors">
 <span class="header-section-number">4.2.5</span> Visualizing PCA scores and variable vectors</h3>
 <p>To see and attempt to understand PCA results, it is useful to plot both the scores for the observations on a few of the largest components and also the loadings or variable vectors that give the weights for the variables in determining the principal components.</p>
-<p>In <a href="#sec-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-biplot</span></a> I discuss the biplot technique that plots both in a single display. However, I do this directly here, using tidy processing to explain what is going on in PCA and in these graphical displays.</p>
+<p>In <a href="#sec-biplot" class="quarto-xref"><span>Section 4.3</span></a> I discuss the biplot technique that plots both in a single display. However, I do this directly here, using tidy processing to explain what is going on in PCA and in these graphical displays.</p>
 <section id="scores" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="scores">Scores</h4>
 <p>The (uncorrelated) principal component scores can be extracted as <code>crime.pca$x</code> or using <code>purrr::pluck("x")</code>. As noted above, these are uncorrelated and have variances equal to the eigenvalues of the correlation matrix.</p>
 <div class="cell" data-layout-align="center">
@@ -1050,7 +1050,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>The variable vectors (arrows) shown in <a href="#fig-crime-vectors" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-vectors</span></a> have the following interpretations:</p>
+<p>The variable vectors (arrows) shown in <a href="#fig-crime-vectors" class="quarto-xref">Figure&nbsp;<span>4.11</span></a> have the following interpretations:</p>
 <ol type="1">
 <li><p>The lengths of the variable vectors, <span class="math inline">\(\lVert\mathbf{v}_i\rVert = \sqrt{\Sigma_{j} \; v_{ij}^2}\)</span> give the relative proportion of variance of each variable accounted for in a two-dimensional display.</p></li>
 <li><p>Each vector points in the direction in component space with which that variable is most highly correlated: the value, <span class="math inline">\(v_{ij}\)</span>, of the vector for variable <span class="math inline">\(\mathbf{x}_i\)</span> on component <span class="math inline">\(j\)</span> reflects the correlation of that variable with the <span class="math inline">\(j\)</span>th principal component. Thus,</p></li>
@@ -1062,7 +1062,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <ol start="3" type="1">
 <li>The angle between vectors shows the strength and direction of the correlation between those variables: the cosine of the angle <span class="math inline">\(\theta\)</span> between two variable vectors, <span class="math inline">\(\mathbf{v}_i\)</span> and <span class="math inline">\(\mathbf{v}_j\)</span>, which is <span class="math inline">\(\cos(\theta) = \mathbf{v}_i^\prime \; \mathbf{v}_j \;/ \; \| \mathbf{v}_i \| \cdot \| \mathbf{v}_j \|\)</span> gives the approximation of the correlation <span class="math inline">\(r_{ij}\)</span> between <span class="math inline">\(\mathbf{x}_i\)</span> and <span class="math inline">\(\mathbf{x}_j\)</span> that is shown in this space. This means that: * two variable vectors that point in the same direction are highly correlated; <span class="math inline">\(r = 1\)</span> if they are completely aligned. * Variable vectors at right angles are approximately uncorrelated, while those pointing in opposite directions are negatively correlated; <span class="math inline">\(r = -1\)</span> if they are at 180<span class="math inline">\(^o\)</span>.</li>
 </ol>
-<p>To illustrate point (1), the following indicates that almost 70% of the variance of <code>murder</code> is represented in the the 2D plot shown in <a href="#fig-crime-scores-plot12" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-scores-plot12</span></a>, but only 40% of the variance of <code>robbery</code> is captured. For point (2), the correlation of <code>murder</code> with the dimensions is 0.3 for PC1 and 0.63 for PC2. For point (3), the angle between <code>murder</code> and <code>burglary</code> looks to be about 90<span class="math inline">\(^o\)</span>, but the actual correlation is 0.39.</p>
+<p>To illustrate point (1), the following indicates that almost 70% of the variance of <code>murder</code> is represented in the the 2D plot shown in <a href="#fig-crime-scores-plot12" class="quarto-xref">Figure&nbsp;<span>4.10</span></a>, but only 40% of the variance of <code>robbery</code> is captured. For point (2), the correlation of <code>murder</code> with the dimensions is 0.3 for PC1 and 0.63 for PC2. For point (3), the angle between <code>murder</code> and <code>burglary</code> looks to be about 90<span class="math inline">\(^o\)</span>, but the actual correlation is 0.39.</p>
 <!-- •  Each vector corresponds to a variable, and points in the direction in which that variable is most strongly linearly correlated. -->
 <!-- •  Variables that are perfectly correlated with an axis are parallel to it. -->
 <!-- •  Variables that are uncorrelated with an axis are perpendicular to it. -->
@@ -1085,9 +1085,9 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </section></section></section><section id="sec-biplot" class="level2" data-number="4.3"><h2 data-number="4.3" class="anchored" data-anchor-id="sec-biplot">
 <span class="header-section-number">4.3</span> Biplots</h2>
-<p>The biplot is a visual multivariate juicer. It is the simple and powerful idea that came from the recognition that you can overlay a plot of observation scores in a principal components analysis with the information of the variable loadings (weights) to give a simultaneous display that is easy to interpret. In this sense, a biplot is generalization of a scatterplot, projecting from data space to PCA space, where the observations are shown by points, as in the plots of component scores in <a href="#fig-crime-scores-plot12" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-scores-plot12</span></a>, but with the variables also shown by vectors (or scaled linear axes aligned with those vectors).</p>
-<p>The idea of the biplot was introduced by Ruben Gabriel <span class="citation" data-cites="Gabriel:71 Gabriel:81">(<a href="#ref-Gabriel:71" role="doc-biblioref">1971</a>, <a href="#ref-Gabriel:81" role="doc-biblioref">1981</a>)</span> and later expanded in scope by <span class="citation" data-cites="GowerHand:96">Gower &amp; Hand (<a href="#ref-GowerHand:96" role="doc-biblioref">1996</a>)</span>. The book by <span class="citation" data-cites="Greenacre:2010:biplots">Greenacre (<a href="#ref-Greenacre:2010:biplots" role="doc-biblioref">2010</a>)</span> gives a practical overview of the many variety of biplots. <span class="citation" data-cites="Gower-etal:2011">Gower et al. (<a href="#ref-Gower-etal:2011" role="doc-biblioref">2011</a>)</span> <em>Understanding biplots</em> provides a full treatment of many topics, including how to calibrate biplot axes, 3D plots, and so forth.</p>
-<p>Biplot methodolgy is far more general than I cover here. Categorical variables can be incorporated in PCA using points that represent the levels of discrete categories. Two-way frequency tables of categorical variables can be analysed using <em>correspondence analysis</em>, which is similar to PCA, but designed to account for the maximum amount of the <span class="math inline">\(\chi^2\)</span> statistic for association; <em>multiple correspondence analysis</em> extends this to method to multi-way tables <span class="citation" data-cites="FriendlyMeyer:2016:DDAR Greenacre:84">(<a href="#ref-FriendlyMeyer:2016:DDAR" role="doc-biblioref">Friendly &amp; Meyer, 2016</a>; <a href="#ref-Greenacre:84" role="doc-biblioref">Greenacre, 1984</a>)</span>.</p>
+<p>The biplot is a visual multivariate juicer. It is the simple and powerful idea that came from the recognition that you can overlay a plot of observation scores in a principal components analysis with the information of the variable loadings (weights) to give a simultaneous display that is easy to interpret. In this sense, a biplot is generalization of a scatterplot, projecting from data space to PCA space, where the observations are shown by points, as in the plots of component scores in <a href="#fig-crime-scores-plot12" class="quarto-xref">Figure&nbsp;<span>4.10</span></a>, but with the variables also shown by vectors (or scaled linear axes aligned with those vectors).</p>
+<p>The idea of the biplot was introduced by Ruben Gabriel <span class="citation" data-cites="Gabriel:71 Gabriel:81">(<a href="95-references.html#ref-Gabriel:71" role="doc-biblioref">1971</a>, <a href="95-references.html#ref-Gabriel:81" role="doc-biblioref">1981</a>)</span> and later expanded in scope by <span class="citation" data-cites="GowerHand:96">Gower &amp; Hand (<a href="95-references.html#ref-GowerHand:96" role="doc-biblioref">1996</a>)</span>. The book by <span class="citation" data-cites="Greenacre:2010:biplots">Greenacre (<a href="95-references.html#ref-Greenacre:2010:biplots" role="doc-biblioref">2010</a>)</span> gives a practical overview of the many variety of biplots. <span class="citation" data-cites="Gower-etal:2011">Gower et al. (<a href="95-references.html#ref-Gower-etal:2011" role="doc-biblioref">2011</a>)</span> <em>Understanding biplots</em> provides a full treatment of many topics, including how to calibrate biplot axes, 3D plots, and so forth.</p>
+<p>Biplot methodolgy is far more general than I cover here. Categorical variables can be incorporated in PCA using points that represent the levels of discrete categories. Two-way frequency tables of categorical variables can be analysed using <em>correspondence analysis</em>, which is similar to PCA, but designed to account for the maximum amount of the <span class="math inline">\(\chi^2\)</span> statistic for association; <em>multiple correspondence analysis</em> extends this to method to multi-way tables <span class="citation" data-cites="FriendlyMeyer:2016:DDAR Greenacre:84">(<a href="95-references.html#ref-FriendlyMeyer:2016:DDAR" role="doc-biblioref">Friendly &amp; Meyer, 2016</a>; <a href="95-references.html#ref-Greenacre:84" role="doc-biblioref">Greenacre, 1984</a>)</span>.</p>
 <section id="sec-biplot-svd" class="level3" data-number="4.3.1"><h3 data-number="4.3.1" class="anchored" data-anchor-id="sec-biplot-svd">
 <span class="header-section-number">4.3.1</span> Constructing a biplot</h3>
 <p>The biplot is constructed by using the singular value decomposition (SVD) to obtain a low-rank approximation to the data matrix <span class="math inline">\(\mathbf{X}_{n \times p}\)</span> (centered, and optionally scaled to unit variances) whose <span class="math inline">\(n\)</span> rows are the observations and whose <span class="math inline">\(p\)</span> columns are the variables.</p>
@@ -1130,11 +1130,11 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <p>When the singular values are assigned totally to the left or to the right factor, the resultant coordinates are called <em>principal coordinates</em> and the sum of squared coordinates on each dimension equal the corresponding singular value. The other matrix, to which no part of the singular values is assigned, contains the so-called <em>standard coordinates</em> and have sum of squared values equal to 1.0.</p>
 </section><section id="biplots-in-r" class="level3" data-number="4.3.2"><h3 data-number="4.3.2" class="anchored" data-anchor-id="biplots-in-r">
 <span class="header-section-number">4.3.2</span> Biplots in R</h3>
-<p>There are a large number of R packages providing biplots. The most basic, <code><a href="https://rdrr.io/r/stats/biplot.html">stats::biplot()</a></code>, provides methods for <code>"prcomp"</code> and <code>"princomp"</code> objects. Among other packages, <span style="color: brown;"><strong>factoextra</strong></span> <span class="citation" data-cites="R-factoextra">(<a href="#ref-R-factoextra" role="doc-biblioref">Kassambara &amp; Mundt, 2020</a>)</span>, an extension of <span style="color: brown;"><strong>FactoMineR</strong></span> <span class="citation" data-cites="R-FactoMineR">(<a href="#ref-R-FactoMineR" role="doc-biblioref">Husson et al., 2024</a>)</span>, is perhaps the most comprehensive and provides <code>ggplot2</code> graphics. In addition to biplot methods for quantitative data using PCA (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_pca.html">fviz_pca()</a></code>), it offers biplots for categorical data using correspondence analysis (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_ca.html">fviz_ca()</a></code>) and multiple correspondence analysis (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_mca.html">fviz_mca()</a></code>); factor analysis with mixed quantitative and categorical variables (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_famd.html">fviz_famd()</a></code>) and cluster analysis (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_cluster.html">fviz_cluster()</a></code>). The <span style="color: brown;"><strong>adegraphics</strong></span> package <span class="citation" data-cites="R-adegraphics">(<a href="#ref-R-adegraphics" role="doc-biblioref">Dray et al., 2023</a>)</span> produces lovely biplots using <span style="color: brown;"><strong>lattice</strong></span> graphics, but with its own analytic framework.</p>
+<p>There are a large number of R packages providing biplots. The most basic, <code><a href="https://rdrr.io/r/stats/biplot.html">stats::biplot()</a></code>, provides methods for <code>"prcomp"</code> and <code>"princomp"</code> objects. Among other packages, <span style="color: brown;"><strong>factoextra</strong></span> <span class="citation" data-cites="R-factoextra">(<a href="95-references.html#ref-R-factoextra" role="doc-biblioref">Kassambara &amp; Mundt, 2020</a>)</span>, an extension of <span style="color: brown;"><strong>FactoMineR</strong></span> <span class="citation" data-cites="R-FactoMineR">(<a href="95-references.html#ref-R-FactoMineR" role="doc-biblioref">Husson et al., 2024</a>)</span>, is perhaps the most comprehensive and provides <code>ggplot2</code> graphics. In addition to biplot methods for quantitative data using PCA (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_pca.html">fviz_pca()</a></code>), it offers biplots for categorical data using correspondence analysis (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_ca.html">fviz_ca()</a></code>) and multiple correspondence analysis (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_mca.html">fviz_mca()</a></code>); factor analysis with mixed quantitative and categorical variables (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_famd.html">fviz_famd()</a></code>) and cluster analysis (<code><a href="https://rdrr.io/pkg/factoextra/man/fviz_cluster.html">fviz_cluster()</a></code>). The <span style="color: brown;"><strong>adegraphics</strong></span> package <span class="citation" data-cites="R-adegraphics">(<a href="95-references.html#ref-R-adegraphics" role="doc-biblioref">Dray et al., 2023</a>)</span> produces lovely biplots using <span style="color: brown;"><strong>lattice</strong></span> graphics, but with its own analytic framework.</p>
 <p>Here, I use the <strong>ggbiplot</strong> [R-ggbiplot] package, which aims to provide a simple interface to biplots within the <code>ggplot2</code> framework. I also use some convenient utility functions from <strong>factoextra</strong>.</p>
 </section><section id="example-crime-data-1" class="level3" data-number="4.3.3"><h3 data-number="4.3.3" class="anchored" data-anchor-id="example-crime-data-1">
 <span class="header-section-number">4.3.3</span> Example: Crime data</h3>
-<p>A basic biplot of the <code>crime</code> data, using standardized principal components and labeling the observation by their state abbreviation is shown in <a href="#fig-crime-biplot1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-biplot1</span></a>. The correlation circle reflects the data ellipse of the standardized components. This reminds us that these components are uncorrelated and have equal variance in the display.</p>
+<p>A basic biplot of the <code>crime</code> data, using standardized principal components and labeling the observation by their state abbreviation is shown in <a href="#fig-crime-biplot1" class="quarto-xref">Figure&nbsp;<span>4.13</span></a>. The correlation circle reflects the data ellipse of the standardized components. This reminds us that these components are uncorrelated and have equal variance in the display.</p>
 <!-- figure-code: R/crime/crime-ggbiplot.R -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb26" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">crime.pca</span> <span class="op">&lt;-</span> <span class="fu"><a href="http://friendly.github.io/ggbiplot/reference/reflect.html">reflect</a></span><span class="op">(</span><span class="va">crime.pca</span><span class="op">)</span> <span class="co"># reflect the axes</span></span>
@@ -1157,7 +1157,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>In this dataset the states are grouped by region and we saw some differences among regions in the plot (<a href="#fig-crime-scores-plot12" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-scores-plot12</span></a>) of component scores. <code><a href="http://friendly.github.io/ggbiplot/reference/ggbiplot.html">ggbiplot()</a></code> provides options to include a <code>groups =</code> variable, used to color the observation points and also to draw their data ellipses, facilitating interpretation.</p>
+<p>In this dataset the states are grouped by region and we saw some differences among regions in the plot (<a href="#fig-crime-scores-plot12" class="quarto-xref">Figure&nbsp;<span>4.10</span></a>) of component scores. <code><a href="http://friendly.github.io/ggbiplot/reference/ggbiplot.html">ggbiplot()</a></code> provides options to include a <code>groups =</code> variable, used to color the observation points and also to draw their data ellipses, facilitating interpretation.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb27" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="http://friendly.github.io/ggbiplot/reference/ggbiplot.html">ggbiplot</a></span><span class="op">(</span><span class="va">crime.pca</span>,</span>
 <span>   obs.scale <span class="op">=</span> <span class="fl">1</span>, var.scale <span class="op">=</span> <span class="fl">1</span>,</span>
@@ -1221,7 +1221,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>Dimension 3 in <a href="#fig-crime-biplot3" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-biplot3</span></a> is more subtle. One interpretation is a contrast between larceny, which is a larceny (simple theft) and robbery, which involves stealing something from a person and is considered a more serious crime with an element of possible violence. In this plot, murder has a relatively short variable vector, so does not contribute very much to differences among the states.</p>
+<p>Dimension 3 in <a href="#fig-crime-biplot3" class="quarto-xref">Figure&nbsp;<span>4.15</span></a> is more subtle. One interpretation is a contrast between larceny, which is a larceny (simple theft) and robbery, which involves stealing something from a person and is considered a more serious crime with an element of possible violence. In this plot, murder has a relatively short variable vector, so does not contribute very much to differences among the states.</p>
 </section><section id="biplot-contributions-and-quality" class="level3" data-number="4.3.4"><h3 data-number="4.3.4" class="anchored" data-anchor-id="biplot-contributions-and-quality">
 <span class="header-section-number">4.3.4</span> Biplot contributions and quality</h3>
 <p>To better understand how much each variable contributes to the biplot dimensions, it is helpful to see information about the variance of variables along each dimension. Graphically, this is nothing more than a measure of the lengths of projections of the variables on each of the dimensions. <code><a href="https://rdrr.io/pkg/factoextra/man/get_pca.html">factoextra::get_pca_var()</a></code> calculates a number of tables from a <code>"prcomp"</code> or similar object.</p>
@@ -1265,7 +1265,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 <p>A simple rubric for interpreting the dimensions in terms of the variable contributions is to mention those that are largest or above average on each dimension. So, burglary and rape contribute most to the first dimension, while murder and auto theft contribute most to the second.</p>
-<p>Another useful measure is called <code>cos2</code>, the <em>quality</em> of representation, meaning how much of a variable is represented in a given component. The columns sum to the eigenvalue for each dimension. The rows each sum to 1.0, meaning each variable is completely represented on all components, but we can find the quality of a <span class="math inline">\(k\)</span>-D solution by summing the values in the first <span class="math inline">\(k\)</span> columns. These can be plotted in a style similar to <a href="#fig-fviz-contrib" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-fviz-contrib</span></a> using <code><a href="https://rdrr.io/pkg/factoextra/man/fviz_cos2.html">factoextra::fviz_cos2()</a></code>.</p>
+<p>Another useful measure is called <code>cos2</code>, the <em>quality</em> of representation, meaning how much of a variable is represented in a given component. The columns sum to the eigenvalue for each dimension. The rows each sum to 1.0, meaning each variable is completely represented on all components, but we can find the quality of a <span class="math inline">\(k\)</span>-D solution by summing the values in the first <span class="math inline">\(k\)</span> columns. These can be plotted in a style similar to <a href="#fig-fviz-contrib" class="quarto-xref">Figure&nbsp;<span>4.16</span></a> using <code><a href="https://rdrr.io/pkg/factoextra/man/fviz_cos2.html">factoextra::fviz_cos2()</a></code>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb32" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">quality</span> <span class="op">&lt;-</span> <span class="va">var_info</span><span class="op">$</span><span class="va">cos2</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/base/colSums.html">rowSums</a></span><span class="op">(</span><span class="va">quality</span><span class="op">)</span></span>
@@ -1288,12 +1288,12 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="co">#&gt; larceny   0.53  0.20  0.73</span></span>
 <span><span class="co">#&gt; auto      0.36  0.31  0.67</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>In two dimensions, murder and burglary are best represented; robbery and larceny are the worst, but as we saw above (<a href="#fig-crime-biplot3" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-biplot3</span></a>), these crimes are implicated in the third dimension.</p>
+<p>In two dimensions, murder and burglary are best represented; robbery and larceny are the worst, but as we saw above (<a href="#fig-crime-biplot3" class="quarto-xref">Figure&nbsp;<span>4.15</span></a>), these crimes are implicated in the third dimension.</p>
 </section><section id="sec-supp-vars" class="level3" data-number="4.3.5"><h3 data-number="4.3.5" class="anchored" data-anchor-id="sec-supp-vars">
 <span class="header-section-number">4.3.5</span> Supplementary variables</h3>
-<p>An important feature of biplot methodology is that once you have a reduced-rank display of the relations among a set of variables, you can use other available data to help interpret what what is shown in the biplot. In a sense, this is what I did above in <a href="#fig-crime-biplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-biplot2</span></a> and <a href="#fig-crime-biplot3" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-biplot3</span></a> using <code>region</code> as a grouping variable and summarizing the variability in the scores for states with their data ellipses by region.</p>
+<p>An important feature of biplot methodology is that once you have a reduced-rank display of the relations among a set of variables, you can use other available data to help interpret what what is shown in the biplot. In a sense, this is what I did above in <a href="#fig-crime-biplot2" class="quarto-xref">Figure&nbsp;<span>4.14</span></a> and <a href="#fig-crime-biplot3" class="quarto-xref">Figure&nbsp;<span>4.15</span></a> using <code>region</code> as a grouping variable and summarizing the variability in the scores for states with their data ellipses by region.</p>
 <p>When we have other quantitative variables on the same observations, these can be represented as supplementary variables in the same space. Geometrically, this amounts to projecting the new variables on the space of the principal components. It is carried out by regressions of these supplementary variables on the scores for the principal component dimensions.</p>
-<p>For example, the left panel of <a href="#fig-supp-regession" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-supp-regession</span></a> depicts the vector geometry of a regression of a variable <span class="math inline">\(\mathbf{y}\)</span> on two predictors, <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span>. The fitted vector, <span class="math inline">\(\widehat{\mathbf{y}}\)</span>, is the perpendicular projection of <span class="math inline">\(\mathbf{y}\)</span> onto the plane of <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span>. In the same way, in the right panel, a <span style="color: green;">supplementary variable</span> is projected into the plane of two principal component axes shown as an ellipse. The black fitted vector shows how that additional variable relates to the biplot dimensions.</p>
+<p>For example, the left panel of <a href="#fig-supp-regession" class="quarto-xref">Figure&nbsp;<span>4.17</span></a> depicts the vector geometry of a regression of a variable <span class="math inline">\(\mathbf{y}\)</span> on two predictors, <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span>. The fitted vector, <span class="math inline">\(\widehat{\mathbf{y}}\)</span>, is the perpendicular projection of <span class="math inline">\(\mathbf{y}\)</span> onto the plane of <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span>. In the same way, in the right panel, a <span style="color: green;">supplementary variable</span> is projected into the plane of two principal component axes shown as an ellipse. The black fitted vector shows how that additional variable relates to the biplot dimensions.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-supp-regession" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1301,7 +1301,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <img src="images/pca4ds-figure-2-11.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:90.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-supp-regession-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;4.17: Fitting supplementary variables in a biplot is analogous (right) to regression on the principal component dimensions (left). <em>Source</em>: <span class="citation" data-cites="Aluja-etal-2018">Aluja et al. (<a href="#ref-Aluja-etal-2018" role="doc-biblioref">2018</a>)</span>, Figure 2.11
+Figure&nbsp;4.17: Fitting supplementary variables in a biplot is analogous (right) to regression on the principal component dimensions (left). <em>Source</em>: <span class="citation" data-cites="Aluja-etal-2018">Aluja et al. (<a href="95-references.html#ref-Aluja-etal-2018" role="doc-biblioref">2018</a>)</span>, Figure 2.11
 </figcaption></figure>
 </div>
 </div>
@@ -1343,7 +1343,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span>                     graph <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The essential difference between the result of <code><a href="https://rdrr.io/r/stats/prcomp.html">prcomp()</a></code> used earlier to get the <code>crime.pca</code> object and the result of <code><a href="https://rdrr.io/pkg/FactoMineR/man/PCA.html">PCA()</a></code> with supplementary variables is that the <code>crime.PCA_sup</code> object now contains a <code>quanti.sup</code> component containing the coordinates for the supplementary variables in PCA space.</p>
-<p>These can be calculated directly as a the coefficients of a multivariate regression of the <em>standardized</em> supplementary variables on the PCA scores for the dimensions, with no intercept—which forces the fitted vectors to go through the origin. For example, in the plot below (<a href="#fig-crime-factominer" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-factominer</span></a>), the vector for Income has coordinates (0.192, -0.530) on the first two PCA dimensions.</p>
+<p>These can be calculated directly as a the coefficients of a multivariate regression of the <em>standardized</em> supplementary variables on the PCA scores for the dimensions, with no intercept—which forces the fitted vectors to go through the origin. For example, in the plot below (<a href="#fig-crime-factominer" class="quarto-xref">Figure&nbsp;<span>4.18</span></a>), the vector for Income has coordinates (0.192, -0.530) on the first two PCA dimensions.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb36" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">reg.data</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/scale.html">scale</a></span><span class="op">(</span><span class="va">supp_data</span><span class="op">[</span>, <span class="op">-</span><span class="fl">1</span><span class="op">]</span><span class="op">)</span>, </span>
 <span>                  <span class="va">crime.PCA_sup</span><span class="op">$</span><span class="va">ind</span><span class="op">$</span><span class="va">coord</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -1378,7 +1378,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="co">#&gt; HS_Grad     2.05  1.12 0.861</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The <code><a href="https://rdrr.io/pkg/FactoMineR/man/PCA.html">PCA()</a></code> result can then be plotted using <code>FactoMiner::plot()</code> or various <code>factoextra</code> functions like <code><a href="https://rdrr.io/pkg/factoextra/man/fviz_pca.html">fviz_pca_var()</a></code> for a plot of the variable vectors or <code><a href="https://rdrr.io/pkg/factoextra/man/fviz_pca.html">fviz_pca_biplot()</a></code> for a biplot. When a <code>quanti.sup</code> component is present, supplementary variables are also shown in the displays.</p>
-<p>For simplicity I use <code>FactoMiner::plot()</code> here and only show the variable vectors. For consistency with earlier plots, I first reflect the orientation of the 2nd PCA dimension so that crimes of personal violence are at the top, as in <a href="#fig-crime-vectors" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-vectors</span></a>.</p>
+<p>For simplicity I use <code>FactoMiner::plot()</code> here and only show the variable vectors. For consistency with earlier plots, I first reflect the orientation of the 2nd PCA dimension so that crimes of personal violence are at the top, as in <a href="#fig-crime-vectors" class="quarto-xref">Figure&nbsp;<span>4.11</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb38" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># reverse coordinates of Dim 2</span></span>
 <span><span class="va">crime.PCA_sup</span> <span class="op">&lt;-</span> <span class="fu">ggbiplot</span><span class="fu">::</span><span class="fu"><a href="http://friendly.github.io/ggbiplot/reference/reflect.html">reflect</a></span><span class="op">(</span><span class="va">crime.PCA_sup</span>, columns <span class="op">=</span> <span class="fl">2</span><span class="op">)</span></span>
@@ -1396,12 +1396,12 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>Recall that from earlier analyses, I interpreted the the dominant PC1 dimension as reflecting overall rate of crime. The contributions to this dimension, which are the projections of the variable vectors on the horizontal axis in <a href="#fig-crime-vectors" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-vectors</span></a> and <a href="#fig-crime-biplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-biplot2</span></a> were shown graphically by barcharts in the left panel of <a href="#fig-fviz-contrib" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-fviz-contrib</span></a>.</p>
-<p>But now in <a href="#fig-crime-factominer" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-factominer</span></a>, with the addition of variable vectors for the supplementary variables, you can see how income, rate of illiteracy, life expectancy and proportion of high school graduates are related to the variation in rates of crimes for the U.S. states.</p>
-<p>On dimension 1, what stands out is that life expectancy is associated with lower overall crime, while other supplementary variable have positive associations. On dimension 2, crimes against persons (murder, assault, rape) are associated with greater rates of illiteracy among the states, which as we earlier saw (<a href="#fig-crime-biplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-crime-biplot2</span></a>) were more often Southern states. Crimes against property (auto theft, larceny) at the bottom of this dimension are associated with higher levels of income and high school graduates</p>
+<p>Recall that from earlier analyses, I interpreted the the dominant PC1 dimension as reflecting overall rate of crime. The contributions to this dimension, which are the projections of the variable vectors on the horizontal axis in <a href="#fig-crime-vectors" class="quarto-xref">Figure&nbsp;<span>4.11</span></a> and <a href="#fig-crime-biplot2" class="quarto-xref">Figure&nbsp;<span>4.14</span></a> were shown graphically by barcharts in the left panel of <a href="#fig-fviz-contrib" class="quarto-xref">Figure&nbsp;<span>4.16</span></a>.</p>
+<p>But now in <a href="#fig-crime-factominer" class="quarto-xref">Figure&nbsp;<span>4.18</span></a>, with the addition of variable vectors for the supplementary variables, you can see how income, rate of illiteracy, life expectancy and proportion of high school graduates are related to the variation in rates of crimes for the U.S. states.</p>
+<p>On dimension 1, what stands out is that life expectancy is associated with lower overall crime, while other supplementary variable have positive associations. On dimension 2, crimes against persons (murder, assault, rape) are associated with greater rates of illiteracy among the states, which as we earlier saw (<a href="#fig-crime-biplot2" class="quarto-xref">Figure&nbsp;<span>4.14</span></a>) were more often Southern states. Crimes against property (auto theft, larceny) at the bottom of this dimension are associated with higher levels of income and high school graduates</p>
 </section><section id="example-diabetes-data" class="level3" data-number="4.3.6"><h3 data-number="4.3.6" class="anchored" data-anchor-id="example-diabetes-data">
 <span class="header-section-number">4.3.6</span> Example: Diabetes data</h3>
-<p>As another example, consider the data from <span class="citation" data-cites="ReavenMiller:79">Reaven &amp; Miller (<a href="#ref-ReavenMiller:79" role="doc-biblioref">1979</a>)</span> on measures of insulin and glucose shown in <a href="#fig-diabetes1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes1</span></a> and that led to the discovery of two distinct types of development of Type 2 diabetes (<a href="#sec-discoveries" class="quarto-xref"><span class="quarto-unresolved-ref">sec-discoveries</span></a>). This dataset is available as <code><a href="https://friendly.github.io/heplots/reference/Diabetes.html">heplots::Diabetes</a></code>. The three groups are <code>Normal</code>, <code>Chemical_Diabetic</code> and <code>Overt_Diabetic</code>, and the (numerical) diagnostic variables are:</p>
+<p>As another example, consider the data from <span class="citation" data-cites="ReavenMiller:79">Reaven &amp; Miller (<a href="95-references.html#ref-ReavenMiller:79" role="doc-biblioref">1979</a>)</span> on measures of insulin and glucose shown in <a href="index.html#fig-diabetes1" class="quarto-xref">Figure&nbsp;<span>6</span></a> and that led to the discovery of two distinct types of development of Type 2 diabetes (<a href="index.html#sec-discoveries" class="quarto-xref"><span>Section 3.1</span></a>). This dataset is available as <code><a href="https://friendly.github.io/heplots/reference/Diabetes.html">heplots::Diabetes</a></code>. The three groups are <code>Normal</code>, <code>Chemical_Diabetic</code> and <code>Overt_Diabetic</code>, and the (numerical) diagnostic variables are:</p>
 <ul>
 <li>
 <code>relwt</code>: relative weight, the ratio of actual to expected weight, given the person’s height,</li>
@@ -1414,8 +1414,8 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <li>
 <code>sspg</code>: steady state plasma glucose, a measure of insulin resistance</li>
 </ul>
-<p><strong>TODO</strong>: Should introduce 3D plots earlier, in Ch3 before <a href="#sec-scatmat" class="quarto-xref"><span class="quarto-unresolved-ref">sec-scatmat</span></a>.</p>
-<p>First, let’s try to create a 3D plot, analogous to the artist’s drawing from PRIM-9 shown in <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ReavenMiller-3d</span></a>. For this, I use <code><a href="https://rdrr.io/pkg/car/man/scatter3d.html">car::scatter3d()</a></code> which can show data ellipsoids summarizing each group. The formula notation, <code>z ~ x + y</code> assigns the <code>z</code> variable to the vertical direction in the plot, and the <code>x</code> and <code>y</code> variable form a base plane.</p>
+<p><strong>TODO</strong>: Should introduce 3D plots earlier, in Ch3 before <a href="03-multivariate_plots.html#sec-scatmat" class="quarto-xref"><span>Section 3.3</span></a>.</p>
+<p>First, let’s try to create a 3D plot, analogous to the artist’s drawing from PRIM-9 shown in <a href="index.html#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span>7</span></a>. For this, I use <code><a href="https://rdrr.io/pkg/car/man/scatter3d.html">car::scatter3d()</a></code> which can show data ellipsoids summarizing each group. The formula notation, <code>z ~ x + y</code> assigns the <code>z</code> variable to the vertical direction in the plot, and the <code>x</code> and <code>y</code> variable form a base plane.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb39" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">cols</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"darkgreen"</span>, <span class="st">"blue"</span>, <span class="st">"red"</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/pkg/car/man/scatter3d.html">scatter3d</a></span><span class="op">(</span><span class="va">sspg</span> <span class="op">~</span> <span class="va">instest</span> <span class="op">+</span> <span class="va">glutest</span>, data<span class="op">=</span><span class="va">Diabetes</span>,</span>
@@ -1425,7 +1425,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span>          col <span class="op">=</span> <span class="va">cols</span>,</span>
 <span>          surface.col <span class="op">=</span> <span class="va">cols</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p><code><a href="https://rdrr.io/pkg/car/man/scatter3d.html">car::scatter3d()</a></code> uses the <span style="color: brown;"><strong>rgl</strong></span> package <span class="citation" data-cites="R-rgl">(<a href="#ref-R-rgl" role="doc-biblioref">Adler &amp; Murdoch, 2023</a>)</span> to render 3D graphics on a display device, which means that it has facilities for perspective, lighting and other visual properties. You can interactively zoom in or out or rotate the display in any of the three dimensions and use <code><a href="https://dmurdoch.github.io/rgl/dev/reference/spin3d.html">rgl::spin3d()</a></code> to animate rotations around any axes and record this a a <code>movie3d()</code>. <a href="#fig-diabetes-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes-3d</span></a> shows two views of this plot, one from the front and one from the back. The data ellipsoids are not as evocative as the artist’s rendering, but they give a sense of the relative sizes and shapes of the clouds of points for the three diagnostic groups.</p>
+<p><code><a href="https://rdrr.io/pkg/car/man/scatter3d.html">car::scatter3d()</a></code> uses the <span style="color: brown;"><strong>rgl</strong></span> package <span class="citation" data-cites="R-rgl">(<a href="95-references.html#ref-R-rgl" role="doc-biblioref">Adler &amp; Murdoch, 2023</a>)</span> to render 3D graphics on a display device, which means that it has facilities for perspective, lighting and other visual properties. You can interactively zoom in or out or rotate the display in any of the three dimensions and use <code><a href="https://dmurdoch.github.io/rgl/dev/reference/spin3d.html">rgl::spin3d()</a></code> to animate rotations around any axes and record this a a <code>movie3d()</code>. <a href="#fig-diabetes-3d" class="quarto-xref">Figure&nbsp;<span>4.19</span></a> shows two views of this plot, one from the front and one from the back. The data ellipsoids are not as evocative as the artist’s rendering, but they give a sense of the relative sizes and shapes of the clouds of points for the three diagnostic groups.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-diabetes-3d" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1433,13 +1433,13 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <img src="images/diabetes-3d-both.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:100.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-diabetes-3d-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;4.19: Two views of a 3D scatterplot of three main diagnostic variables in the <code>Diabetes</code> dataset. The left panel shows an orientation similar to that of <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ReavenMiller-3d</span></a>; the right panel shows a view from the back.
+Figure&nbsp;4.19: Two views of a 3D scatterplot of three main diagnostic variables in the <code>Diabetes</code> dataset. The left panel shows an orientation similar to that of <a href="index.html#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span>7</span></a>; the right panel shows a view from the back.
 </figcaption></figure>
 </div>
 </div>
 </div>
 <p>The normal group is concentrated near the origin, with relatively low values on all three diagnostic measures. The chemical diabetic group forms a wing with higher values on insulin response to oral glucose (<code>instest</code>), while the overt diabetics form the other wing, with higher values on glucose intolerance (<code>glutest</code>). The relative sizes and orientations of the data ellipsoids are also informative.</p>
-<p>Given this, what can we see in a biplot view based on PCA? The PCA of these data shows that 83% of the variance is captured in two dimensions and 96% in three. The result for 3D is interesting, in that the view from PRIM-9 shown in <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ReavenMiller-3d</span></a> and <a href="#fig-diabetes-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes-3d</span></a> nearly captured all available information.</p>
+<p>Given this, what can we see in a biplot view based on PCA? The PCA of these data shows that 83% of the variance is captured in two dimensions and 96% in three. The result for 3D is interesting, in that the view from PRIM-9 shown in <a href="index.html#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span>7</span></a> and <a href="#fig-diabetes-3d" class="quarto-xref">Figure&nbsp;<span>4.19</span></a> nearly captured all available information.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb40" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">Diabetes</span>, package<span class="op">=</span><span class="st">"heplots"</span><span class="op">)</span></span>
 <span></span>
@@ -1492,11 +1492,11 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>What can we see here, and how does it relate to the artist’s depiction in <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ReavenMiller-3d</span></a>? The variables <code>instest</code>, <code>sspg</code> and <code>glutest</code> correspond approximately to the coordinate axes in the artist’s drawing. <code>glutest</code> and <code>glufast</code> primarily separate the overt diabetics from the others. The chemical diabetics are distinguished by having larger values of insulin response (<code>instest</code>) and are also higher in relative weight (<code>relwt</code>).</p>
+<p>What can we see here, and how does it relate to the artist’s depiction in <a href="index.html#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span>7</span></a>? The variables <code>instest</code>, <code>sspg</code> and <code>glutest</code> correspond approximately to the coordinate axes in the artist’s drawing. <code>glutest</code> and <code>glufast</code> primarily separate the overt diabetics from the others. The chemical diabetics are distinguished by having larger values of insulin response (<code>instest</code>) and are also higher in relative weight (<code>relwt</code>).</p>
 </section></section><section id="sec-nonlinear" class="level2" data-number="4.4"><h2 data-number="4.4" class="anchored" data-anchor-id="sec-nonlinear">
 <span class="header-section-number">4.4</span> Nonlinear dimension reduction</h2>
 <p>The world of dimension reduction methods reflected by PCA is a simple and attractive one in which relationships among variable are at least approximately linear, and can be made visible in a lower-dimensional view by linear transformations and projections. PCA does an optimal job of capturing <em>global</em> linear relationships in the data. But many phenomena defy linear description or involve <em>local</em> nonlinear relationships and clusters within the data. Our understanding of high-D data can sometimes be improved by nonlinear dimension reduction techniques.</p>
-<p>To see why, consider the data shown in the left panel of <a href="#fig-nonlin-demo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-nonlin-demo</span></a> and suppose we want to be able to separate the two classes by a line. The groups are readily seen in this simple 2D example, but there is no linear combination or projection that shows them as distinct categories. The right panel shows the same data after a nonlinear transformation to polar coordinates, where the two groups are readily distinguished by radius. Such problems arise in higher dimensions where direct visualization is far more difficult and nonlinear methods become attractive.</p>
+<p>To see why, consider the data shown in the left panel of <a href="#fig-nonlin-demo" class="quarto-xref">Figure&nbsp;<span>4.21</span></a> and suppose we want to be able to separate the two classes by a line. The groups are readily seen in this simple 2D example, but there is no linear combination or projection that shows them as distinct categories. The right panel shows the same data after a nonlinear transformation to polar coordinates, where the two groups are readily distinguished by radius. Such problems arise in higher dimensions where direct visualization is far more difficult and nonlinear methods become attractive.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-nonlin-demo" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1511,9 +1511,9 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 <section id="multidimensional-scaling" class="level3" data-number="4.4.1"><h3 data-number="4.4.1" class="anchored" data-anchor-id="multidimensional-scaling">
 <span class="header-section-number">4.4.1</span> Multidimensional scaling</h3>
-<p>One way to break out of the “linear-combination, maximize-variance PCA” mold is to consider a more intrinsic property of points in <em>Spaceland</em>: similarity or distance. The earliest expression of this idea was in <strong>multidimensional scaling</strong> (MDS) by <span class="citation" data-cites="Torgerson1952">Torgerson (<a href="#ref-Torgerson1952" role="doc-biblioref">1952</a>)</span>, which involved trying to determine a metric low-D representation of objects from their interpoint distances via an application of the SVD.</p>
-<p>The break-through for nonlinear methods came from Roger Shepard and William Kruskal <span class="citation" data-cites="Shepard1962a Shepard1962b Kruskal1964">(<a href="#ref-Kruskal1964" role="doc-biblioref">Kruskal, 1964</a>; <a href="#ref-Shepard1962a" role="doc-biblioref">Shepard, 1962a</a>, <a href="#ref-Shepard1962b" role="doc-biblioref">1962b</a>)</span> who recognized that a more general, <em>nonmetric</em> version (nMDS) could be achieved using only the <em>rank order</em> of input distances <span class="math inline">\(d_{ij}\)</span> among objects. nMDS maps these into a low-D spatial representation of points, <span class="math inline">\(\mathbf{x}_i, \mathbf{x}_j\)</span> whose fitted distances, <span class="math inline">\(\hat{d}_{ij} = \lVert\mathbf{x}_i - \mathbf{x}_j\rVert\)</span> matches the order of the <span class="math inline">\(d_{ij}\)</span> as closely as possible. That is, rather than assume that the observed distances are linearly related to the fitted <span class="math inline">\(\hat{d}_{ij}\)</span>, nMDS assumes only that their order is the same. <span class="citation" data-cites="BorgGroenen2005">Borg &amp; Groenen (<a href="#ref-BorgGroenen2005" role="doc-biblioref">2005</a>)</span> and <span class="citation" data-cites="Borg2018">Borg et al. (<a href="#ref-Borg2018" role="doc-biblioref">2018</a>)</span> give a comprehensive overview of modern developments in MDS.</p>
-<p>The impetus for MDS stemmed largely from psychology and the behavioral sciences, where simple experimental measures of similarity or dissimilarity of psychological objects (color names, facial expressions, words, Morse code symbols) could be obtained by direct ratings, confusions, or other tasks <span class="citation" data-cites="Shepard-etal-1972a Shepard-etal-1972b">(<a href="#ref-Shepard-etal-1972a" role="doc-biblioref">Shepard et al., 1972b</a>, <a href="#ref-Shepard-etal-1972b" role="doc-biblioref">1972a</a>)</span>. MDS was revolutionary in that it provided a coherent method to study the dimensions of perceptual and cognitive space in applications where the explanation of a cognitive process was derived directly from an MDS solution <span class="citation" data-cites="Shoben1983">(<a href="#ref-Shoben1983" role="doc-biblioref">Shoben, 1983</a>)</span>.</p>
+<p>One way to break out of the “linear-combination, maximize-variance PCA” mold is to consider a more intrinsic property of points in <em>Spaceland</em>: similarity or distance. The earliest expression of this idea was in <strong>multidimensional scaling</strong> (MDS) by <span class="citation" data-cites="Torgerson1952">Torgerson (<a href="95-references.html#ref-Torgerson1952" role="doc-biblioref">1952</a>)</span>, which involved trying to determine a metric low-D representation of objects from their interpoint distances via an application of the SVD.</p>
+<p>The break-through for nonlinear methods came from Roger Shepard and William Kruskal <span class="citation" data-cites="Shepard1962a Shepard1962b Kruskal1964">(<a href="95-references.html#ref-Kruskal1964" role="doc-biblioref">Kruskal, 1964</a>; <a href="95-references.html#ref-Shepard1962a" role="doc-biblioref">Shepard, 1962a</a>, <a href="95-references.html#ref-Shepard1962b" role="doc-biblioref">1962b</a>)</span> who recognized that a more general, <em>nonmetric</em> version (nMDS) could be achieved using only the <em>rank order</em> of input distances <span class="math inline">\(d_{ij}\)</span> among objects. nMDS maps these into a low-D spatial representation of points, <span class="math inline">\(\mathbf{x}_i, \mathbf{x}_j\)</span> whose fitted distances, <span class="math inline">\(\hat{d}_{ij} = \lVert\mathbf{x}_i - \mathbf{x}_j\rVert\)</span> matches the order of the <span class="math inline">\(d_{ij}\)</span> as closely as possible. That is, rather than assume that the observed distances are linearly related to the fitted <span class="math inline">\(\hat{d}_{ij}\)</span>, nMDS assumes only that their order is the same. <span class="citation" data-cites="BorgGroenen2005">Borg &amp; Groenen (<a href="95-references.html#ref-BorgGroenen2005" role="doc-biblioref">2005</a>)</span> and <span class="citation" data-cites="Borg2018">Borg et al. (<a href="95-references.html#ref-Borg2018" role="doc-biblioref">2018</a>)</span> give a comprehensive overview of modern developments in MDS.</p>
+<p>The impetus for MDS stemmed largely from psychology and the behavioral sciences, where simple experimental measures of similarity or dissimilarity of psychological objects (color names, facial expressions, words, Morse code symbols) could be obtained by direct ratings, confusions, or other tasks <span class="citation" data-cites="Shepard-etal-1972a Shepard-etal-1972b">(<a href="95-references.html#ref-Shepard-etal-1972a" role="doc-biblioref">Shepard et al., 1972b</a>, <a href="95-references.html#ref-Shepard-etal-1972b" role="doc-biblioref">1972a</a>)</span>. MDS was revolutionary in that it provided a coherent method to study the dimensions of perceptual and cognitive space in applications where the explanation of a cognitive process was derived directly from an MDS solution <span class="citation" data-cites="Shoben1983">(<a href="95-references.html#ref-Shoben1983" role="doc-biblioref">Shoben, 1983</a>)</span>.</p>
 <p>To perform nMDS, you need to calculate the matrix of distances between all pairs of observations (<code><a href="https://rdrr.io/r/stats/dist.html">dist()</a></code>). The basic function is <code><a href="https://rdrr.io/pkg/MASS/man/isoMDS.html">MASS::isoMDS()</a></code>.<a href="#fn4" class="footnote-ref" id="fnref4" role="doc-noteref"><sup>4</sup></a> In the call, you can specify the number of dimensions (<code>k</code>) desired, with <code>k=2</code> as default. It returns the coordinates in a dataset called <code>points</code>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb43" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">diab.dist</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/dist.html">dist</a></span><span class="op">(</span><span class="va">Diabetes</span><span class="op">[</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span><span class="op">)</span></span>
@@ -1577,7 +1577,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_abline.html">geom_hline</a></span><span class="op">(</span>yintercept <span class="op">=</span> <span class="fl">0</span>, color <span class="op">=</span> <span class="st">"gray"</span><span class="op">)</span> <span class="op">+</span></span>
 <span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_abline.html">geom_vline</a></span><span class="op">(</span>xintercept <span class="op">=</span> <span class="fl">0</span>, color <span class="op">=</span> <span class="st">"gray"</span><span class="op">)</span> </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>For this and other examples using MDS, it would be nice to also show how the dimensions of this space relate to the original variables, as in a biplot. Using the idea of correlations between variables and dimensions from <a href="#sec-supp-vars" class="quarto-xref"><span class="quarto-unresolved-ref">sec-supp-vars</span></a>, I do this as shown below. Only the relative directions and lengths of the variable vectors matter, so you can choose any convenient scale factor to make the vectors fill the plot region.</p>
+<p>For this and other examples using MDS, it would be nice to also show how the dimensions of this space relate to the original variables, as in a biplot. Using the idea of correlations between variables and dimensions from <a href="#sec-supp-vars" class="quarto-xref"><span>Section 4.3.5</span></a>, I do this as shown below. Only the relative directions and lengths of the variable vectors matter, so you can choose any convenient scale factor to make the vectors fill the plot region.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb47" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">vectors</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/cor.html">cor</a></span><span class="op">(</span><span class="va">Diabetes</span><span class="op">[</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span>, <span class="va">mds</span><span class="op">[</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">2</span><span class="op">]</span><span class="op">)</span></span>
 <span><span class="va">scale_fac</span> <span class="op">&lt;-</span> <span class="fl">500</span></span>
@@ -1605,10 +1605,10 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>The configuration of the groups in <a href="#fig-diabetes-mds" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes-mds</span></a> is similar to that of the biplot in <a href="#fig-diabetes-ggbiplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes-ggbiplot</span></a>, but the groups are more widely separated along the first MDS dimension. The variable vectors are also similar, except that <code>relwt</code> is not well-represented in the MDS solution.</p>
+<p>The configuration of the groups in <a href="#fig-diabetes-mds" class="quarto-xref">Figure&nbsp;<span>4.23</span></a> is similar to that of the biplot in <a href="#fig-diabetes-ggbiplot" class="quarto-xref">Figure&nbsp;<span>4.20</span></a>, but the groups are more widely separated along the first MDS dimension. The variable vectors are also similar, except that <code>relwt</code> is not well-represented in the MDS solution.</p>
 </section><section id="t-sne" class="level3" data-number="4.4.2"><h3 data-number="4.4.2" class="anchored" data-anchor-id="t-sne">
 <span class="header-section-number">4.4.2</span> t-SNE</h3>
-<p>With the rise of “machine learning” methods for “feature extraction” in “supervised” vs.&nbsp;“unsupervised” settings, a variety of new algorithms have been proposed for the task of finding low-D representations of high-D data. Among these, t-distributed Stochastic Neighbor Embedding (t-SNE) developed by <span class="citation" data-cites="MaatenHinton2008">Maaten &amp; Hinton (<a href="#ref-MaatenHinton2008" role="doc-biblioref">2008</a>)</span> is touted as method for revealing <em>local structure</em> and clustering better in possibly complex high-D data and at different scales.</p>
+<p>With the rise of “machine learning” methods for “feature extraction” in “supervised” vs.&nbsp;“unsupervised” settings, a variety of new algorithms have been proposed for the task of finding low-D representations of high-D data. Among these, t-distributed Stochastic Neighbor Embedding (t-SNE) developed by <span class="citation" data-cites="MaatenHinton2008">Maaten &amp; Hinton (<a href="95-references.html#ref-MaatenHinton2008" role="doc-biblioref">2008</a>)</span> is touted as method for revealing <em>local structure</em> and clustering better in possibly complex high-D data and at different scales.</p>
 <p>t-SNE differs from MDS in what it tries to preserve in the mapping to low-D space: Multidimensional scaling aims to preserve the distances between pairs of data points, focusing on pairs of <em>distant points</em> in the original space. t-SNE, on the other hand focuses on preserving <em>neighboring</em> data points. Data points that are close in the original data space will be tight in the t-SNE embeddings.</p>
 <!-- Simpler description, from: https://www.displayr.com/using-t-sne-to-visualize-data-before-prediction/ -->
 <p>“The t-SNE algorithm models the probability distribution of neighbors around each point. Here, the term <em>neighbors</em> refers to the set of points which are closest to each point. In the original, high-dimensional space, this is modeled as a Gaussian distribution. In the 2-dimensional output space this is modeled as a <span class="math inline">\(t\)</span>-distribution. The goal of the procedure is to find a mapping onto the 2-dimensional space that minimizes the differences between these two distributions over all points. The fatter tails of a <span class="math inline">\(t\)</span>-distribution compared to a Gaussian help to spread the points more evenly in the 2-dimensional space.” (Jake Hoare, <a href="https://www.displayr.com/using-t-sne-to-visualize-data-before-prediction/">How t-SNE works and Dimensionality Reduction</a>).</p>
@@ -1621,7 +1621,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <p><span class="math display">\[
 D_\mathrm{KL}\left(P \parallel Q\right) = \sum_{i \neq j} p_{ij} \log \frac{p_{ij}}{q_{ij}} \; ,
 \]</span> a measure of how different the distribution of <span class="math inline">\(P\)</span> in the data is from that of <span class="math inline">\(Q\)</span> in the low-D representation. The <em>t</em> in t-SNE comes from the fact that the probability distribution of the points <span class="math inline">\(\mathbf{y}_i\)</span> in the embedding space is taken to be a heavy-tailed <span class="math inline">\(t_{(1)}\)</span> distribution with one degree of freedom to spread the points more evenly in the 2-dimensional space, rather than the Gaussian distribution for the points in the high-D data space.</p>
-<p>t-SNE is implemented in the <span style="color: brown;"><strong>Rtsne</strong></span> package <span class="citation" data-cites="R-Rtsne">(<a href="#ref-R-Rtsne" role="doc-biblioref">Krijthe, 2023</a>)</span> which is capable of handling thousands of points in very high dimensions. It uses a tuning parameter, “perplexity” to choose the bandwidth <span class="math inline">\(\sigma^2_i\)</span> for each point. This value effectively controls how many nearest neighbors are taken into account when constructing the embedding in the low-dimensional space. It can be thought of as the means to balance between preserving the global and the local structure of the data.<a href="#fn5" class="footnote-ref" id="fnref5" role="doc-noteref"><sup>5</sup></a></p>
+<p>t-SNE is implemented in the <span style="color: brown;"><strong>Rtsne</strong></span> package <span class="citation" data-cites="R-Rtsne">(<a href="95-references.html#ref-R-Rtsne" role="doc-biblioref">Krijthe, 2023</a>)</span> which is capable of handling thousands of points in very high dimensions. It uses a tuning parameter, “perplexity” to choose the bandwidth <span class="math inline">\(\sigma^2_i\)</span> for each point. This value effectively controls how many nearest neighbors are taken into account when constructing the embedding in the low-dimensional space. It can be thought of as the means to balance between preserving the global and the local structure of the data.<a href="#fn5" class="footnote-ref" id="fnref5" role="doc-noteref"><sup>5</sup></a></p>
 <p><code><a href="https://rdrr.io/pkg/Rtsne/man/Rtsne.html">Rtsne::Rtsne()</a></code> finds the locations of the points in the low-D space, of dimension <code>k=2</code> by default. It returns the coordinates in a component named <code>Y</code>. The package has no <code><a href="https://rdrr.io/r/base/print.html">print()</a></code>, <code><a href="https://rdrr.io/r/base/summary.html">summary()</a></code> or plot methods, so you’re on your own.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb48" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/jkrijthe/Rtsne">Rtsne</a></span><span class="op">)</span></span>
@@ -1658,8 +1658,8 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 <section id="compare-solutions" class="level4" data-number="4.4.2.1"><h4 data-number="4.4.2.1" class="anchored" data-anchor-id="compare-solutions">
 <span class="header-section-number">4.4.2.1</span> Comparing solutions</h4>
-<p>For the Diabetes data, I’ve shown the results of three different dimension reduction techniques, PCA (<a href="#fig-diabetes-ggbiplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes-ggbiplot</span></a>), MDS (<a href="#fig-diabetes-mds" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes-mds</span></a>), and t-SNE (<a href="#fig-diabetes-tsne" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes-tsne</span></a>). How are these similar, and how do they differ?</p>
-<p>One way is to view them side by side as shown in <a href="#fig-diabetes-pca-tsne" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes-pca-tsne</span></a>. To an initial glance, the t-SNE solution looks like a rotated version of the PCA solution, but there are differences in the shapes of the clusters as well.</p>
+<p>For the Diabetes data, I’ve shown the results of three different dimension reduction techniques, PCA (<a href="#fig-diabetes-ggbiplot" class="quarto-xref">Figure&nbsp;<span>4.20</span></a>), MDS (<a href="#fig-diabetes-mds" class="quarto-xref">Figure&nbsp;<span>4.23</span></a>), and t-SNE (<a href="#fig-diabetes-tsne" class="quarto-xref">Figure&nbsp;<span>4.24</span></a>). How are these similar, and how do they differ?</p>
+<p>One way is to view them side by side as shown in <a href="#fig-diabetes-pca-tsne" class="quarto-xref">Figure&nbsp;<span>4.25</span></a>. To an initial glance, the t-SNE solution looks like a rotated version of the PCA solution, but there are differences in the shapes of the clusters as well.</p>
 <!-- fig.code: R/diabetes/diabetes-pca-tsne.R -->
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -1677,7 +1677,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <p>The essential idea is calculate interpolated views as a weighted average of the two endpoints using a weight <span class="math inline">\(\gamma\)</span> that is varied from 0 to 1.</p>
 <p><span class="math display">\[
 \mathbf{X}_{\text{View}} = \gamma \;\mathbf{X}_{\text{PCA}} + (1-\gamma) \;\mathbf{X}_{\text{t-SNE}}
-\]</span> The same idea can be applied to other graphical features: lines, paths (ellipses), and so forth. These methods are implemented in the <span style="color: brown;"><strong>gganimate</strong></span> package <span class="citation" data-cites="R-gganimate">(<a href="#ref-R-gganimate" role="doc-biblioref">Pedersen &amp; Robinson, 2024</a>)</span>.</p>
+\]</span> The same idea can be applied to other graphical features: lines, paths (ellipses), and so forth. These methods are implemented in the <span style="color: brown;"><strong>gganimate</strong></span> package <span class="citation" data-cites="R-gganimate">(<a href="95-references.html#ref-R-gganimate" role="doc-biblioref">Pedersen &amp; Robinson, 2024</a>)</span>.</p>
 <p>In this case, to create an animation you can extract the coordinates for the PCA, <span class="math inline">\(\mathbf{X}_{\text{PCA}}\)</span>, as a data.frame <code>df1</code>, and those for the t-SNE, <span class="math inline">\(\mathbf{X}_{\text{t-SNE}}\)</span> as <code>df2</code>, each with a constant <code>method</code> variable. These two are then stacked (using <code><a href="https://rdrr.io/r/base/cbind.html">rbind()</a></code>) to give a combined <code>df3</code>. The animation can then interpolate over <code>method</code> going from pure PCA to pure t-SNE.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb50" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">diab.pca</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/prcomp.html">prcomp</a></span><span class="op">(</span><span class="va">Diabetes</span><span class="op">[</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span>, scale <span class="op">=</span> <span class="cn">TRUE</span>, rank.<span class="op">=</span><span class="fl">2</span><span class="op">)</span> </span>
@@ -1732,8 +1732,8 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <p>It is more interesting that the sizes and shapes of the group clusters change from one solution to the other. The normal group is most compact in the PCA solution, but becomes the least compact in t-SNE.</p>
 </section></section></section><section id="sec-var-order" class="level2" data-number="4.5"><h2 data-number="4.5" class="anchored" data-anchor-id="sec-var-order">
 <span class="header-section-number">4.5</span> Application: Variable ordering for data displays</h2>
-<p>In many multivariate data displays, such as scatterplot matrices, parallel coordinate plots and others reviewed in <a href="#sec-multivariate_plots" class="quarto-xref"><span class="quarto-unresolved-ref">sec-multivariate_plots</span></a>, the order of different variables might seem arbitrary. They might appear in alphabetic order, or more often in the order they appear in your dataset, for example when you use <code>pairs(mydata)</code>. Yet, the principle of <em>effect ordering</em> (<span class="citation" data-cites="FriendlyKwan:03:effect">Friendly &amp; Kwan (<a href="#ref-FriendlyKwan:03:effect" role="doc-biblioref">2003</a>)</span>) for variables says you should try to arrange the variables so that adjacent ones are as similar as possible.<a href="#fn6" class="footnote-ref" id="fnref6" role="doc-noteref"><sup>6</sup></a></p>
-<p>For example, the <code>mtcars</code> dataset contains data on 32 automobiles from the 1974 U.S. magazine <em>Motor Trend</em> and consists of fuel comsumption (<code>mpg</code>) and 10 aspects of automobile design (<code>cyl</code>: number of cyliners; <code>hp</code>: horsepower, <code>wt</code>: weight) and performance (<code>qsec</code>: time to drive a quarter-mile). What can we see from a simple <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> of their correlations? No coherent pattern stands out in <a href="#fig-mtcars-corrplot-varorder" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mtcars-corrplot-varorder</span></a>.</p>
+<p>In many multivariate data displays, such as scatterplot matrices, parallel coordinate plots and others reviewed in <a href="03-multivariate_plots.html" class="quarto-xref"><span>Chapter 3</span></a>, the order of different variables might seem arbitrary. They might appear in alphabetic order, or more often in the order they appear in your dataset, for example when you use <code>pairs(mydata)</code>. Yet, the principle of <em>effect ordering</em> (<span class="citation" data-cites="FriendlyKwan:03:effect">Friendly &amp; Kwan (<a href="95-references.html#ref-FriendlyKwan:03:effect" role="doc-biblioref">2003</a>)</span>) for variables says you should try to arrange the variables so that adjacent ones are as similar as possible.<a href="#fn6" class="footnote-ref" id="fnref6" role="doc-noteref"><sup>6</sup></a></p>
+<p>For example, the <code>mtcars</code> dataset contains data on 32 automobiles from the 1974 U.S. magazine <em>Motor Trend</em> and consists of fuel comsumption (<code>mpg</code>) and 10 aspects of automobile design (<code>cyl</code>: number of cyliners; <code>hp</code>: horsepower, <code>wt</code>: weight) and performance (<code>qsec</code>: time to drive a quarter-mile). What can we see from a simple <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> of their correlations? No coherent pattern stands out in <a href="#fig-mtcars-corrplot-varorder" class="quarto-xref">Figure&nbsp;<span>4.27</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb52" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">mtcars</span><span class="op">)</span></span>
 <span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/taiyun/corrplot">corrplot</a></span><span class="op">)</span></span>
@@ -1770,7 +1770,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="co">#&gt; gear  48 -50  -56 -13   69 -59  -22  20  79  100   27</span></span>
 <span><span class="co">#&gt; carb -56  52   39  74  -10  42  -66 -57   5   27  100</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Because the angles between variable vectors in the biplot reflect their correlations, <span class="citation" data-cites="FriendlyKwan:03:effect">Friendly &amp; Kwan (<a href="#ref-FriendlyKwan:03:effect" role="doc-biblioref">2003</a>)</span> defined <strong>principal component variable ordering</strong> as the order of angles, <span class="math inline">\(a_i\)</span> of the first two eigenvectors, <span class="math inline">\(\mathbf{v}_1, \mathbf{v}_2\)</span> around the unit circle. These values are calculated going counter-clockwise from the 12:00 position as:</p>
+<p>Because the angles between variable vectors in the biplot reflect their correlations, <span class="citation" data-cites="FriendlyKwan:03:effect">Friendly &amp; Kwan (<a href="95-references.html#ref-FriendlyKwan:03:effect" role="doc-biblioref">2003</a>)</span> defined <strong>principal component variable ordering</strong> as the order of angles, <span class="math inline">\(a_i\)</span> of the first two eigenvectors, <span class="math inline">\(\mathbf{v}_1, \mathbf{v}_2\)</span> around the unit circle. These values are calculated going counter-clockwise from the 12:00 position as:</p>
 <p><span id="eq-angle-AOE"><span class="math display">\[
 a_i =
   \begin{cases}
@@ -1779,9 +1779,9 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
     \tan^{-1} (v_{i2}/v_{i1}) + \pi, &amp; \text{otherwise.}
   \end{cases}     
 \tag{4.3}\]</span></span></p>
-<p>In <a href="#eq-angle-AOE" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-angle-AOE</span></a> <span class="math inline">\(\tan^{-1}(x)\)</span> is read as “the angle whose tangent is <span class="math inline">\(x\)</span>”, and so the angles are determined by the tangent ratios “opposite” / “adjacent” = <span class="math inline">\(v_{i2} / v_{i1}\)</span> in the right triangle defined by the vector and the horizontal axis.</p>
+<p>In <a href="#eq-angle-AOE" class="quarto-xref">Equation&nbsp;<span>4.3</span></a> <span class="math inline">\(\tan^{-1}(x)\)</span> is read as “the angle whose tangent is <span class="math inline">\(x\)</span>”, and so the angles are determined by the tangent ratios “opposite” / “adjacent” = <span class="math inline">\(v_{i2} / v_{i1}\)</span> in the right triangle defined by the vector and the horizontal axis.</p>
 <!-- **TODO**: Make a diagram of this -->
-<p>For the <code>mtcars</code> data the biplot in <a href="#fig-mtcars-biplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mtcars-biplot</span></a> accounts for 84% of the total variance so a 2D representation is fairly good. The plot shows the variables as widely dispersed. There is a collection at the left of positively correlated variables and another positively correlated set at the right.</p>
+<p>For the <code>mtcars</code> data the biplot in <a href="#fig-mtcars-biplot" class="quarto-xref">Figure&nbsp;<span>4.28</span></a> accounts for 84% of the total variance so a 2D representation is fairly good. The plot shows the variables as widely dispersed. There is a collection at the left of positively correlated variables and another positively correlated set at the right.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb54" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">mtcars.pca</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/prcomp.html">prcomp</a></span><span class="op">(</span><span class="va">mtcars</span>, scale. <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span>
 <span><span class="fu"><a href="http://friendly.github.io/ggbiplot/reference/ggbiplot.html">ggbiplot</a></span><span class="op">(</span><span class="va">mtcars.pca</span>,</span>
@@ -1802,7 +1802,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 <p>In <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> principal component variable ordering is implemented using the <code>order = "AOE"</code> option. There are a variety of other methods based on hierarchical cluster analysis described in the <a href="https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html">package vignette</a>.</p>
-<p><a href="#fig-mtcars-corrplot-pcaorder" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mtcars-corrplot-pcaorder</span></a> shows the result of ordering the variables by this method. A nice feature of <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> is the ability to manually highlight blocks of variables that have a similar pattern of signs by outlining them with rectangles. From the biplot, the two main clusters of positively correlated variables seemed clear, and are outlined in the plot using <code><a href="https://rdrr.io/pkg/corrplot/man/corrRect.html">corrplot::corrRect()</a></code>. What became clear in the corrplot is that <code>qsec</code>, the time to drive a quarter-mile from a dead start didn’t quite fit this pattern, so I highlighted it separately.</p>
+<p><a href="#fig-mtcars-corrplot-pcaorder" class="quarto-xref">Figure&nbsp;<span>4.29</span></a> shows the result of ordering the variables by this method. A nice feature of <code><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot()</a></code> is the ability to manually highlight blocks of variables that have a similar pattern of signs by outlining them with rectangles. From the biplot, the two main clusters of positively correlated variables seemed clear, and are outlined in the plot using <code><a href="https://rdrr.io/pkg/corrplot/man/corrRect.html">corrplot::corrRect()</a></code>. What became clear in the corrplot is that <code>qsec</code>, the time to drive a quarter-mile from a dead start didn’t quite fit this pattern, so I highlighted it separately.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb55" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/corrplot/man/corrplot.html">corrplot</a></span><span class="op">(</span><span class="va">R</span>, </span>
 <span>         method <span class="op">=</span> <span class="st">'ellipse'</span>, </span>
@@ -1822,7 +1822,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>But wait, there is something else to be seen in <a href="#fig-mtcars-corrplot-pcaorder" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mtcars-corrplot-pcaorder</span></a>. Can you see one cell that doesn’t fit with the rest?</p>
+<p>But wait, there is something else to be seen in <a href="#fig-mtcars-corrplot-pcaorder" class="quarto-xref">Figure&nbsp;<span>4.29</span></a>. Can you see one cell that doesn’t fit with the rest?</p>
 <p>Yes, the correlation of number of forward gears (<code>gear</code>) and number of carburators (<code>carb</code>) in the upper left and lower right corners stands out as moderately positive (0.27) while all the others in their off-diagonal blocks are negative. This is another benefit of effect ordering: when you arrange the variables so that the most highly related variable are together, features that deviate from dominant pattern become visible.</p>
 </section><section id="application-eigenfaces" class="level2" data-number="4.6"><h2 data-number="4.6" class="anchored" data-anchor-id="application-eigenfaces">
 <span class="header-section-number">4.6</span> Application: Eigenfaces</h2>
@@ -1851,7 +1851,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <strong>facial recognition</strong>: classifying or matching a facial image against a large corpus of stored images.</li>
 </ul>
 <p>When PCA is used on facial images, you can think of the process as generating <strong>eigenfaces</strong>, a representation of the pixels in the image in terms of an eigenvalue decomposition. Dimension reduction means that a facial image can be considerably compressed by removing the components associated with small dimensions.</p>
-<p>As an example, consider the black and white version of the Mona Lisa shown in <a href="#fig-MonaLisa" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-MonaLisa</span></a>. The idea and code for this example is adapted from this <a href="https://kieranhealy.org/blog/archives/2019/10/27/reconstructing-images-using-pca/">blog post</a> by Kieran Healy.<a href="#fn7" class="footnote-ref" id="fnref7" role="doc-noteref"><sup>7</sup></a></p>
+<p>As an example, consider the black and white version of the Mona Lisa shown in <a href="#fig-MonaLisa" class="quarto-xref">Figure&nbsp;<span>4.30</span></a>. The idea and code for this example is adapted from this <a href="https://kieranhealy.org/blog/archives/2019/10/27/reconstructing-images-using-pca/">blog post</a> by Kieran Healy.<a href="#fn7" class="footnote-ref" id="fnref7" role="doc-noteref"><sup>7</sup></a></p>
 <p><strong>TODO</strong>: <del>Web links like this should be footnotes for PDF</del></p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -1919,7 +1919,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span><span class="co">#&gt;  9     9    3.12 0.0102       0.622</span></span>
 <span><span class="co">#&gt; 10    10    2.86 0.00855      0.631</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p><a href="#fig-mona-screeplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mona-screeplot</span></a> shows a screeplot of proportions of variance. Because there are so many components and most of the information is concentrated in the largest dimensions, I’ve used a <span class="math inline">\(\log_{10}()\)</span> scale on the horizontal axis. Beyond 10 or so dimensions, the variance of additional components looks quite tiny.</p>
+<p><a href="#fig-mona-screeplot" class="quarto-xref">Figure&nbsp;<span>4.31</span></a> shows a screeplot of proportions of variance. Because there are so many components and most of the information is concentrated in the largest dimensions, I’ve used a <span class="math inline">\(\log_{10}()\)</span> scale on the horizontal axis. Beyond 10 or so dimensions, the variance of additional components looks quite tiny.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb61" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="http://friendly.github.io/ggbiplot/reference/ggscreeplot.html">ggscreeplot</a></span><span class="op">(</span><span class="va">img_pca</span><span class="op">)</span> <span class="op">+</span></span>
 <span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/scale_continuous.html">scale_x_log10</a></span><span class="op">(</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1999,7 +1999,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 <span></span>
 <span><span class="va">p_out</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The result, in <a href="#fig-mona-pca" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mona-pca</span></a> is instructive about how much visual information is contained in lower-dimensional reconstructions, or conversely, how much the image can be compressed by omitting the many small dimensions.</p>
+<p>The result, in <a href="#fig-mona-pca" class="quarto-xref">Figure&nbsp;<span>4.32</span></a> is instructive about how much visual information is contained in lower-dimensional reconstructions, or conversely, how much the image can be compressed by omitting the many small dimensions.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-mona-pca" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -2012,17 +2012,17 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>In this figure, with 4-5 components most people would recognize this as a blury image of the world’s most famous portrait. It is certainly clear that this is the Mona Lisa with 10–15 components. Details of the portrait and backgound features become recognizable with 20–50 components, and with 100 components it compares favorably with the original in <a href="#fig-MonaLisa" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-MonaLisa</span></a>. In numbers, the original <span class="math inline">\(640 \times 955\)</span>) image is of size 600 Kb. The 100 component version is only 93 Kb, 15.6% of this.</p>
+<p>In this figure, with 4-5 components most people would recognize this as a blury image of the world’s most famous portrait. It is certainly clear that this is the Mona Lisa with 10–15 components. Details of the portrait and backgound features become recognizable with 20–50 components, and with 100 components it compares favorably with the original in <a href="#fig-MonaLisa" class="quarto-xref">Figure&nbsp;<span>4.30</span></a>. In numbers, the original <span class="math inline">\(640 \times 955\)</span>) image is of size 600 Kb. The 100 component version is only 93 Kb, 15.6% of this.</p>
 </section><section id="elliptical-insights-outlier-detection" class="level2" data-number="4.7"><h2 data-number="4.7" class="anchored" data-anchor-id="elliptical-insights-outlier-detection">
 <span class="header-section-number">4.7</span> Elliptical insights: Outlier detection</h2>
-<p>The data ellipse (<a href="#sec-data-ellipse" class="quarto-xref"><span class="quarto-unresolved-ref">sec-data-ellipse</span></a>), or ellipsoid in more than 2D is fundamental in regression. But also, as Pearson showed, it is key to understanding principal components analysis, where the principal component directions are simply the axes of the ellipsoid of the data. As such, observations that are unusual in data space may not stand out in univariate views of the variables, but will stand out in principal component space, usually on the <em>smallest</em> dimension.</p>
+<p>The data ellipse (<a href="03-multivariate_plots.html#sec-data-ellipse" class="quarto-xref"><span>Section 3.2</span></a>), or ellipsoid in more than 2D is fundamental in regression. But also, as Pearson showed, it is key to understanding principal components analysis, where the principal component directions are simply the axes of the ellipsoid of the data. As such, observations that are unusual in data space may not stand out in univariate views of the variables, but will stand out in principal component space, usually on the <em>smallest</em> dimension.</p>
 <p>As an illustration, I created a dataset of <span class="math inline">\(n = 100\)</span> observations with a linear relation, <span class="math inline">\(y = x + \mathcal{N}(0, 1)\)</span> and then added two discrepant points at (1.5, -1.5), (-1.5, 1.5).</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb65" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/base/Random.html">set.seed</a></span><span class="op">(</span><span class="fl">123345</span><span class="op">)</span></span>
 <span><span class="va">x</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="fl">100</span><span class="op">)</span>,             <span class="fl">1.5</span>, <span class="op">-</span><span class="fl">1.5</span><span class="op">)</span></span>
 <span><span class="va">y</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="va">x</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">100</span><span class="op">]</span> <span class="op">+</span> <span class="fu"><a href="https://rdrr.io/r/stats/Normal.html">rnorm</a></span><span class="op">(</span><span class="fl">100</span><span class="op">)</span>, <span class="op">-</span><span class="fl">1.5</span>, <span class="fl">1.5</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>When these are plotted with a data ellipse in <a href="#fig-outlier-demo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-outlier-demo</span></a> (left), you can see the discrepant points labeled 101 and 102, but they do not stand out as unusual on either <span class="math inline">\(x\)</span> or <span class="math inline">\(y\)</span>. The transformation to from data space to principal components space, shown in <a href="#fig-outlier-demo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-outlier-demo</span></a> (right), is simply a rotation of <span class="math inline">\((x, y)\)</span> to a space whose coordinate axes are the major and minor axes of the data ellipse, <span class="math inline">\((PC_1, PC_2)\)</span>. In this view, the additional points appear a univariate outliers on the smallest dimension, <span class="math inline">\(PC_2\)</span>.</p>
+<p>When these are plotted with a data ellipse in <a href="#fig-outlier-demo" class="quarto-xref">Figure&nbsp;<span>4.33</span></a> (left), you can see the discrepant points labeled 101 and 102, but they do not stand out as unusual on either <span class="math inline">\(x\)</span> or <span class="math inline">\(y\)</span>. The transformation to from data space to principal components space, shown in <a href="#fig-outlier-demo" class="quarto-xref">Figure&nbsp;<span>4.33</span></a> (right), is simply a rotation of <span class="math inline">\((x, y)\)</span> to a space whose coordinate axes are the major and minor axes of the data ellipse, <span class="math inline">\((PC_1, PC_2)\)</span>. In this view, the additional points appear a univariate outliers on the smallest dimension, <span class="math inline">\(PC_2\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-outlier-demo" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -2035,7 +2035,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>To see this more clearly, <a href="#fig-outlier-animation" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-outlier-animation</span></a> shows an animation of the rotation from data space to PCA space. This uses <code><a href="https://friendly.github.io/heplots/reference/interpPlot.html">heplots::interpPlot()</a></code> …</p>
+<p>To see this more clearly, <a href="#fig-outlier-animation" class="quarto-xref">Figure&nbsp;<span>4.34</span></a> shows an animation of the rotation from data space to PCA space. This uses <code><a href="https://friendly.github.io/heplots/reference/interpPlot.html">heplots::interpPlot()</a></code> …</p>
 <div id="fig-outlier-animation" class="quarto-float quarto-figure quarto-figure-center anchored">
 <figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-outlier-animation-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
 <div data-align="center">
@@ -2052,7 +2052,7 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-R-rgl" class="csl-entry" role="listitem">
 Adler, D., &amp; Murdoch, D. (2023). <em>Rgl: 3D visualization using OpenGL</em>. <a href="https://CRAN.R-project.org/package=rgl">https://CRAN.R-project.org/package=rgl</a>
 </div>
@@ -2167,12 +2167,12 @@ <h1 class="title"><span id="sec-pca-biplot" class="quarto-section-identifier"><s
 </div>
 </section><section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes"><hr>
 <ol>
-<li id="fn1"><p>This is Euler’s <span class="citation" data-cites="Euler:1758">(<a href="#ref-Euler:1758" role="doc-biblioref">1758</a>)</span> formula, which states that any convex polyhedron must obey the formula <span class="math inline">\(V + F - E = 2\)</span> where <span class="math inline">\(V\)</span> is the number of vertexes (corners), <span class="math inline">\(F\)</span> is the number of faces and <span class="math inline">\(E\)</span> is the number of edges. For example, a tetrahedron or pyramid has <span class="math inline">\((V, F, E) = (4, 4, 6)\)</span> and a cube has <span class="math inline">\((V, F, E) = (8, 6, 12)\)</span>. Stated in words, for all solid bodies confined by planes, the sum of the number of vertexes and the number of faces is two less than the number of edges.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn1"><p>This is Euler’s <span class="citation" data-cites="Euler:1758">(<a href="95-references.html#ref-Euler:1758" role="doc-biblioref">1758</a>)</span> formula, which states that any convex polyhedron must obey the formula <span class="math inline">\(V + F - E = 2\)</span> where <span class="math inline">\(V\)</span> is the number of vertexes (corners), <span class="math inline">\(F\)</span> is the number of faces and <span class="math inline">\(E\)</span> is the number of edges. For example, a tetrahedron or pyramid has <span class="math inline">\((V, F, E) = (4, 4, 6)\)</span> and a cube has <span class="math inline">\((V, F, E) = (8, 6, 12)\)</span>. Stated in words, for all solid bodies confined by planes, the sum of the number of vertexes and the number of faces is two less than the number of edges.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 <li id="fn2"><p>For example, if two variables in the analysis are height and weight, changing the unit of height from inches to centimeters would multiply its variance by <span class="math inline">\(2.54^2\)</span>; changing weight from pounds to kilograms would divide its variance by <span class="math inline">\(2.2^2\)</span>.<a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 <li id="fn3"><p>The unfortunate default <code>scale. = FALSE</code> was for consistency with S, the precursor to R but in general scaling is usually advisable.<a href="#fnref3" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn4"><p>The <span style="color: brown;"><strong>vegan</strong></span> package <span class="citation" data-cites="R-vegan">(<a href="#ref-R-vegan" role="doc-biblioref">Oksanen et al., 2024</a>)</span> provides <code><a href="https://rdrr.io/pkg/vegan/man/metaMDS.html">vegan::metaMDS()</a></code> which allows a wide range of distance measures …<a href="#fnref4" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn4"><p>The <span style="color: brown;"><strong>vegan</strong></span> package <span class="citation" data-cites="R-vegan">(<a href="95-references.html#ref-R-vegan" role="doc-biblioref">Oksanen et al., 2024</a>)</span> provides <code><a href="https://rdrr.io/pkg/vegan/man/metaMDS.html">vegan::metaMDS()</a></code> which allows a wide range of distance measures …<a href="#fnref4" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 <li id="fn5"><p>The usual default, <code>perplexity = 30</code> focuses on preserving the distances to the 30 nearest neighbors and puts virtually no weight on preserving distances to the remaining points. For data sets with a small number of points e.g.&nbsp;<span class="math inline">\(n=100\)</span>, this will uncover the global structure quite well since each point will preserve distances to a third of the data set. For larger problems, e.g., <span class="math inline">\(n = 10,000\)</span> points, using a higher perplexity value e.g.&nbsp;500, will do a much better job for of uncovering the global structure. (This description comes from https://opentsne.readthedocs.io/en/latest/parameters.html)<a href="#fnref5" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn6"><p>The general topic of arranging items (variables, factor values) in an orderly sequence is called <em>seriation</em>, and stems from methods of dating in archaeology, used to arrange stone tools, pottery fragments, and other artifacts in time order. In R, the <span style="color: brown;"><strong>seriation</strong></span> package <span class="citation" data-cites="R-seriation">(<a href="#ref-R-seriation" role="doc-biblioref">Hahsler et al., 2024</a>)</span> provides a wide range of techniques. …<a href="#fnref6" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn6"><p>The general topic of arranging items (variables, factor values) in an orderly sequence is called <em>seriation</em>, and stems from methods of dating in archaeology, used to arrange stone tools, pottery fragments, and other artifacts in time order. In R, the <span style="color: brown;"><strong>seriation</strong></span> package <span class="citation" data-cites="R-seriation">(<a href="95-references.html#ref-R-seriation" role="doc-biblioref">Hahsler et al., 2024</a>)</span> provides a wide range of techniques. …<a href="#fnref6" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 <li id="fn7"><p>https://kieranhealy.org/blog/archives/2019/10/27/reconstructing-images-using-pca/<a href="#fnref7" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 </ol></section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
 window.document.addEventListener("DOMContentLoaded", function (event) {
diff --git a/docs/05-linear_models.html b/docs/05-linear_models.html
index 542ca0ca..163c13d3 100644
--- a/docs/05-linear_models.html
+++ b/docs/05-linear_models.html
@@ -375,7 +375,7 @@ <h1 class="title"><span id="sec-linear-models" class="quarto-section-identifier"
 <li><p>All of these models involve linear combinations of predictors (weighted sums) fit to optimize some criterion, for example minimizing some function of the residuals or maximizing some measure of fit.</p></li>
 <li><p>Models and data can be more easily understood with graphics, and many statistical ideas have a visual representation in geometry.</p></li>
 </ul>
-<p><a href="#fig-techniques" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-techniques</span></a> summarizes a variety of methods for linear models, classified by number of predictors and number of response variables, and whether these are quantitative vs.&nbsp;discrete. For the present purposes, the key columns are the first two, for the case of one or more quantitative outcome variables.</p>
+<p><a href="#fig-techniques" class="quarto-xref">Figure&nbsp;<span>5.1</span></a> summarizes a variety of methods for linear models, classified by number of predictors and number of response variables, and whether these are quantitative vs.&nbsp;discrete. For the present purposes, the key columns are the first two, for the case of one or more quantitative outcome variables.</p>
 <!-- **TODO**: This should be a table, but that's not working -->
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -392,7 +392,7 @@ <h1 class="title"><span id="sec-linear-models" class="quarto-section-identifier"
 <p>When the <em>predictors</em> are also quantitative, simple regression (<span class="math inline">\(p=1\)</span>) generalizes to multivariate regression with two or more outcomes (<span class="math inline">\(q &gt; 1\)</span>). For example we might want to predict weight and body mass index <em>jointly</em> from a person’s height.</p>
 <p>The situation is more interesting when there are <span class="math inline">\(p&gt;1\)</span> predictors. The most common multivariate generalization is <em>multivariate multiple regression</em> (MMRA), where each outcome is regressed on the predictors, as if done separately for each outcome, but using multivariate tests that take correlations among the predictors into account. Other methods for this case include canonical correlation analysis, which tries to explain all relations between <span class="math inline">\(\mathbf{Y}\)</span> and a set of <span class="math inline">\(\mathbf{x}\)</span>s through maximally correlated linear combinations of each.</p>
 <p>When the predictor variables are all discrete or categorical, such as gender or level of education, methods like the simple <span class="math inline">\(t\)</span>-test, one-way ANOVA and factorial ANOVA with <span class="math inline">\(q=1\)</span> outcome measures all have simple extensions to the case of <span class="math inline">\(q&gt;1\)</span> outcomes.</p>
-<p>Not shown in <a href="#fig-techniques" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-techniques</span></a> are rows for cases where predictor variables include a mixture quantitative and discrete factors. These have two “flavors”, depending on our emphasis.</p>
+<p>Not shown in <a href="#fig-techniques" class="quarto-xref">Figure&nbsp;<span>5.1</span></a> are rows for cases where predictor variables include a mixture quantitative and discrete factors. These have two “flavors”, depending on our emphasis.</p>
 <ul>
 <li><p><strong>Analysis of covariance</strong> (ANCOVA) is the term for models where the main emphasis on on comparing means of different groups, but where there are one or more quantitative predictors (for example a pre-test score on the outcome variable) that need to be taken into account, so we can assess the differences among groups <em>controlling</em> or <em>adjusting</em> for those predictors, which are called <em>covariates</em>. (Strictly speaking, ANCOVA models typically assume that the regression slopes are the same for all groups.)</p></li>
 <li><p><strong>Homogeneity of regression</strong> treats the same type of data, but the main interest is on the regression relation between <span class="math inline">\(\mathbf{y}\)</span> and a set of quantitative <span class="math inline">\(\mathbf{x}\)</span>s, but there is also one or more grouping factors. In this case, we want to determine if the <em>same regression</em> relation applies in all groups or whether the groups differ in their intercepts and/or slopes. Another term for this class of questions is <strong>moderator</strong> effects, which refers to interactions between the quantitative predictors and group factors.</p></li>
@@ -409,11 +409,11 @@ <h1 class="title"><span id="sec-linear-models" class="quarto-section-identifier"
 <div class="callout-body-container callout-body">
 <p>Why are there so many different names for regression vs.&nbsp;ANOVA concepts, statistics and techniques? In regression, we use notation like <span class="math inline">\(x_1, x_2, ...\)</span> to refer to <em>predictors</em> in a model, while in ANOVA, factors <span class="math inline">\(A, B, ...\)</span> are called <em>main effects</em>. In regression applications, we often test <em>linear hypotheses</em>, are interested in <em>coefficients</em> and evaluate a model with an <span class="math inline">\(R^2\)</span> statistic, while in ANOVA we may test <em>contrasts</em> among factor levels, and use <span class="math inline">\(F\)</span>-tests to evaluate models.</p>
 <p>Well, like twins separated at birth, they grew up in homes in different places and with different parents, who were each free to choose their own names, not recognizing their shared DNA.</p>
-<p>Methods of regression began in evolutionary biology with Francis Galton’s <span class="citation" data-cites="Galton:1886 Galton:1889">(<a href="#ref-Galton:1886" role="doc-biblioref">1886</a>, <a href="#ref-Galton:1889" role="doc-biblioref">1889</a>)</span> studies of heritability of traits, trying to understand how strongly the physical characteristics of one generation of living things resembled those in the next generation. From a study of the diameters of sweet peas in parent plants and their size in the next generation, and another on the relationship between heights of human parents and their offspring, he developed the fundamental ideas of regression. Karl Pearson <span class="citation" data-cites="Pearson:1896">(<a href="#ref-Pearson:1896" role="doc-biblioref">1896</a>)</span></p>
-<p>In contrast, analysis of variance methods were raised on farms, notably the Rothamsted Experimental Station, where R. A. Fisher analyzed vast amounts of data on crop experiments designed to determine the conditions (soil condition, fertilizer treatments) that gave the greatest yields, while controlling for extraneous determiners (plots of planting). With multiple factors determining the outcome, Fisher <span class="citation" data-cites="Fisher1923">(<a href="#ref-Fisher1923" role="doc-biblioref">1923</a>)</span>, in an experiment on yields of different varieties of potatoes given various manure treatments, devised the method of breaking down the total variance into portions attributable to each factor and presented the first ANOVA table. The method became well-known after Fisher’s <span class="citation" data-cites="Fisher:25">(<a href="#ref-Fisher:25" role="doc-biblioref">1925</a>)</span> <em>Statistical Methods for Research Workers</em>.</p>
+<p>Methods of regression began in evolutionary biology with Francis Galton’s <span class="citation" data-cites="Galton:1886 Galton:1889">(<a href="95-references.html#ref-Galton:1886" role="doc-biblioref">1886</a>, <a href="95-references.html#ref-Galton:1889" role="doc-biblioref">1889</a>)</span> studies of heritability of traits, trying to understand how strongly the physical characteristics of one generation of living things resembled those in the next generation. From a study of the diameters of sweet peas in parent plants and their size in the next generation, and another on the relationship between heights of human parents and their offspring, he developed the fundamental ideas of regression. Karl Pearson <span class="citation" data-cites="Pearson:1896">(<a href="95-references.html#ref-Pearson:1896" role="doc-biblioref">1896</a>)</span></p>
+<p>In contrast, analysis of variance methods were raised on farms, notably the Rothamsted Experimental Station, where R. A. Fisher analyzed vast amounts of data on crop experiments designed to determine the conditions (soil condition, fertilizer treatments) that gave the greatest yields, while controlling for extraneous determiners (plots of planting). With multiple factors determining the outcome, Fisher <span class="citation" data-cites="Fisher1923">(<a href="95-references.html#ref-Fisher1923" role="doc-biblioref">1923</a>)</span>, in an experiment on yields of different varieties of potatoes given various manure treatments, devised the method of breaking down the total variance into portions attributable to each factor and presented the first ANOVA table. The method became well-known after Fisher’s <span class="citation" data-cites="Fisher:25">(<a href="95-references.html#ref-Fisher:25" role="doc-biblioref">1925</a>)</span> <em>Statistical Methods for Research Workers</em>.</p>
 <p>The great synthesis of regression and ANOVA did not take place until the 1960s. At that time, methods for computing were beginning to move from programmable desk calculators to mainframe computers, largely using a collection of separate FORTRAN programs, designed for regression, one-way ANOVA, two-way ANOVA, models with interactions, and so forth. To complete an analysis, as a graduate student I often had to use three or more different programs.</p>
-<p>Then, something remarkable happened on two fronts: theory and computation. First, in quick succession textbooks by <span class="citation" data-cites="Scheffe1960">Scheffé (<a href="#ref-Scheffe1960" role="doc-biblioref">1960</a>)</span>, <span class="citation" data-cites="Graybill1961">Graybill (<a href="#ref-Graybill1961" role="doc-biblioref">1961</a>)</span>, <span class="citation" data-cites="Winer1962">Winer (<a href="#ref-Winer1962" role="doc-biblioref">1962</a>)</span> … began to layout a general theory of linear models that encompassed all of these separate models, giving the “General Linear Model” a well-deserved name. Second, two symposiums, one at IBM Yorkdown Heights (<span class="citation" data-cites="IBM1965">IBM (<a href="#ref-IBM1965" role="doc-biblioref">1965</a>)</span>) and the other at the University of Georgia (BradshawFindley1967) resulted in the first general programs to handle all these cases in an understandable way.</p>
-<p>A bit of matrix algebra thrown into the mix showed that most of the ideas for univariate models could be extended to multiple response variables, and so the “Multivariate Linear Model” was born. R. Darrell Bock <span class="citation" data-cites="Bock1963 Bock1964">(<a href="#ref-Bock1963" role="doc-biblioref">Bock, 1963</a>, <a href="#ref-Bock1964" role="doc-biblioref">1964</a>)</span> sketched a flowchart of the computational steps, which was implemented at the University of Chicago by Jeremy Finn <span class="citation" data-cites="Finn1967">(<a href="#ref-Finn1967" role="doc-biblioref">1967</a>)</span> in the MULTIVARIANCE program. A group at the University of North Carolina headed by Elliot Cramer developed their MANOVA program <span class="citation" data-cites="Clyde-etal-1966">(<a href="#ref-Clyde-etal-1966" role="doc-biblioref">Clyde et al., 1966</a>)</span> and Willard Dixon <span class="citation" data-cites="Dixon1965">(<a href="#ref-Dixon1965" role="doc-biblioref">1965</a>)</span> at UCLA developed the BMD programs incorporating these ideas. The ANOVA and regression twins had finally become part of a larger family.</p>
+<p>Then, something remarkable happened on two fronts: theory and computation. First, in quick succession textbooks by <span class="citation" data-cites="Scheffe1960">Scheffé (<a href="95-references.html#ref-Scheffe1960" role="doc-biblioref">1960</a>)</span>, <span class="citation" data-cites="Graybill1961">Graybill (<a href="95-references.html#ref-Graybill1961" role="doc-biblioref">1961</a>)</span>, <span class="citation" data-cites="Winer1962">Winer (<a href="95-references.html#ref-Winer1962" role="doc-biblioref">1962</a>)</span> … began to layout a general theory of linear models that encompassed all of these separate models, giving the “General Linear Model” a well-deserved name. Second, two symposiums, one at IBM Yorkdown Heights (<span class="citation" data-cites="IBM1965">IBM (<a href="95-references.html#ref-IBM1965" role="doc-biblioref">1965</a>)</span>) and the other at the University of Georgia (BradshawFindley1967) resulted in the first general programs to handle all these cases in an understandable way.</p>
+<p>A bit of matrix algebra thrown into the mix showed that most of the ideas for univariate models could be extended to multiple response variables, and so the “Multivariate Linear Model” was born. R. Darrell Bock <span class="citation" data-cites="Bock1963 Bock1964">(<a href="95-references.html#ref-Bock1963" role="doc-biblioref">Bock, 1963</a>, <a href="95-references.html#ref-Bock1964" role="doc-biblioref">1964</a>)</span> sketched a flowchart of the computational steps, which was implemented at the University of Chicago by Jeremy Finn <span class="citation" data-cites="Finn1967">(<a href="95-references.html#ref-Finn1967" role="doc-biblioref">1967</a>)</span> in the MULTIVARIANCE program. A group at the University of North Carolina headed by Elliot Cramer developed their MANOVA program <span class="citation" data-cites="Clyde-etal-1966">(<a href="95-references.html#ref-Clyde-etal-1966" role="doc-biblioref">Clyde et al., 1966</a>)</span> and Willard Dixon <span class="citation" data-cites="Dixon1965">(<a href="95-references.html#ref-Dixon1965" role="doc-biblioref">1965</a>)</span> at UCLA developed the BMD programs incorporating these ideas. The ANOVA and regression twins had finally become part of a larger family.</p>
 <!--
 ...
 Graybill, Winer, Bock  ... -> Symposium 1967
@@ -434,7 +434,7 @@ <h1 class="title"><span id="sec-linear-models" class="quarto-section-identifier"
 <li><p>The variables in <span class="math inline">\(\mathbf{X}\)</span> and <span class="math inline">\(\mathbf{Y}\)</span> are discrete, <strong>categorical factors</strong> like sex and level of education or <strong>quantitative</strong> variables like salary and number of years of experience.</p></li>
 </ul>
 <section id="pca" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="pca">PCA</h4>
-<p>For example, <a href="#fig-lin-comb-pca" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-lin-comb-pca</span></a> illustrates PCA (as we saw in <a href="#sec-pca-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-pca-biplot</span></a>) as finding weights to maximize the variance of linear combinations, <span class="math inline">\(v_1, v_2, ...\)</span>, <span class="math display">\[\begin{aligned}
+<p>For example, <a href="#fig-lin-comb-pca" class="quarto-xref">Figure&nbsp;<span>5.2</span></a> illustrates PCA (as we saw in <a href="04-pca-biplot.html" class="quarto-xref"><span>Chapter 4</span></a>) as finding weights to maximize the variance of linear combinations, <span class="math inline">\(v_1, v_2, ...\)</span>, <span class="math display">\[\begin{aligned}
 \mathbf{v}_1 &amp; = &amp; a_1 \mathbf{x}_1 + a_2 \mathbf{x}_2 + a_3 \mathbf{x}_3 + a_4 \mathbf{x}_4 \\
 \mathbf{v}_2 &amp; = &amp; b_1 \mathbf{x}_1 + b_2 \mathbf{x}_2 + b_3 \mathbf{x}_3 + b_4 \mathbf{x}_4 \\
 \vdots &amp; = &amp; \vdots \; , \\
@@ -453,7 +453,7 @@ <h1 class="title"><span id="sec-linear-models" class="quarto-section-identifier"
 </div>
 </div>
 </section><section id="multiple-regression" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="multiple-regression">Multiple regression</h4>
-<p>An analogous diagram for multiple regression is shown in <a href="#fig-lin-comb-mra" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-lin-comb-mra</span></a>. Here, we find the weights <span class="math inline">\(b_1, b_2, \dots\)</span> to maximize the <span class="math inline">\(R^2\)</span> of <span class="math inline">\(\mathbf{y}\)</span> with the predicted values <span class="math inline">\(\widehat{\mathbf{y}}\)</span>,</p>
+<p>An analogous diagram for multiple regression is shown in <a href="#fig-lin-comb-mra" class="quarto-xref">Figure&nbsp;<span>5.3</span></a>. Here, we find the weights <span class="math inline">\(b_1, b_2, \dots\)</span> to maximize the <span class="math inline">\(R^2\)</span> of <span class="math inline">\(\mathbf{y}\)</span> with the predicted values <span class="math inline">\(\widehat{\mathbf{y}}\)</span>,</p>
 <p><span class="math display">\[
 \widehat{\mathbf{y}} = b_1 \mathbf{x}_1 + b_2 \mathbf{x}_2 + b_3 \mathbf{x}_3 \:\: .
 \]</span> In the vector diagram at the right, saying that the fitted vector <span class="math inline">\(\widehat{\mathbf{y}}\)</span> is a linear combination of <span class="math inline">\(\mathbf{x}_1\)</span> and <span class="math inline">\(\mathbf{x}_2\)</span> means that it lies in the plane that they define. The fitted vector is the orthogonal projection of <span class="math inline">\(\mathbf{y}\)</span> on this plane, and the least squares weights <span class="math inline">\(b_1\)</span> and <span class="math inline">\(b_2\)</span> give the maximum possible correlation <span class="math inline">\(r^2 (\mathbf{y}, \widehat{\mathbf{y}})\)</span>.</p>
@@ -471,7 +471,7 @@ <h1 class="title"><span id="sec-linear-models" class="quarto-section-identifier"
 </div>
 <p>The vector of residuals, <span class="math inline">\(\mathbf{e} = \mathbf{y} -\widehat{\mathbf{y}}\)</span> is orthogonal to that plane (<span class="math inline">\(\mathbf{y}\)</span> and <span class="math inline">\(\mathbf{e}\)</span> are uncorrelated), and the least squares solution also minimizes length <span class="math inline">\(\parallel \mathbf{e} \parallel = \sqrt(\Sigma e_i^2)\)</span>.</p>
 </section><section id="multivariate-regression" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="multivariate-regression">Multivariate regression</h4>
-<p>Multivariate multiple regression does the same thing for each response variable, <span class="math inline">\(\mathbf{y}_1\)</span> and <span class="math inline">\(\mathbf{y}_2\)</span>, as shown in <a href="#fig-lin-comb3" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-lin-comb3</span></a>. It finds the weights to maximize the correlation between <em>each</em> <span class="math inline">\(\mathbf{y}_j\)</span> and the corresponding predicted value <span class="math inline">\(\widehat{\mathbf{y}}_j\)</span>.</p>
+<p>Multivariate multiple regression does the same thing for each response variable, <span class="math inline">\(\mathbf{y}_1\)</span> and <span class="math inline">\(\mathbf{y}_2\)</span>, as shown in <a href="#fig-lin-comb3" class="quarto-xref">Figure&nbsp;<span>5.4</span></a>. It finds the weights to maximize the correlation between <em>each</em> <span class="math inline">\(\mathbf{y}_j\)</span> and the corresponding predicted value <span class="math inline">\(\widehat{\mathbf{y}}_j\)</span>.</p>
 <p><span class="math display">\[\begin{aligned}
 \widehat{\mathbf{y}}_1 &amp; = &amp; a_1 \mathbf{x}_1 + a_2 \mathbf{x}_2 + a_3 \mathbf{x}_3 \\
 \widehat{\mathbf{y}}_2 &amp; = &amp; b_1 \mathbf{x}_1 + b_2 \mathbf{x}_2 + b_3 \mathbf{x}_3 \\
@@ -531,13 +531,13 @@ <h1 class="title"><span id="sec-linear-models" class="quarto-section-identifier"
 </ul>
 <section id="sec-model-formulas" class="level3" data-number="5.2.1"><h3 data-number="5.2.1" class="anchored" data-anchor-id="sec-model-formulas">
 <span class="header-section-number">5.2.1</span> Model formulas</h3>
-<p>Statistical models in R, such as those fit by <code><a href="https://rdrr.io/r/stats/lm.html">lm()</a></code>, <code><a href="https://rdrr.io/r/stats/glm.html">glm()</a></code> and many other modelling function in R are expressed in a simple notation that was developed by <span class="citation" data-cites="WilkinsonRogers1973">Wilkinson &amp; Rogers (<a href="#ref-WilkinsonRogers1973" role="doc-biblioref">1973</a>)</span> for the GENSTAT software system at the Rothamsted Research Station. It solves the problem of having a compact way to specify any model consisting of any combinations of quantitative and discrete factor variables, interactions of these and arbitrary transformations of these.</p>
+<p>Statistical models in R, such as those fit by <code><a href="https://rdrr.io/r/stats/lm.html">lm()</a></code>, <code><a href="https://rdrr.io/r/stats/glm.html">glm()</a></code> and many other modelling function in R are expressed in a simple notation that was developed by <span class="citation" data-cites="WilkinsonRogers1973">Wilkinson &amp; Rogers (<a href="95-references.html#ref-WilkinsonRogers1973" role="doc-biblioref">1973</a>)</span> for the GENSTAT software system at the Rothamsted Research Station. It solves the problem of having a compact way to specify any model consisting of any combinations of quantitative and discrete factor variables, interactions of these and arbitrary transformations of these.</p>
 <p>In this, a <strong>model formula</strong> take the forms</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb1" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">response</span> <span class="op">~</span> <span class="va">terms</span></span>
 <span><span class="va">response</span> <span class="op">~</span> <span class="va">term1</span> <span class="op">+</span> <span class="va">term2</span> <span class="op">+</span> <span class="va">...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>where the left-hand side, <code>response</code> specifies the response variable in the model and the right-hand side specifies the <code>terms</code> in the model specifying the columns in the <span class="math inline">\(\mathbf{X}\)</span> matrix of <a href="#eq-glm" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-glm</span></a>; the coefficients <span class="math inline">\(\beta\)</span> are implied and not represented explicitly in the formula.</p>
+<p>where the left-hand side, <code>response</code> specifies the response variable in the model and the right-hand side specifies the <code>terms</code> in the model specifying the columns in the <span class="math inline">\(\mathbf{X}\)</span> matrix of <a href="#eq-glm" class="quarto-xref">Equation&nbsp;<span>5.1</span></a>; the coefficients <span class="math inline">\(\beta\)</span> are implied and not represented explicitly in the formula.</p>
 <p>The notation <code>y ~ x</code> is read as “<code>y</code> <em>is modeled by</em> <code>x</code>”. The left-hand side is usually a variable name (such as <code>height</code>), but it could be an expression that evaluates to the the response, such as <code>log(salary)</code> or <code>weight/height^2</code> which represents the body mass index.</p>
 <p>On the right-hand side (RHS), the usual arithmetic operator, <code>+, -, *, /, ^</code> have special meanings as described below. The most fundamental is that <code>y ~ a + b</code> is interpreted as “<code>y</code> is modeled by <code>a</code> <em>and</em> <code>b</code>”; that is, the sum of linear terms for <code>a</code> and <code>b</code>.</p>
 <p>Some examples for regression-like models using only quantitative variables, <code>x, x1, x2, x3, ...</code> are shown below:</p>
@@ -669,7 +669,7 @@ <h1 class="title"><span id="sec-linear-models" class="quarto-section-identifier"
 </div>
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-Bock1963" class="csl-entry" role="listitem">
 Bock, R. D. (1963). Programming univariate and multivariate analysis of variance. <em>Technometrics</em>, <em>5</em>(1), 95–117. <a href="https://doi.org/10.1080/00401706.1963.10490061">https://doi.org/10.1080/00401706.1963.10490061</a>
 </div>
diff --git a/docs/06-linear_models-plots.html b/docs/06-linear_models-plots.html
index 7e352da0..7a5ca1dd 100644
--- a/docs/06-linear_models-plots.html
+++ b/docs/06-linear_models-plots.html
@@ -401,7 +401,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <li><p>in this same situation, you can more easily understand their separate impact on the response by plotting the marginal <em>predicted effects</em> of one or more focal variables, averaging over other variables not shown in a given plot.</p></li>
 <li><p>when there are highly correlated predictors, some specialized plots are useful to understand the nature of <em>multicolinearity</em>.</p></li>
 </ul>
-<p>The classic reference on regression diagnostics is <span class="citation" data-cites="Belsley-etal:80">Belsley et al. (<a href="#ref-Belsley-etal:80" role="doc-biblioref">1980</a>)</span>. My favorite modern texts are the brief <span class="citation" data-cites="Fox2020">Fox (<a href="#ref-Fox2020" role="doc-biblioref">2020</a>)</span> and the more complete <span class="citation" data-cites="FoxWeisberg:2018">Fox &amp; Weisberg (<a href="#ref-FoxWeisberg:2018" role="doc-biblioref">2018a</a>)</span>, both of which are supported by the <span style="color: brown;"><strong>car</strong></span> package <span class="citation" data-cites="R-car">(<a href="#ref-R-car" role="doc-biblioref">Fox et al., 2023</a>)</span>. Some of the examples in this chapter are inspired by <span class="citation" data-cites="FoxWeisberg:2018">Fox &amp; Weisberg (<a href="#ref-FoxWeisberg:2018" role="doc-biblioref">2018a</a>)</span>.</p>
+<p>The classic reference on regression diagnostics is <span class="citation" data-cites="Belsley-etal:80">Belsley et al. (<a href="95-references.html#ref-Belsley-etal:80" role="doc-biblioref">1980</a>)</span>. My favorite modern texts are the brief <span class="citation" data-cites="Fox2020">Fox (<a href="95-references.html#ref-Fox2020" role="doc-biblioref">2020</a>)</span> and the more complete <span class="citation" data-cites="FoxWeisberg:2018">Fox &amp; Weisberg (<a href="95-references.html#ref-FoxWeisberg:2018" role="doc-biblioref">2018a</a>)</span>, both of which are supported by the <span style="color: brown;"><strong>car</strong></span> package <span class="citation" data-cites="R-car">(<a href="95-references.html#ref-R-car" role="doc-biblioref">Fox et al., 2023</a>)</span>. Some of the examples in this chapter are inspired by <span class="citation" data-cites="FoxWeisberg:2018">Fox &amp; Weisberg (<a href="95-references.html#ref-FoxWeisberg:2018" role="doc-biblioref">2018a</a>)</span>.</p>
 <p><strong>Packages</strong></p>
 <p>In this chapter I use the following packages. Load them now:</p>
 <div class="cell" data-layout-align="center">
@@ -427,10 +427,10 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <li><p><strong>Residuals vs.&nbsp;Leverage</strong>: Plots standardized residuals against leverage to help identify possibly influential observations. Leverage, or “hat” values (given by <code>hat(model)</code>) are proportional to the squared Mahalanobis distances of the predictor values <span class="math inline">\(\mathbf{x}_i\)</span> from the means, and measure the potential of an observation to change the fitted coefficients if that observation was deleted. Actual influence is measured by Cooks’s distance (<code>cooks.distance(model)</code>) and is proportional to the product of residual times leverage. Contours of constant Cook’s <span class="math inline">\(D\)</span> are added to the plot.</p></li>
 </ul>
 <p>One key feature of these plots is providing <strong>reference</strong> lines or smoothed curves for ease of judging the extent to which a plot conforms to the expected pattern; another is the <strong>labeling</strong> of observations which deviate from an assumption.</p>
-<p>The base-R <code>plot(model)</code> plots are done much better in a variety of packages. I illustrate some versions from the <strong>car</strong> <span class="citation" data-cites="R-car">(<a href="#ref-R-car" role="doc-biblioref">Fox et al., 2023</a>)</span> and <strong>performance</strong> <span class="citation" data-cites="Ludecke-etal-performance">(<a href="#ref-Ludecke-etal-performance" role="doc-biblioref">Lüdecke et al., 2021</a>)</span> packages, part of the <strong>easystats</strong> <span class="citation" data-cites="R-easystats">(<a href="#ref-R-easystats" role="doc-biblioref">Lüdecke et al., 2022</a>)</span> suite of packages.</p>
+<p>The base-R <code>plot(model)</code> plots are done much better in a variety of packages. I illustrate some versions from the <strong>car</strong> <span class="citation" data-cites="R-car">(<a href="95-references.html#ref-R-car" role="doc-biblioref">Fox et al., 2023</a>)</span> and <strong>performance</strong> <span class="citation" data-cites="Ludecke-etal-performance">(<a href="95-references.html#ref-Ludecke-etal-performance" role="doc-biblioref">Lüdecke et al., 2021</a>)</span> packages, part of the <strong>easystats</strong> <span class="citation" data-cites="R-easystats">(<a href="95-references.html#ref-R-easystats" role="doc-biblioref">Lüdecke et al., 2022</a>)</span> suite of packages.</p>
 <section id="sec-example-duncan" class="level3" data-number="6.1.1"><h3 data-number="6.1.1" class="anchored" data-anchor-id="sec-example-duncan">
 <span class="header-section-number">6.1.1</span> Example: Duncan’s occupational prestige</h3>
-<p>In a classic study in sociology, <span class="citation" data-cites="Duncan:61">Duncan (<a href="#ref-Duncan:61" role="doc-biblioref">1961</a>)</span> used data from the U.S. Census in 1950 to study how one could predict the prestige of occupational categories — which is hard to measure — from available information in the census for those occupations. His data is available in <code>carData:Duncan</code>, and contains</p>
+<p>In a classic study in sociology, <span class="citation" data-cites="Duncan:61">Duncan (<a href="95-references.html#ref-Duncan:61" role="doc-biblioref">1961</a>)</span> used data from the U.S. Census in 1950 to study how one could predict the prestige of occupational categories — which is hard to measure — from available information in the census for those occupations. His data is available in <code>carData:Duncan</code>, and contains</p>
 <ul>
 <li>
 <code>type</code>: the category of occupation, one of <code>prof</code> (professional), <code>wc</code> (white collar) or <code>bc</code> (blue collar);</li>
@@ -514,7 +514,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <span><span class="co">#&gt;        Estimate Std.Error DF t-value p-value Lower 0.95 Upper 0.95</span></span>
 <span><span class="co">#&gt;   Larg  -0.0529     0.203 42  -0.261   0.795     -0.462      0.356</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>We can visualize this test and confidence intervals using a joint confidence ellipse for the coefficients for income and education in the model <code>duncan.mod</code>. In <a href="#fig-duncan-beta-diff" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-duncan-beta-diff</span></a> the size of the ellipse is set to <span class="math inline">\(\sqrt{F^{0.95}_{1,\nu}} = t^{0.95}_{\nu}\)</span>, so that its shadows on the horizontal and vertical axes correspond to 1D 95% confidence intervals. In this plot, the line through the origin with slope <span class="math inline">\(= 1\)</span> corresponds to equal coefficients for income and education and the line with slope <span class="math inline">\(= -1\)</span> corresponds to their difference, <span class="math inline">\(\beta_{\text{Educ}} - \beta_{\text{Inc}}\)</span>. The orthogonal projection of the coefficient vector <span class="math inline">\((\widehat{\beta}_{\text{Inc}}, \widehat{\beta}_{\text{Educ}})\)</span> (the center of the ellipse) is the point estimate of <span class="math inline">\(\widehat{\beta}_{\text{Educ}} - \widehat{\beta}_{\text{Inc}}\)</span> and the shadow of the ellipse along this axis is the 95% confidence interval for the difference in slopes.</p>
+<p>We can visualize this test and confidence intervals using a joint confidence ellipse for the coefficients for income and education in the model <code>duncan.mod</code>. In <a href="#fig-duncan-beta-diff" class="quarto-xref">Figure&nbsp;<span>6.1</span></a> the size of the ellipse is set to <span class="math inline">\(\sqrt{F^{0.95}_{1,\nu}} = t^{0.95}_{\nu}\)</span>, so that its shadows on the horizontal and vertical axes correspond to 1D 95% confidence intervals. In this plot, the line through the origin with slope <span class="math inline">\(= 1\)</span> corresponds to equal coefficients for income and education and the line with slope <span class="math inline">\(= -1\)</span> corresponds to their difference, <span class="math inline">\(\beta_{\text{Educ}} - \beta_{\text{Inc}}\)</span>. The orthogonal projection of the coefficient vector <span class="math inline">\((\widehat{\beta}_{\text{Inc}}, \widehat{\beta}_{\text{Educ}})\)</span> (the center of the ellipse) is the point estimate of <span class="math inline">\(\widehat{\beta}_{\text{Educ}} - \widehat{\beta}_{\text{Inc}}\)</span> and the shadow of the ellipse along this axis is the 95% confidence interval for the difference in slopes.</p>
 <!-- figure-code: R/Duncan/dunc-conf-ellipse.R -->
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>See the code</summary><div class="sourceCode" id="cb7" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/Ellipses.html">confidenceEllipse</a></span><span class="op">(</span><span class="va">duncan.mod</span>, col <span class="op">=</span> <span class="st">"blue"</span>,</span>
@@ -589,7 +589,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <!-- ``` -->
 <section id="diagnostic-plots" class="level4" data-number="6.1.1.1"><h4 data-number="6.1.1.1" class="anchored" data-anchor-id="diagnostic-plots">
 <span class="header-section-number">6.1.1.1</span> Diagnostic plots</h4>
-<p>But, should Duncan be <strong>so</strong> happy? It is unlikely that he ran any model diagnostics or plotted his model; we do so now. Here is the regression quartet (<a href="#fig-duncan-plot-model" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-duncan-plot-model</span></a>) for this model. Each plot shows some trend lines, and importantly, labels some observations that stand out and might deserve attention.</p>
+<p>But, should Duncan be <strong>so</strong> happy? It is unlikely that he ran any model diagnostics or plotted his model; we do so now. Here is the regression quartet (<a href="#fig-duncan-plot-model" class="quarto-xref">Figure&nbsp;<span>6.2</span></a>) for this model. Each plot shows some trend lines, and importantly, labels some observations that stand out and might deserve attention.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb8" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mfrow <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">2</span>,<span class="fl">2</span><span class="op">)</span>, </span>
 <span>          mar <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">4</span>,<span class="fl">4</span>,<span class="fl">3</span>,<span class="fl">1</span><span class="op">)</span><span class="op">+</span><span class="fl">.1</span><span class="op">)</span></span>
@@ -608,7 +608,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 <p>Some points to note:</p>
 <ul>
-<li>A few observations (minister, reporter, conductor, contractor) are flagged in multiple panels. It turns out (<a href="#sec-duncan-influence" class="quarto-xref"><span class="quarto-unresolved-ref">sec-duncan-influence</span></a>) that the observations for minister and reporter noted in the residuals vs.&nbsp;leverage plot are highly influential and largely responsible for Duncan’s finding that the slopes for income and education could be considered equal.</li>
+<li>A few observations (minister, reporter, conductor, contractor) are flagged in multiple panels. It turns out (<a href="#sec-duncan-influence" class="quarto-xref"><span>Section 6.6.3</span></a>) that the observations for minister and reporter noted in the residuals vs.&nbsp;leverage plot are highly influential and largely responsible for Duncan’s finding that the slopes for income and education could be considered equal.</li>
 <li>The <span style="color: red;">red</span> trend line in the scale-location plot indicates that residual variance is not constant, but rather increases from both ends. This is a consequence of the fact that <code>prestige</code> is measured as a percentage, bounded at [0, 100], and the standard deviation of a percentage <span class="math inline">\(p\)</span> is proportional to <span class="math inline">\(\sqrt{p \times (1-p)}\)</span> which is maximal at $p = 0.5.</li>
 </ul>
 <p>Similar, but nicer-looking diagnostic plots are provided by <code><a href="https://easystats.github.io/performance/reference/check_model.html">performance::check_model()</a></code> which uses <code>ggplot2</code> for graphics. These include helpful captions indicating what should be observed for each for a good-fitting model. However, they don’t have as good facilities for labeling unusual observations as the base R <code><a href="https://rdrr.io/r/graphics/plot.default.html">plot()</a></code> or functions in the <code>car</code> package.</p>
@@ -628,9 +628,9 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </section></section><section id="sec-example-prestige" class="level3" data-number="6.1.2"><h3 data-number="6.1.2" class="anchored" data-anchor-id="sec-example-prestige">
 <span class="header-section-number">6.1.2</span> Example: Canadian occupational prestige</h3>
 <!-- **TODO**:  Already introduced in @sec-prestige Pair this down & integrate with that -->
-<p>Following <span class="citation" data-cites="Duncan:61">Duncan (<a href="#ref-Duncan:61" role="doc-biblioref">1961</a>)</span>, occupational prestige was studied in a Canadian context by Bernard Blishen and others at York University, giving the dataset <code><a href="https://rdrr.io/pkg/carData/man/Prestige.html">carData::Prestige</a></code> which we looked at in <a href="#sec-prestige" class="quarto-xref"><span class="quarto-unresolved-ref">sec-prestige</span></a>. It differs from the <code>Duncan</code> dataset primarily in that the main variables—prestige, income and education were revamped to better reflect the underlying constructs in more meaningful units.</p>
+<p>Following <span class="citation" data-cites="Duncan:61">Duncan (<a href="95-references.html#ref-Duncan:61" role="doc-biblioref">1961</a>)</span>, occupational prestige was studied in a Canadian context by Bernard Blishen and others at York University, giving the dataset <code><a href="https://rdrr.io/pkg/carData/man/Prestige.html">carData::Prestige</a></code> which we looked at in <a href="03-multivariate_plots.html#sec-prestige" class="quarto-xref"><span>Section 3.2.3</span></a>. It differs from the <code>Duncan</code> dataset primarily in that the main variables—prestige, income and education were revamped to better reflect the underlying constructs in more meaningful units.</p>
 <ul>
-<li><p><code>prestige</code>: Rather than a simple percentage of “good+” ratings, this uses a wider and more reliable scale from <span class="citation" data-cites="Pineo-Porter-1967">Pineo &amp; Porter (<a href="#ref-Pineo-Porter-1967" role="doc-biblioref">1967</a>)</span> on a scale from 10–90.</p></li>
+<li><p><code>prestige</code>: Rather than a simple percentage of “good+” ratings, this uses a wider and more reliable scale from <span class="citation" data-cites="Pineo-Porter-1967">Pineo &amp; Porter (<a href="95-references.html#ref-Pineo-Porter-1967" role="doc-biblioref">1967</a>)</span> on a scale from 10–90.</p></li>
 <li><p><code>income</code> is measured as the average income of incumbents in each occupation, in 1971 dollars, rather than percent exceeding a given threshold ($3500)</p></li>
 <li><p><code>education</code> is measured as the average education of occupational incumbents, years.</p></li>
 </ul>
@@ -677,7 +677,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <p><strong>TODO</strong>: What goes here?</p>
 </section><section id="coefficient-displays" class="level2" data-number="6.3"><h2 data-number="6.3" class="anchored" data-anchor-id="coefficient-displays">
 <span class="header-section-number">6.3</span> Coefficient displays</h2>
-<p>The results of linear models are most often reported in tables and typically with “significance stars” (<code>*, **, ***</code>) to indicate the outcome of hypothesis tests. These are useful for looking up precise values and you can use this format to compare a small number of competing models side-by-side. However, as illustrated by <span class="citation" data-cites="KastellecLeoni:2007">Kastellec &amp; Leoni (<a href="#ref-KastellecLeoni:2007" role="doc-biblioref">2007</a>)</span>, plots of coefficients can increase the clarity of presentation and make it easier to draw correct conclusions. Yet, when you need to present tables, there is a variety of tools in R that can help make them attractive in publications.</p>
+<p>The results of linear models are most often reported in tables and typically with “significance stars” (<code>*, **, ***</code>) to indicate the outcome of hypothesis tests. These are useful for looking up precise values and you can use this format to compare a small number of competing models side-by-side. However, as illustrated by <span class="citation" data-cites="KastellecLeoni:2007">Kastellec &amp; Leoni (<a href="95-references.html#ref-KastellecLeoni:2007" role="doc-biblioref">2007</a>)</span>, plots of coefficients can increase the clarity of presentation and make it easier to draw correct conclusions. Yet, when you need to present tables, there is a variety of tools in R that can help make them attractive in publications.</p>
 <p>For illustration, I’ll consider three models for the <code>Prestige</code> data of increasing complexity:</p>
 <ul>
 <li>
@@ -695,7 +695,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <span><span class="va">mod3</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">prestige</span> <span class="op">~</span> <span class="va">education</span> <span class="op">+</span> <span class="va">women</span> <span class="op">+</span> <span class="va">income</span> <span class="op">*</span> <span class="va">type</span>,</span>
 <span>           data<span class="op">=</span><span class="va">Prestige</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>From our earlier analyses (<a href="#sec-prestige" class="quarto-xref"><span class="quarto-unresolved-ref">sec-prestige</span></a>) we saw that the marginal relationship between <code>income</code> and <code>prestige</code> was nonlinear <a href="#fig-Prestige-scatterplot-income1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot-income1</span></a>), and was better represented in a linear model using <code>log(income)</code> (<a href="#sec-log-scale" class="quarto-xref"><span class="quarto-unresolved-ref">sec-log-scale</span></a>) shown in <a href="#fig-Prestige-scatterplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot2</span></a>. However, this possibly non-linear relationship could also be explained by stratifying (<a href="#sec-stratifying" class="quarto-xref"><span class="quarto-unresolved-ref">sec-stratifying</span></a>) the data by <code>type</code> of occupation (<a href="#fig-Prestige-scatterplot3" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-Prestige-scatterplot3</span></a>).</p>
+<p>From our earlier analyses (<a href="03-multivariate_plots.html#sec-prestige" class="quarto-xref"><span>Section 3.2.3</span></a>) we saw that the marginal relationship between <code>income</code> and <code>prestige</code> was nonlinear <a href="03-multivariate_plots.html#fig-Prestige-scatterplot-income1" class="quarto-xref">Figure&nbsp;<span>3.11</span></a>), and was better represented in a linear model using <code>log(income)</code> (<a href="03-multivariate_plots.html#sec-log-scale" class="quarto-xref"><span>Section 3.2.3.1</span></a>) shown in <a href="03-multivariate_plots.html#fig-Prestige-scatterplot2" class="quarto-xref">Figure&nbsp;<span>3.14</span></a>. However, this possibly non-linear relationship could also be explained by stratifying (<a href="03-multivariate_plots.html#sec-stratifying" class="quarto-xref"><span>Section 3.2.3.2</span></a>) the data by <code>type</code> of occupation (<a href="03-multivariate_plots.html#fig-Prestige-scatterplot3" class="quarto-xref">Figure&nbsp;<span>3.15</span></a>).</p>
 <section id="displaying-coefficients" class="level3" data-number="6.3.1"><h3 data-number="6.3.1" class="anchored" data-anchor-id="displaying-coefficients">
 <span class="header-section-number">6.3.1</span> Displaying coefficients</h3>
 <p><code><a href="https://rdrr.io/r/base/summary.html">summary()</a></code> gives the complete precis of a fitted model, with information about the estimated coefficients, residuals and goodness-of fit statistics like <span class="math inline">\(R^2\)</span>. But if you only want to see the coefficients, standard errors, etc. <code><a href="https://rdrr.io/pkg/lmtest/man/coeftest.html">lmtest::coeftest()</a></code> gives these results in the familiar format for console output. <code><a href="https://generics.r-lib.org/reference/tidy.html">broom::tidy()</a></code> places these in a tidy format common to many modeling functions which is useful for futher processing (e.g., comparing models).</p>
@@ -721,7 +721,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <span><span class="co">#&gt; 3 income       0.00131  0.000278     4.73  7.58e- 6</span></span>
 <span><span class="co">#&gt; 4 women       -0.00891  0.0304      -0.293 7.70e- 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The <span style="color: brown;"><strong>modelsummary</strong></span> package <span class="citation" data-cites="R-modelsummary">(<a href="#ref-R-modelsummary" role="doc-biblioref">Arel-Bundock, 2024b</a>)</span> is an easy to use, very general package to summarize data and statistical models in R. The main function <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> can produce highly customizable tables of coefficients in a wide variety of output formats, including HTML, PDF, LaTeX, Markdown, and MS Word. You can select the statistics displayed for any model term with the <code>estimate</code> and <code>statistic</code> arguments.</p>
+<p>The <span style="color: brown;"><strong>modelsummary</strong></span> package <span class="citation" data-cites="R-modelsummary">(<a href="95-references.html#ref-R-modelsummary" role="doc-biblioref">Arel-Bundock, 2024b</a>)</span> is an easy to use, very general package to summarize data and statistical models in R. The main function <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> can produce highly customizable tables of coefficients in a wide variety of output formats, including HTML, PDF, LaTeX, Markdown, and MS Word. You can select the statistics displayed for any model term with the <code>estimate</code> and <code>statistic</code> arguments.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb15" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://modelsummary.com/man/modelsummary.html">modelsummary</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="st">"Model1"</span> <span class="op">=</span> <span class="va">mod1</span><span class="op">)</span>,</span>
 <span>  coef_omit <span class="op">=</span> <span class="st">"Intercept"</span>,</span>
@@ -888,7 +888,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 <p>But this plot is disappointing and misleading because it show the <strong>raw</strong> coefficients. From the plot, it looks like only <code>education</code> has a non-zero effect, but the effect of <code>income</code> is also highly significant. The problem is that the magnitude of the coefficient <span class="math inline">\(\hat{b}_{\text{education}}\)</span> is more than 40,000 times that of the other coefficients, because education is measured years, while income is measured in dollars. The 95% confidence interval for <span class="math inline">\(\hat{b}_{\text{income}} = [0.0008, 0.0019]\)</span>, but this is invisible in the plot.</p>
-<p>Before figuring out how to fix this issue, I show the comparable displays from <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> and <code><a href="https://modelsummary.com/man/modelplot.html">modelplot()</a></code> for all three models. When you give <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> a list of models, it displays their coefficients side-by-side as shown in <a href="#tbl-modelsummary2" class="quarto-xref">Table&nbsp;<span class="quarto-unresolved-ref">tbl-modelsummary2</span></a>.</p>
+<p>Before figuring out how to fix this issue, I show the comparable displays from <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> and <code><a href="https://modelsummary.com/man/modelplot.html">modelplot()</a></code> for all three models. When you give <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> a list of models, it displays their coefficients side-by-side as shown in <a href="#tbl-modelsummary2" class="quarto-xref">Table&nbsp;<span>6.2</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb18" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">models</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="st">"Model1"</span> <span class="op">=</span> <span class="va">mod1</span>, <span class="st">"Model2"</span> <span class="op">=</span> <span class="va">mod2</span>, <span class="st">"Model3"</span> <span class="op">=</span> <span class="va">mod3</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://modelsummary.com/man/modelsummary.html">modelsummary</a></span><span class="op">(</span><span class="va">models</span>,</span>
@@ -1134,7 +1134,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 <p>Note that a factor predictor (like <code>type</code> here) with <span class="math inline">\(d\)</span> levels is represented by <span class="math inline">\(d-1\)</span> coefficients in main effects and in interactions with quantitative variables. These levels are coded with <em>treatment contrasts</em> by default. Also by default, the first level is set as the reference level in alphabetical order. Here the reference level is blue collar (<code>bc</code>), so the coefficient <code>typeprof = 5.91</code> indicates that professional occupations on average are rated 5.91 greater on the Prestige scale than blue collar workers.</p>
-<p>Note also that unlike the table, the coefficients in <a href="#fig-modelplot1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-modelplot1</span></a> are ordered from bottom to top, because the Y axis starts at the lower left corner. In <a href="#fig-modelplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-modelplot2</span></a> I use <code><a href="https://ggplot2.tidyverse.org/reference/scale_discrete.html">scale_y_discrete()</a></code> to reverse the order. It is also useful to add a vertical reference line at <span class="math inline">\(\beta = 0\)</span>.</p>
+<p>Note also that unlike the table, the coefficients in <a href="#fig-modelplot1" class="quarto-xref">Figure&nbsp;<span>6.5</span></a> are ordered from bottom to top, because the Y axis starts at the lower left corner. In <a href="#fig-modelplot2" class="quarto-xref">Figure&nbsp;<span>6.6</span></a> I use <code><a href="https://ggplot2.tidyverse.org/reference/scale_discrete.html">scale_y_discrete()</a></code> to reverse the order. It is also useful to add a vertical reference line at <span class="math inline">\(\beta = 0\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb19" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://modelsummary.com/man/modelplot.html">modelplot</a></span><span class="op">(</span><span class="va">models</span>, </span>
 <span>          coef_omit<span class="op">=</span><span class="st">"Intercept"</span>, </span>
@@ -1157,7 +1157,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </section><section id="more-useful-coefficient-plots" class="level3" data-number="6.3.3"><h3 data-number="6.3.3" class="anchored" data-anchor-id="more-useful-coefficient-plots">
 <span class="header-section-number">6.3.3</span> More useful coefficient plots</h3>
-<p>The problem with plots of raw coefficients shown in <a href="#fig-modelplot1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-modelplot1</span></a> and <a href="#fig-modelplot2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-modelplot2</span></a> is that the coefficients for different predictors are not directly comparable because they are measured in different units.</p>
+<p>The problem with plots of raw coefficients shown in <a href="#fig-modelplot1" class="quarto-xref">Figure&nbsp;<span>6.5</span></a> and <a href="#fig-modelplot2" class="quarto-xref">Figure&nbsp;<span>6.6</span></a> is that the coefficients for different predictors are not directly comparable because they are measured in different units.</p>
 <p>One alternative is to plot the <em>standardized coefficients</em>. Another way is to re-scale the predictors into more comparable and meaningful units. I illustrate these ideas below.</p>
 <section id="standardized-coefficients" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="standardized-coefficients">Standardized coefficients</h4>
 <p>The simplest way to do this is to transform all variables to standardized (<span class="math inline">\(z\)</span>) scores. The coefficients are then interpreted as the standardized change in prestige for a one standard deviation change in the predictors. The syntax below uses <code>scale</code> to transform all the numeric variables. Then, we re-fit the models using the standardized data.</p>
@@ -1173,7 +1173,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <span><span class="va">mod3_std</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">prestige</span> <span class="op">~</span> <span class="va">education</span> <span class="op">+</span> <span class="va">women</span> <span class="op">+</span> <span class="va">income</span> <span class="op">*</span> <span class="va">type</span>, </span>
 <span>               data<span class="op">=</span><span class="va">Prestige_std</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The plot in <a href="#fig-modelplot3" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-modelplot3</span></a> now shows the significant effect of income in all three models. As well, it offers a more sensitive comparison of the coefficients of other terms across models; for example <code>women</code> is not significant in models 1 and 2, but becomes significant in Model 3 when the interaction of <code>income * type</code> is included.</p>
+<p>The plot in <a href="#fig-modelplot3" class="quarto-xref">Figure&nbsp;<span>6.7</span></a> now shows the significant effect of income in all three models. As well, it offers a more sensitive comparison of the coefficients of other terms across models; for example <code>women</code> is not significant in models 1 and 2, but becomes significant in Model 3 when the interaction of <code>income * type</code> is included.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb21" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">models</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="st">"Model1"</span> <span class="op">=</span> <span class="va">mod1_std</span>, <span class="st">"Model2"</span> <span class="op">=</span> <span class="va">mod2_std</span>, <span class="st">"Model3"</span> <span class="op">=</span> <span class="va">mod3_std</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://modelsummary.com/man/modelplot.html">modelplot</a></span><span class="op">(</span><span class="va">models</span>, </span>
@@ -1194,7 +1194,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>It turns out there is an easier way to get plots of standardized coefficients. <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> extracts coefficients from model objects using the <code>parameters</code> package, and that package offers several options for standardization: See <a href="https://easystats.github.io/parameters/reference/model_parameters.default.html">model parameters documentation</a>. We can pass the <code>standardize="refit"</code> (or other) argument directly to <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> or <code><a href="https://modelsummary.com/man/modelplot.html">modelplot()</a></code>, and that argument will be forwarded to <code>parameters</code>. The plot produced by the code below is identical to <a href="#fig-modelplot3" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-modelplot3</span></a> and is not shown.</p>
+<p>It turns out there is an easier way to get plots of standardized coefficients. <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> extracts coefficients from model objects using the <code>parameters</code> package, and that package offers several options for standardization: See <a href="https://easystats.github.io/parameters/reference/model_parameters.default.html">model parameters documentation</a>. We can pass the <code>standardize="refit"</code> (or other) argument directly to <code><a href="https://modelsummary.com/man/modelsummary.html">modelsummary()</a></code> or <code><a href="https://modelsummary.com/man/modelplot.html">modelplot()</a></code>, and that argument will be forwarded to <code>parameters</code>. The plot produced by the code below is identical to <a href="#fig-modelplot3" class="quarto-xref">Figure&nbsp;<span>6.7</span></a> and is not shown.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb22" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://modelsummary.com/man/modelplot.html">modelplot</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="st">"mod1"</span> <span class="op">=</span> <span class="va">mod1</span>, <span class="st">"mod2"</span> <span class="op">=</span> <span class="va">mod2</span>, <span class="st">"mod3"</span> <span class="op">=</span> <span class="va">mod3</span><span class="op">)</span>,</span>
 <span>          standardize <span class="op">=</span> <span class="st">"refit"</span>,</span>
@@ -1205,7 +1205,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/theme.html">theme</a></span><span class="op">(</span>legend.position <span class="op">=</span> <span class="st">"inside"</span>,</span>
 <span>        legend.position.inside <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">0.85</span>, <span class="fl">0.2</span><span class="op">)</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The <span style="color: brown;"><strong>ggstats</strong></span> package <span class="citation" data-cites="R-ggstats">(<a href="#ref-R-ggstats" role="doc-biblioref">Larmarange, 2024</a>)</span> provides even nicer versions of coefficient plots that handle factors in a more reasonable way, as levels within the factor. <code><a href="https://larmarange.github.io/ggstats/reference/ggcoef_model.html">ggcoef_model()</a></code> plots a single model and <code><a href="https://larmarange.github.io/ggstats/reference/ggcoef_model.html">ggcoef_compare()</a></code> plots a list of models using sensible defaults. A small but nice feature is that it explicitly shows the 0 value for the reference level of a factor (<code>type = "bc"</code> here) and uses better labels for factors and their interactions.</p>
+<p>The <span style="color: brown;"><strong>ggstats</strong></span> package <span class="citation" data-cites="R-ggstats">(<a href="95-references.html#ref-R-ggstats" role="doc-biblioref">Larmarange, 2024</a>)</span> provides even nicer versions of coefficient plots that handle factors in a more reasonable way, as levels within the factor. <code><a href="https://larmarange.github.io/ggstats/reference/ggcoef_model.html">ggcoef_model()</a></code> plots a single model and <code><a href="https://larmarange.github.io/ggstats/reference/ggcoef_model.html">ggcoef_compare()</a></code> plots a list of models using sensible defaults. A small but nice feature is that it explicitly shows the 0 value for the reference level of a factor (<code>type = "bc"</code> here) and uses better labels for factors and their interactions.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb23" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">models</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span></span>
 <span>  <span class="st">"Base model"</span>      <span class="op">=</span> <span class="va">mod1_std</span>,</span>
@@ -1282,7 +1282,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>The message from these marginal plots in <a href="#fig-coffee-spm" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-spm</span></a> seems to be that coffee is bad for your heart, stress is bad for your heart, and stress is also strongly related to coffee consumption. Yet, when we fit a model with both variables together, we get the following results:</p>
+<p>The message from these marginal plots in <a href="#fig-coffee-spm" class="quarto-xref">Figure&nbsp;<span>6.10</span></a> seems to be that coffee is bad for your heart, stress is bad for your heart, and stress is also strongly related to coffee consumption. Yet, when we fit a model with both variables together, we get the following results:</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb27" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">fit.both</span>   <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">Heart</span> <span class="op">~</span> <span class="va">Coffee</span> <span class="op">+</span> <span class="va">Stress</span>, data<span class="op">=</span><span class="va">coffee</span><span class="op">)</span></span>
 <span><span class="fu">lmtest</span><span class="fu">::</span><span class="fu"><a href="https://rdrr.io/pkg/lmtest/man/coeftest.html">coeftest</a></span><span class="op">(</span><span class="va">fit.both</span><span class="op">)</span></span>
@@ -1300,13 +1300,13 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <p>The reason for this apparent paradox is that the general linear model fit by <code><a href="https://rdrr.io/r/stats/lm.html">lm()</a></code> estimates all effects together and so the coefficients pertain to the <em>partial</em> effect of a given predictor, <em>adjusting</em> for the effects of all others. That is, the coefficient for coffee (<span class="math inline">\(\beta_{\text{Coffee}} = -0.41\)</span>) estimates the effect of coffee for people with <em>same</em> level of stress. In the marginal scatterplot, the positive slope for coffee (1.10) ignores the correlation of coffee and stress.</p>
 <p>This is an example of <em>confounding</em> in regression when an important predictor is omitted. Stress is positively associated with both coffee consumption and heart damage. When stress is omitted, the coefficient for coffee is biased because it “picks up” the relation with the omitted variable.</p>
 <p>A solution to this problem is the <em>added-variable</em> plot (“AV plot”, also called <em>partial regression</em> plot, MostellerTukey-1977). This is a multivariate analog of a a simple marginal scatterplot, designed to visualize directly the partial relation between <span class="math inline">\(y\)</span> and the predictors <span class="math inline">\(x_1, x_2, \dots\)</span> in a multiple regression model.</p>
-<p>You can think of this as a magic window that hides the relations of all other variables with each of the <span class="math inline">\(y\)</span> and <span class="math inline">\(x_i\)</span> shown in a given added-variable plot. This gives an unobstructed view of the net relation between <span class="math inline">\(y\)</span> and <span class="math inline">\(x_i\)</span> with the effect of all other variables removed. In effect, it reduces the problem of viewing the complete model in <span class="math inline">\(p\)</span>-dimensional space to a sequence of <span class="math inline">\(p\)</span> 2D plots, each of which tells the story of one predictor, unentangled from the others. This is essentially the same idea as the partial variables plot (<a href="#sec-pvPlot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-pvPlot</span></a>) used to understand partial correlations.</p>
+<p>You can think of this as a magic window that hides the relations of all other variables with each of the <span class="math inline">\(y\)</span> and <span class="math inline">\(x_i\)</span> shown in a given added-variable plot. This gives an unobstructed view of the net relation between <span class="math inline">\(y\)</span> and <span class="math inline">\(x_i\)</span> with the effect of all other variables removed. In effect, it reduces the problem of viewing the complete model in <span class="math inline">\(p\)</span>-dimensional space to a sequence of <span class="math inline">\(p\)</span> 2D plots, each of which tells the story of one predictor, unentangled from the others. This is essentially the same idea as the partial variables plot (<a href="03-multivariate_plots.html#sec-pvPlot" class="quarto-xref"><span>Section 3.8.3</span></a>) used to understand partial correlations.</p>
 <p>The construction of an AV plot is conceptually very simple. For variable <span class="math inline">\(x_i\)</span>, imagine that we fit two supplementary regressions:</p>
 <ul>
 <li><p>Regress <span class="math inline">\(\mathbf{y}\)</span> on <span class="math inline">\(\mathbf{X_{(-i)}}\)</span>, the model matrix of all of the regressors except <span class="math inline">\(x_i\)</span>. By definition, the residuals from this regression, <span class="math inline">\(\mathbf{y}^\star \equiv \mathbf{y} \,\vert\, \text{others} = \mathbf{y} - \widehat{\mathbf{y}} \,\vert\, \mathbf{X_{(-i)}}\)</span>, <!-- $r_y(x_i) = y - \widehat{y}_{\mathbf{X_{(-i)}}} \equiv y \,\vert\, \text{others}$, --> are the part of <span class="math inline">\(\mathbf{y}\)</span> that cannot be explained by all the other regression terms. These residuals are necessarily uncorrelated with the other predictors.</p></li>
 <li><p>Regress <span class="math inline">\(x_i\)</span> on the other predictors, <span class="math inline">\(\mathbf{X_{(-i)}}\)</span> and again obtain the residuals. These residuals, <span class="math inline">\(\mathbf{x}_i^\star \equiv \mathbf{x}_i \,\vert\, \text{others} = \mathbf{x}_i - \widehat{\mathbf{x}}_i \,\vert\, \mathbf{X_{(-i)}}\)</span> give the part of <span class="math inline">\(x_i\)</span> that cannot be explained by the others, and so are uncorrelated with them.</p></li>
 </ul>
-<p>The AV plot is then just a simple scatterplot of these residuals, <span class="math inline">\(\mathbf{y}^\star\)</span> on the vertical axis, and <span class="math inline">\(\mathbf{x}^\star\)</span> on the horizontal. In practice, it is unnecessary to run the auxilliary regressions this way <span class="citation" data-cites="VellemanWelsh:81">(<a href="#ref-VellemanWelsh:81" role="doc-biblioref">Velleman &amp; Welsh, 1981</a>)</span>. Both can be calculated using <code><a href="https://rdrr.io/r/stats/lsfit.html">stats::lsfit()</a></code> roughly as follows:</p>
+<p>The AV plot is then just a simple scatterplot of these residuals, <span class="math inline">\(\mathbf{y}^\star\)</span> on the vertical axis, and <span class="math inline">\(\mathbf{x}^\star\)</span> on the horizontal. In practice, it is unnecessary to run the auxilliary regressions this way <span class="citation" data-cites="VellemanWelsh:81">(<a href="95-references.html#ref-VellemanWelsh:81" role="doc-biblioref">Velleman &amp; Welsh, 1981</a>)</span>. Both can be calculated using <code><a href="https://rdrr.io/r/stats/lsfit.html">stats::lsfit()</a></code> roughly as follows:</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb28" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">AVcalc</span> <span class="op">&lt;-</span> <span class="kw">function</span><span class="op">(</span><span class="va">model</span>, <span class="va">variable</span><span class="op">)</span></span>
 <span><span class="va">X</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/model.matrix.html">model.matrix</a></span><span class="op">(</span><span class="va">model</span><span class="op">)</span></span>
@@ -1318,7 +1318,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <span><span class="kw"><a href="https://rdrr.io/r/base/function.html">return</a></span><span class="op">(</span><span class="va">resids</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>Note that <code>y</code> here contains both the current predictor, <span class="math inline">\(\mathbf{x}_i\)</span> and the response <span class="math inline">\(\mathbf{y}\)</span>, so the residuals <code>resids</code> have two columns, one for <span class="math inline">\(x_i \,\vert\, \text{others}\)</span> and one for <span class="math inline">\(y \,\vert\, \text{others}\)</span>.</p>
-<p>Added-variable plots are produced using <code><a href="https://rdrr.io/pkg/car/man/avPlots.html">car::avPlot()</a></code> for one predictor or <code><a href="https://rdrr.io/pkg/car/man/avPlots.html">avPlots()</a></code> for any number of model terms. The <code>id</code> argument controls which points are identified in the plots; <code>n=2</code> labels the two points that are furthest from the mean on the horizontal axis <em>and</em> the two with the largest absolute residuals. For instance, in <a href="#fig-coffee-avPlots" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-avPlots</span></a>, observations 5 and 13 are flagged because their conditional <span class="math inline">\(\mathbf{x}_i^\star\)</span> values are extreme; observation 17 has a large absolute residual, <span class="math inline">\(\mathbf{y}^\star = \text{Heart} \,\vert\, \text{others}\)</span>.</p>
+<p>Added-variable plots are produced using <code><a href="https://rdrr.io/pkg/car/man/avPlots.html">car::avPlot()</a></code> for one predictor or <code><a href="https://rdrr.io/pkg/car/man/avPlots.html">avPlots()</a></code> for any number of model terms. The <code>id</code> argument controls which points are identified in the plots; <code>n=2</code> labels the two points that are furthest from the mean on the horizontal axis <em>and</em> the two with the largest absolute residuals. For instance, in <a href="#fig-coffee-avPlots" class="quarto-xref">Figure&nbsp;<span>6.11</span></a>, observations 5 and 13 are flagged because their conditional <span class="math inline">\(\mathbf{x}_i^\star\)</span> values are extreme; observation 17 has a large absolute residual, <span class="math inline">\(\mathbf{y}^\star = \text{Heart} \,\vert\, \text{others}\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb29" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/avPlots.html">avPlots</a></span><span class="op">(</span><span class="va">fit.both</span>,</span>
 <span>  ellipse <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span>levels <span class="op">=</span> <span class="fl">0.68</span>, fill<span class="op">=</span><span class="cn">TRUE</span>, fill.alpha <span class="op">=</span> <span class="fl">0.1</span><span class="op">)</span>,</span>
@@ -1344,12 +1344,12 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <ul>
 <li><p>The slope of the simple regression in the AV plot for variable <span class="math inline">\(x_i\)</span> is identical to the slope <span class="math inline">\(b_i\)</span> for that variable in the full multiple regression model.</p></li>
 <li><p>The residuals in this plot are the same as the residuals using all predictors. This means you can see the degree of fit for observations directly in relation to the various predictors, which is not the case for marginal scatterplots.</p></li>
-<li><p>Consequentially, the standard deviation of the (vertical) residuals in the AV plot is the same as <span class="math inline">\(s = \sqrt(MSE)\)</span> in the full model and the standard error of a coefficient is <span class="math inline">\(\text{SE}(b_i) = s / \sqrt{\Sigma (\mathbf{x}_i^\star)^2}\)</span>. This is shown by the size of the shadow of the data ellipses on the vertical axis in <a href="#fig-coffee-avPlots" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-avPlots</span></a>.</p></li>
+<li><p>Consequentially, the standard deviation of the (vertical) residuals in the AV plot is the same as <span class="math inline">\(s = \sqrt(MSE)\)</span> in the full model and the standard error of a coefficient is <span class="math inline">\(\text{SE}(b_i) = s / \sqrt{\Sigma (\mathbf{x}_i^\star)^2}\)</span>. This is shown by the size of the shadow of the data ellipses on the vertical axis in <a href="#fig-coffee-avPlots" class="quarto-xref">Figure&nbsp;<span>6.11</span></a>.</p></li>
 <li><p>The horizontal positions, <span class="math inline">\(\mathbf{x}_i^\star\)</span>, of points adjust for all other predictors, and so we can see points at the extreme left and right as unusual in relation to the others. If these points are also badly fitted (large residuals), we can see their influence on the fitted relation in the full model. AV plots thus provide visual displays of (partial) <em>leverage</em> and <em>influence</em> on each of the regression coefficients.</p></li>
 <li><p>The correlation of <span class="math inline">\(\mathbf{x}_i^\star\)</span> and <span class="math inline">\(\mathbf{y}^\star\)</span> shown by the shape of the data ellipses is the <em>partial correlation</em> between <span class="math inline">\(\mathbf{x}_i\)</span> and <span class="math inline">\(\mathbf{y}_i\)</span> with other predictors partialled out.</p></li>
 </ul></section><section id="marginal---conditional-plots" class="level3" data-number="6.4.2"><h3 data-number="6.4.2" class="anchored" data-anchor-id="marginal---conditional-plots">
 <span class="header-section-number">6.4.2</span> Marginal - conditional plots</h3>
-<p>The relation of the <em>conditional</em> data ellipses in AV plots to those in <em>marginal</em> plots of the same variables provides further insight into what it means to “control for” other variables. <a href="#fig-coffee-mcplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-mcplot</span></a> shows the same added-variable plots for Heart disease on Stress and Coffee as in <a href="#fig-coffee-avPlots" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-avPlots</span></a> (with a zoomed-out scaling), but here we also overlay the marginal data ellipses for <span class="math inline">\((\mathbf{x}_i, \mathbf{y})\)</span> (centered at the means), and marginal regression lines for Stress and Coffee separately. Drawing arrows connecting the original data points to their positions in the AV plot shows what happens when we condition on or partial out the other variable.</p>
+<p>The relation of the <em>conditional</em> data ellipses in AV plots to those in <em>marginal</em> plots of the same variables provides further insight into what it means to “control for” other variables. <a href="#fig-coffee-mcplot" class="quarto-xref">Figure&nbsp;<span>6.12</span></a> shows the same added-variable plots for Heart disease on Stress and Coffee as in <a href="#fig-coffee-avPlots" class="quarto-xref">Figure&nbsp;<span>6.11</span></a> (with a zoomed-out scaling), but here we also overlay the marginal data ellipses for <span class="math inline">\((\mathbf{x}_i, \mathbf{y})\)</span> (centered at the means), and marginal regression lines for Stress and Coffee separately. Drawing arrows connecting the original data points to their positions in the AV plot shows what happens when we condition on or partial out the other variable.</p>
 <p>These <em>marginal - conditional plots</em> are produced by <code><a href="https://rdrr.io/pkg/car/man/mcPlots.html">car::mcPlot()</a></code> (for one regressor) and <code><a href="https://rdrr.io/pkg/car/man/mcPlots.html">car::mcPlots()</a></code> (for several). The plots for the marginal and conditional relations can be compared separately using the same scales for both, or overlaid as shown here. The points labeled here are only those with large absolute residuals <span class="math inline">\(\mathbf{y}^\star\)</span> in the vertical direction.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb30" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/mcPlots.html">mcPlots</a></span><span class="op">(</span><span class="va">fit.both</span>, </span>
@@ -1370,15 +1370,15 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>The most obvious feature of <a href="#fig-coffee-mcplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-mcplot</span></a> is that Coffee has a negative slope in the conditional AV plot but a positive slope in the marginal plot. This is an example of Simpson’s paradox in a regression context: marginal and conditional relations can have opposite signs. </p>
-<p>Less obvious is the relation between the marginal and AVP ellipses. In 3D, the marginal data ellipse is the shadow of the ellipsoid for <span class="math inline">\((\mathbf{y}, \mathbf{x}_1, \mathbf{x}_2)\)</span> on one of the coordinate planes, while the AV plot is a slice through the ellipsoid where either <span class="math inline">\(\mathbf{x}_1\)</span> or <span class="math inline">\(\mathbf{x}_2\)</span> is held constant. Thus, the AVP ellipse must be contained in the marginal ellipse, as we can see in <a href="#fig-coffee-mcplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-mcplot</span></a>. If there are only two <span class="math inline">\(x\)</span>s, then the AVP ellipse must touch the marginal ellipse at two points.</p>
-<p>Finally, <a href="#fig-coffee-mcplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-mcplot</span></a> also shows how conditioning on other predictors works for individual observations, where each point of <span class="math inline">\((\mathbf{x}_i^\star, \mathbf{y}^\star)\)</span> is the image of <span class="math inline">\((\mathbf{x}_i, \mathbf{y})\)</span> along the path of the marginal regression. The variability in the response and in the focal predictor are both reduced, leaving only the uncontaminated relation of <span class="math inline">\(\mathbf{y}\)</span> with <span class="math inline">\(\mathbf{x}_i\)</span>.</p>
-<p>These plots are similar in spirit to the ARES plot (“Adding REgressors Smoothly”) proposed by <span class="citation" data-cites="CookWeisberg-1994">Cook &amp; Weisberg (<a href="#ref-CookWeisberg-1994" role="doc-biblioref">1994</a>)</span>, but their idea was an interactive animation, displaying a smooth transition between the fit of a marginal model and the fit of a larger model. They used linear interpolation, <span class="math display">\[
+<p>The most obvious feature of <a href="#fig-coffee-mcplot" class="quarto-xref">Figure&nbsp;<span>6.12</span></a> is that Coffee has a negative slope in the conditional AV plot but a positive slope in the marginal plot. This is an example of Simpson’s paradox in a regression context: marginal and conditional relations can have opposite signs. </p>
+<p>Less obvious is the relation between the marginal and AVP ellipses. In 3D, the marginal data ellipse is the shadow of the ellipsoid for <span class="math inline">\((\mathbf{y}, \mathbf{x}_1, \mathbf{x}_2)\)</span> on one of the coordinate planes, while the AV plot is a slice through the ellipsoid where either <span class="math inline">\(\mathbf{x}_1\)</span> or <span class="math inline">\(\mathbf{x}_2\)</span> is held constant. Thus, the AVP ellipse must be contained in the marginal ellipse, as we can see in <a href="#fig-coffee-mcplot" class="quarto-xref">Figure&nbsp;<span>6.12</span></a>. If there are only two <span class="math inline">\(x\)</span>s, then the AVP ellipse must touch the marginal ellipse at two points.</p>
+<p>Finally, <a href="#fig-coffee-mcplot" class="quarto-xref">Figure&nbsp;<span>6.12</span></a> also shows how conditioning on other predictors works for individual observations, where each point of <span class="math inline">\((\mathbf{x}_i^\star, \mathbf{y}^\star)\)</span> is the image of <span class="math inline">\((\mathbf{x}_i, \mathbf{y})\)</span> along the path of the marginal regression. The variability in the response and in the focal predictor are both reduced, leaving only the uncontaminated relation of <span class="math inline">\(\mathbf{y}\)</span> with <span class="math inline">\(\mathbf{x}_i\)</span>.</p>
+<p>These plots are similar in spirit to the ARES plot (“Adding REgressors Smoothly”) proposed by <span class="citation" data-cites="CookWeisberg-1994">Cook &amp; Weisberg (<a href="95-references.html#ref-CookWeisberg-1994" role="doc-biblioref">1994</a>)</span>, but their idea was an interactive animation, displaying a smooth transition between the fit of a marginal model and the fit of a larger model. They used linear interpolation, <span class="math display">\[
 (\mathbf{x}_i, \mathbf{y})_{\text{interp}} = (\mathbf{x}_i, \mathbf{y}) + \lambda [(\mathbf{x}_i^\star, \mathbf{y}^\star) - (\mathbf{x}_i, \mathbf{y})] \:\: ,
 \]</span> controlled by a slider whose value, <span class="math inline">\(\lambda \in [0, 1]\)</span>, was the weight given to the smaller marginal model. See <a href="https://www.datavis.ca/gallery/animation/duncanAV/">this animation</a> for an example using the Duncan data.</p>
 </section><section id="prestige-data" class="level3" data-number="6.4.3"><h3 data-number="6.4.3" class="anchored" data-anchor-id="prestige-data">
 <span class="header-section-number">6.4.3</span> Prestige data</h3>
-<p>For a substantive example, let’s return to the model for income, education and women in the <code>Prestige</code> data. The plot in <a href="#fig-prestige-avplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-avplot</span></a> shows the strong positive relations of income and education to prestige in the full model, and the negligible relation of percent women. But, in the plot for income, two occupations (physicians and general managers) with high income strongly pull the regression line down from what can be seen in the orientation of the conditional data ellipse.</p>
+<p>For a substantive example, let’s return to the model for income, education and women in the <code>Prestige</code> data. The plot in <a href="#fig-prestige-avplot" class="quarto-xref">Figure&nbsp;<span>6.13</span></a> shows the strong positive relations of income and education to prestige in the full model, and the negligible relation of percent women. But, in the plot for income, two occupations (physicians and general managers) with high income strongly pull the regression line down from what can be seen in the orientation of the conditional data ellipse.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb31" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">prestige.mod1</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">prestige</span> <span class="op">~</span> <span class="va">education</span> <span class="op">+</span> <span class="va">income</span> <span class="op">+</span> <span class="va">women</span>,</span>
 <span>           data<span class="op">=</span><span class="va">Prestige</span><span class="op">)</span></span>
@@ -1400,7 +1400,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>The influential points for physicians and general managers could just be unusual, or suggest that the relation of income to prestige is nonlinear. A rough test of this is to fit a smoothed curve through the points in the AV plot as shown in <a href="#fig-prestige-av-income" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-av-income</span></a>.</p>
+<p>The influential points for physicians and general managers could just be unusual, or suggest that the relation of income to prestige is nonlinear. A rough test of this is to fit a smoothed curve through the points in the AV plot as shown in <a href="#fig-prestige-av-income" class="quarto-xref">Figure&nbsp;<span>6.14</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb32" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">4</span>, <span class="fl">4</span>, <span class="fl">1</span>, <span class="fl">0</span><span class="op">)</span> <span class="op">+</span> <span class="fl">0.5</span><span class="op">)</span></span>
 <span><span class="va">res</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/pkg/car/man/avPlots.html">avPlot</a></span><span class="op">(</span><span class="va">prestige.mod1</span>, <span class="st">"income"</span>,</span>
@@ -1420,12 +1420,12 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>However, this use of AV plots to diagnose nonlinearity or suggest transformations can be misleading <span class="citation" data-cites="Cook-1996">(<a href="#ref-Cook-1996" role="doc-biblioref">Cook, 1996</a>)</span>. Curvature in these plots is an indication of <em>some</em> model deficiency, but unless the predictors are uncorrelated, they cannot determine the form of a possible transformation of the predictors.</p>
+<p>However, this use of AV plots to diagnose nonlinearity or suggest transformations can be misleading <span class="citation" data-cites="Cook-1996">(<a href="95-references.html#ref-Cook-1996" role="doc-biblioref">Cook, 1996</a>)</span>. Curvature in these plots is an indication of <em>some</em> model deficiency, but unless the predictors are uncorrelated, they cannot determine the form of a possible transformation of the predictors.</p>
 </section><section id="component-residual-plots" class="level3" data-number="6.4.4"><h3 data-number="6.4.4" class="anchored" data-anchor-id="component-residual-plots">
 <span class="header-section-number">6.4.4</span> Component + Residual plots</h3>
-<p>A plot more suited to detecting the need to transform a predictor <span class="math inline">\(\mathbf{x}_i\)</span> to a form <span class="math inline">\(f(\mathbf{x}_i)\)</span> to make it’s relationship with the response <span class="math inline">\(\mathbf{y}\)</span> more nearly linear is the <em>component + residual plot</em> (“C+R plot”, also called <em>partial residual plot</em>, <span class="citation" data-cites="LarsenMcCleary:72">Larsen &amp; McCleary (<a href="#ref-LarsenMcCleary:72" role="doc-biblioref">1972</a>)</span>; <span class="citation" data-cites="Cook:93">Cook (<a href="#ref-Cook:93" role="doc-biblioref">1993</a>)</span>). This plot displays the partial residual <span class="math inline">\(\mathbf{e} + \hat{b}_i \mathbf{x}_i\)</span> on the vertical axis against <span class="math inline">\(\mathbf{x}_i\)</span> on the horizontal, where <span class="math inline">\(\mathbf{e}\)</span> are the residuals from the full model. A smoothed curve through the points will often suggest the form of the transformation <span class="math inline">\(f()\)</span>. The fact that the horizontal axis is <span class="math inline">\(\mathbf{x}_i\)</span> itself rather than <span class="math inline">\(\mathbf{x}^\star_i\)</span> makes it easier to see the functional form.</p>
+<p>A plot more suited to detecting the need to transform a predictor <span class="math inline">\(\mathbf{x}_i\)</span> to a form <span class="math inline">\(f(\mathbf{x}_i)\)</span> to make it’s relationship with the response <span class="math inline">\(\mathbf{y}\)</span> more nearly linear is the <em>component + residual plot</em> (“C+R plot”, also called <em>partial residual plot</em>, <span class="citation" data-cites="LarsenMcCleary:72">Larsen &amp; McCleary (<a href="95-references.html#ref-LarsenMcCleary:72" role="doc-biblioref">1972</a>)</span>; <span class="citation" data-cites="Cook:93">Cook (<a href="95-references.html#ref-Cook:93" role="doc-biblioref">1993</a>)</span>). This plot displays the partial residual <span class="math inline">\(\mathbf{e} + \hat{b}_i \mathbf{x}_i\)</span> on the vertical axis against <span class="math inline">\(\mathbf{x}_i\)</span> on the horizontal, where <span class="math inline">\(\mathbf{e}\)</span> are the residuals from the full model. A smoothed curve through the points will often suggest the form of the transformation <span class="math inline">\(f()\)</span>. The fact that the horizontal axis is <span class="math inline">\(\mathbf{x}_i\)</span> itself rather than <span class="math inline">\(\mathbf{x}^\star_i\)</span> makes it easier to see the functional form.</p>
 <p>The C+R plot has the same desirable properties as the AV plot: The slope <span class="math inline">\(\hat{b}_i\)</span> and residuals <span class="math inline">\(\mathbf{e}\)</span> in this plot are the same as those in the full model.</p>
-<p>C+R plots are produced by <code><a href="https://rdrr.io/pkg/car/man/crPlots.html">car::crPlots()</a></code> and <code><a href="https://rdrr.io/pkg/car/man/crPlots.html">car::crPlot()</a></code>. <a href="#fig-prestige-crplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-crplot</span></a> shows this just for income in the model <code>prestige.mod1</code>. (These plots for education and women show no strong evidence of curvilinearity.) The dashed <span style="color: blue;">blue</span> line is the linear <em>partial fit</em>, <span class="math inline">\(\hat{b}_i \mathbf{x}_i\)</span>, whose slope <span class="math inline">\(\hat{b}_2 = 0.0013\)</span> is the same as that for income in <code>prestige.mod1</code>. The solid <span style="color: red;">red</span> curve is the loess smooth through the points. The same points are identified as noteworthy as in AV plot in <a href="#fig-prestige-av-income" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-av-income</span></a>.</p>
+<p>C+R plots are produced by <code><a href="https://rdrr.io/pkg/car/man/crPlots.html">car::crPlots()</a></code> and <code><a href="https://rdrr.io/pkg/car/man/crPlots.html">car::crPlot()</a></code>. <a href="#fig-prestige-crplot" class="quarto-xref">Figure&nbsp;<span>6.15</span></a> shows this just for income in the model <code>prestige.mod1</code>. (These plots for education and women show no strong evidence of curvilinearity.) The dashed <span style="color: blue;">blue</span> line is the linear <em>partial fit</em>, <span class="math inline">\(\hat{b}_i \mathbf{x}_i\)</span>, whose slope <span class="math inline">\(\hat{b}_2 = 0.0013\)</span> is the same as that for income in <code>prestige.mod1</code>. The solid <span style="color: red;">red</span> curve is the loess smooth through the points. The same points are identified as noteworthy as in AV plot in <a href="#fig-prestige-av-income" class="quarto-xref">Figure&nbsp;<span>6.14</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb33" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/crPlots.html">crPlot</a></span><span class="op">(</span><span class="va">prestige.mod1</span>, <span class="st">"income"</span>,</span>
 <span>       smooth <span class="op">=</span> <span class="cn">TRUE</span>,</span>
@@ -1445,8 +1445,8 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>The partial relation between prestige and income is clearly curved, so it would be appropriate to transform income or to include a polynomial (quadratic) term and refit the model. As suggested earlier (<a href="#sec-prestige" class="quarto-xref"><span class="quarto-unresolved-ref">sec-prestige</span></a>) it makes sense statistically and substantively to model the effect of income on a log scale, so then the slope for <code>log(income)</code> would measure the increment in prestige for a constant <em>percentage increase</em> in income.</p>
-<p>The effect of percent women on prestige seen in <a href="#fig-prestige-avplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-avplot</span></a> appears very small and essentially linear. However, if we wished to examine this more closely, we could use the C+R plot in <a href="#fig-prestige-crplot-women" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-crplot-women</span></a>.</p>
+<p>The partial relation between prestige and income is clearly curved, so it would be appropriate to transform income or to include a polynomial (quadratic) term and refit the model. As suggested earlier (<a href="03-multivariate_plots.html#sec-prestige" class="quarto-xref"><span>Section 3.2.3</span></a>) it makes sense statistically and substantively to model the effect of income on a log scale, so then the slope for <code>log(income)</code> would measure the increment in prestige for a constant <em>percentage increase</em> in income.</p>
+<p>The effect of percent women on prestige seen in <a href="#fig-prestige-avplot" class="quarto-xref">Figure&nbsp;<span>6.13</span></a> appears very small and essentially linear. However, if we wished to examine this more closely, we could use the C+R plot in <a href="#fig-prestige-crplot-women" class="quarto-xref">Figure&nbsp;<span>6.16</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb34" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/crPlots.html">crPlot</a></span><span class="op">(</span><span class="va">prestige.mod1</span>, <span class="st">"women"</span>,</span>
 <span>       pch <span class="op">=</span> <span class="fl">19</span>,</span>
@@ -1481,7 +1481,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <p>This model ignores the <code>type</code> of occupation (“bc”, “wc”, “prof”) as well as any possible interactions of type with other predictors. We examine this next, using effect displays.</p>
 </section></section><section id="effect-displays" class="level2" data-number="6.5"><h2 data-number="6.5" class="anchored" data-anchor-id="effect-displays">
 <span class="header-section-number">6.5</span> Effect displays</h2>
-<p>For two predictors it is possible, even if awkward, to display the fitted response surface in a 3D plot or faceted 2D views in what I call a <em>full model plot</em>. For more than two predictors such displays become cumbersome if not impractical, particularly when there are interactions in the model, when some effects are curvilinear, or when the main substantive interest is focused understanding on one or more main effects or interaction terms in the presence of others. The method of <em>effect displays</em>, largely introduced by John Fox <span class="citation" data-cites="Fox:87 Fox:03:effects FoxWeisberg2018">(<a href="#ref-Fox:87" role="doc-biblioref">Fox, 1987</a>, <a href="#ref-Fox:03:effects" role="doc-biblioref">2003</a>; <a href="#ref-FoxWeisberg2018" role="doc-biblioref">Fox &amp; Weisberg, 2018b</a>)</span> is a generally useful solution to this problem. <a href="#fn2" class="footnote-ref" id="fnref2" role="doc-noteref"><sup>2</sup></a> These plots are nearly always easier to understand than tables of coefficients.</p>
+<p>For two predictors it is possible, even if awkward, to display the fitted response surface in a 3D plot or faceted 2D views in what I call a <em>full model plot</em>. For more than two predictors such displays become cumbersome if not impractical, particularly when there are interactions in the model, when some effects are curvilinear, or when the main substantive interest is focused understanding on one or more main effects or interaction terms in the presence of others. The method of <em>effect displays</em>, largely introduced by John Fox <span class="citation" data-cites="Fox:87 Fox:03:effects FoxWeisberg2018">(<a href="95-references.html#ref-Fox:87" role="doc-biblioref">Fox, 1987</a>, <a href="95-references.html#ref-Fox:03:effects" role="doc-biblioref">2003</a>; <a href="95-references.html#ref-FoxWeisberg2018" role="doc-biblioref">Fox &amp; Weisberg, 2018b</a>)</span> is a generally useful solution to this problem. <a href="#fn2" class="footnote-ref" id="fnref2" role="doc-noteref"><sup>2</sup></a> These plots are nearly always easier to understand than tables of coefficients.</p>
 <p>The idea of effect displays is quite simple, but very general and handles models of arbitrary complexity. Imagine that in a model we have a particular subset of predictors (<em>focal predictors</em>) whose effects on the response variable we wish to visualize. The essence of an effect display is that we calculate the predicted values (and standard errors) of the response for the model term(s) involving the focal predictors (and all low-order relatives, e.g, main effects that are marginal to an interaction) as those predictors are allowed to vary over a grid covering their range.</p>
 <p>For a given plot, the other, non-focal variables are “controlled” by being fixed at typical values. For example, a quantitative predictor could be fixed at it’s mean, median or some representative value. A factor could be fixed at equal proportions of its levels or its proportions in the data. The result, when plotted, shows the predicted effects of the focal variables, either with multiple lines or in a faceted display, but with all the other variables controlled, adjusted for or averaged over. For interaction effects all low-order relatives are typically included in the fitted values for the term being graphed.</p>
 <p>In practical terms, a scoring matrix <span class="math inline">\(\mathbf{X}^\bullet\)</span> is defined by the focal variables varied over their ranges and the other variables held fixed. The fitted values for a model term are then calculated as <span class="math inline">\(\widehat{\mathbf{y}}^\bullet = \mathbf{X}^\bullet \; \widehat{\mathbf{b}}\)</span> using the equivalent of:</p>
@@ -1515,10 +1515,10 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <span><span class="co">#&gt; 4        14   6798    29 60.5 1.487</span></span>
 <span><span class="co">#&gt; 5        16   6798    29 68.9 2.188</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>As <span class="citation" data-cites="FoxWeisberg2018">Fox &amp; Weisberg (<a href="#ref-FoxWeisberg2018" role="doc-biblioref">2018b</a>)</span> note, effect displays can be combined with partial residuals to visualize <em>both</em> fit and potential lack of fit simultaneously, by plotting residuals from a model around 2D slices of the fitted response surface. This adds the benefits of C+R plots, in that we can see the impact of unmodeled curvilinearity and interactions in addition to those of predictor effect displays.</p>
-<p>There are several implementations of effect displays in R, whose details, terminology and ease of use vary. Among these, <!-- **ggeffects** [@R-ggeffects]  --> <span style="color: brown;"><strong>ggeffects</strong></span> <span class="citation" data-cites="R-ggeffects">(<a href="#ref-R-ggeffects" role="doc-biblioref">Lüdecke, 2024</a>)</span> calculates adjusted predicted values under several methods for conditioning. <!-- **marginaleffects** [@R-marginaleffects]  --> <span style="color: brown;"><strong>marginaleffects</strong></span> <span class="citation" data-cites="R-marginaleffects">(<a href="#ref-R-marginaleffects" role="doc-biblioref">Arel-Bundock, 2024a</a>)</span> is similar and also provides estimation of marginal slopes, contrasts, odds ratios, etc. Both have <code><a href="https://rdrr.io/r/graphics/plot.default.html">plot()</a></code> methods based on <code>ggplot2</code>. My favorite is the <!-- **effects** [@R-effects]  --> <span style="color: brown;"><strong>effects</strong></span> <span class="citation" data-cites="R-effects">(<a href="#ref-R-effects" role="doc-biblioref">Fox et al., 2022</a>)</span> package, which alone provides partial residuals, and is somewhat easier to use, though it uses <span style="color: brown;"><strong>lattice</strong></span> graphics. See the vignette <a href="https://cran.r-project.org/web/packages/effects/vignettes/predictor-effects-gallery.pdf">Predictor Effects Graphics Gallery</a> for details of the computations for effect displays.</p>
+<p>As <span class="citation" data-cites="FoxWeisberg2018">Fox &amp; Weisberg (<a href="95-references.html#ref-FoxWeisberg2018" role="doc-biblioref">2018b</a>)</span> note, effect displays can be combined with partial residuals to visualize <em>both</em> fit and potential lack of fit simultaneously, by plotting residuals from a model around 2D slices of the fitted response surface. This adds the benefits of C+R plots, in that we can see the impact of unmodeled curvilinearity and interactions in addition to those of predictor effect displays.</p>
+<p>There are several implementations of effect displays in R, whose details, terminology and ease of use vary. Among these, <!-- **ggeffects** [@R-ggeffects]  --> <span style="color: brown;"><strong>ggeffects</strong></span> <span class="citation" data-cites="R-ggeffects">(<a href="95-references.html#ref-R-ggeffects" role="doc-biblioref">Lüdecke, 2024</a>)</span> calculates adjusted predicted values under several methods for conditioning. <!-- **marginaleffects** [@R-marginaleffects]  --> <span style="color: brown;"><strong>marginaleffects</strong></span> <span class="citation" data-cites="R-marginaleffects">(<a href="95-references.html#ref-R-marginaleffects" role="doc-biblioref">Arel-Bundock, 2024a</a>)</span> is similar and also provides estimation of marginal slopes, contrasts, odds ratios, etc. Both have <code><a href="https://rdrr.io/r/graphics/plot.default.html">plot()</a></code> methods based on <code>ggplot2</code>. My favorite is the <!-- **effects** [@R-effects]  --> <span style="color: brown;"><strong>effects</strong></span> <span class="citation" data-cites="R-effects">(<a href="95-references.html#ref-R-effects" role="doc-biblioref">Fox et al., 2022</a>)</span> package, which alone provides partial residuals, and is somewhat easier to use, though it uses <span style="color: brown;"><strong>lattice</strong></span> graphics. See the vignette <a href="https://cran.r-project.org/web/packages/effects/vignettes/predictor-effects-gallery.pdf">Predictor Effects Graphics Gallery</a> for details of the computations for effect displays.</p>
 <p>The main functions for computing fitted effects are <code><a href="https://rdrr.io/pkg/effects/man/predictorEffects.html">predictorEffect()</a></code> (for one predictor) and <code><a href="https://rdrr.io/pkg/effects/man/predictorEffects.html">predictorEffects()</a></code> (for one or more). For a model <code>mod</code> with formula <code>y ~ x1 + x2 + x3 + x1:x2</code>, the call to <code>predictorEffects(mod, ~ x1)</code> recognizes that an interaction is present and calculates the fitted values for combinations of <code>x1</code> and <code>x2</code>, holding <code>x3</code> fixed at its average value. This returns an object of class <code>"eff"</code> which can be graphed using the <code>plot.eff()</code> method.</p>
-<p>The effect displays for several predictors can be plotted together, as with <code>avplots()</code> (<a href="#fig-prestige-avplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-avplot</span></a>) by including them in the plot formula, e.g., <code>predictorEffects(mod, ~ x1 + x3)</code>. Another function, <code><a href="https://rdrr.io/pkg/effects/man/effect.html">allEffects()</a></code> calculates the effects for each high-order term in the model, so <code>allEffects(mod) |&gt; plot()</code> is handy for getting a visual overview of a fitted model.</p>
+<p>The effect displays for several predictors can be plotted together, as with <code>avplots()</code> (<a href="#fig-prestige-avplot" class="quarto-xref">Figure&nbsp;<span>6.13</span></a>) by including them in the plot formula, e.g., <code>predictorEffects(mod, ~ x1 + x3)</code>. Another function, <code><a href="https://rdrr.io/pkg/effects/man/effect.html">allEffects()</a></code> calculates the effects for each high-order term in the model, so <code>allEffects(mod) |&gt; plot()</code> is handy for getting a visual overview of a fitted model.</p>
 <section id="prestige-data-1" class="level3" data-number="6.5.1"><h3 data-number="6.5.1" class="anchored" data-anchor-id="prestige-data-1">
 <span class="header-section-number">6.5.1</span> Prestige data</h3>
 <p>To illustrate effect plots, I consider a more complex model, allowing a quadratic effect of women, representing income on a <span class="math inline">\(\log_{10}\)</span> scale, and allowing this to interact with type of occupation. <code><a href="https://rdrr.io/pkg/car/man/Anova.html">Anova()</a></code> provides the Type II tests of each of the model terms.</p>
@@ -1569,7 +1569,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <span><span class="co">#&gt; ---</span></span>
 <span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>It is easiest to produce effect displays for all terms in the model using <code><a href="https://rdrr.io/pkg/effects/man/effect.html">allEffects()</a></code>, accepting all defaults. This gives (<a href="#fig-prestige-allEffects" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-allEffects</span></a>) effect plots for the main effects of education and income and the interaction of income with type, with the non-focal variables held fixed. Each plot shows the fitted regression relation and a default 95% pointwise confidence band using the standard errors. Rug plots at the bottom show the locations of observations for the horizontal focal variable, which is useful when the observations are not otherwise plotted.</p>
+<p>It is easiest to produce effect displays for all terms in the model using <code><a href="https://rdrr.io/pkg/effects/man/effect.html">allEffects()</a></code>, accepting all defaults. This gives (<a href="#fig-prestige-allEffects" class="quarto-xref">Figure&nbsp;<span>6.17</span></a>) effect plots for the main effects of education and income and the interaction of income with type, with the non-focal variables held fixed. Each plot shows the fitted regression relation and a default 95% pointwise confidence band using the standard errors. Rug plots at the bottom show the locations of observations for the horizontal focal variable, which is useful when the observations are not otherwise plotted.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb41" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/effects/man/effect.html">allEffects</a></span><span class="op">(</span><span class="va">prestige.mod3</span><span class="op">)</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu"><a href="https://rdrr.io/r/graphics/plot.default.html">plot</a></span><span class="op">(</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1585,7 +1585,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 <p>The effect for women, holding education, income and type constant looks to be quite strong and curved upwards. But note that these plots use different vertical scales for prestige in each plot and the range in the plot for women is much smaller than in the others. The interaction is graphed showing separate curves for the three levels of type.</p>
-<p>For a more detailed look, it is useful to make separate plots for the predictors in the model, which allows customizing options for calculation and display. Partial residuals for the observations are computed by using <code>residuals = TRUE</code> in the call to <code><a href="https://rdrr.io/pkg/effects/man/predictorEffects.html">predictorEffects()</a></code>. The slope of the fitted line (in <span style="color: blue;">blue</span>) is exactly coefficient for education in the full model. As with C+R plots, a smooth loess curve (in <span style="color: red;">red</span>) gives a visual assessment of linearity for a given predictor. A wide variety of graphing options are available in the call to <code><a href="https://rdrr.io/r/graphics/plot.default.html">plot()</a></code>. <a href="#fig-prestige-effplot-educ" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-effplot-educ</span></a> shows the effect display for education with partial residuals and point identification of those points with the largest Mahalanobis distances from the centroid.</p>
+<p>For a more detailed look, it is useful to make separate plots for the predictors in the model, which allows customizing options for calculation and display. Partial residuals for the observations are computed by using <code>residuals = TRUE</code> in the call to <code><a href="https://rdrr.io/pkg/effects/man/predictorEffects.html">predictorEffects()</a></code>. The slope of the fitted line (in <span style="color: blue;">blue</span>) is exactly coefficient for education in the full model. As with C+R plots, a smooth loess curve (in <span style="color: red;">red</span>) gives a visual assessment of linearity for a given predictor. A wide variety of graphing options are available in the call to <code><a href="https://rdrr.io/r/graphics/plot.default.html">plot()</a></code>. <a href="#fig-prestige-effplot-educ" class="quarto-xref">Figure&nbsp;<span>6.18</span></a> shows the effect display for education with partial residuals and point identification of those points with the largest Mahalanobis distances from the centroid.</p>
 <!-- #| fig-cap: Predictor effect plot for education displaying partial residuals. The blue line shows the slice of the fitted regression surface where other variables are held fixed. The red curve shows a loess smooth of the partial residuals. -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb42" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu">lattice</span><span class="fu">::</span><span class="fu"><a href="https://rdrr.io/pkg/lattice/man/trellis.par.get.html">trellis.par.set</a></span><span class="op">(</span>par.xlab.text<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span>cex<span class="op">=</span><span class="fl">1.5</span><span class="op">)</span>,</span>
@@ -1606,7 +1606,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>The effect plot for women in this model is shown in <a href="#fig-prestige-effplot-women" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-effplot-women</span></a>. This uses the same vertical scale as in <a href="#fig-prestige-effplot-educ" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-effplot-educ</span></a>, showing a more modest effect of percent women.</p>
+<p>The effect plot for women in this model is shown in <a href="#fig-prestige-effplot-women" class="quarto-xref">Figure&nbsp;<span>6.19</span></a>. This uses the same vertical scale as in <a href="#fig-prestige-effplot-educ" class="quarto-xref">Figure&nbsp;<span>6.18</span></a>, showing a more modest effect of percent women.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb43" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/effects/man/predictorEffects.html">predictorEffects</a></span><span class="op">(</span><span class="va">prestige.mod3</span>, <span class="op">~</span><span class="va">women</span>,</span>
 <span>                 residuals <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -1655,7 +1655,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 
 
 -->
-<p>Because of the interaction with <code>type</code>, the fitted effects for <code>income</code> are calculated for the three types of occupation. It is easiest to compare these in the a single plot (using <code>multiline = TRUE</code>), rather than in separate panels as in <a href="#fig-prestige-allEffects" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-allEffects</span></a>. Income is represented as <code>log10(income)</code> in the model <code>prestige.mod3</code>, and it is also easier to understand the interaction by plotting income on a log scale, using the <code>axes</code> argument to specify a transformation of the <span class="math inline">\(x\)</span> axis. I use 68% confidence bands here to make the differences among type more apparent.</p>
+<p>Because of the interaction with <code>type</code>, the fitted effects for <code>income</code> are calculated for the three types of occupation. It is easiest to compare these in the a single plot (using <code>multiline = TRUE</code>), rather than in separate panels as in <a href="#fig-prestige-allEffects" class="quarto-xref">Figure&nbsp;<span>6.17</span></a>. Income is represented as <code>log10(income)</code> in the model <code>prestige.mod3</code>, and it is also easier to understand the interaction by plotting income on a log scale, using the <code>axes</code> argument to specify a transformation of the <span class="math inline">\(x\)</span> axis. I use 68% confidence bands here to make the differences among type more apparent.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb44" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/effects/man/predictorEffects.html">predictorEffects</a></span><span class="op">(</span><span class="va">prestige.mod3</span>, <span class="op">~</span> <span class="va">income</span>,</span>
 <span>                 confidence.level <span class="op">=</span> <span class="fl">0.68</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -1675,17 +1675,17 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p><a href="#fig-prestige-effplot-inc-log" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-prestige-effplot-inc-log</span></a> provides a clear interpretation of the interaction, represented by the coefficients shown above for <code>log10(income):typewc</code> and <code>log10(income):typeprof</code> in the model. Averaging over three occupation types, prestige increases linearly with log income with a coefficient of 40.33. This means that increasing income by 10% (say) gives an increase of <span class="math inline">\(40.33 / 10 = 4.033\)</span> in prestige. The slope for professional workers is less steep: the coefficient for <code>log10(income):typeprof</code> is -17.725. For these workers compared with blue collar jobs, prestige increases 1.77 less with a 10% increase in income. The difference in slopes for blue collar and white collar jobs is negligible.</p>
+<p><a href="#fig-prestige-effplot-inc-log" class="quarto-xref">Figure&nbsp;<span>6.20</span></a> provides a clear interpretation of the interaction, represented by the coefficients shown above for <code>log10(income):typewc</code> and <code>log10(income):typeprof</code> in the model. Averaging over three occupation types, prestige increases linearly with log income with a coefficient of 40.33. This means that increasing income by 10% (say) gives an increase of <span class="math inline">\(40.33 / 10 = 4.033\)</span> in prestige. The slope for professional workers is less steep: the coefficient for <code>log10(income):typeprof</code> is -17.725. For these workers compared with blue collar jobs, prestige increases 1.77 less with a 10% increase in income. The difference in slopes for blue collar and white collar jobs is negligible.</p>
 <!-- ## Outliers, leverage influence -->
 <!-- was: r child="child/06-leverage.qmd" -->
 </section></section><section id="sec-leverage" class="level2" data-number="6.6"><h2 data-number="6.6" class="anchored" data-anchor-id="sec-leverage">
 <span class="header-section-number">6.6</span> Outliers, leverage and influence</h2>
-<p>In small to moderate samples, “unusual” observations can have dramatic effects on a fitted regression model, as we saw in the analysis of Davis’s data on reported and measured weight (<a href="#sec-davis" class="quarto-xref"><span class="quarto-unresolved-ref">sec-davis</span></a>) where one erroneous observations hugely altered the fitted line. As well, it turns out that two observations in Duncan’s data are unusual enough that removing them alters his conclusion that income and education have nearly equal effects on occupational prestige.</p>
+<p>In small to moderate samples, “unusual” observations can have dramatic effects on a fitted regression model, as we saw in the analysis of Davis’s data on reported and measured weight (<a href="02-getting_started.html#sec-davis" class="quarto-xref"><span>Section 2.1.2</span></a>) where one erroneous observations hugely altered the fitted line. As well, it turns out that two observations in Duncan’s data are unusual enough that removing them alters his conclusion that income and education have nearly equal effects on occupational prestige.</p>
 <p>An observation can be unusual in three archetypal ways, with different consequences:</p>
 <ul>
-<li><p>Unusual in the response <span class="math inline">\(y\)</span>, but typical in the predictor(s), <span class="math inline">\(\mathbf{x}\)</span> — a badly fitted case with a large absolute residual, but with <span class="math inline">\(x\)</span> not far from the mean, as in <a href="#fig-ch02-davis-reg2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ch02-davis-reg2</span></a>. This case does not do much harm to the fitted model.</p></li>
+<li><p>Unusual in the response <span class="math inline">\(y\)</span>, but typical in the predictor(s), <span class="math inline">\(\mathbf{x}\)</span> — a badly fitted case with a large absolute residual, but with <span class="math inline">\(x\)</span> not far from the mean, as in <a href="02-getting_started.html#fig-ch02-davis-reg2" class="quarto-xref">Figure&nbsp;<span>2.4</span></a>. This case does not do much harm to the fitted model.</p></li>
 <li><p>Unusual in the predictor(s) <span class="math inline">\(\mathbf{x}\)</span>, but typical in <span class="math inline">\(y\)</span> — an otherwise well-fitted point. This case also does litle harm, and in fact can be considered to improve precision, a “good leverage” point.</p></li>
-<li><p>Unusual in <strong>both</strong> <span class="math inline">\(\mathbf{x}\)</span> and <span class="math inline">\(y\)</span> — This is the case, a “bad leverage” point, revealed in the analysis of Davis’s data, <a href="#fig-ch02-davis-reg1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ch02-davis-reg1</span></a>, where the one erroneous point for women was highly influential, pulling the regression line towards it and affecting the estimated coefficient as well as all the fitted values. In addition, subsets of observations can be <em>jointly</em> influential, in that their effects combine, or can mask each other’s influence.</p></li>
+<li><p>Unusual in <strong>both</strong> <span class="math inline">\(\mathbf{x}\)</span> and <span class="math inline">\(y\)</span> — This is the case, a “bad leverage” point, revealed in the analysis of Davis’s data, <a href="02-getting_started.html#fig-ch02-davis-reg1" class="quarto-xref">Figure&nbsp;<span>2.3</span></a>, where the one erroneous point for women was highly influential, pulling the regression line towards it and affecting the estimated coefficient as well as all the fitted values. In addition, subsets of observations can be <em>jointly</em> influential, in that their effects combine, or can mask each other’s influence.</p></li>
 </ul>
 <p>Influential cases are the ones that matter most. As suggested above, to be influential an observation must be unusual in <strong>both</strong> <span class="math inline">\(\mathbf{x}\)</span> and <span class="math inline">\(y\)</span>, and affects the estimated coefficients, thereby also altering the predicted values for all observations. A heuristic formula capturing the relations among leverage, “outlyingness” on <span class="math inline">\(y\)</span> and influence is</p>
 <p><span class="math display">\[
@@ -1761,7 +1761,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 <p>The standard version of this graph shows only the fitted regression lines in each panel. As can be seen, the fitted line doesn’t change very much in panels (2) and (3); only the bad leverage point, “OL” in panel (4) is harmful. Adding data ellipses to each panel immediately makes it clear that there is another part to this story— the effect of the unusual point on <em>precision</em> (standard errors) of our estimates of the coefficients.</p>
 <p>Now, we see <em>directly</em> that there is a big difference in impact between the low-leverage outlier [panel (2)] and the high-leverage, small-residual case [panel (3)], even though their effect on coefficient estimates is negligible. In panel (2), the single outlier inflates the estimate of residual variance (the size of the vertical slice of the data ellipse at <span class="math inline">\(\bar{x}\)</span>), while in panel (3) this is decreased.</p>
-<p>To allow direct comparison and make the added value of the data ellipse more apparent, we overlay the data ellipses from <a href="#fig-levdemo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-levdemo</span></a> in a single graph, shown in <a href="#fig-levdemo2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-levdemo2</span></a>. Here, we can also see why the high-leverage point “L” (added in panel (c) of <a href="#fig-levdemo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-levdemo</span></a>) is called a “good leverage” point. By increasing the standard deviation of <span class="math inline">\(x\)</span>, it makes the data ellipse somewhat more elongated, giving increased precision of our estimates of <span class="math inline">\(\mathbf{\beta}\)</span>.</p>
+<p>To allow direct comparison and make the added value of the data ellipse more apparent, we overlay the data ellipses from <a href="#fig-levdemo" class="quarto-xref">Figure&nbsp;<span>6.21</span></a> in a single graph, shown in <a href="#fig-levdemo2" class="quarto-xref">Figure&nbsp;<span>6.22</span></a>. Here, we can also see why the high-leverage point “L” (added in panel (c) of <a href="#fig-levdemo" class="quarto-xref">Figure&nbsp;<span>6.21</span></a>) is called a “good leverage” point. By increasing the standard deviation of <span class="math inline">\(x\)</span>, it makes the data ellipse somewhat more elongated, giving increased precision of our estimates of <span class="math inline">\(\mathbf{\beta}\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb48" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">colors</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"black"</span>, <span class="st">"blue"</span>, <span class="st">"darkgreen"</span>, <span class="st">"red"</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/base/with.html">with</a></span><span class="op">(</span><span class="va">both</span>,</span>
@@ -1789,7 +1789,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <img src="figs/ch06/fig-levdemo2-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:80.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-levdemo2-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;6.22: Data ellipses in the Leverage-influence quartet. This graph overlays the data ellipses and additional points from the four panels of <a href="#fig-levdemo2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-levdemo2</span></a>. It can be seen that only the OL point affects the slope, while the O and L points affect precision of the estimates in opposite directions.
+Figure&nbsp;6.22: Data ellipses in the Leverage-influence quartet. This graph overlays the data ellipses and additional points from the four panels of <a href="#fig-levdemo2" class="quarto-xref">Figure&nbsp;<span>6.22</span></a>. It can be seen that only the OL point affects the slope, while the O and L points affect precision of the estimates in opposite directions.
 </figcaption></figure>
 </div>
 </div>
@@ -1811,13 +1811,13 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 \tag{6.1}\]</span></span></p>
 <p>and range from <span class="math inline">\(1/n\)</span> to 1, with an average value <span class="math inline">\(\bar{h} = 2/n\)</span>. Consequently, observations with <span class="math inline">\(h_i\)</span> greater than <span class="math inline">\(2 \bar{h}\)</span> or <span class="math inline">\(3 \bar{h}\)</span> are commonly considered to be of high leverage.</p>
 <p>With <span class="math inline">\(p \ge 2\)</span> predictors, an analogous relationship holds, but the correlations among the predictors must be taken into account. It is demonstrated below that <span class="math inline">\(h_i \propto D^2 (\mathbf{x} - \bar{\mathbf{x}})\)</span>, the Mahalanobis squared distance of <span class="math inline">\(\mathbf{x}\)</span> from the centroid <span class="math inline">\(\bar{\mathbf{x}}\)</span><a href="#fn3" class="footnote-ref" id="fnref3" role="doc-noteref"><sup>3</sup></a>.</p>
-<p>The generalized version of <a href="#eq-hat-univar" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-hat-univar</span></a> is</p>
+<p>The generalized version of <a href="#eq-hat-univar" class="quarto-xref">Equation&nbsp;<span>6.1</span></a> is</p>
 <p><span id="eq-hat-multivar"><span class="math display">\[
 h_i = \frac{1}{n} + \frac{1}{n-1} D^2 (\mathbf{x} - \bar{\mathbf{x}}) \; ,
 \tag{6.2}\]</span></span></p>
-<p>where <span class="math inline">\(D^2 (\mathbf{x} - \bar{\mathbf{x}}) = (\mathbf{x} - \bar{\mathbf{x}})^\mathsf{T} \mathbf{S}_X^{-1} (\mathbf{x} - \bar{\mathbf{x}})\)</span>. From <a href="#sec-data-ellipse" class="quarto-xref"><span class="quarto-unresolved-ref">sec-data-ellipse</span></a>, it follows that contours of constant leverage correspond to data ellipses or ellipsoids of the predictors in <span class="math inline">\(\mathbf{x}\)</span>, whose boundaries, assuming normality, correspond to quantiles of the <span class="math inline">\(\chi^2_p\)</span> distribution</p>
+<p>where <span class="math inline">\(D^2 (\mathbf{x} - \bar{\mathbf{x}}) = (\mathbf{x} - \bar{\mathbf{x}})^\mathsf{T} \mathbf{S}_X^{-1} (\mathbf{x} - \bar{\mathbf{x}})\)</span>. From <a href="03-multivariate_plots.html#sec-data-ellipse" class="quarto-xref"><span>Section 3.2</span></a>, it follows that contours of constant leverage correspond to data ellipses or ellipsoids of the predictors in <span class="math inline">\(\mathbf{x}\)</span>, whose boundaries, assuming normality, correspond to quantiles of the <span class="math inline">\(\chi^2_p\)</span> distribution</p>
 <p><strong>Example</strong>:</p>
-<p>To illustrate <a href="#eq-hat-multivar" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-hat-multivar</span></a>, I generate <span class="math inline">\(N = 100\)</span> points from a bivariate normal distribution with means <span class="math inline">\(\mu = (30, 30)\)</span>, variances = 10, and a correlation <span class="math inline">\(\rho = 0.7\)</span> and add two noteworthy points that show an apparently paradoxical result.</p>
+<p>To illustrate <a href="#eq-hat-multivar" class="quarto-xref">Equation&nbsp;<span>6.2</span></a>, I generate <span class="math inline">\(N = 100\)</span> points from a bivariate normal distribution with means <span class="math inline">\(\mu = (30, 30)\)</span>, variances = 10, and a correlation <span class="math inline">\(\rho = 0.7\)</span> and add two noteworthy points that show an apparently paradoxical result.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb49" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/base/Random.html">set.seed</a></span><span class="op">(</span><span class="fl">421</span><span class="op">)</span></span>
 <span><span class="va">N</span> <span class="op">&lt;-</span> <span class="fl">100</span></span>
@@ -1863,7 +1863,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>The fact that hatvalues are proportional to leverage can be seen by plotting one against the other. I highlight the two noteworthy points in their colors from <a href="#fig-hatvalues-demo1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-hatvalues-demo1</span></a> to illustrate how much greater leverage the <span style="color: red;">red</span> point has compared to the <span style="color: blue;">blue</span> point.</p>
+<p>The fact that hatvalues are proportional to leverage can be seen by plotting one against the other. I highlight the two noteworthy points in their colors from <a href="#fig-hatvalues-demo1" class="quarto-xref">Figure&nbsp;<span>6.23</span></a> to illustrate how much greater leverage the <span style="color: red;">red</span> point has compared to the <span style="color: blue;">blue</span> point.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb52" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/graphics/plot.default.html">plot</a></span><span class="op">(</span><span class="va">hat</span> <span class="op">~</span> <span class="va">Dsq</span>, data <span class="op">=</span> <span class="va">X</span>,</span>
 <span>     cex <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/rep.html">rep</a></span><span class="op">(</span><span class="fl">1</span>, <span class="va">N</span><span class="op">)</span>, <span class="fu"><a href="https://rdrr.io/r/base/rep.html">rep</a></span><span class="op">(</span><span class="fl">2</span>, <span class="fl">2</span><span class="op">)</span><span class="op">)</span>, </span>
@@ -1882,7 +1882,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>Look back at these two points in <a href="#fig-hatvalues-demo1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-hatvalues-demo1</span></a>. Can you guess how much further the <span style="color: red;">red</span> point is from the mean than the <span style="color: blue;">blue</span> point? You might be surprised that its’ <span class="math inline">\(D^2\)</span> and leverage are about five times as great!</p>
+<p>Look back at these two points in <a href="#fig-hatvalues-demo1" class="quarto-xref">Figure&nbsp;<span>6.23</span></a>. Can you guess how much further the <span style="color: red;">red</span> point is from the mean than the <span style="color: blue;">blue</span> point? You might be surprised that its’ <span class="math inline">\(D^2\)</span> and leverage are about five times as great!</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb53" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">X</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/slice.html">slice_tail</a></span><span class="op">(</span>n<span class="op">=</span><span class="fl">2</span><span class="op">)</span></span>
 <span><span class="co">#&gt;   x1 x2   Dsq   y    hat</span></span>
@@ -1891,14 +1891,14 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </section><section id="outliers-measuring-residuals" class="level4" data-number="6.6.1.2"><h4 data-number="6.6.1.2" class="anchored" data-anchor-id="outliers-measuring-residuals">
 <span class="header-section-number">6.6.1.2</span> Outliers: Measuring residuals</h4>
-<p>From the discussion in <a href="#sec-leverage" class="quarto-xref"><span class="quarto-unresolved-ref">sec-leverage</span></a>, outliers for the response <span class="math inline">\(y\)</span> are those observations for which the residual <span class="math inline">\(e_i = y_i - \hat{y}_i\)</span> are unusually large in magnitude. However, as demonstrated in <a href="#fig-levdemo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-levdemo</span></a>, a high-leverage point will pull the fitted line towards it, reducing its’ residual and thus making them look less unusual.</p>
-<p>The standard approach <span class="citation" data-cites="CookWeisberg:82 HoaglinWelsch1978">(<a href="#ref-CookWeisberg:82" role="doc-biblioref">Cook &amp; Weisberg, 1982</a>; <a href="#ref-HoaglinWelsch1978" role="doc-biblioref">Hoaglin &amp; Welsch, 1978</a>)</span> is to consider a <em>deleted residual</em> <span class="math inline">\(e_{(-i)}\)</span>, conceptually as that obtained by re-fitting the model with observation <span class="math inline">\(i\)</span> omitted and obtaining the fitted value <span class="math inline">\(\hat{y}_{(-i)}\)</span> from the remaining <span class="math inline">\(n-1\)</span> observations, <span class="math display">\[
+<p>From the discussion in <a href="#sec-leverage" class="quarto-xref"><span>Section 6.6</span></a>, outliers for the response <span class="math inline">\(y\)</span> are those observations for which the residual <span class="math inline">\(e_i = y_i - \hat{y}_i\)</span> are unusually large in magnitude. However, as demonstrated in <a href="#fig-levdemo" class="quarto-xref">Figure&nbsp;<span>6.21</span></a>, a high-leverage point will pull the fitted line towards it, reducing its’ residual and thus making them look less unusual.</p>
+<p>The standard approach <span class="citation" data-cites="CookWeisberg:82 HoaglinWelsch1978">(<a href="95-references.html#ref-CookWeisberg:82" role="doc-biblioref">Cook &amp; Weisberg, 1982</a>; <a href="95-references.html#ref-HoaglinWelsch1978" role="doc-biblioref">Hoaglin &amp; Welsch, 1978</a>)</span> is to consider a <em>deleted residual</em> <span class="math inline">\(e_{(-i)}\)</span>, conceptually as that obtained by re-fitting the model with observation <span class="math inline">\(i\)</span> omitted and obtaining the fitted value <span class="math inline">\(\hat{y}_{(-i)}\)</span> from the remaining <span class="math inline">\(n-1\)</span> observations, <span class="math display">\[
 e_{(-i)} = y_i - \hat{y}_{(-i)} \; .
 \]</span> The (externally) <em>studentized residual</em> is then obtained by dividing <span class="math inline">\(e_{(-i)}\)</span> by it’s estimated standard error, giving <span class="math display">\[
 e^\star_{(-i)} = \frac{e_{(-i)}}{\text{sd}(e_{(-i)})} = \frac{e_i}{\sqrt{\text{MSE}_{(-i)}\; (1 - h_i)}} \; .
 \]</span></p>
 <p>This is just the ordinary residual <span class="math inline">\(e_i\)</span> divided by a factor that increases with the residual variance but decreases with leverage. It can be shown that these studentized residuals follow a <span class="math inline">\(t\)</span> distribution with <span class="math inline">\(n - p -2\)</span> degrees of freedom, so a value <span class="math inline">\(|e^\star_{(-i)}| &gt; 2\)</span> can be considered large enough to pay attention to.<br>
-In practice for classical linear models, it is unnecessary to actually re-fit the model <span class="math inline">\(n\)</span> times. <span class="citation" data-cites="VellemanWelsh:81">Velleman &amp; Welsh (<a href="#ref-VellemanWelsh:81" role="doc-biblioref">1981</a>)</span> show that all these leave-one-out quantities can be calculated from the model fitted to the full data set and the hat (projection) matrix <span class="math inline">\(\mathbf{H} = (\mathbf{X}^\mathsf{T}\mathbf{X})^{-1} \mathbf{X}^\mathsf{T}\)</span> from which <span class="math inline">\(\widehat{\mathbf{b}} = \mathbf{H} \mathbf{y}\)</span>.</p>
+In practice for classical linear models, it is unnecessary to actually re-fit the model <span class="math inline">\(n\)</span> times. <span class="citation" data-cites="VellemanWelsh:81">Velleman &amp; Welsh (<a href="95-references.html#ref-VellemanWelsh:81" role="doc-biblioref">1981</a>)</span> show that all these leave-one-out quantities can be calculated from the model fitted to the full data set and the hat (projection) matrix <span class="math inline">\(\mathbf{H} = (\mathbf{X}^\mathsf{T}\mathbf{X})^{-1} \mathbf{X}^\mathsf{T}\)</span> from which <span class="math inline">\(\widehat{\mathbf{b}} = \mathbf{H} \mathbf{y}\)</span>.</p>
 </section><section id="measuring-influence" class="level4" data-number="6.6.1.3"><h4 data-number="6.6.1.3" class="anchored" data-anchor-id="measuring-influence">
 <span class="header-section-number">6.6.1.3</span> Measuring influence</h4>
 <p>As described at the start of this section, the actual influence of a given case depends multiplicatively on its’ leverage and residual. But how can we measure it?</p>
@@ -1908,8 +1908,8 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 \text{DFFITS}_i &amp; = &amp; \frac{\hat{y}_i - \hat{y}_{(-i)}}{\sqrt{\text{MSE}_{(-i)}\; h_i}} \\
    &amp; = &amp; e^\star_{(-i)} \times \sqrt{\frac{h_i}{1-h_i}} \;\; .
 \end{aligned}\]</span></p>
-<p>The first equation gives the signed difference in fitted values in units of the standard deviation of that difference weighted by leverage; the second version <span class="citation" data-cites="Belsley-etal:80">(<a href="#ref-Belsley-etal:80" role="doc-biblioref">Belsley et al., 1980</a>)</span> represents that as a product of residual and leverage. A rule of thumb is that an observation is deemed to be influential if <span class="math inline">\(| \text{DFFITS}_i | &gt; 2 \sqrt{(p+1) / n}\)</span>.</p>
-<p>Influence can also be assessed in terms of the change in the estimated coefficients <span class="math inline">\(\mathbf{b} = \widehat{\mathbf{\beta}}\)</span> versus their values <span class="math inline">\(\mathbf{b}_{(-i)}\)</span> when case <span class="math inline">\(i\)</span> is removed. Cook’s distance, <span class="math inline">\(D_i\)</span>, summarizes the size of the difference as a weighted sum of squares of the differences <span class="math inline">\(\mathbf{d} =\mathbf{b} - \mathbf{b}_{(-i)}\)</span> <span class="citation" data-cites="Cook:77">(<a href="#ref-Cook:77" role="doc-biblioref">Cook, 1977</a>)</span>.</p>
+<p>The first equation gives the signed difference in fitted values in units of the standard deviation of that difference weighted by leverage; the second version <span class="citation" data-cites="Belsley-etal:80">(<a href="95-references.html#ref-Belsley-etal:80" role="doc-biblioref">Belsley et al., 1980</a>)</span> represents that as a product of residual and leverage. A rule of thumb is that an observation is deemed to be influential if <span class="math inline">\(| \text{DFFITS}_i | &gt; 2 \sqrt{(p+1) / n}\)</span>.</p>
+<p>Influence can also be assessed in terms of the change in the estimated coefficients <span class="math inline">\(\mathbf{b} = \widehat{\mathbf{\beta}}\)</span> versus their values <span class="math inline">\(\mathbf{b}_{(-i)}\)</span> when case <span class="math inline">\(i\)</span> is removed. Cook’s distance, <span class="math inline">\(D_i\)</span>, summarizes the size of the difference as a weighted sum of squares of the differences <span class="math inline">\(\mathbf{d} =\mathbf{b} - \mathbf{b}_{(-i)}\)</span> <span class="citation" data-cites="Cook:77">(<a href="95-references.html#ref-Cook:77" role="doc-biblioref">Cook, 1977</a>)</span>.</p>
 <p><span class="math display">\[
 D_i = \mathbf{d}^\mathsf{T}\, (\mathbf{X}^\mathsf{T}\mathbf{X}) \,\mathbf{d} / (p+1) \hat{\sigma}^2
 \]</span> This can be re-expressed in terms of the components of residual and leverage</p>
@@ -1920,8 +1920,8 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </section></section><section id="influence-plots" class="level3" data-number="6.6.2"><h3 data-number="6.6.2" class="anchored" data-anchor-id="influence-plots">
 <span class="header-section-number">6.6.2</span> Influence plots</h3>
 <p>The most common plot to detect influence is a bubble plot of the studentized residuals versus hat values, with the size (area) of the plotting symbol proportional to Cook’s <span class="math inline">\(D\)</span>. These plots are constructed using <code><a href="https://rdrr.io/pkg/car/man/influencePlot.html">car::influencePlot()</a></code> which also fills the bubble symbols with color whose opacity is proportional to Cook’s <span class="math inline">\(D\)</span>.</p>
-<p>This is shown in <a href="#fig-levdemo-infl" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-levdemo-infl</span></a> for the demonstration dataset constructed in <a href="#sec-lev-inf-quartet" class="quarto-xref"><span class="quarto-unresolved-ref">sec-lev-inf-quartet</span></a>. In this plot, notable cutoffs for hatvalues at <span class="math inline">\(2 \bar{h}\)</span> and <span class="math inline">\(3 \bar{h}\)</span> are shown by dashed vertical lines and horizontal cutoffs for studentized residuals are shown at values of <span class="math inline">\(\pm 2\)</span>.</p>
-<p>The demonstration data of <a href="#sec-lev-inf-quartet" class="quarto-xref"><span class="quarto-unresolved-ref">sec-lev-inf-quartet</span></a> has four copies of the same <span class="math inline">\((x, y)\)</span> data, three of which have an unusual observation. The influence plot in <a href="#fig-levdemo-infl" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-levdemo-infl</span></a> subsets the data to give the <span class="math inline">\(19 = 15 + 4\)</span> unique observations, including the three unusual cases. As can be seen, the high “Leverage” point has has less influence than the point labeled “Influence”, which has moderate leverage but a large absolute residual.</p>
+<p>This is shown in <a href="#fig-levdemo-infl" class="quarto-xref">Figure&nbsp;<span>6.25</span></a> for the demonstration dataset constructed in <a href="#sec-lev-inf-quartet" class="quarto-xref"><span>Section 6.6.1</span></a>. In this plot, notable cutoffs for hatvalues at <span class="math inline">\(2 \bar{h}\)</span> and <span class="math inline">\(3 \bar{h}\)</span> are shown by dashed vertical lines and horizontal cutoffs for studentized residuals are shown at values of <span class="math inline">\(\pm 2\)</span>.</p>
+<p>The demonstration data of <a href="#sec-lev-inf-quartet" class="quarto-xref"><span>Section 6.6.1</span></a> has four copies of the same <span class="math inline">\((x, y)\)</span> data, three of which have an unusual observation. The influence plot in <a href="#fig-levdemo-infl" class="quarto-xref">Figure&nbsp;<span>6.25</span></a> subsets the data to give the <span class="math inline">\(19 = 15 + 4\)</span> unique observations, including the three unusual cases. As can be seen, the high “Leverage” point has has less influence than the point labeled “Influence”, which has moderate leverage but a large absolute residual.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>See the code</summary><div class="sourceCode" id="cb54" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">once</span> <span class="op">&lt;-</span> <span class="va">both</span><span class="op">[</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">1</span><span class="op">:</span><span class="fl">16</span>, <span class="fl">62</span>, <span class="fl">63</span>, <span class="fl">64</span><span class="op">)</span>,<span class="op">]</span>      <span class="co"># unique observations</span></span>
 <span><span class="va">once.mod</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">y</span> <span class="op">~</span> <span class="va">x</span>, data<span class="op">=</span><span class="va">once</span><span class="op">)</span></span>
@@ -1956,7 +1956,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </section><section id="sec-duncan-influence" class="level3" data-number="6.6.3"><h3 data-number="6.6.3" class="anchored" data-anchor-id="sec-duncan-influence">
 <span class="header-section-number">6.6.3</span> Duncan data</h3>
-<p>Let’s return to the <code>Duncan</code> data used as an example in <a href="#sec-example-duncan" class="quarto-xref"><span class="quarto-unresolved-ref">sec-example-duncan</span></a> where a few points stood out as unusual in the basic diagnostic plots (<a href="#fig-duncan-plot-model" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-duncan-plot-model</span></a>). The influence plot in <a href="#fig-duncan-infl" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-duncan-infl</span></a> helps to make sense of these noteworthy observations. The default method for identifying points in <code><a href="https://rdrr.io/pkg/car/man/influencePlot.html">influencePlot()</a></code> labels points with any of large studentized residuals, hat-values or Cook’s distances.</p>
+<p>Let’s return to the <code>Duncan</code> data used as an example in <a href="#sec-example-duncan" class="quarto-xref"><span>Section 6.1.1</span></a> where a few points stood out as unusual in the basic diagnostic plots (<a href="#fig-duncan-plot-model" class="quarto-xref">Figure&nbsp;<span>6.2</span></a>). The influence plot in <a href="#fig-duncan-infl" class="quarto-xref">Figure&nbsp;<span>6.26</span></a> helps to make sense of these noteworthy observations. The default method for identifying points in <code><a href="https://rdrr.io/pkg/car/man/influencePlot.html">influencePlot()</a></code> labels points with any of large studentized residuals, hat-values or Cook’s distances.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb55" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">inf</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/pkg/car/man/influencePlot.html">influencePlot</a></span><span class="op">(</span><span class="va">duncan.mod</span>, id <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span>n<span class="op">=</span><span class="fl">3</span><span class="op">)</span>,</span>
 <span>                     cex.lab <span class="op">=</span> <span class="fl">1.5</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1989,9 +1989,9 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <li><p>Among the others, <em>reporter</em> has a relatively large negative residual—its prestige is far less than the model predicts—but its low leverage make it not highly influential. <em>railroad engineer</em> has an extremely large hat value because its income is very high in relation to education. But this case is well-predicted and has a small residual, so its leverage is not large.</p></li>
 </ul></section><section id="influence-in-added-variable-plots" class="level3" data-number="6.6.4"><h3 data-number="6.6.4" class="anchored" data-anchor-id="influence-in-added-variable-plots">
 <span class="header-section-number">6.6.4</span> Influence in added-variable plots</h3>
-<p>The properties of added-variable plots discussed in <a href="#sec-avplots" class="quarto-xref"><span class="quarto-unresolved-ref">sec-avplots</span></a> make them also useful for understanding why cases are influential because they control for other predictors in each plot, and therefore show the <em>partial</em> contributions of each observation to hat values and residuals. As a consequence, we can see directly the how individual cases become individually or jointly influential.</p>
-<p>The Duncan data provides a particularly instructive example of this. <a href="#fig-duncan-av-influence" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-duncan-av-influence</span></a> shows the AV plots for both income and education in the model <code>duncan.mod</code>, with some annotations added. I want to focus here on the <em>joint</em> influence of the occupations minister and conductor which were seen to be the most influential in <a href="#fig-duncan-infl" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-duncan-infl</span></a>. The <span style="color: green;">green</span> vertical lines show their residuals in each panel and the <span style="color: red;">red</span> lines show the regressions when these two observations are deleted.</p>
-<p>The basic AV plots are produced using the call to <code><a href="https://rdrr.io/pkg/car/man/avPlots.html">avPlots()</a></code> below. To avoid clutter, I use the argument <code>id = list(method = "mahal", n=3)</code> so that only the three points with the greatest Mahalanobis distances from the centroid in each plot are labeled. These are the cases with the largest leverage seen in <a href="#fig-duncan-infl" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-duncan-infl</span></a>.</p>
+<p>The properties of added-variable plots discussed in <a href="#sec-avplots" class="quarto-xref"><span>Section 6.4</span></a> make them also useful for understanding why cases are influential because they control for other predictors in each plot, and therefore show the <em>partial</em> contributions of each observation to hat values and residuals. As a consequence, we can see directly the how individual cases become individually or jointly influential.</p>
+<p>The Duncan data provides a particularly instructive example of this. <a href="#fig-duncan-av-influence" class="quarto-xref">Figure&nbsp;<span>6.27</span></a> shows the AV plots for both income and education in the model <code>duncan.mod</code>, with some annotations added. I want to focus here on the <em>joint</em> influence of the occupations minister and conductor which were seen to be the most influential in <a href="#fig-duncan-infl" class="quarto-xref">Figure&nbsp;<span>6.26</span></a>. The <span style="color: green;">green</span> vertical lines show their residuals in each panel and the <span style="color: red;">red</span> lines show the regressions when these two observations are deleted.</p>
+<p>The basic AV plots are produced using the call to <code><a href="https://rdrr.io/pkg/car/man/avPlots.html">avPlots()</a></code> below. To avoid clutter, I use the argument <code>id = list(method = "mahal", n=3)</code> so that only the three points with the greatest Mahalanobis distances from the centroid in each plot are labeled. These are the cases with the largest leverage seen in <a href="#fig-duncan-infl" class="quarto-xref">Figure&nbsp;<span>6.26</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb57" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/avPlots.html">avPlots</a></span><span class="op">(</span><span class="va">duncan.mod</span>,</span>
 <span>  ellipse <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span>levels <span class="op">=</span> <span class="fl">0.68</span>, fill <span class="op">=</span> <span class="cn">TRUE</span>, fill.alpha <span class="op">=</span> <span class="fl">0.1</span><span class="op">)</span>,</span>
@@ -2011,7 +2011,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </div>
 </div>
 </div>
-<p>The two cases—minister and conductor—are the most highly influential, but as we can see in <a href="#fig-duncan-av-influence" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-duncan-av-influence</span></a> their influence combines because they are at opposite sides of the horizontal axis and their residuals are of opposite signs. They act together to decrease the slope for income and increase that for education.</p>
+<p>The two cases—minister and conductor—are the most highly influential, but as we can see in <a href="#fig-duncan-av-influence" class="quarto-xref">Figure&nbsp;<span>6.27</span></a> their influence combines because they are at opposite sides of the horizontal axis and their residuals are of opposite signs. They act together to decrease the slope for income and increase that for education.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>Code for income AV plot</summary><div class="sourceCode" id="cb58" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">res</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/pkg/car/man/avPlots.html">avPlot</a></span><span class="op">(</span><span class="va">duncan.mod</span>, <span class="st">"income"</span>,</span>
 <span>              ellipse <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span>levels <span class="op">=</span> <span class="fl">0.68</span><span class="op">)</span>,</span>
@@ -2052,7 +2052,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 <!-- ## References {.unnumbered} -->
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-R-marginaleffects" class="csl-entry" role="listitem">
 Arel-Bundock, V. (2024a). <em>Marginaleffects: Predictions, comparisons, slopes, marginal means, and hypothesis tests</em>. <a href="https://marginaleffects.com/">https://marginaleffects.com/</a>
 </div>
@@ -2138,7 +2138,7 @@ <h1 class="title"><span id="sec-linear-models-plots" class="quarto-section-ident
 </section></section><section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes"><hr>
 <ol>
 <li id="fn1"><p>Note that the factor <code>type</code> in the dataset has its levels ordered alphabetically. For analysis and graphing it is useful to reorder the levels in the natural increasing order. An alternative is to make <code>type</code> an <em>ordered</em> factor, but this would represent it using polynomial contrasts for linear and quadratic trends, which seems unuseful in this context.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn2"><p>Earlier, but less general expression of these ideas go back to the use of <strong>adjusted means</strong> in analysis of covariance <span class="citation" data-cites="Fisher-1936">(<a href="#ref-Fisher-1936" role="doc-biblioref">Fisher, 1925</a>)</span> or <strong>least squares means</strong> or <strong>population marginal means</strong> in analysis of variance <span class="citation" data-cites="Searle-etal:80">(<a href="#ref-Searle-etal:80" role="doc-biblioref">Searle et al., 1980</a>)</span><a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn2"><p>Earlier, but less general expression of these ideas go back to the use of <strong>adjusted means</strong> in analysis of covariance <span class="citation" data-cites="Fisher-1936">(<a href="95-references.html#ref-Fisher-1936" role="doc-biblioref">Fisher, 1925</a>)</span> or <strong>least squares means</strong> or <strong>population marginal means</strong> in analysis of variance <span class="citation" data-cites="Searle-etal:80">(<a href="95-references.html#ref-Searle-etal:80" role="doc-biblioref">Searle et al., 1980</a>)</span><a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 <li id="fn3"><p>See this <a href="https://bit.ly/45x2T0Q">Stats StackExchange discussion</a> for a proof.<a href="#fnref3" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 </ol></section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
 window.document.addEventListener("DOMContentLoaded", function (event) {
diff --git a/docs/07-lin-mod-topics.html b/docs/07-lin-mod-topics.html
index d891d527..a69f688d 100644
--- a/docs/07-lin-mod-topics.html
+++ b/docs/07-lin-mod-topics.html
@@ -384,7 +384,7 @@ <h1 class="title">
 </div>
 <section id="sec-betaspace" class="level2" data-number="7.1"><h2 data-number="7.1" class="anchored" data-anchor-id="sec-betaspace">
 <span class="header-section-number">7.1</span> Ellipsoids in data space and <span class="math inline">\(\mathbf{\beta}\)</span> space</h2>
-<p>It is most common to look at data and fitted models in “data space,” where axes correspond to variables, points represent observations, and fitted models are plotted as lines (or planes) in this space. As we’ve suggested, data ellipsoids provide informative summaries of relationships in data space. For linear models, particularly regression models with quantitative predictors, there is another space—“<span class="math inline">\(\mathbf{\beta}\)</span> space”—that provides deeper views of models and the relationships among them. This discussion extends <span class="citation" data-cites="Friendly-etal:ellipses:2013">Friendly et al. (<a href="#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">2013</a>)</span>, Sec. 4.6.</p>
+<p>It is most common to look at data and fitted models in “data space,” where axes correspond to variables, points represent observations, and fitted models are plotted as lines (or planes) in this space. As we’ve suggested, data ellipsoids provide informative summaries of relationships in data space. For linear models, particularly regression models with quantitative predictors, there is another space—“<span class="math inline">\(\mathbf{\beta}\)</span> space”—that provides deeper views of models and the relationships among them. This discussion extends <span class="citation" data-cites="Friendly-etal:ellipses:2013">Friendly et al. (<a href="95-references.html#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">2013</a>)</span>, Sec. 4.6.</p>
 <p>In <span class="math inline">\(\mathbf{\beta}\)</span> space, the axes pertain to coefficients, for example <span class="math inline">\((\beta_0, \beta_1)\)</span> in a simple linear regression. Points in this space are models (true, hypothesized, fitted) whose coordinates represent values of these parameters. For example, one point <span class="math inline">\(\widehat{\mathbf{\beta}}_{\text{OLS}} = (\hat{\beta}_0, \hat{\beta}_1)\)</span> represents the least squares estimate; other points, <span class="math inline">\(\widehat{\mathbf{\beta}}_{\text{WLS}}\)</span> and <span class="math inline">\(\widehat{\mathbf{\beta}}_{\text{ML}}\)</span> would give weighted least squares and maximum likelihood estimates, and the line <span class="math inline">\(\beta_1 = 0\)</span> represents the null hypothesis that the slope is zero.</p>
 <p>In the sense described below, data space and <span class="math inline">\(\boldsymbol{\beta}\)</span> space are each <em>dual</em> to the other. In simple linear regression, for example:</p>
 <ul>
@@ -404,7 +404,7 @@ <h1 class="title">
 </div>
 </div>
 </div>
-<p>This is illustrated in <a href="#fig-dual-points-lines" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-dual-points-lines</span></a>. The left panel shows three lines in data space, which can be expressed as linear equations in <span class="math inline">\(\mathbf{z} = (x, y)\)</span> of the form <span class="math inline">\(\mathbf{A} \mathbf{z} = \mathbf{d}\)</span>. <code>matlib::showEqn(A, d)</code> prints these as equations in <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>.</p>
+<p>This is illustrated in <a href="#fig-dual-points-lines" class="quarto-xref">Figure&nbsp;<span>7.1</span></a>. The left panel shows three lines in data space, which can be expressed as linear equations in <span class="math inline">\(\mathbf{z} = (x, y)\)</span> of the form <span class="math inline">\(\mathbf{A} \mathbf{z} = \mathbf{d}\)</span>. <code>matlib::showEqn(A, d)</code> prints these as equations in <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb2" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">A</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/matrix.html">matrix</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span> <span class="fl">1</span>, <span class="fl">1</span>, <span class="fl">0</span>,</span>
 <span>              <span class="op">-</span><span class="fl">1</span>, <span class="fl">1</span>, <span class="fl">1</span><span class="op">)</span>, <span class="fl">3</span>, <span class="fl">2</span><span class="op">)</span> </span>
@@ -416,12 +416,12 @@ <h1 class="title">
 </div>
 <p>The first equation, <span class="math inline">\(x - y = 2\)</span> can be expressed as the line <span class="math inline">\(y = x - 2\)</span> and corresponds to the point <span class="math inline">\((\beta_0, \beta_1) = (-2, 1)\)</span> in <span class="math inline">\(\beta\)</span> space, and similarly for the other two equations. The second equation, <span class="math inline">\(x + y = \frac{1}{2}\)</span>, or <span class="math inline">\(y = 0.5 - x\)</span> intersects the first at the point <span class="math inline">\((x, y) = (1.25, 0.75)\)</span>; this corresponds to the line connecting <span class="math inline">\((-2, 1)\)</span> and <span class="math inline">\((0.5, -1)\)</span> in <span class="math inline">\(\beta\)</span> space.</p>
 <p>This lovely duality is an example of an <a href="https://en.wikipedia.org/wiki/Duality_(mathematics)">important principle</a> in modern mathematics which translates concepts and structures from one perspective to another and back again. We get two views of the same thing, whose dual nature provides greater insight.</p>
-<p>We have seen (<a href="#sec-data-ellipse" class="quarto-xref"><span class="quarto-unresolved-ref">sec-data-ellipse</span></a>) how ellipsoids in data space summarize variance (lack of precision) and correlation of our data. For the purpose of understanding linear models, ellipsoids in <span class="math inline">\(\beta\)</span> space do the same thing for the estimates of parameters. These ellipsoids are dual and inversely related to each other, a point first made clear by Dempster <span class="citation" data-cites="Dempster:69">(<a href="#ref-Dempster:69" role="doc-biblioref">1969</a>, Ch. 6)</span>:</p>
+<p>We have seen (<a href="03-multivariate_plots.html#sec-data-ellipse" class="quarto-xref"><span>Section 3.2</span></a>) how ellipsoids in data space summarize variance (lack of precision) and correlation of our data. For the purpose of understanding linear models, ellipsoids in <span class="math inline">\(\beta\)</span> space do the same thing for the estimates of parameters. These ellipsoids are dual and inversely related to each other, a point first made clear by Dempster <span class="citation" data-cites="Dempster:69">(<a href="95-references.html#ref-Dempster:69" role="doc-biblioref">1969</a>, Ch. 6)</span>:</p>
 <ul>
-<li><p>In data space, joint confidence intervals for the mean vector or joint prediction regions for the data are given by the ellipsoids <span class="math inline">\((\bar{x}_1, \bar{x}_2)^\mathsf{T} \oplus c \sqrt{\mathbf{S}_{\mathbf{X}}}\)</span>, where the covariance matrix <span class="math inline">\(\mathbf{S}_{\mathbf{X}}\)</span> depends on <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X}\)</span> (<span class="math inline">\(\oplus\)</span> here shifts the ellipsoid to one centered at <span class="math inline">\((\bar{x}_1, \bar{x}_2)\)</span> here, as in <a href="#eq-ellE" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-ellE</span></a>).</p></li>
+<li><p>In data space, joint confidence intervals for the mean vector or joint prediction regions for the data are given by the ellipsoids <span class="math inline">\((\bar{x}_1, \bar{x}_2)^\mathsf{T} \oplus c \sqrt{\mathbf{S}_{\mathbf{X}}}\)</span>, where the covariance matrix <span class="math inline">\(\mathbf{S}_{\mathbf{X}}\)</span> depends on <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X}\)</span> (<span class="math inline">\(\oplus\)</span> here shifts the ellipsoid to one centered at <span class="math inline">\((\bar{x}_1, \bar{x}_2)\)</span> here, as in <a href="03-multivariate_plots.html#eq-ellE" class="quarto-xref">Equation&nbsp;<span>3.2</span></a>).</p></li>
 <li><p>In the dual <span class="math inline">\(\mathbf{\beta}\)</span> space, joint confidence regions for the coefficients of a response variable <span class="math inline">\(y\)</span> on <span class="math inline">\((x_1, x_2)\)</span> are given by ellipsoids of the form <span class="math inline">\(\widehat{\mathbf{\beta}} \oplus c \sqrt{\mathbf{S}_{\mathbf{X}}^{-1}}\)</span>, and depend on <span class="math inline">\(\mathbf{(\mathbf{X}^\mathsf{T}\mathbf{X})}^{-1}\)</span>.</p></li>
 </ul>
-<p>It is useful to understand the underlying geometry here connecting the ellipses for a matrix and its inverse. This can be seen in <a href="#fig-inverse" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-inverse</span></a>, which shows an ellipse for a covariance matrix <span class="math inline">\(\mathbf{S}\)</span>, whose axes, as we saw in <a href="#sec-pca-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-pca-biplot</span></a> are the eigenvectors <span class="math inline">\(\mathbf{v}_i\)</span> of <span class="math inline">\(\mathbf{S}\)</span> and whose radii are the square roots <span class="math inline">\(\sqrt{\lambda_i}\)</span> of the corresponding eigenvalues. The comparable ellipse for <span class="math inline">\(2 \mathbf{S}\)</span> has radii multiplied by <span class="math inline">\(\sqrt{2}\)</span>.</p>
+<p>It is useful to understand the underlying geometry here connecting the ellipses for a matrix and its inverse. This can be seen in <a href="#fig-inverse" class="quarto-xref">Figure&nbsp;<span>7.2</span></a>, which shows an ellipse for a covariance matrix <span class="math inline">\(\mathbf{S}\)</span>, whose axes, as we saw in <a href="04-pca-biplot.html" class="quarto-xref"><span>Chapter 4</span></a> are the eigenvectors <span class="math inline">\(\mathbf{v}_i\)</span> of <span class="math inline">\(\mathbf{S}\)</span> and whose radii are the square roots <span class="math inline">\(\sqrt{\lambda_i}\)</span> of the corresponding eigenvalues. The comparable ellipse for <span class="math inline">\(2 \mathbf{S}\)</span> has radii multiplied by <span class="math inline">\(\sqrt{2}\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-inverse" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -452,7 +452,7 @@ <h1 class="title">
 <span><span class="co">#&gt; 5      Student     64     74    63</span></span>
 <span><span class="co">#&gt; 6    Professor    141    175   145</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p><a href="#fig-coffee-scatmat" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-scatmat</span></a> shows the scatterplot matrix, giving the marginal relations between all pairs of variables. The marginal message seems to be that coffee is bad for your heart, stress is bad for your heart and coffee consumption is also related to occupational stress.</p>
+<p><a href="#fig-coffee-scatmat" class="quarto-xref">Figure&nbsp;<span>7.3</span></a> shows the scatterplot matrix, giving the marginal relations between all pairs of variables. The marginal message seems to be that coffee is bad for your heart, stress is bad for your heart and coffee consumption is also related to occupational stress.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>Show the code</summary><div class="sourceCode" id="cb4" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">scatterplotMatrix</a></span><span class="op">(</span><span class="op">~</span> <span class="va">Heart</span> <span class="op">+</span> <span class="va">Coffee</span> <span class="op">+</span> <span class="va">Stress</span>, data<span class="op">=</span><span class="va">coffee</span>,</span>
 <span>    smooth <span class="op">=</span> <span class="cn">FALSE</span>,</span>
@@ -481,9 +481,9 @@ <h1 class="title">
 <span><span class="co">#&gt; 2 Coffee        -0.409     0.292     -1.40 0.179    </span></span>
 <span><span class="co">#&gt; 3 Stress         1.20      0.224      5.34 0.0000536</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The answer is that the marginal plots of <code>Heart</code> vs.&nbsp;<code>Coffee</code> and <code>Stress</code> in the first row of <a href="#fig-coffee-scatmat" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-scatmat</span></a> each ignore the the other predictor. In contrast, the coefficients for coffee and stress in the multiple regression model <code>coffee.mod</code> are <em>partial</em> coefficients, giving the estimated change in heart damage for a unit change in each predictor, but <strong>adjusting for</strong> (controlling for, or holding constant) the other predictor.</p>
-<p>We can see these effects directly in <strong>added variable plots</strong> (<a href="#sec-avplots" class="quarto-xref"><span class="quarto-unresolved-ref">sec-avplots</span></a>), but here I consider the relationship of coffee and stress in data space and beta space and how their ellipses relate to each other and to hypothesis tests.</p>
-<p>The left panel in <a href="#fig-coffee-data-beta-both" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-data-beta-both</span></a> is the same as that in the (3,2) cell of <a href="#fig-coffee-scatmat" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-scatmat</span></a> for the relation <code>Stress ~ Coffee</code> but with data ellipses of 40% and 60% coverage. The shadows of the 40% ellipse on any axis give univariate intervals of the mean <span class="math inline">\(\bar{x} \pm 1 s_x\)</span> (standard deviation) shown by the thick red lines; the shadow of the 68% ellipse corresponds to an interval <span class="math inline">\(\bar{x} \pm 1.5 s_x\)</span>.</p>
+<p>The answer is that the marginal plots of <code>Heart</code> vs.&nbsp;<code>Coffee</code> and <code>Stress</code> in the first row of <a href="#fig-coffee-scatmat" class="quarto-xref">Figure&nbsp;<span>7.3</span></a> each ignore the the other predictor. In contrast, the coefficients for coffee and stress in the multiple regression model <code>coffee.mod</code> are <em>partial</em> coefficients, giving the estimated change in heart damage for a unit change in each predictor, but <strong>adjusting for</strong> (controlling for, or holding constant) the other predictor.</p>
+<p>We can see these effects directly in <strong>added variable plots</strong> (<a href="06-linear_models-plots.html#sec-avplots" class="quarto-xref"><span>Section 6.4</span></a>), but here I consider the relationship of coffee and stress in data space and beta space and how their ellipses relate to each other and to hypothesis tests.</p>
+<p>The left panel in <a href="#fig-coffee-data-beta-both" class="quarto-xref">Figure&nbsp;<span>7.4</span></a> is the same as that in the (3,2) cell of <a href="#fig-coffee-scatmat" class="quarto-xref">Figure&nbsp;<span>7.3</span></a> for the relation <code>Stress ~ Coffee</code> but with data ellipses of 40% and 60% coverage. The shadows of the 40% ellipse on any axis give univariate intervals of the mean <span class="math inline">\(\bar{x} \pm 1 s_x\)</span> (standard deviation) shown by the thick red lines; the shadow of the 68% ellipse corresponds to an interval <span class="math inline">\(\bar{x} \pm 1.5 s_x\)</span>.</p>
 <p>The right panel shows the joint 95% confidence region for the coefficients <span class="math inline">\((\beta_{\text{Coffee}}, \beta_{\text{Stress}})\)</span> and individual confidence intervals in <span class="math inline">\(\mathbf{\beta}\)</span> space. These are determined as</p>
 <p><span class="math display">\[
 \widehat{\mathbf{\beta}} \oplus \sqrt{d F^{.95}_{d, \nu}} \times s_e \times \mathbf{S}_X^{-1/2} \:\: .
@@ -500,13 +500,13 @@ <h1 class="title">
 </div>
 </div>
 </div>
-<p>Thus, the <span style="color: blue;">blue</span> ellipse in <a href="#fig-coffee-data-beta-both" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-data-beta-both</span></a> (right) is the ellipse of <strong>joint</strong> 95% coverage, using the factor <span class="math inline">\(\sqrt{2 F^{.95}_{2, \nu}}\)</span>, which covers the true values of (<span class="math inline">\(\beta_{\mathrm{Stress}}, \beta_{\mathrm{Coffee}}\)</span>) in 95% of samples. Moreover:</p>
+<p>Thus, the <span style="color: blue;">blue</span> ellipse in <a href="#fig-coffee-data-beta-both" class="quarto-xref">Figure&nbsp;<span>7.4</span></a> (right) is the ellipse of <strong>joint</strong> 95% coverage, using the factor <span class="math inline">\(\sqrt{2 F^{.95}_{2, \nu}}\)</span>, which covers the true values of (<span class="math inline">\(\beta_{\mathrm{Stress}}, \beta_{\mathrm{Coffee}}\)</span>) in 95% of samples. Moreover:</p>
 <ul>
 <li>Any <em>joint</em> hypothesis (e.g., <span class="math inline">\(\mathcal{H}_0:\beta_{\mathrm{Stress}}=0, \beta_{\mathrm{Coffee}}=0\)</span>) can be tested visually, simply by observing whether the hypothesized point, <span class="math inline">\((0, 0)\)</span> here, lies inside or outside the joint confidence ellipse. That hypothesis is rejected</li>
 <li>The shadows of this ellipse on the horizontal and vertical axes give Scheff'e joint 95% confidence intervals for the parameters, with protection for simultaneous inference (“fishing”) in a 2-dimensional space.</li>
 <li>Similarly, using the factor <span class="math inline">\(\sqrt{F^{1-\alpha/d}_{1, \nu}} = t^{1-\alpha/2d}_\nu\)</span> would give an ellipse whose 1D shadows are <span class="math inline">\(1-\alpha\)</span> Bonferroni confidence intervals for <span class="math inline">\(d\)</span> posterior hypotheses.</li>
 </ul>
-<p>Visual hypothesis tests and <span class="math inline">\(d=1\)</span> confidence intervals for the parameters <em>separately</em> are obtained from the <span style="color: red;">red</span> ellipse in <a href="#fig-coffee-data-beta-both" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-data-beta-both</span></a>, which is scaled by <span class="math inline">\(\sqrt{F^{.95}_{1, \nu}} = t^{.975}_\nu\)</span>. We call this the _confidence-interval generating ellipse” (or, more compactly, the “confidence-interval ellipse”). The shadows of the confidence-interval ellipse on the axes (thick red lines) give the corresponding individual 95% confidence intervals, which are equivalent to the (partial, Type III) <span class="math inline">\(t\)</span>-tests for each coefficient given in the standard multiple regression output shown above.</p>
+<p>Visual hypothesis tests and <span class="math inline">\(d=1\)</span> confidence intervals for the parameters <em>separately</em> are obtained from the <span style="color: red;">red</span> ellipse in <a href="#fig-coffee-data-beta-both" class="quarto-xref">Figure&nbsp;<span>7.4</span></a>, which is scaled by <span class="math inline">\(\sqrt{F^{.95}_{1, \nu}} = t^{.975}_\nu\)</span>. We call this the _confidence-interval generating ellipse” (or, more compactly, the “confidence-interval ellipse”). The shadows of the confidence-interval ellipse on the axes (thick red lines) give the corresponding individual 95% confidence intervals, which are equivalent to the (partial, Type III) <span class="math inline">\(t\)</span>-tests for each coefficient given in the standard multiple regression output shown above.</p>
 <p>Thus, controlling for Stress, the confidence interval for the slope for Coffee includes 0, so we cannot reject the hypothesis that <span class="math inline">\(\beta_{\mathrm{Coffee}}=0\)</span> in the multiple regression model, as we saw above in the numerical output. On the other hand, the interval for the slope for Stress excludes the origin, so we reject the null hypothesis that <span class="math inline">\(\beta_{\mathrm{Stress}}=0\)</span>, controlling for Coffee consumption.</p>
 <p>Finally, consider the relationship between the data ellipse and the confidence ellipse. These have exactly the same shape, but (with equal coordinate scaling of the axes), the confidence ellipse is exactly a <span class="math inline">\(90^o\)</span> rotation and rescaling of the data ellipse. In directions in data space where the slice of the data ellipse is wide—where we have more information about the relationship between Coffee and Stress—the projection of the confidence ellipse is narrow, reflecting greater precision of the estimates of coefficients. Conversely, where slice of the the data ellipse is narrow (less information), the projection of the confidence ellipse is wide (less precision).</p>
 <p>Confidence ellipses are drawn using <code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::confidenceEllipse()</a></code>. Click the button to show the code.</p>
@@ -550,7 +550,7 @@ <h1 class="title">
 <p>Not only this, but the Gauss-Markov theorem guarantees that the OLS estimator is also the most <em>efficient</em> because it has the least variance among all linear and unbiased estimators. The classical OLS estimator is said to be BLUE: <strong>B</strong>est (lowest variance), <strong>L</strong>inear (among linear estimators), <strong>U</strong>nbiased, <strong>E</strong>stimator.</p>
 </section><section id="errors-in-predictors" class="level3" data-number="7.2.2"><h3 data-number="7.2.2" class="anchored" data-anchor-id="errors-in-predictors">
 <span class="header-section-number">7.2.2</span> Errors in predictors</h3>
-<p>Errors in the response <span class="math inline">\(y\)</span> are accounted for in the model and measured by the mean squared error, <span class="math inline">\(\text{MSE} = \hat{\sigma}_\epsilon^2\)</span>. But in practice, of course, predictor variables are often also observed indicators, subject to their own error. Indeed, in the behavioral sciences it is rare that predictors are perfectly reliable and measured exactly. This fact that is recognized in errors-in-variables regression models <span class="citation" data-cites="Fuller2006">(<a href="#ref-Fuller2006" role="doc-biblioref">Fuller, 2006</a>)</span> and in more general structural equation models, but often ignored otherwise. Ellipsoids in data space and <span class="math inline">\(\beta\)</span> space are well suited to showing the effect of measurement error in predictors on OLS estimates.</p>
+<p>Errors in the response <span class="math inline">\(y\)</span> are accounted for in the model and measured by the mean squared error, <span class="math inline">\(\text{MSE} = \hat{\sigma}_\epsilon^2\)</span>. But in practice, of course, predictor variables are often also observed indicators, subject to their own error. Indeed, in the behavioral sciences it is rare that predictors are perfectly reliable and measured exactly. This fact that is recognized in errors-in-variables regression models <span class="citation" data-cites="Fuller2006">(<a href="95-references.html#ref-Fuller2006" role="doc-biblioref">Fuller, 2006</a>)</span> and in more general structural equation models, but often ignored otherwise. Ellipsoids in data space and <span class="math inline">\(\beta\)</span> space are well suited to showing the effect of measurement error in predictors on OLS estimates.</p>
 <p>The statistical facts are well known, though perhaps counter-intuitive in certain details: measurement error in a predictor biases regression coefficients (towards 0), while error in the measurement in <span class="math inline">\(y\)</span> increases the MSE and thus standard errors of the regression coefficients but does not introduce bias in the coefficients.</p>
 <section id="example" class="level4" data-number="7.2.2.1"><h4 data-number="7.2.2.1" class="anchored" data-anchor-id="example">
 <span class="header-section-number">7.2.2.1</span> Example</h4>
@@ -627,7 +627,7 @@ <h1 class="title">
 <p>The effect of error in <span class="math inline">\(x\)</span> is less kind. Comparing the first row of plots with the second row, you can see that the estimated slope decreases when errors are added to <span class="math inline">\(x\)</span>. This is called <em>attenuation bias</em>, and it can be shown that <span class="math display">\[
 \widehat{\beta}_{x^\star} \longrightarrow \frac{\beta}{1+\sigma^2_\eta /\sigma^2_x} \; ,
 \]</span> where <span class="math inline">\(\beta\)</span> here refers to the regression slope and <span class="math inline">\(\longrightarrow\)</span> means “converges to”, as the sample size gets large. Thus, as <span class="math inline">\(\sigma^2_\eta\)</span> increases, <span class="math inline">\(\widehat{\beta}_{x^\star}\)</span> becomes less than <span class="math inline">\(\beta\)</span>.</p>
-<p>Beyond plots like <a href="#fig-measerr-demo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-measerr-demo</span></a>, we can see the effects of error in <span class="math inline">\(x\)</span> or <span class="math inline">\(y\)</span> on the model summary statistics such as the correlation <span class="math inline">\(r_{xy}\)</span> or MSE by extracting these from the fitted models. This is easily done using <code>dplyr::nest_by(name)</code> and fitting the regression model to each subset, from which we can obtain the model statistics using <code><a href="https://rdrr.io/r/stats/sigma.html">sigma()</a></code>, <code><a href="https://rdrr.io/r/stats/coef.html">coef()</a></code> and so forth. A bit of <code><a href="https://dplyr.tidyverse.org/reference/mutate.html">dplyr::mutate()</a></code> magic is used to construct indicators <code>errX</code> and <code>errY</code> giving whether or not error was added to <span class="math inline">\(x\)</span> and/or <span class="math inline">\(y\)</span>.</p>
+<p>Beyond plots like <a href="#fig-measerr-demo" class="quarto-xref">Figure&nbsp;<span>7.5</span></a>, we can see the effects of error in <span class="math inline">\(x\)</span> or <span class="math inline">\(y\)</span> on the model summary statistics such as the correlation <span class="math inline">\(r_{xy}\)</span> or MSE by extracting these from the fitted models. This is easily done using <code>dplyr::nest_by(name)</code> and fitting the regression model to each subset, from which we can obtain the model statistics using <code><a href="https://rdrr.io/r/stats/sigma.html">sigma()</a></code>, <code><a href="https://rdrr.io/r/stats/coef.html">coef()</a></code> and so forth. A bit of <code><a href="https://dplyr.tidyverse.org/reference/mutate.html">dplyr::mutate()</a></code> magic is used to construct indicators <code>errX</code> and <code>errY</code> giving whether or not error was added to <span class="math inline">\(x\)</span> and/or <span class="math inline">\(y\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb12" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">model_stats</span> <span class="op">&lt;-</span> <span class="va">df</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/nest_by.html">nest_by</a></span><span class="op">(</span><span class="va">name</span><span class="op">)</span> <span class="op">|&gt;</span></span>
@@ -652,7 +652,7 @@ <h1 class="title">
 <span><span class="co">#&gt; 3 Measurement error … TRUE  FALSE 0.481 &lt;lm&gt;  0.844    1.22   0.0946</span></span>
 <span><span class="co">#&gt; 4 Measurement error … TRUE  TRUE  0.401 &lt;lm&gt;  1.31     1.12   0.117</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>We plot the model <span class="math inline">\(R = r_{xy}\)</span> and the estimated residual standard error in <a href="#fig-measerr-stats" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-measerr-stats</span></a> below. The lines connecting the points are approximately parallel, indicating that errors of measurement in <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span> have nearly additive effects on model summaries.</p>
+<p>We plot the model <span class="math inline">\(R = r_{xy}\)</span> and the estimated residual standard error in <a href="#fig-measerr-stats" class="quarto-xref">Figure&nbsp;<span>7.6</span></a> below. The lines connecting the points are approximately parallel, indicating that errors of measurement in <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span> have nearly additive effects on model summaries.</p>
 <!-- fig.code: R/measerr-demo.R -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb13" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">p1</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span><span class="op">(</span>data<span class="op">=</span><span class="va">model_stats</span>, </span>
@@ -695,7 +695,7 @@ <h1 class="title">
 </section></section><section id="coffee-data-beta-space" class="level3" data-number="7.2.3"><h3 data-number="7.2.3" class="anchored" data-anchor-id="coffee-data-beta-space">
 <span class="header-section-number">7.2.3</span> Coffee data: <span class="math inline">\(\beta\)</span> space</h3>
 <p>In multiple regression the effects of measurement error in a predictor become more complex, because error variance in one predictor, <span class="math inline">\(x_1\)</span>, say, can affect the coefficients of other terms in the model.</p>
-<p>Consider the marginal relation between Heart disease and Stress in the <code>coffee</code> data. <a href="#fig-coffee-measerr-data-beta" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-measerr-data-beta</span></a> shows this with data ellipses in data space and the corresponding confidence ellipses in <span class="math inline">\(\beta\)</span> space. Each panel starts with the observed data (the darkest ellipse, marked <span class="math inline">\(0\)</span>), then adds random normal error, <span class="math inline">\(\mathcal{N}(0, \delta \times \mathrm{SD}_{Stress})\)</span>, with <span class="math inline">\(\delta = \{0.75, 1.0, 1.5\}\)</span>, to the value of Stress, while keeping the mean of Stress the same. All of the data ellipses have the same vertical shadows (<span class="math inline">\(\text{SD}_{\textrm{Heart}}\)</span>), while the horizontal shadows increase with <span class="math inline">\(\delta\)</span>, driving the slope for Stress toward 0.</p>
+<p>Consider the marginal relation between Heart disease and Stress in the <code>coffee</code> data. <a href="#fig-coffee-measerr-data-beta" class="quarto-xref">Figure&nbsp;<span>7.7</span></a> shows this with data ellipses in data space and the corresponding confidence ellipses in <span class="math inline">\(\beta\)</span> space. Each panel starts with the observed data (the darkest ellipse, marked <span class="math inline">\(0\)</span>), then adds random normal error, <span class="math inline">\(\mathcal{N}(0, \delta \times \mathrm{SD}_{Stress})\)</span>, with <span class="math inline">\(\delta = \{0.75, 1.0, 1.5\}\)</span>, to the value of Stress, while keeping the mean of Stress the same. All of the data ellipses have the same vertical shadows (<span class="math inline">\(\text{SD}_{\textrm{Heart}}\)</span>), while the horizontal shadows increase with <span class="math inline">\(\delta\)</span>, driving the slope for Stress toward 0.</p>
 <p>In <span class="math inline">\(\beta\)</span> space, it can be seen that the estimated coefficients, <span class="math inline">\((\beta_0, \beta_{\textrm{Stress}})\)</span> vary along a line and approach <span class="math inline">\(\beta_{\textrm{Stress}}=0\)</span> as <span class="math inline">\(\delta\)</span> gets sufficiently large. The shadows of ellipses for <span class="math inline">\((\beta_0, \beta_{\textrm{Stress}})\)</span> along the <span class="math inline">\(\beta_{\textrm{Stress}}\)</span> axis also demonstrate the effects of measurement error on the standard error of <span class="math inline">\(\beta_{\textrm{Stress}}\)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -709,7 +709,7 @@ <h1 class="title">
 </div>
 </div>
 </div>
-<p>Perhaps less well-known, but both more surprising and interesting, is the effect that measurement error in one variable, <span class="math inline">\(x_1\)</span>, has on the estimate of the coefficient for an <em>other</em> variable, <span class="math inline">\(x_2\)</span>, in a multiple regression model. <a href="#fig-coffee-measerr" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-coffee-measerr</span></a> shows the confidence ellipses for <span class="math inline">\((\beta_{\textrm{Coffee}}, \beta_{\textrm{Stress}})\)</span> in the multiple regression predicting Heart disease, adding random normal error <span class="math inline">\(\mathcal{N}(0, \delta \times \mathrm{SD}_{Stress})\)</span>, with <span class="math inline">\(\delta = \{0, 0.2, 0.4, 0.8\}\)</span>, to the value of Stress alone.<br>
+<p>Perhaps less well-known, but both more surprising and interesting, is the effect that measurement error in one variable, <span class="math inline">\(x_1\)</span>, has on the estimate of the coefficient for an <em>other</em> variable, <span class="math inline">\(x_2\)</span>, in a multiple regression model. <a href="#fig-coffee-measerr" class="quarto-xref">Figure&nbsp;<span>7.8</span></a> shows the confidence ellipses for <span class="math inline">\((\beta_{\textrm{Coffee}}, \beta_{\textrm{Stress}})\)</span> in the multiple regression predicting Heart disease, adding random normal error <span class="math inline">\(\mathcal{N}(0, \delta \times \mathrm{SD}_{Stress})\)</span>, with <span class="math inline">\(\delta = \{0, 0.2, 0.4, 0.8\}\)</span>, to the value of Stress alone.<br>
 As can be plainly seen, while this measurement error in Stress attenuates its coefficient, it also has the effect of biasing the coefficient for Coffee toward that in the <em>marginal</em> regression of Heart disease on Coffee alone.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -729,7 +729,7 @@ <h1 class="title">
 </div>
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-Dempster:69" class="csl-entry" role="listitem">
 Dempster, A. P. (1969). <em>Elements of continuous multivariate analysis</em>. Addison-Wesley.
 </div>
diff --git a/docs/08-collinearity-ridge.html b/docs/08-collinearity-ridge.html
index e6684e1c..eaba0f09 100644
--- a/docs/08-collinearity-ridge.html
+++ b/docs/08-collinearity-ridge.html
@@ -408,7 +408,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 <section id="what-is-collinearity" class="level2 page-columns page-full" data-number="8.1"><h2 data-number="8.1" class="anchored" data-anchor-id="what-is-collinearity">
 <span class="header-section-number">8.1</span> What is collinearity?</h2>
-<p>Researchers who have studies standard treatments of linear models (e.g, <span class="citation" data-cites="Graybill1961">Graybill (<a href="#ref-Graybill1961" role="doc-biblioref">1961</a>)</span>; <span class="citation" data-cites="Hocking2013">Hocking (<a href="#ref-Hocking2013" role="doc-biblioref">2013</a>)</span>) are often confused about what collinearity is, how to find its sources and how to take steps to resolve them. There are a number of important diagnostic measures that can help, but these are usually presented in a tabular display like <a href="#fig-collinearity-diagnostics-SPSS" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collinearity-diagnostics-SPSS</span></a>, which prompted this querry on an online forum:</p>
+<p>Researchers who have studies standard treatments of linear models (e.g, <span class="citation" data-cites="Graybill1961">Graybill (<a href="95-references.html#ref-Graybill1961" role="doc-biblioref">1961</a>)</span>; <span class="citation" data-cites="Hocking2013">Hocking (<a href="95-references.html#ref-Hocking2013" role="doc-biblioref">2013</a>)</span>) are often confused about what collinearity is, how to find its sources and how to take steps to resolve them. There are a number of important diagnostic measures that can help, but these are usually presented in a tabular display like <a href="#fig-collinearity-diagnostics-SPSS" class="quarto-xref">Figure&nbsp;<span>8.1</span></a>, which prompted this querry on an online forum:</p>
 <blockquote class="blockquote">
 <p>Some of my collinearity diagnostics have large values, or small values, or whatever they are <em>not</em> supposed to be</p>
 <ul>
@@ -428,7 +428,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 </div>
 </div>
-<p>The trouble with displays like <a href="#fig-collinearity-diagnostics-SPSS" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collinearity-diagnostics-SPSS</span></a> is that the important information is hidden in a sea of numbers, some of which are bad when large, others bad when they are small and a large bunch which are irrelevant. In <span class="citation" data-cites="FriendlyKwan:2009">Friendly &amp; Kwan (<a href="#ref-FriendlyKwan:2009" role="doc-biblioref">2009</a>)</span>, we liken this problem to that of the reader of Martin Hansford’s successful series of books, <em>Where’s Waldo</em>. These consist of a series of full-page illustrations of hundreds of people and things and a few Waldos— a character wearing a red and white striped shirt and hat, glasses, and carrying a walking stick or other paraphernalia. Waldo was never disguised, yet the complex arrangement of misleading visual cues in the pictures made him very hard to find. Collinearity diagnostics often provide a similar puzzle: where should you look in traditional tabular displays?</p>
+<p>The trouble with displays like <a href="#fig-collinearity-diagnostics-SPSS" class="quarto-xref">Figure&nbsp;<span>8.1</span></a> is that the important information is hidden in a sea of numbers, some of which are bad when large, others bad when they are small and a large bunch which are irrelevant. In <span class="citation" data-cites="FriendlyKwan:2009">Friendly &amp; Kwan (<a href="95-references.html#ref-FriendlyKwan:2009" role="doc-biblioref">2009</a>)</span>, we liken this problem to that of the reader of Martin Hansford’s successful series of books, <em>Where’s Waldo</em>. These consist of a series of full-page illustrations of hundreds of people and things and a few Waldos— a character wearing a red and white striped shirt and hat, glasses, and carrying a walking stick or other paraphernalia. Waldo was never disguised, yet the complex arrangement of misleading visual cues in the pictures made him very hard to find. Collinearity diagnostics often provide a similar puzzle: where should you look in traditional tabular displays?</p>
 <!-- This image based on: https://x.com/ErrorJustin/status/830205933598879744 -->
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -464,12 +464,12 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <li>The estimated coefficients have large standard errors, <span class="math inline">\(s(\hat{b}_j)\)</span>. They are multiplied by the square root of the variance inflation factor, <span class="math inline">\(\sqrt{\text{VIF}}\)</span>, discussed below.</li>
 <li>The large standard errors deflate the <span class="math inline">\(t\)</span>-statistics, <span class="math inline">\(t = \hat{b}_j / s(\hat{b}_j)\)</span>, by the same factor, so a coefficient that would significant if the predictors were uncorrelated becomes insignificant when collinearity is present.</li>
 <li>Thus you may find a situation where an overall model is highly significant (large <span class="math inline">\(F\)</span>-statistic), while no (or few) of the individual predictors are. This is a puzzlement!</li>
-<li>Beyond this, the least squares solution may have poor numerical accuracy <span class="citation" data-cites="Longley:1967">(<a href="#ref-Longley:1967" role="doc-biblioref">Longley, 1967</a>)</span>, because the solution depends inversely on the determinant <span class="math inline">\(|\,\mathbf{X}^\mathsf{T} \mathbf{X}\,|\)</span>, which approaches 0 as multiple correlations increase.</li>
+<li>Beyond this, the least squares solution may have poor numerical accuracy <span class="citation" data-cites="Longley:1967">(<a href="95-references.html#ref-Longley:1967" role="doc-biblioref">Longley, 1967</a>)</span>, because the solution depends inversely on the determinant <span class="math inline">\(|\,\mathbf{X}^\mathsf{T} \mathbf{X}\,|\)</span>, which approaches 0 as multiple correlations increase.</li>
 <li>There is an interpretive problem as well. Recall that the coefficients <span class="math inline">\(\hat{b}\)</span> are <em>partial coefficients</em>, meaning that they estimate change <span class="math inline">\(\Delta y\)</span> in <span class="math inline">\(y\)</span> when <span class="math inline">\(x\)</span> changes by one unit <span class="math inline">\(\Delta x\)</span>, but holding <strong>all other variables constant</strong>. Then, the model may be trying to estimate something that does not occur in the data. (For example: predicting strength from the highly correlated height and weight)</li>
 </ul>
 <section id="visualizing-collinearity" class="level3" data-number="8.1.1"><h3 data-number="8.1.1" class="anchored" data-anchor-id="visualizing-collinearity">
 <span class="header-section-number">8.1.1</span> Visualizing collinearity</h3>
-<p>Collinearity can be illustrated in data space for two predictors in terms of the stability of the regression plane for a linear model <code>Y = X1 + X2</code>. <a href="#fig-collin-demo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collin-demo</span></a> (adapted from <span class="citation" data-cites="Fox:2016:ARA">Fox (<a href="#ref-Fox:2016:ARA" role="doc-biblioref">2016</a>)</span>, Fig. 13.2) shows three cases as 3D plots of <span class="math inline">\((X_1, X_2, Y)\)</span>, where the correlation of predictors can be observed in the <span class="math inline">\((X_1, X_2)\)</span> plane.</p>
+<p>Collinearity can be illustrated in data space for two predictors in terms of the stability of the regression plane for a linear model <code>Y = X1 + X2</code>. <a href="#fig-collin-demo" class="quarto-xref">Figure&nbsp;<span>8.3</span></a> (adapted from <span class="citation" data-cites="Fox:2016:ARA">Fox (<a href="95-references.html#ref-Fox:2016:ARA" role="doc-biblioref">2016</a>)</span>, Fig. 13.2) shows three cases as 3D plots of <span class="math inline">\((X_1, X_2, Y)\)</span>, where the correlation of predictors can be observed in the <span class="math inline">\((X_1, X_2)\)</span> plane.</p>
 <ol type="a">
 <li><p>shows a case where <span class="math inline">\(X_1\)</span> and <span class="math inline">\(X_2\)</span> are uncorrelated as can be seen in their scatter in the horizontal plane (<code>+</code> symbols). The gray regression plane is well-supported; a small change in Y for one observation won’t make much difference.</p></li>
 <li><p>In panel (b), <span class="math inline">\(X_1\)</span> and <span class="math inline">\(X_2\)</span> have a perfect correlation, <span class="math inline">\(r (x_1, x_2) = 1.0\)</span>. The regression plane is not unique; in fact there are an infinite number of planes that fit the data equally well. Note that, if all we care about is prediction (not the coefficients), we could use <span class="math inline">\(X_1\)</span> or <span class="math inline">\(X_2\)</span>, or both, or any weighted sum of them in a model and get the same predicted values.</p></li>
@@ -482,7 +482,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <img src="images/collin-demo.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:100.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-collin-demo-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;8.3: Effect of collinearity on the least squares regression plane. (a) Small correlation between predictors; (b) Perfect correlation ; (c) Very strong correlation. The black points show the data Y values, white points are the fitted values in the regression plane, and + signs represent the values of X1 and X2. <em>Source</em>: Adapted from <span class="citation" data-cites="Fox:2016:ARA">Fox (<a href="#ref-Fox:2016:ARA" role="doc-biblioref">2016</a>)</span>, Fig. 13.2
+Figure&nbsp;8.3: Effect of collinearity on the least squares regression plane. (a) Small correlation between predictors; (b) Perfect correlation ; (c) Very strong correlation. The black points show the data Y values, white points are the fitted values in the regression plane, and + signs represent the values of X1 and X2. <em>Source</em>: Adapted from <span class="citation" data-cites="Fox:2016:ARA">Fox (<a href="95-references.html#ref-Fox:2016:ARA" role="doc-biblioref">2016</a>)</span>, Fig. 13.2
 </figcaption></figure>
 </div>
 </div>
@@ -537,7 +537,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <span><span class="co">#&gt; x1                  3.18         3.4719           3.053</span></span>
 <span><span class="co">#&gt; x2                  1.68         2.9734           2.059</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Then, I define a function to plot the data ellipse (<code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::dataEllipse()</a></code>) for each data frame and confidence ellipse (<code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::confidenceEllipse()</a></code>) for the coefficients in the corresponding fitted model. In the plots in <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collin-data-beta</span></a>, I specify the x, y limits for each plot so that the relative sizes of these ellipses are comparable, so that variance inflation can be assessed visually.</p>
+<p>Then, I define a function to plot the data ellipse (<code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::dataEllipse()</a></code>) for each data frame and confidence ellipse (<code><a href="https://rdrr.io/pkg/car/man/Ellipses.html">car::confidenceEllipse()</a></code>) for the coefficients in the corresponding fitted model. In the plots in <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span>8.4</span></a>, I specify the x, y limits for each plot so that the relative sizes of these ellipses are comparable, so that variance inflation can be assessed visually.</p>
 <div class="cell" data-layout-align="center">
 <details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb4" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">do_plots</span> <span class="op">&lt;-</span> <span class="kw">function</span><span class="op">(</span><span class="va">XY</span>, <span class="va">mod</span>, <span class="va">r</span><span class="op">)</span> <span class="op">{</span></span>
 <span>  <span class="va">X</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/matrix.html">as.matrix</a></span><span class="op">(</span><span class="va">XY</span><span class="op">[</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">2</span><span class="op">]</span><span class="op">)</span></span>
@@ -579,18 +579,18 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 </div>
 </div>
-<p>Recall (<a href="#sec-betaspace" class="quarto-xref"><span class="quarto-unresolved-ref">sec-betaspace</span></a>) that the confidence ellipse for <span class="math inline">\((\beta_1, \beta_2)\)</span> is just a 90 degree rotation (and rescaling) of the data ellipse for <span class="math inline">\((x_1, x_2)\)</span>: it is wide (more variance) in any direction where the data ellipse is narrow.</p>
-<p>The shadows of the confidence ellipses on the coordinate axes in <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collin-data-beta</span></a> represent the standard errors of the coefficients, and get larger with increasing <span class="math inline">\(\rho\)</span>. This is the effect of variance inflation, described in the following section.</p>
+<p>Recall (<a href="07-lin-mod-topics.html#sec-betaspace" class="quarto-xref"><span>Section 7.1</span></a>) that the confidence ellipse for <span class="math inline">\((\beta_1, \beta_2)\)</span> is just a 90 degree rotation (and rescaling) of the data ellipse for <span class="math inline">\((x_1, x_2)\)</span>: it is wide (more variance) in any direction where the data ellipse is narrow.</p>
+<p>The shadows of the confidence ellipses on the coordinate axes in <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span>8.4</span></a> represent the standard errors of the coefficients, and get larger with increasing <span class="math inline">\(\rho\)</span>. This is the effect of variance inflation, described in the following section.</p>
 </section></section><section id="sec-measure-collin" class="level2 page-columns page-full" data-number="8.2"><h2 data-number="8.2" class="anchored" data-anchor-id="sec-measure-collin">
 <span class="header-section-number">8.2</span> Measuring collinearity</h2>
 <p>This section first describes the <em>variance inflation factor</em> (VIF) used to measure the effect of possible collinearity on each predictor and a collection of diagnostic measures designed to help interpret these. Then I describe some novel graphical methods to make these effects more readily understandable, to answer the “Where’s Waldo” question posed at the outset.</p>
 <section id="sec-vif" class="level3" data-number="8.2.1"><h3 data-number="8.2.1" class="anchored" data-anchor-id="sec-vif">
 <span class="header-section-number">8.2.1</span> Variance inflation factors</h3>
 <p>How can we measure the effect of collinearity? The essential idea is to compare, for each predictor the variance <span class="math inline">\(s^2 (\widehat{b_j})\)</span> that the coefficient that <span class="math inline">\(x_j\)</span> would have if it was totally unrelated to the other predictors to the actual variance it has in the given model.</p>
-<p>For two predictors such as shown in <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collin-data-beta</span></a> the sampling variance of <span class="math inline">\(x_1\)</span> can be expressed as</p>
+<p>For two predictors such as shown in <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span>8.4</span></a> the sampling variance of <span class="math inline">\(x_1\)</span> can be expressed as</p>
 <p><span class="math display">\[
 s^2 (\widehat{b_1}) = \frac{MSE}{(n-1) \; s^2(x_1)} \; \times \; \left[ \frac{1}{1-r^2_{12}} \right]
-\]</span> The first term here is the variance of <span class="math inline">\(b_1\)</span> when the two predictors are uncorrelated. The term in brackets represents the <strong>variance inflation factor</strong> <span class="citation" data-cites="Marquardt:1970">(<a href="#ref-Marquardt:1970" role="doc-biblioref">Marquardt, 1970</a>)</span>, the amount by which the variance of the coefficient is multiplied as a consequence of the correlation <span class="math inline">\(r_{12}\)</span> of the predictors. As <span class="math inline">\(r_{12} \rightarrow 1\)</span>, the variances approaches infinity.</p>
+\]</span> The first term here is the variance of <span class="math inline">\(b_1\)</span> when the two predictors are uncorrelated. The term in brackets represents the <strong>variance inflation factor</strong> <span class="citation" data-cites="Marquardt:1970">(<a href="95-references.html#ref-Marquardt:1970" role="doc-biblioref">Marquardt, 1970</a>)</span>, the amount by which the variance of the coefficient is multiplied as a consequence of the correlation <span class="math inline">\(r_{12}\)</span> of the predictors. As <span class="math inline">\(r_{12} \rightarrow 1\)</span>, the variances approaches infinity.</p>
 <p>More generally, with any number of predictors, this relation has a similar form, replacing the simple correlation <span class="math inline">\(r_{12}\)</span> with the multiple correlation predicting <span class="math inline">\(x_j\)</span> from all others,</p>
 <p><span class="math display">\[
 s^2 (\widehat{b_j}) = \frac{MSE}{(n-1) \; s^2(x_j)} \; \times \; \left[ \frac{1}{1-R^2_{j | \text{others}}} \right]
@@ -598,7 +598,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <p><span class="math display">\[
 \text{VIF}_j = \frac{1}{1-R^2_{j \,|\, \text{others}}}
 \]</span> In practice, it is often easier to think in terms of the square root, <span class="math inline">\(\sqrt{\text{VIF}_j}\)</span> as the multiplier of the standard errors. The denominator, <span class="math inline">\(1-R^2_{j | \text{others}}\)</span> is sometimes called <strong>tolerance</strong>, a term I don’t find particularly useful, but it is just the proportion of the variance of <span class="math inline">\(x_j\)</span> that is <em>not</em> explainable from the others.</p>
-<p>For the cases shown in <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collin-data-beta</span></a> the VIFs and their square roots are:</p>
+<p>For the cases shown in <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span>8.4</span></a> the VIFs and their square roots are:</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb5" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">vifs</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/lapply.html">sapply</a></span><span class="op">(</span><span class="va">mods</span>, <span class="fu">car</span><span class="fu">::</span><span class="va"><a href="https://rdrr.io/pkg/car/man/vif.html">vif</a></span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/base/colnames.html">colnames</a></span><span class="op">(</span><span class="va">vifs</span><span class="op">)</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/paste.html">paste</a></span><span class="op">(</span><span class="st">"rho:"</span>, <span class="va">rho</span><span class="op">)</span></span>
@@ -613,7 +613,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <span><span class="co">#&gt; x2      1     1.76      4.31</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>Note that when there are terms in the model with more than one degree of freedom, such as education with four levels (and hence 3 df) or a polynomial term specified as <code>poly(age, 3)</code>, that variable, education or age is represented by three separate <span class="math inline">\(x\)</span>s in the model matrix, and the standard VIF calculation gives results that vary with how those terms are coded in the model.</p>
-<p>To allow for these cases, <span class="citation" data-cites="FoxMonette:92">Fox &amp; Monette (<a href="#ref-FoxMonette:92" role="doc-biblioref">1992</a>)</span> define <em>generalized</em>, GVIFs as the inflation in the squared area of the confidence ellipse for the coefficients of such terms, relative to what would be obtained with uncorrelated data. Visually, this can be seen by comparing the areas of the ellipses in the bottom row of <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collin-data-beta</span></a>. Because the magnitude of the GVIF increases with the number of degrees of freedom for the set of parameters, Fox &amp; Monette suggest the analog <span class="math inline">\(\sqrt{\text{GVIF}^{1/2 \text{df}}}\)</span> as the measure of impact on standard errors. This is what <code><a href="https://rdrr.io/pkg/car/man/vif.html">car::vif()</a></code> calculates for a factor or other term with more than 1 df.</p>
+<p>To allow for these cases, <span class="citation" data-cites="FoxMonette:92">Fox &amp; Monette (<a href="95-references.html#ref-FoxMonette:92" role="doc-biblioref">1992</a>)</span> define <em>generalized</em>, GVIFs as the inflation in the squared area of the confidence ellipse for the coefficients of such terms, relative to what would be obtained with uncorrelated data. Visually, this can be seen by comparing the areas of the ellipses in the bottom row of <a href="#fig-collin-data-beta" class="quarto-xref">Figure&nbsp;<span>8.4</span></a>. Because the magnitude of the GVIF increases with the number of degrees of freedom for the set of parameters, Fox &amp; Monette suggest the analog <span class="math inline">\(\sqrt{\text{GVIF}^{1/2 \text{df}}}\)</span> as the measure of impact on standard errors. This is what <code><a href="https://rdrr.io/pkg/car/man/vif.html">car::vif()</a></code> calculates for a factor or other term with more than 1 df.</p>
 <p><strong>Example</strong>: This example uses the <code>cars</code> dataset in the <code>VisCollin</code> package containing various measures of size and performance on 406 models of automobiles from 1982. Interest is focused on predicting gas mileage, <code>mpg</code>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb6" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">cars</span>, package <span class="op">=</span> <span class="st">"VisCollin"</span><span class="op">)</span></span>
@@ -699,11 +699,11 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <li>how many dimensions in the space of the predictors are associated with nearly collinear relations?</li>
 <li>which predictors are most strongly implicated in each of these?</li>
 </ul>
-<p>Answers to these questions are provided using measures developed by Belsley and colleagues <span class="citation" data-cites="Belsley-etal:80 Belsley:91a">(<a href="#ref-Belsley-etal:80" role="doc-biblioref">Belsley et al., 1980</a>; <a href="#ref-Belsley:91a" role="doc-biblioref">Belsley, 1991</a>)</span>. These measures are based on the eigenvalues <span class="math inline">\(\lambda_1, \lambda_2, \dots \lambda_p\)</span> of the correlation matrix <span class="math inline">\(R_{X}\)</span> of the predictors (preferably centered and scaled, and not including the constant term for the intercept), and the corresponding eigenvectors in the columns of <span class="math inline">\(\mathbf{V}_{p \times p}\)</span>, given by the the eigen decomposition <span class="math display">\[
+<p>Answers to these questions are provided using measures developed by Belsley and colleagues <span class="citation" data-cites="Belsley-etal:80 Belsley:91a">(<a href="95-references.html#ref-Belsley-etal:80" role="doc-biblioref">Belsley et al., 1980</a>; <a href="95-references.html#ref-Belsley:91a" role="doc-biblioref">Belsley, 1991</a>)</span>. These measures are based on the eigenvalues <span class="math inline">\(\lambda_1, \lambda_2, \dots \lambda_p\)</span> of the correlation matrix <span class="math inline">\(R_{X}\)</span> of the predictors (preferably centered and scaled, and not including the constant term for the intercept), and the corresponding eigenvectors in the columns of <span class="math inline">\(\mathbf{V}_{p \times p}\)</span>, given by the the eigen decomposition <span class="math display">\[
 \mathbf{R}_{X} = \mathbf{V} \mathbf{\Lambda} \mathbf{V}^\mathsf{T}
 \]</span> By elementary matrix algebra, the eigen decomposition of <span class="math inline">\(\mathbf{R}_{XX}^{-1}\)</span> is then <span id="eq-rxinv-eigen"><span class="math display">\[
 \mathbf{R}_{X}^{-1} = \mathbf{V} \mathbf{\Lambda}^{-1} \mathbf{V}^\mathsf{T} \; ,
-\tag{8.1}\]</span></span> so, <span class="math inline">\(\mathbf{R}_{X}\)</span> and <span class="math inline">\(\mathbf{R}_{XX}^{-1}\)</span> have the same eigenvectors, and the eigenvalues of <span class="math inline">\(\mathbf{R}_{X}^{-1}\)</span> are just <span class="math inline">\(\lambda_i^{-1}\)</span>. Using <a href="#eq-rxinv-eigen" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-rxinv-eigen</span></a>, the variance inflation factors may be expressed as <span class="math display">\[
+\tag{8.1}\]</span></span> so, <span class="math inline">\(\mathbf{R}_{X}\)</span> and <span class="math inline">\(\mathbf{R}_{XX}^{-1}\)</span> have the same eigenvectors, and the eigenvalues of <span class="math inline">\(\mathbf{R}_{X}^{-1}\)</span> are just <span class="math inline">\(\lambda_i^{-1}\)</span>. Using <a href="#eq-rxinv-eigen" class="quarto-xref">Equation&nbsp;<span>8.1</span></a>, the variance inflation factors may be expressed as <span class="math display">\[
 \text{VIF}_j = \sum_{k=1}^p \frac{V^2_{jk}}{\lambda_k} \; ,
 \]</span> which shows that only the <em>small</em> eigenvalues contribute to variance inflation, but only for those predictors that have large eigenvector coefficients on those small components. These facts lead to the following diagnostic statistics for collinearity:</p>
 <ul>
@@ -715,7 +715,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <span class="math inline">\(\kappa_j \rightarrow \infty\)</span> as any <span class="math inline">\(\lambda_k \rightarrow 0\)</span>.</li>
 </ul>
 </li>
-<li><p><strong>Variance decomposition proportions</strong>: Large VIFs indicate variables that are involved in <em>some</em> nearly collinear relations, but they don’t indicate <em>which</em> other variable(s) each is involved with. For this purpose, <span class="citation" data-cites="Belsley-etal:80">Belsley et al. (<a href="#ref-Belsley-etal:80" role="doc-biblioref">1980</a>)</span> and <span class="citation" data-cites="Belsley:91a">Belsley (<a href="#ref-Belsley:91a" role="doc-biblioref">1991</a>)</span> proposed calculation of the proportions of variance of each variable associated with each principal component as a decomposition of the coefficient variance for each dimension.</p></li>
+<li><p><strong>Variance decomposition proportions</strong>: Large VIFs indicate variables that are involved in <em>some</em> nearly collinear relations, but they don’t indicate <em>which</em> other variable(s) each is involved with. For this purpose, <span class="citation" data-cites="Belsley-etal:80">Belsley et al. (<a href="95-references.html#ref-Belsley-etal:80" role="doc-biblioref">1980</a>)</span> and <span class="citation" data-cites="Belsley:91a">Belsley (<a href="95-references.html#ref-Belsley:91a" role="doc-biblioref">1991</a>)</span> proposed calculation of the proportions of variance of each variable associated with each principal component as a decomposition of the coefficient variance for each dimension.</p></li>
 </ul>
 <p>These measures can be calculated using <code><a href="https://rdrr.io/pkg/VisCollin/man/colldiag.html">VisCollin::colldiag()</a></code>. For the current model, the usual display contains both the condition indices and variance proportions. However, even for a small example, it is often difficult to know what numbers to pay attention to.</p>
 <div class="cell" data-layout-align="center">
@@ -730,7 +730,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <span><span class="co">#&gt; 5   8.342 0.115    0.000  0.654 0.715  0.469 0.052</span></span>
 <span><span class="co">#&gt; 6  10.818 0.563    0.981  0.032 0.176  0.013 0.004</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p><span class="citation" data-cites="Belsley:91a">Belsley (<a href="#ref-Belsley:91a" role="doc-biblioref">1991</a>)</span> recommends that the sources of collinearity be diagnosed (a) only for those components with large <span class="math inline">\(\kappa_j\)</span>, and (b) for those components for which the variance proportion is large (say, <span class="math inline">\(\ge 0.5\)</span>) on <em>two</em> or more predictors. The print method for <code>"colldiag"</code> objects has a <code>fuzz</code> argument controlling this.</p>
+<p><span class="citation" data-cites="Belsley:91a">Belsley (<a href="95-references.html#ref-Belsley:91a" role="doc-biblioref">1991</a>)</span> recommends that the sources of collinearity be diagnosed (a) only for those components with large <span class="math inline">\(\kappa_j\)</span>, and (b) for those components for which the variance proportion is large (say, <span class="math inline">\(\ge 0.5\)</span>) on <em>two</em> or more predictors. The print method for <code>"colldiag"</code> objects has a <code>fuzz</code> argument controlling this.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb11" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="va">cd</span>, fuzz <span class="op">=</span> <span class="fl">0.5</span><span class="op">)</span></span>
 <span><span class="co">#&gt; Condition</span></span>
@@ -752,13 +752,13 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </section><section id="tableplots" class="level3 page-columns page-full" data-number="8.2.3"><h3 data-number="8.2.3" class="anchored" data-anchor-id="tableplots">
 <span class="header-section-number">8.2.3</span> Tableplots</h3>
 <p>The default tabular display of condition indices and variance proportions from <code><a href="https://rdrr.io/pkg/VisCollin/man/colldiag.html">colldiag()</a></code> is what triggered the comparison to “Where’s Waldo”. It suffers from the fact that the important information — (a) how many Waldos? (b) where are they hiding — is disguised by being embedded in a sea of mostly irrelevant numbers. The simple option of using a principled <code>fuzz</code> factor helps considerably, but not entirely.</p>
-<p>The simplified tabular display above can be improved to make the patterns of collinearity more visually apparent and to signify warnings directly to the eyes. A <strong>tableplot</strong> <span class="citation" data-cites="Kwan-etal:2009">(<a href="#ref-Kwan-etal:2009" role="doc-biblioref">Kwan et al., 2009</a>)</span> is a semi-graphic display that presents numerical information in a table using shapes proportional to the value in a cell and other visual attributes (shape type, color fill, and so forth) to encode other information.</p>
+<p>The simplified tabular display above can be improved to make the patterns of collinearity more visually apparent and to signify warnings directly to the eyes. A <strong>tableplot</strong> <span class="citation" data-cites="Kwan-etal:2009">(<a href="95-references.html#ref-Kwan-etal:2009" role="doc-biblioref">Kwan et al., 2009</a>)</span> is a semi-graphic display that presents numerical information in a table using shapes proportional to the value in a cell and other visual attributes (shape type, color fill, and so forth) to encode other information.</p>
 <p>For collinearity diagnostics, these show:</p>
 <ul>
 <li><p>the condition indices, using <em>squares</em> whose background color is <span style="color: red;">red</span> for condition indices &gt; 10, <span style="color: brown;">brown</span> for values &gt; 5 and <span style="color: green;">green</span> otherwise, reflecting danger, warning and OK respectively. The value of the condition index is encoded within this using a white square whose side is proportional to the value (up to some maximum value, <code>cond.max</code> that fills the cell).</p></li>
 <li><p>Variance decomposition proportions are shown by filled <em>circles</em> whose radius is proportional to those values and are filled (by default) with shades ranging from white through pink to red. Rounded values of those diagnostics are printed in the cells.</p></li>
 </ul>
-<p>The tableplot below (<a href="#fig-cars-tableplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-cars-tableplot</span></a>) encodes all the information from the values of <code><a href="https://rdrr.io/pkg/VisCollin/man/colldiag.html">colldiag()</a></code> printed above. To aid perception, it uses <code>prop.col</code> color breaks such that variance proportions &lt; 0.3 are shaded white. The visual message is that one should attend to collinearities with large condition indices <strong>and</strong> large variance proportions implicating two or more predictors.</p>
+<p>The tableplot below (<a href="#fig-cars-tableplot" class="quarto-xref">Figure&nbsp;<span>8.5</span></a>) encodes all the information from the values of <code><a href="https://rdrr.io/pkg/VisCollin/man/colldiag.html">colldiag()</a></code> printed above. To aid perception, it uses <code>prop.col</code> color breaks such that variance proportions &lt; 0.3 are shaded white. The visual message is that one should attend to collinearities with large condition indices <strong>and</strong> large variance proportions implicating two or more predictors.</p>
 
 <!-- fig.code: R/cars-colldiag.R -->
 <div class="no-row-height column-margin column-container"><div class="">
@@ -779,7 +779,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 </section><section id="collinearity-biplots" class="level3" data-number="8.2.4"><h3 data-number="8.2.4" class="anchored" data-anchor-id="collinearity-biplots">
 <span class="header-section-number">8.2.4</span> Collinearity biplots</h3>
-<p>As we have seen, the collinearity diagnostics are all functions of the eigenvalues and eigenvectors of the correlation matrix of the predictors in the regression model, or alternatively, the SVD of the <span class="math inline">\(\mathbf{X}\)</span> matrix in the linear model (excluding the constant). The standard biplot <span class="citation" data-cites="Gabriel:71 GowerHand:96">(<a href="#ref-Gabriel:71" role="doc-biblioref">Gabriel, 1971</a>; <a href="#ref-GowerHand:96" role="doc-biblioref">Gower &amp; Hand, 1996</a>)</span> (see: <a href="#sec-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-biplot</span></a>) can be regarded as a multivariate analog of a scatterplot, obtained by projecting a multivariate sample into a low-dimensional space (typically of 2 or 3 dimensions) accounting for the greatest variance in the data.</p>
+<p>As we have seen, the collinearity diagnostics are all functions of the eigenvalues and eigenvectors of the correlation matrix of the predictors in the regression model, or alternatively, the SVD of the <span class="math inline">\(\mathbf{X}\)</span> matrix in the linear model (excluding the constant). The standard biplot <span class="citation" data-cites="Gabriel:71 GowerHand:96">(<a href="95-references.html#ref-Gabriel:71" role="doc-biblioref">Gabriel, 1971</a>; <a href="95-references.html#ref-GowerHand:96" role="doc-biblioref">Gower &amp; Hand, 1996</a>)</span> (see: <a href="04-pca-biplot.html#sec-biplot" class="quarto-xref"><span>Section 4.3</span></a>) can be regarded as a multivariate analog of a scatterplot, obtained by projecting a multivariate sample into a low-dimensional space (typically of 2 or 3 dimensions) accounting for the greatest variance in the data.</p>
 <p>However the standard biplot is less useful for visualizing the relations among the predictors that lead to nearly collinear relations. Instead, biplots of the <strong>smallest dimensions</strong> show these relations directly, and can show other features of the data as well, such as outliers and leverage points. We use <code>prcomp(X, scale.=TRUE)</code> to obtain the PCA of the correlation matrix of the predictors:</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb13" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">cars.X</span> <span class="op">&lt;-</span> <span class="va">cars</span> <span class="op">|&gt;</span></span>
@@ -839,15 +839,15 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 </div>
 <p>As with the tabular display of variance proportions, Waldo is hiding in the dimensions associated with the smallest eigenvalues (largest condition indices). As well, it turns out that outliers in the predictor space (also high leverage observations) can often be seen as observations far from the centroid in the space of the smallest principal components.</p>
-<p>The projections of the variable vectors in <a href="#fig-cars-collin-biplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-cars-collin-biplot</span></a> on the Dimension 5 and Dimension 6 axes are proportional to their variance proportions shown above. The relative lengths of these variable vectors can be considered to indicate the extent to which each variable contributes to collinearity for these two near-singular dimensions.</p>
+<p>The projections of the variable vectors in <a href="#fig-cars-collin-biplot" class="quarto-xref">Figure&nbsp;<span>8.6</span></a> on the Dimension 5 and Dimension 6 axes are proportional to their variance proportions shown above. The relative lengths of these variable vectors can be considered to indicate the extent to which each variable contributes to collinearity for these two near-singular dimensions.</p>
 <p>Thus, we see again that Dimension 6 is largely determined by <code>engine</code> size, with a substantial (negative) relation to <code>cylinder</code>. Dimension 5 has its’ strongest relations to <code>weight</code> and <code>horse</code>.</p>
 <p>Moreover, there is one observation, #20, that stands out as an outlier in predictor space, far from the centroid. It turns out that this vehicle, a Buick Estate wagon, is an early-year (1970) American behemoth, with an 8-cylinder, 455 cu. in, 225 horse-power engine, and able to go from 0 to 60 mph in 10 sec. (Its MPG is only slightly under-predicted from the regression model, however.)</p>
-<p>With PCA and the biplot, we are used to looking at the dimensions that account for the most variation, but the answer to <em>Where’s Waldo?</em> is that he is hiding in the smallest data dimensions, just as he does in <a href="#fig-wheres-waldo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-wheres-waldo</span></a> where the weak signals of his stripped shirt, hat and glasses are embedded in a visual field of noise. As we just saw, outliers hide there also, hoping to escape detection. These small dimensions are also implicated in ridge regression as we will see shortly (<a href="#sec-ridge" class="quarto-xref"><span class="quarto-unresolved-ref">sec-ridge</span></a>).</p>
+<p>With PCA and the biplot, we are used to looking at the dimensions that account for the most variation, but the answer to <em>Where’s Waldo?</em> is that he is hiding in the smallest data dimensions, just as he does in <a href="#fig-wheres-waldo" class="quarto-xref">Figure&nbsp;<span>8.2</span></a> where the weak signals of his stripped shirt, hat and glasses are embedded in a visual field of noise. As we just saw, outliers hide there also, hoping to escape detection. These small dimensions are also implicated in ridge regression as we will see shortly (<a href="#sec-ridge" class="quarto-xref"><span>Section 8.4</span></a>).</p>
 </section></section><section id="sec-remedies" class="level2" data-number="8.3"><h2 data-number="8.3" class="anchored" data-anchor-id="sec-remedies">
 <span class="header-section-number">8.3</span> Remedies for collinearity: What can I do?</h2>
 <p>Collinearity is often a <strong>data</strong> problem, for which there is no magic cure. Nevertheless there are some general guidelines and useful techniques to address this problem.</p>
 <ul>
-<li><p><strong>Pure prediction</strong>: If we are only interested in predicting / explaining an outcome, and not the model coefficients or which are “significant”, collinearity can be largely ignored. The fitted values are unaffected by collinearity, even in the case of perfect collinearity as shown in <a href="#fig-collin-demo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-collin-demo</span></a> (b).</p></li>
+<li><p><strong>Pure prediction</strong>: If we are only interested in predicting / explaining an outcome, and not the model coefficients or which are “significant”, collinearity can be largely ignored. The fitted values are unaffected by collinearity, even in the case of perfect collinearity as shown in <a href="#fig-collin-demo" class="quarto-xref">Figure&nbsp;<span>8.3</span></a> (b).</p></li>
 <li>
 <p><strong>Structural collinearity</strong>: Sometimes collinearity results from structural relations among the variables that relate to how they have been defined.</p>
 <ul>
@@ -921,7 +921,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 </div>
 <p><strong>Example</strong>: Interactions</p>
-<p>The dataset <code><a href="https://friendly.github.io/genridge/reference/Acetylene.html">genridge::Acetylene</a></code> gives data from <span class="citation" data-cites="MarquardtSnee1975">Marquardt &amp; Snee (<a href="#ref-MarquardtSnee1975" role="doc-biblioref">1975</a>)</span> on the <code>yield</code> of a chemical manufacturing process to produce acetylene in relation to reactor temperature (<code>temp</code>), the <code>ratio</code> of two components and the contact <code>time</code> in the reactor. A naive response surface model might suggest that yield is quadratic in time and there are potential interactions among all pairs of predictors.</p>
+<p>The dataset <code><a href="https://friendly.github.io/genridge/reference/Acetylene.html">genridge::Acetylene</a></code> gives data from <span class="citation" data-cites="MarquardtSnee1975">Marquardt &amp; Snee (<a href="95-references.html#ref-MarquardtSnee1975" role="doc-biblioref">1975</a>)</span> on the <code>yield</code> of a chemical manufacturing process to produce acetylene in relation to reactor temperature (<code>temp</code>), the <code>ratio</code> of two components and the contact <code>time</code> in the reactor. A naive response surface model might suggest that yield is quadratic in time and there are potential interactions among all pairs of predictors.</p>
 <!-- fig.code: R/acetlyne-colldiag.R -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb17" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">Acetylene</span>, package <span class="op">=</span> <span class="st">"genridge"</span><span class="op">)</span></span>
@@ -976,9 +976,9 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 </section><section id="sec-ridge" class="level2" data-number="8.4"><h2 data-number="8.4" class="anchored" data-anchor-id="sec-ridge">
 <span class="header-section-number">8.4</span> Ridge regression</h2>
-<p>Ridge regression is an instance of a class of techniques designed to obtain more favorable predictions at the expense of some increase in bias, compared to ordinary least squares (OLS) estimation. These methods began as a way of solving collinearity problems in OLS regression with highly correlated predictors <span class="citation" data-cites="HoerlKennard:1970a">(<a href="#ref-HoerlKennard:1970a" role="doc-biblioref">Hoerl &amp; Kennard, 1970</a>)</span>. More recently ridge regression developed to a larger class of model selection methods, of which the LASSO method of <span class="citation" data-cites="Tibshriani:regr:1996">Tibshirani (<a href="#ref-Tibshriani:regr:1996" role="doc-biblioref">1996</a>)</span> and LAR method of <span class="citation" data-cites="Efron-etal:leas:2004">Efron et al. (<a href="#ref-Efron-etal:leas:2004" role="doc-biblioref">2004</a>)</span> are well-known instances. See, for example, the reviews in <span class="citation" data-cites="Vinod:1978">Vinod (<a href="#ref-Vinod:1978" role="doc-biblioref">1978</a>)</span> and <span class="citation" data-cites="McDonald:2009">McDonald (<a href="#ref-McDonald:2009" role="doc-biblioref">2009</a>)</span> for details and context omitted here. The case of ridge regression has also been extended to the case of two or more response variables <span class="citation" data-cites="Brown-Zidek-1980 Haitovsky1987">(<a href="#ref-Brown-Zidek-1980" role="doc-biblioref">Brown &amp; Zidek, 1980</a>; <a href="#ref-Haitovsky1987" role="doc-biblioref">Haitovsky, 1987</a>)</span>.</p>
+<p>Ridge regression is an instance of a class of techniques designed to obtain more favorable predictions at the expense of some increase in bias, compared to ordinary least squares (OLS) estimation. These methods began as a way of solving collinearity problems in OLS regression with highly correlated predictors <span class="citation" data-cites="HoerlKennard:1970a">(<a href="95-references.html#ref-HoerlKennard:1970a" role="doc-biblioref">Hoerl &amp; Kennard, 1970</a>)</span>. More recently ridge regression developed to a larger class of model selection methods, of which the LASSO method of <span class="citation" data-cites="Tibshriani:regr:1996">Tibshirani (<a href="95-references.html#ref-Tibshriani:regr:1996" role="doc-biblioref">1996</a>)</span> and LAR method of <span class="citation" data-cites="Efron-etal:leas:2004">Efron et al. (<a href="95-references.html#ref-Efron-etal:leas:2004" role="doc-biblioref">2004</a>)</span> are well-known instances. See, for example, the reviews in <span class="citation" data-cites="Vinod:1978">Vinod (<a href="95-references.html#ref-Vinod:1978" role="doc-biblioref">1978</a>)</span> and <span class="citation" data-cites="McDonald:2009">McDonald (<a href="95-references.html#ref-McDonald:2009" role="doc-biblioref">2009</a>)</span> for details and context omitted here. The case of ridge regression has also been extended to the case of two or more response variables <span class="citation" data-cites="Brown-Zidek-1980 Haitovsky1987">(<a href="95-references.html#ref-Brown-Zidek-1980" role="doc-biblioref">Brown &amp; Zidek, 1980</a>; <a href="95-references.html#ref-Haitovsky1987" role="doc-biblioref">Haitovsky, 1987</a>)</span>.</p>
 <p>An essential idea behind these methods is that the OLS estimates are constrained in some way, shrinking them, on average, toward zero, to achieve increased predictive accuracy at the expense of some increase in bias. Another common characteristic is that they involve some tuning parameter (<span class="math inline">\(k\)</span>) or criterion to quantify the tradeoff between bias and variance. In many cases, analytical or computationally intensive methods have been developed to choose an optimal value of the tuning parameter, for example using generalized cross validation, bootstrap methods.</p>
-<p>A common means to visualize the effects of shrinkage in these problems is to make what are called <em>univariate ridge trace plots</em> (<a href="#sec-ridge-univar" class="quarto-xref"><span class="quarto-unresolved-ref">sec-ridge-univar</span></a>) showing how the estimated coefficients <span class="math inline">\(\widehat{\boldsymbol{\beta}}_k\)</span> change as the shrinkage criterion <span class="math inline">\(k\)</span> increases. (An example is shown in Fig XX below.) But this only provides a view of bias. It is the wrong graphic form for a multivariate problem where we want to visualize bias in the coefficients <span class="math inline">\(\widehat{\boldsymbol{\beta}}_k\)</span> vs.&nbsp;their precision, as reflected in their estimated variances, <span class="math inline">\(\widehat{\textsf{Var}} (\widehat{\boldsymbol{\beta}}_k)\)</span>. A more useful graphic plots the confidence ellipses for the coefficients, showing both bias and precision (<a href="#sec-ridge-bivar" class="quarto-xref"><span class="quarto-unresolved-ref">sec-ridge-bivar</span></a>). Some of the material below borrows from <span class="citation" data-cites="Friendly-2011-gentalk">Friendly (<a href="#ref-Friendly-2011-gentalk" role="doc-biblioref">2011</a>)</span> and <span class="citation" data-cites="Friendly:genridge:2013">Friendly (<a href="#ref-Friendly:genridge:2013" role="doc-biblioref">2013</a>)</span>.</p>
+<p>A common means to visualize the effects of shrinkage in these problems is to make what are called <em>univariate ridge trace plots</em> (<a href="#sec-ridge-univar" class="quarto-xref"><span>Section 8.5</span></a>) showing how the estimated coefficients <span class="math inline">\(\widehat{\boldsymbol{\beta}}_k\)</span> change as the shrinkage criterion <span class="math inline">\(k\)</span> increases. (An example is shown in Fig XX below.) But this only provides a view of bias. It is the wrong graphic form for a multivariate problem where we want to visualize bias in the coefficients <span class="math inline">\(\widehat{\boldsymbol{\beta}}_k\)</span> vs.&nbsp;their precision, as reflected in their estimated variances, <span class="math inline">\(\widehat{\textsf{Var}} (\widehat{\boldsymbol{\beta}}_k)\)</span>. A more useful graphic plots the confidence ellipses for the coefficients, showing both bias and precision (<a href="#sec-ridge-bivar" class="quarto-xref"><span>Section 8.6</span></a>). Some of the material below borrows from <span class="citation" data-cites="Friendly-2011-gentalk">Friendly (<a href="95-references.html#ref-Friendly-2011-gentalk" role="doc-biblioref">2011</a>)</span> and <span class="citation" data-cites="Friendly:genridge:2013">Friendly (<a href="95-references.html#ref-Friendly:genridge:2013" role="doc-biblioref">2013</a>)</span>.</p>
 <section id="properties-of-ridge-regression" class="level3" data-number="8.4.1"><h3 data-number="8.4.1" class="anchored" data-anchor-id="properties-of-ridge-regression">
 <span class="header-section-number">8.4.1</span> Properties of ridge regression</h3>
 <p>To provide some context, I summarize the properties of ridge regression below, comparing the OLS estimates with their ridge counterparts. To avoid unnecessary details related to the intercept, assume the predictors have been centered at their means and the unit vector is omitted from <span class="math inline">\(\mathbf{X}\)</span>. Further, to avoid scaling issues, we standardize the columns of <span class="math inline">\(\mathbf{X}\)</span> to unit length, so that <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X}\)</span> is a also correlation matrix.</p>
@@ -990,7 +990,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
     \widehat{\sigma}_{\epsilon}^2 (\mathbf{X}^\mathsf{T}\mathbf{X})^{-1}.
 \end{aligned} \tag{8.2}\]</span></span></p>
 <p>As we saw ealier, one signal of the problem of collinearity is that the determinant <span class="math inline">\(\mathrm{det}(\mathbf{X}^\mathsf{T}\mathbf{X})\)</span> approaches zero as the predictors become more collinear. The inverse <span class="math inline">\((\mathbf{X}^\mathsf{T}\mathbf{X})^{-1}\)</span> becomes numerically unstable, or does not exist if the determinant becomes zero in the case of exact dependency of one variable on the others.</p>
-<p>Ridge regression uses a trick to avoid this. It adds a constant, <span class="math inline">\(k\)</span> to the diagonal elements, replacing <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X}\)</span> with <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X} + k \mathbf{I}\)</span> in <a href="#eq-OLS-beta-var" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-OLS-beta-var</span></a>. This drives the determinant away from zero as <span class="math inline">\(k\)</span> increases. The ridge regression estimates then become,</p>
+<p>Ridge regression uses a trick to avoid this. It adds a constant, <span class="math inline">\(k\)</span> to the diagonal elements, replacing <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X}\)</span> with <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X} + k \mathbf{I}\)</span> in <a href="#eq-OLS-beta-var" class="quarto-xref">Equation&nbsp;<span>8.2</span></a>. This drives the determinant away from zero as <span class="math inline">\(k\)</span> increases. The ridge regression estimates then become,</p>
 <p><span id="eq-ridge-beta-var"><span class="math display">\[\begin{aligned}
 \widehat{\boldsymbol{\beta}}^{\mathrm{RR}}_k = &amp;
     (\mathbf{X}^\mathsf{T}\mathbf{X} + k \mathbf{I})^{-1} \mathbf{X}^\mathsf{T}\mathbf{y}  \\
@@ -998,13 +998,13 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 \widehat{\mathsf{Var}} (\widehat{\boldsymbol{\beta}}^{\mathrm{RR}}_k) = &amp;
      \widehat{\sigma}^2  \mathbf{G}_k (\mathbf{X}^\mathsf{T}\mathbf{X})^{-1} \mathbf{G}_k^\mathsf{T}\:\: ,
 \end{aligned} \tag{8.3}\]</span></span></p>
-<p>where <span class="math inline">\(\mathbf{G}_k = \left[\mathbf{I} + k (\mathbf{X}^\mathsf{T}\mathbf{X})^{-1} \right] ^{-1}\)</span> is the <span class="math inline">\((p \times p)\)</span> <em>shrinkage</em> matrix. Thus, as <span class="math inline">\(k\)</span> increases, <span class="math inline">\(\mathbf{G}_k\)</span> decreases, and drives <span class="math inline">\(\widehat{\mathbf{\beta}}^{\mathrm{RR}}_k\)</span> toward <span class="math inline">\(\mathbf{0}\)</span> <span class="citation" data-cites="HoerlKennard:1970a">(<a href="#ref-HoerlKennard:1970a" role="doc-biblioref">Hoerl &amp; Kennard, 1970</a>)</span>.</p>
+<p>where <span class="math inline">\(\mathbf{G}_k = \left[\mathbf{I} + k (\mathbf{X}^\mathsf{T}\mathbf{X})^{-1} \right] ^{-1}\)</span> is the <span class="math inline">\((p \times p)\)</span> <em>shrinkage</em> matrix. Thus, as <span class="math inline">\(k\)</span> increases, <span class="math inline">\(\mathbf{G}_k\)</span> decreases, and drives <span class="math inline">\(\widehat{\mathbf{\beta}}^{\mathrm{RR}}_k\)</span> toward <span class="math inline">\(\mathbf{0}\)</span> <span class="citation" data-cites="HoerlKennard:1970a">(<a href="95-references.html#ref-HoerlKennard:1970a" role="doc-biblioref">Hoerl &amp; Kennard, 1970</a>)</span>.</p>
 <p>Another insight, from the shrinkage literature, is that ridge regression can be formulated as least squares regression, minimizing a residual sum of squares, <span class="math inline">\(\text{RSS}(k)\)</span>, which adds a penalty for large coefficients,</p>
 <p><span id="eq-ridgeRSS"><span class="math display">\[
 \text{RSS}(k) = (\mathbf{y}-\mathbf{X} \mathbf{\beta}) ^\mathsf{T}(\mathbf{y}-\mathbf{X} \boldsymbol{\beta}) + k \boldsymbol{\beta}^\mathsf{T}\boldsymbol{\beta} \quad\quad (k \ge 0)
 \:\: ,
 \tag{8.4}\]</span></span> where the penalty restrict the coefficients to some squared length <span class="math inline">\(\boldsymbol{\beta}^\mathsf{T}\boldsymbol{\beta} = \Sigma \beta_i \le t(k)\)</span>.</p>
-<p>The geometry of ridge regession is illustrated in <a href="#fig-ridge-demo" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ridge-demo</span></a> for two coefficients <span class="math inline">\(\boldsymbol{\beta} = (\beta_1, \beta_2)\)</span>. The <span style="color: blue;">blue circles</span> at the origin, having radii <span class="math inline">\(\sqrt{t_k}\)</span>, show the constraint that the sum of squares of coefficients, <span class="math inline">\(\boldsymbol{\beta}^\mathsf{T}\boldsymbol{\beta} = \beta_1^2 + \beta_2^2\)</span> be less than <span class="math inline">\(k\)</span>. The <span style="color: red;">red ellipses</span> show contours of the covariance ellipse of <span class="math inline">\(\widehat{\boldsymbol{\beta}}^{\mathrm{OLS}}\)</span>. As the shrinkage constant <span class="math inline">\(k\)</span> increases, the center of these ellipses travel along the path illustrated toward <span class="math inline">\(\boldsymbol{\beta} = \mathbf{0}\)</span> This path is called the <em>locus of osculation</em>, the path along which circles or ellipses first kiss as they expand, like the pattern of ripples from rocks dropped into a pond <span class="citation" data-cites="Friendly-etal:ellipses:2013">(<a href="#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">Friendly et al., 2013</a>)</span>.</p>
+<p>The geometry of ridge regession is illustrated in <a href="#fig-ridge-demo" class="quarto-xref">Figure&nbsp;<span>8.8</span></a> for two coefficients <span class="math inline">\(\boldsymbol{\beta} = (\beta_1, \beta_2)\)</span>. The <span style="color: blue;">blue circles</span> at the origin, having radii <span class="math inline">\(\sqrt{t_k}\)</span>, show the constraint that the sum of squares of coefficients, <span class="math inline">\(\boldsymbol{\beta}^\mathsf{T}\boldsymbol{\beta} = \beta_1^2 + \beta_2^2\)</span> be less than <span class="math inline">\(k\)</span>. The <span style="color: red;">red ellipses</span> show contours of the covariance ellipse of <span class="math inline">\(\widehat{\boldsymbol{\beta}}^{\mathrm{OLS}}\)</span>. As the shrinkage constant <span class="math inline">\(k\)</span> increases, the center of these ellipses travel along the path illustrated toward <span class="math inline">\(\boldsymbol{\beta} = \mathbf{0}\)</span> This path is called the <em>locus of osculation</em>, the path along which circles or ellipses first kiss as they expand, like the pattern of ripples from rocks dropped into a pond <span class="citation" data-cites="Friendly-etal:ellipses:2013">(<a href="95-references.html#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">Friendly et al., 2013</a>)</span>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-ridge-demo" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1012,19 +1012,19 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <img src="images/ridge-demo.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:80.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-ridge-demo-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;8.8: Geometric interpretation of ridge regression, using elliptical contours of the <span class="math inline">\(\text{RSS}(k)\)</span> function. The blue circles at the origin show the constraint that the sum of squares of coefficients, <span class="math inline">\(\boldsymbol{\beta}^\mathsf{T}\boldsymbol{\beta}\)</span> be less than <span class="math inline">\(k\)</span>. The red ellipses show the covariance ellipse of two coefficients <span class="math inline">\(\boldsymbol{\beta}\)</span>. Ridge regression finds the point <span class="math inline">\(\widehat{\boldsymbol{\beta}}^{\mathrm{RR}}_k\)</span> where the OLS contours just kiss the constraint region. _Source: <span class="citation" data-cites="Friendly-etal:ellipses:2013">Friendly et al. (<a href="#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">2013</a>)</span>.
+Figure&nbsp;8.8: Geometric interpretation of ridge regression, using elliptical contours of the <span class="math inline">\(\text{RSS}(k)\)</span> function. The blue circles at the origin show the constraint that the sum of squares of coefficients, <span class="math inline">\(\boldsymbol{\beta}^\mathsf{T}\boldsymbol{\beta}\)</span> be less than <span class="math inline">\(k\)</span>. The red ellipses show the covariance ellipse of two coefficients <span class="math inline">\(\boldsymbol{\beta}\)</span>. Ridge regression finds the point <span class="math inline">\(\widehat{\boldsymbol{\beta}}^{\mathrm{RR}}_k\)</span> where the OLS contours just kiss the constraint region. _Source: <span class="citation" data-cites="Friendly-etal:ellipses:2013">Friendly et al. (<a href="95-references.html#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">2013</a>)</span>.
 </figcaption></figure>
 </div>
 </div>
 </div>
 <!-- The blue circles at the origin show the constraint that the sum of squares of coefficients, $\\boldsymbol{\\beta}\\trans \\boldsymbol{\\beta}$ be less than $k$. The red ellipses show the covariance ellipse of two coefficients $\\boldsymbol{\\beta}$
 -->
-<p><a href="#eq-ridge-beta-var" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-ridge-beta-var</span></a> is computationally expensive, potentially numerically unstable for small <span class="math inline">\(k\)</span>, and it is conceptually opaque, in that it sheds little light on the underlying geometry of the data in the column space of <span class="math inline">\(\mathbf{X}\)</span>. An alternative formulation can be given in terms of the singular value decomposition (SVD) of <span class="math inline">\(\mathbf{X}\)</span>,</p>
+<p><a href="#eq-ridge-beta-var" class="quarto-xref">Equation&nbsp;<span>8.3</span></a> is computationally expensive, potentially numerically unstable for small <span class="math inline">\(k\)</span>, and it is conceptually opaque, in that it sheds little light on the underlying geometry of the data in the column space of <span class="math inline">\(\mathbf{X}\)</span>. An alternative formulation can be given in terms of the singular value decomposition (SVD) of <span class="math inline">\(\mathbf{X}\)</span>,</p>
 <p><span class="math display">\[
 \mathbf{X} = \mathbf{U} \mathbf{D} \mathbf{V}^\mathsf{T}
 \]</span></p>
 <p>where <span class="math inline">\(\mathbf{U}\)</span> and <span class="math inline">\(\mathbf{V}\)</span> are respectively <span class="math inline">\(n\times p\)</span> and <span class="math inline">\(p\times p\)</span> orthonormal matrices, so that <span class="math inline">\(\mathbf{U}^\mathsf{T}\mathbf{U} = \mathbf{V}^\mathsf{T}\mathbf{V} = \mathbf{I}\)</span>, and <span class="math inline">\(\mathbf{D} = \mathrm{diag}\, (d_1, d_2, \dots d_p)\)</span> is the diagonal matrix of ordered singular values, with entries <span class="math inline">\(d_1 \ge d_2 \ge \cdots \ge d_p \ge 0\)</span>.</p>
-<p>Because <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X} = \mathbf{V} \mathbf{D}^2 \mathbf{V}^\mathsf{T}\)</span>, the eigenvalues of <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X}\)</span> are given by <span class="math inline">\(\mathbf{D}^2\)</span> and therefore the eigenvalues of <span class="math inline">\(\mathbf{G}_k\)</span> can be shown <span class="citation" data-cites="HoerlKennard:1970a">(<a href="#ref-HoerlKennard:1970a" role="doc-biblioref">Hoerl &amp; Kennard, 1970</a>)</span> to be the diagonal elements of</p>
+<p>Because <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X} = \mathbf{V} \mathbf{D}^2 \mathbf{V}^\mathsf{T}\)</span>, the eigenvalues of <span class="math inline">\(\mathbf{X}^\mathsf{T}\mathbf{X}\)</span> are given by <span class="math inline">\(\mathbf{D}^2\)</span> and therefore the eigenvalues of <span class="math inline">\(\mathbf{G}_k\)</span> can be shown <span class="citation" data-cites="HoerlKennard:1970a">(<a href="95-references.html#ref-HoerlKennard:1970a" role="doc-biblioref">Hoerl &amp; Kennard, 1970</a>)</span> to be the diagonal elements of</p>
 <p><span class="math display">\[
 \mathbf{D}(\mathbf{D}^2 + k \mathbf{I} )^{-1} \mathbf{D} = \mathrm{diag}\,  \left(\frac{d_i^2}{d_i^2 + k}\right) \:\: .
 \]</span></p>
@@ -1033,18 +1033,18 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 \widehat{\boldsymbol{\beta}}^{\mathrm{RR}}_k = (\mathbf{D}^2 + k \mathbf{I})^{-1} \mathbf{D} \mathbf{U}^\mathsf{T}\mathbf{y} = \left( \frac{d_i}{d_i^2 + k}\right) \: \mathbf{u}_i^\mathsf{T}\mathbf{y}, \quad i=1, \dots p \:\: .
 \]</span></p>
 <p>The terms <span class="math inline">\(d^2_i / (d_i^2 + k) \le 1\)</span> are thus the factors by which the coordinates of <span class="math inline">\(\mathbf{u}_i^\mathsf{T}\mathbf{y}\)</span> are shrunk with respect to the orthonormal basis for the column space of <span class="math inline">\(\mathbf{X}\)</span>. The small singular values <span class="math inline">\(d_i\)</span> correspond to the directions which ridge regression shrinks the most. These are the directions which contribute most to collinearity, discussed earlier.</p>
-<p>This analysis also provides an alternative and more intuitive characterization of the ridge tuning constant. By analogy with OLS, where the hat matrix, <span class="math inline">\(\mathbf{H} = \mathbf{X} (\mathbf{X}^\mathsf{T}\mathbf{X})^{-1} \mathbf{X}^\mathsf{T}\)</span> reflects degrees of freedom <span class="math inline">\(\text{df} = \mathrm{tr} (\mathbf{H}) = p\)</span> corresponding to the <span class="math inline">\(p\)</span> parameters, the effective degrees of freedom for ridge regression <span class="citation" data-cites="Hastie-etal-2009">(<a href="#ref-Hastie-etal-2009" role="doc-biblioref">Hastie et al., 2009</a>)</span> is</p>
+<p>This analysis also provides an alternative and more intuitive characterization of the ridge tuning constant. By analogy with OLS, where the hat matrix, <span class="math inline">\(\mathbf{H} = \mathbf{X} (\mathbf{X}^\mathsf{T}\mathbf{X})^{-1} \mathbf{X}^\mathsf{T}\)</span> reflects degrees of freedom <span class="math inline">\(\text{df} = \mathrm{tr} (\mathbf{H}) = p\)</span> corresponding to the <span class="math inline">\(p\)</span> parameters, the effective degrees of freedom for ridge regression <span class="citation" data-cites="Hastie-etal-2009">(<a href="95-references.html#ref-Hastie-etal-2009" role="doc-biblioref">Hastie et al., 2009</a>)</span> is</p>
 <p><span id="eq-dfk"><span class="math display">\[\begin{aligned}
 \text{df}_k
     = &amp; \text{tr}[\mathbf{X} (\mathbf{X}^\mathsf{T}\mathbf{X} + k \mathbf{I})^{-1} \mathbf{X}^\mathsf{T}] \\
     = &amp; \sum_i^p \text{df}_k(i) = \sum_i^p \left( \frac{d_i^2}{d_i^2 + k} \right) \:\: .
 \end{aligned} \tag{8.5}\]</span></span></p>
 <p><span class="math inline">\(\text{df}_k\)</span> is a monotone decreasing function of <span class="math inline">\(k\)</span>, and hence any set of ridge constants can be specified in terms of equivalent <span class="math inline">\(\text{df}_k\)</span>. Greater shrinkage corresponds to fewer coefficients being estimated.</p>
-<p>There is a close connection with principal components regression mentioned in <a href="#sec-remedies" class="quarto-xref"><span class="quarto-unresolved-ref">sec-remedies</span></a>. Ridge regression shrinks <em>all</em> dimensions in proportion to <span class="math inline">\(\text{df}_k(i)\)</span>, so the low variance dimensions are shrunk more. Principal components regression discards the low variance dimensions and leaves the high variance dimensions unchanged.</p>
+<p>There is a close connection with principal components regression mentioned in <a href="#sec-remedies" class="quarto-xref"><span>Section 8.3</span></a>. Ridge regression shrinks <em>all</em> dimensions in proportion to <span class="math inline">\(\text{df}_k(i)\)</span>, so the low variance dimensions are shrunk more. Principal components regression discards the low variance dimensions and leaves the high variance dimensions unchanged.</p>
 </section><section id="the-genridge-package" class="level3" data-number="8.4.2"><h3 data-number="8.4.2" class="anchored" data-anchor-id="the-genridge-package">
 <span class="header-section-number">8.4.2</span> The <code>genridge</code> package</h3>
-<p>Ridge regression and other shrinkage methods are available in several packages including <span style="color: brown;"><strong>MASS</strong></span> (the <code><a href="https://rdrr.io/pkg/MASS/man/lm.ridge.html">lm.ridge()</a></code> function), <span style="color: brown;"><strong>glmnet</strong></span> <span class="citation" data-cites="R-glmnet">(<a href="#ref-R-glmnet" role="doc-biblioref">Friedman et al., 2023</a>)</span>, and <span style="color: brown;"><strong>penalized</strong></span> <span class="citation" data-cites="R-penalized">(<a href="#ref-R-penalized" role="doc-biblioref">Goeman et al., 2022</a>)</span>, but none of these provides insightful graphical displays. <code><a href="https://glmnet.stanford.edu/reference/glmnet.html">glmnet::glmnet()</a></code> also implements a method for multivariate responses with a `family=“mgaussian”.</p>
-<p>Here, I focus in the <span style="color: brown;"><strong>genridge</strong></span> package <span class="citation" data-cites="R-genridge">(<a href="#ref-R-genridge" role="doc-biblioref">Friendly, 2024</a>)</span>, where the <code><a href="https://friendly.github.io/genridge/reference/ridge.html">ridge()</a></code> function is the workhorse and <code>pca.ridge()</code> transforms these results to PCA/SVD space. <code>vif.ridge()</code> calculates VIFs for class <code>"ridge"</code> objects and <code><a href="https://friendly.github.io/genridge/reference/precision.html">precision()</a></code> calculates precision and shrinkage measures.</p>
+<p>Ridge regression and other shrinkage methods are available in several packages including <span style="color: brown;"><strong>MASS</strong></span> (the <code><a href="https://rdrr.io/pkg/MASS/man/lm.ridge.html">lm.ridge()</a></code> function), <span style="color: brown;"><strong>glmnet</strong></span> <span class="citation" data-cites="R-glmnet">(<a href="95-references.html#ref-R-glmnet" role="doc-biblioref">Friedman et al., 2023</a>)</span>, and <span style="color: brown;"><strong>penalized</strong></span> <span class="citation" data-cites="R-penalized">(<a href="95-references.html#ref-R-penalized" role="doc-biblioref">Goeman et al., 2022</a>)</span>, but none of these provides insightful graphical displays. <code><a href="https://glmnet.stanford.edu/reference/glmnet.html">glmnet::glmnet()</a></code> also implements a method for multivariate responses with a `family=“mgaussian”.</p>
+<p>Here, I focus in the <span style="color: brown;"><strong>genridge</strong></span> package <span class="citation" data-cites="R-genridge">(<a href="95-references.html#ref-R-genridge" role="doc-biblioref">Friendly, 2024</a>)</span>, where the <code><a href="https://friendly.github.io/genridge/reference/ridge.html">ridge()</a></code> function is the workhorse and <code>pca.ridge()</code> transforms these results to PCA/SVD space. <code>vif.ridge()</code> calculates VIFs for class <code>"ridge"</code> objects and <code><a href="https://friendly.github.io/genridge/reference/precision.html">precision()</a></code> calculates precision and shrinkage measures.</p>
 <p>A variety of plotting functions is available for univariate, bivariate and 3D plots:</p>
 <ul>
 <li>
@@ -1091,7 +1091,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <span><span class="co">#&gt; GNP.deflator </span></span>
 <span><span class="co">#&gt;       135.53</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Shrinkage values can be specified using <span class="math inline">\(k\)</span> (where <span class="math inline">\(k = 0\)</span> corresponds to OLS) or the equivalent degrees of freedom $ _k$ (<a href="#eq-dfk" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-dfk</span></a>). (The function uses the notation <span class="math inline">\(\lambda \equiv k\)</span>, so the argument is <code>lambda</code>.) Among other quantities, <code><a href="https://friendly.github.io/genridge/reference/ridge.html">ridge()</a></code> returns a matrix containing the coefficients for each predictor for each shrinkage value and other quantities.</p>
+<p>Shrinkage values can be specified using <span class="math inline">\(k\)</span> (where <span class="math inline">\(k = 0\)</span> corresponds to OLS) or the equivalent degrees of freedom $ _k$ (<a href="#eq-dfk" class="quarto-xref">Equation&nbsp;<span>8.5</span></a>). (The function uses the notation <span class="math inline">\(\lambda \equiv k\)</span>, so the argument is <code>lambda</code>.) Among other quantities, <code><a href="https://friendly.github.io/genridge/reference/ridge.html">ridge()</a></code> returns a matrix containing the coefficients for each predictor for each shrinkage value and other quantities.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb23" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">lambda</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">0</span>, <span class="fl">0.002</span>, <span class="fl">0.005</span>, <span class="fl">0.01</span>, <span class="fl">0.02</span>, <span class="fl">0.04</span>, <span class="fl">0.08</span><span class="op">)</span></span>
 <span><span class="va">lridge</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://friendly.github.io/genridge/reference/ridge.html">ridge</a></span><span class="op">(</span><span class="va">Employed</span> <span class="op">~</span> <span class="va">GNP</span> <span class="op">+</span> <span class="va">Unemployed</span> <span class="op">+</span> <span class="va">Armed.Forces</span> <span class="op">+</span> </span>
@@ -1134,7 +1134,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 </div>
 <p>You can see that the coefficients for Year and GNP are shrunk considerably. Differences from the <span class="math inline">\(\beta\)</span> value at <span class="math inline">\(k =0\)</span> represent the bias (smaller <span class="math inline">\(\mid \beta \mid\)</span>) needed to achieve more stable estimates.</p>
-<p>The dotted lines in <a href="#fig-longley-traceplot1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-longley-traceplot1</span></a> show choices for the ridge constant by two commonly used criteria to balance bias against precision due to <span class="citation" data-cites="Hoerl-etal-1975">Hoerl et al. (<a href="#ref-Hoerl-etal-1975" role="doc-biblioref">1975</a>)</span> (<code>HKB</code>) and <span class="citation" data-cites="LawlessWang:1976">Lawless &amp; Wang (<a href="#ref-LawlessWang:1976" role="doc-biblioref">1976</a>)</span> (<code>LW</code>). These values (along with a generalized cross-validation value <code>GCV</code>) are also stored in the “ridge” object as a vector <code>criteria</code>.</p>
+<p>The dotted lines in <a href="#fig-longley-traceplot1" class="quarto-xref">Figure&nbsp;<span>8.9</span></a> show choices for the ridge constant by two commonly used criteria to balance bias against precision due to <span class="citation" data-cites="Hoerl-etal-1975">Hoerl et al. (<a href="95-references.html#ref-Hoerl-etal-1975" role="doc-biblioref">1975</a>)</span> (<code>HKB</code>) and <span class="citation" data-cites="LawlessWang:1976">Lawless &amp; Wang (<a href="95-references.html#ref-LawlessWang:1976" role="doc-biblioref">1976</a>)</span> (<code>LW</code>). These values (along with a generalized cross-validation value <code>GCV</code>) are also stored in the “ridge” object as a vector <code>criteria</code>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb25" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">lridge</span><span class="op">$</span><span class="va">criteria</span></span>
 <span><span class="co">#&gt;    kHKB     kLW    kGCV </span></span>
@@ -1161,7 +1161,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <p>For that, we need to consider the variances and covariances of the estimated coefficients. The univariate trace plot is the wrong graphic form for what is essentially a multivariate problem, where we would like to visualize how <em>both</em> coefficients and their variances change with <span class="math inline">\(k\)</span>.</p>
 </section><section id="sec-ridge-bivar" class="level2" data-number="8.6"><h2 data-number="8.6" class="anchored" data-anchor-id="sec-ridge-bivar">
 <span class="header-section-number">8.6</span> Bivariate ridge trace plots</h2>
-<p>The bivariate analog of the trace plot suggested by <span class="citation" data-cites="Friendly:genridge:2013">Friendly (<a href="#ref-Friendly:genridge:2013" role="doc-biblioref">2013</a>)</span> plots bivariate confidence ellipses for <em>pairs</em> of coefficients. Their centers, <span class="math inline">\((\widehat{\beta}_i, \widehat{\beta}_j)\)</span> compared to the OLS values show the bias induced for each coefficient, and also how the change in the ridge estimate for one parameter is related to changes for other parameters.</p>
+<p>The bivariate analog of the trace plot suggested by <span class="citation" data-cites="Friendly:genridge:2013">Friendly (<a href="95-references.html#ref-Friendly:genridge:2013" role="doc-biblioref">2013</a>)</span> plots bivariate confidence ellipses for <em>pairs</em> of coefficients. Their centers, <span class="math inline">\((\widehat{\beta}_i, \widehat{\beta}_j)\)</span> compared to the OLS values show the bias induced for each coefficient, and also how the change in the ridge estimate for one parameter is related to changes for other parameters.</p>
 <p>The size and shapes of the covariance ellipses show directly the effect on precision of the estimates as a function of the ridge tuning constant. and their size and shape indicate sampling variance, <span class="math inline">\(\widehat{\text{Var}} (\mathbf{\widehat{\beta}}_{ij})\)</span>. Here, I plot those for GNP against four of the other predictors. The <code><a href="https://rdrr.io/r/graphics/plot.default.html">plot()</a></code> method for <code>"ridge"</code> objects plots these ellipses for a pair of variables.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb27" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">clr</span> <span class="op">&lt;-</span>  <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"black"</span>, <span class="st">"red"</span>, <span class="st">"brown"</span>, <span class="st">"darkgreen"</span>,<span class="st">"blue"</span>, <span class="st">"cyan4"</span>, <span class="st">"magenta"</span><span class="op">)</span></span>
@@ -1280,7 +1280,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <span><span class="co">#&gt; 0.040  1.51459  0.37858  1.77944  0.29633  3.01774  0.88291</span></span>
 <span><span class="co">#&gt; 0.080  1.51377  0.37778  1.75810  0.25915  2.01876  0.47238</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Then, a <code><a href="https://friendly.github.io/genridge/reference/traceplot.html">traceplot()</a></code> of the resulting <code>"pcaridge"</code> object shows how the dimensions are affected by shrinkage, shown on the scale of degrees of freedom in <a href="#fig-longley-pca-traceplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-longley-pca-traceplot</span></a>.</p>
+<p>Then, a <code><a href="https://friendly.github.io/genridge/reference/traceplot.html">traceplot()</a></code> of the resulting <code>"pcaridge"</code> object shows how the dimensions are affected by shrinkage, shown on the scale of degrees of freedom in <a href="#fig-longley-pca-traceplot" class="quarto-xref">Figure&nbsp;<span>8.14</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb32" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://friendly.github.io/genridge/reference/traceplot.html">traceplot</a></span><span class="op">(</span><span class="va">plridge</span>, X<span class="op">=</span><span class="st">"df"</span>, </span>
 <span>          cex.lab <span class="op">=</span> <span class="fl">1.2</span>, lwd<span class="op">=</span><span class="fl">2</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1332,7 +1332,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <section id="biplot-view" class="level3" data-number="8.7.1"><h3 data-number="8.7.1" class="anchored" data-anchor-id="biplot-view">
 <span class="header-section-number">8.7.1</span> Biplot view</h3>
 <p>The question arises how to relate this view of shrinkage in PCA space to the original predictors. The biplot is again your friend. You can project variable vectors for the predictor variables into the PCA space of the smallest dimensions, where the shrinkage action mostly occurs to see how the predictor variables relate to these dimensions.</p>
-<p><code>biplot.pcaridge()</code> supplements the standard display of the covariance ellipsoids for a ridge regression problem in PCA/SVD space with labeled arrows showing the contributions of the original variables to the dimensions plotted. Recall from <a href="#sec-biplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-biplot</span></a> that these reflect the correlations of the variables with the PCA dimensions. The lengths of the arrows reflect the proportion of variance that each predictors shares with the components.</p>
+<p><code>biplot.pcaridge()</code> supplements the standard display of the covariance ellipsoids for a ridge regression problem in PCA/SVD space with labeled arrows showing the contributions of the original variables to the dimensions plotted. Recall from <a href="04-pca-biplot.html#sec-biplot" class="quarto-xref"><span>Section 4.3</span></a> that these reflect the correlations of the variables with the PCA dimensions. The lengths of the arrows reflect the proportion of variance that each predictors shares with the components.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb35" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/stats/biplot.html">biplot</a></span><span class="op">(</span><span class="va">plridge</span>, radius<span class="op">=</span><span class="fl">0.5</span>, </span>
 <span>       ref<span class="op">=</span><span class="cn">FALSE</span>, asp<span class="op">=</span><span class="fl">1</span>, </span>
@@ -1352,7 +1352,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 </div>
 </div>
 </div>
-<p>The biplot view in <a href="#fig-longley-pca-biplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-longley-pca-biplot</span></a> showing the two smallest dimensions is particularly useful for understanding how the predictors contribute to shrinkage in ridge regression. Here, Year and Population largely contribute to dimension 5; a contrast between (Year, Population) and GNP contributes to dimension 6.</p>
+<p>The biplot view in <a href="#fig-longley-pca-biplot" class="quarto-xref">Figure&nbsp;<span>8.17</span></a> showing the two smallest dimensions is particularly useful for understanding how the predictors contribute to shrinkage in ridge regression. Here, Year and Population largely contribute to dimension 5; a contrast between (Year, Population) and GNP contributes to dimension 6.</p>
 </section></section><section id="what-have-we-learned" class="level2" data-number="8.8"><h2 data-number="8.8" class="anchored" data-anchor-id="what-have-we-learned">
 <span class="header-section-number">8.8</span> What have we learned?</h2>
 <p>This chapter has considered the problems in regression models which stem from high correlations among the predictors. We saw that collinearity results in unstable estimates of coefficients with larger uncertainty, often dramatically more so than would be the case if the predictors were uncorrelated. Collinearity can be seen as merely a “data problem” which can safely be ignored if we are only interested in prediction. When we want to understand a model, ridge regression can tame the collinearity beast by shrinking the coefficients slightly to gain greater precision in the estimates.</p>
@@ -1364,7 +1364,7 @@ <h1 class="title"><span id="sec-collin" class="quarto-section-identifier"><span
 <!-- ## References {.unnumbered} -->
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-Belsley:91a" class="csl-entry" role="listitem">
 Belsley, D. A. (1991). <em>Conditioning diagnostics: Collinearity and weak data in regression</em>. Wiley.
 </div>
diff --git a/docs/09-hotelling.html b/docs/09-hotelling.html
index 93d0dfb8..3f3e10cf 100644
--- a/docs/09-hotelling.html
+++ b/docs/09-hotelling.html
@@ -400,7 +400,7 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 </div>
 <section id="t2-as-a-generalized-t-test" class="level2" data-number="9.1"><h2 data-number="9.1" class="anchored" data-anchor-id="t2-as-a-generalized-t-test">
 <span class="header-section-number">9.1</span> <span class="math inline">\(T^2\)</span> as a generalized <span class="math inline">\(t\)</span>-test</h2>
-<p>Hotelling’s <span class="math inline">\(T^2\)</span> <span class="citation" data-cites="Hotelling:1931">(<a href="#ref-Hotelling:1931" role="doc-biblioref">Hotelling, 1931</a>)</span> is an analog of the square of a univariate <span class="math inline">\(t\)</span> statistic, extended to the case of two or more response variables tested together. Consider the basic one-sample <span class="math inline">\(t\)</span>-test, where we wish to test the hypothesis that the mean <span class="math inline">\(\bar{x}\)</span> of a set of <span class="math inline">\(N\)</span> measures on a test of basic math, with standard deviation <span class="math inline">\(s\)</span> does not differ from an assumed mean <span class="math inline">\(\mu_0 = 150\)</span> for a population. The <span class="math inline">\(t\)</span> statistic for testing <span class="math inline">\(\mathcal{H}_0 : \mu = \mu_0\)</span> against the two-sided alternative, <span class="math inline">\(\mathcal{H}_0 : \mu \ne \mu_0\)</span> is <span class="math display">\[
+<p>Hotelling’s <span class="math inline">\(T^2\)</span> <span class="citation" data-cites="Hotelling:1931">(<a href="95-references.html#ref-Hotelling:1931" role="doc-biblioref">Hotelling, 1931</a>)</span> is an analog of the square of a univariate <span class="math inline">\(t\)</span> statistic, extended to the case of two or more response variables tested together. Consider the basic one-sample <span class="math inline">\(t\)</span>-test, where we wish to test the hypothesis that the mean <span class="math inline">\(\bar{x}\)</span> of a set of <span class="math inline">\(N\)</span> measures on a test of basic math, with standard deviation <span class="math inline">\(s\)</span> does not differ from an assumed mean <span class="math inline">\(\mu_0 = 150\)</span> for a population. The <span class="math inline">\(t\)</span> statistic for testing <span class="math inline">\(\mathcal{H}_0 : \mu = \mu_0\)</span> against the two-sided alternative, <span class="math inline">\(\mathcal{H}_0 : \mu \ne \mu_0\)</span> is <span class="math display">\[
 t = \frac{(\bar{x} - \mu_0)}{s / \sqrt{N}} = \frac{(\bar{x} - \mu_0)\sqrt{N}}{s}
 \]</span></p>
 <p>Squaring this gives</p>
@@ -419,7 +419,7 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
   \end{pmatrix}
 \]</span></p>
 <!-- ![](equations/eqn-mathscore1.png){width=50% fig-align="center"} -->
-<p>Hotelling’s <span class="math inline">\(T^2\)</span> is then the analog of <span class="math inline">\(t^2\)</span>, with the variance-covariance matrix <span class="math inline">\(\mathbf{S}\)</span> of the scores on (BM, WP) replacing the variance of a single score. This is nothing more than the squared Mahalanobis <span class="math inline">\(D^2_M\)</span> distance between the sample mean vector <span class="math inline">\((\bar{x}_{BM}, \bar{x}_{WP})^\mathsf{T}\)</span> and the hypothesized means <span class="math inline">\(\mathbf{\mu}_0\)</span>, in the metric of <span class="math inline">\(\mathbf{S}\)</span>, as shown in <a href="#fig-T2-diagram" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-T2-diagram</span></a>.</p>
+<p>Hotelling’s <span class="math inline">\(T^2\)</span> is then the analog of <span class="math inline">\(t^2\)</span>, with the variance-covariance matrix <span class="math inline">\(\mathbf{S}\)</span> of the scores on (BM, WP) replacing the variance of a single score. This is nothing more than the squared Mahalanobis <span class="math inline">\(D^2_M\)</span> distance between the sample mean vector <span class="math inline">\((\bar{x}_{BM}, \bar{x}_{WP})^\mathsf{T}\)</span> and the hypothesized means <span class="math inline">\(\mathbf{\mu}_0\)</span>, in the metric of <span class="math inline">\(\mathbf{S}\)</span>, as shown in <a href="#fig-T2-diagram" class="quarto-xref">Figure&nbsp;<span>9.1</span></a>.</p>
 <p><span class="math display">\[\begin{aligned}
 T^2 &amp;= N (\bar{\mathbf{x}} - \mathbf{\mu}_0)^\mathsf{T} \; \mathbf{S}^{-1} \; (\bar{\mathbf{x}} - \mathbf{\mu}_0) \\
     &amp;= N D^2_M (\bar{\mathbf{x}}, \mathbf{\mu}_0)
@@ -467,7 +467,7 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 <span><span class="co">#&gt;  $ BM   : int  190 170 180 200 150 180 160 190 150 160 ...</span></span>
 <span><span class="co">#&gt;  $ WP   : int  90 80 80 120 60 70 120 150 90 130 ...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>You can carry out the test that the means for both variables are jointly equal across groups using either <code><a href="https://rdrr.io/pkg/Hotelling/man/hotelling.test.html">Hotelling::hotelling.test()</a></code> <span class="citation" data-cites="R-Hotelling">(<a href="#ref-R-Hotelling" role="doc-biblioref">Curran &amp; Hersh, 2021</a>)</span> or <code><a href="https://rdrr.io/pkg/car/man/Anova.html">car::Anova()</a></code>, but the latter is more generally useful</p>
+<p>You can carry out the test that the means for both variables are jointly equal across groups using either <code><a href="https://rdrr.io/pkg/Hotelling/man/hotelling.test.html">Hotelling::hotelling.test()</a></code> <span class="citation" data-cites="R-Hotelling">(<a href="95-references.html#ref-R-Hotelling" role="doc-biblioref">Curran &amp; Hersh, 2021</a>)</span> or <code><a href="https://rdrr.io/pkg/car/man/Anova.html">car::Anova()</a></code>, but the latter is more generally useful</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb3" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/Hotelling/man/hotelling.test.html">hotelling.test</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">BM</span>, <span class="va">WP</span><span class="op">)</span> <span class="op">~</span> <span class="va">group</span>, data<span class="op">=</span><span class="va">mathscore</span><span class="op">)</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="op">)</span></span>
 <span><span class="co">#&gt; Test stat:  64.174 </span></span>
@@ -580,13 +580,13 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 </div>
 </div>
 </div>
-<p>One of the assumptions of the <span class="math inline">\(T^2\)</span> test (and of MANOVA) is that the within-group variance covariance matrices, <span class="math inline">\(\mathbf{S}_1\)</span> and <span class="math inline">\(\mathbf{S}_2\)</span>, are the same. In <a href="#fig-mathscore-cov2" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mathscore-cov2</span></a>, you can see how the shapes of <span class="math inline">\(\mathbf{S}_1\)</span> and <span class="math inline">\(\mathbf{S}_2\)</span> are very similar, differing in that the variance of word Problems is slightly greater for group 2. In Chapter XX we take of the topic of visualizing tests of this assumption, based on Box’s <span class="math inline">\(M\)</span>-test.</p>
+<p>One of the assumptions of the <span class="math inline">\(T^2\)</span> test (and of MANOVA) is that the within-group variance covariance matrices, <span class="math inline">\(\mathbf{S}_1\)</span> and <span class="math inline">\(\mathbf{S}_2\)</span>, are the same. In <a href="#fig-mathscore-cov2" class="quarto-xref">Figure&nbsp;<span>9.3</span></a>, you can see how the shapes of <span class="math inline">\(\mathbf{S}_1\)</span> and <span class="math inline">\(\mathbf{S}_2\)</span> are very similar, differing in that the variance of word Problems is slightly greater for group 2. In Chapter XX we take of the topic of visualizing tests of this assumption, based on Box’s <span class="math inline">\(M\)</span>-test.</p>
 </section></section><section id="sec-t2-heplot" class="level2" data-number="9.3"><h2 data-number="9.3" class="anchored" data-anchor-id="sec-t2-heplot">
 <span class="header-section-number">9.3</span> HE plot and discriminant axis</h2>
-<p>As we describe in detail in <a href="#sec-vis-mlm" class="quarto-xref"><span class="quarto-unresolved-ref">sec-vis-mlm</span></a>, all the information relevant to the <span class="math inline">\(T^2\)</span> test and MANOVA can be captured in the remarkably simple <em>Hypothesis Error</em> plot, which shows the relative size of two data ellipses,</p>
+<p>As we describe in detail in <a href="11-mlm-viz.html" class="quarto-xref"><span>Chapter 11</span></a>, all the information relevant to the <span class="math inline">\(T^2\)</span> test and MANOVA can be captured in the remarkably simple <em>Hypothesis Error</em> plot, which shows the relative size of two data ellipses,</p>
 <ul>
 <li>
-<span class="math inline">\(\mathbf{H}\)</span>: the data ellipse of the <em>fitted</em> values, which are just the group means on the two variables, <span class="math inline">\(\bar{\mathbf{x}}\)</span>, corresponding to <span class="math inline">\(\mathbf{Q}_H\)</span> in <a href="#eq-eigen" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-eigen</span></a>. In case of <span class="math inline">\(T^2\)</span>, the <span class="math inline">\(\mathbf{H}\)</span> matrix is of rank 1, so the “ellipse” plots as a line.</li>
+<span class="math inline">\(\mathbf{H}\)</span>: the data ellipse of the <em>fitted</em> values, which are just the group means on the two variables, <span class="math inline">\(\bar{\mathbf{x}}\)</span>, corresponding to <span class="math inline">\(\mathbf{Q}_H\)</span> in <a href="#eq-eigen" class="quarto-xref">Equation&nbsp;<span>9.1</span></a>. In case of <span class="math inline">\(T^2\)</span>, the <span class="math inline">\(\mathbf{H}\)</span> matrix is of rank 1, so the “ellipse” plots as a line.</li>
 </ul>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb8" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># calculate H directly</span></span>
@@ -706,7 +706,7 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 </div>
 </section></section><section id="sec-t2-discrim" class="level2" data-number="9.4"><h2 data-number="9.4" class="anchored" data-anchor-id="sec-t2-discrim">
 <span class="header-section-number">9.4</span> Discriminant analysis</h2>
-<p>Discriminant analysis for two-group designs or for one-way MANOVA essentially turns the problem around: Instead of asking whether the mean vectors for two or more groups are equal, discriminant analysis tries to find the linear combination <span class="math inline">\(w\)</span> of the response variables that has the greatest separation among the groups, allowing cases to be best classified. It was developed by <span class="citation" data-cites="Fisher:1936">Fisher (<a href="#ref-Fisher:1936" role="doc-biblioref">1936</a>)</span> as a solution to the biological taxonomy problem of developing a rule to classify instances of flowers—in his famous case, Iris flowers—into known species (<em>I. setosa</em>, <em>I. versicolor</em>, <em>I. virginica</em>) on the basis of multiple measurements (length and width of their sepals and petals).</p>
+<p>Discriminant analysis for two-group designs or for one-way MANOVA essentially turns the problem around: Instead of asking whether the mean vectors for two or more groups are equal, discriminant analysis tries to find the linear combination <span class="math inline">\(w\)</span> of the response variables that has the greatest separation among the groups, allowing cases to be best classified. It was developed by <span class="citation" data-cites="Fisher:1936">Fisher (<a href="95-references.html#ref-Fisher:1936" role="doc-biblioref">1936</a>)</span> as a solution to the biological taxonomy problem of developing a rule to classify instances of flowers—in his famous case, Iris flowers—into known species (<em>I. setosa</em>, <em>I. versicolor</em>, <em>I. virginica</em>) on the basis of multiple measurements (length and width of their sepals and petals).</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb13" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="op">(</span><span class="va">math.lda</span> <span class="op">&lt;-</span> <span class="fu">MASS</span><span class="fu">::</span><span class="fu"><a href="https://rdrr.io/pkg/MASS/man/lda.html">lda</a></span><span class="op">(</span><span class="va">group</span> <span class="op">~</span> <span class="va">.</span>, data<span class="op">=</span><span class="va">mathscore</span><span class="op">)</span><span class="op">)</span></span>
 <span><span class="co">#&gt; Call:</span></span>
@@ -726,8 +726,8 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 <span><span class="co">#&gt; BM -0.0835</span></span>
 <span><span class="co">#&gt; WP  0.0753</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The coefficients give <span class="math inline">\(w = -0.084 \;\text{BM} + 0.075 \;\text{WP}\)</span>. This is exactly the direction given by the line for the <span class="math inline">\(\mathbf{H}\)</span> ellipse in <a href="#fig-mathscore-HE-overlay" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mathscore-HE-overlay</span></a>.</p>
-<p>To round this out, we can calculate the discriminant scores by multiplying the matrix <span class="math inline">\(\mathbf{X}\)</span> by the vector <span class="math inline">\(\mathbf{a} = \mathbf{S}^{-1} (\bar{\mathbf{x}}_1 - \bar{\mathbf{x}}_2)\)</span> of the discriminant weights. These were shown in <a href="#fig-mathscore-HE-overlay" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-mathscore-HE-overlay</span></a> as the projections of the data points on the line joining the group means,</p>
+<p>The coefficients give <span class="math inline">\(w = -0.084 \;\text{BM} + 0.075 \;\text{WP}\)</span>. This is exactly the direction given by the line for the <span class="math inline">\(\mathbf{H}\)</span> ellipse in <a href="#fig-mathscore-HE-overlay" class="quarto-xref">Figure&nbsp;<span>9.5</span></a>.</p>
+<p>To round this out, we can calculate the discriminant scores by multiplying the matrix <span class="math inline">\(\mathbf{X}\)</span> by the vector <span class="math inline">\(\mathbf{a} = \mathbf{S}^{-1} (\bar{\mathbf{x}}_1 - \bar{\mathbf{x}}_2)\)</span> of the discriminant weights. These were shown in <a href="#fig-mathscore-HE-overlay" class="quarto-xref">Figure&nbsp;<span>9.5</span></a> as the projections of the data points on the line joining the group means,</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb14" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">math.lda</span><span class="op">$</span><span class="va">scaling</span></span>
 <span><span class="co">#&gt;        LD1</span></span>
@@ -789,8 +789,8 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 </section><section id="sec-t2-more-variables" class="level2" data-number="9.5"><h2 data-number="9.5" class="anchored" data-anchor-id="sec-t2-more-variables">
 <span class="header-section-number">9.5</span> More variables</h2>
 <p>The <code>mathscore</code> data gave a simple example with two outcomes to explain the essential ideas behind Hotelling’s <span class="math inline">\(T^2\)</span> and multivariate tests. Multivariate methods become increasingly useful as the number of response variables increases because it is harder to show them all together and see how they relate to differences between groups.</p>
-<p>A classic example is the dataset <code>mbclust::banknote</code>, containing six size measures made on 100 genuine and 100 counterfeit old-Swiss 1000-franc bank notes <span class="citation" data-cites="FluryReidwyl-1988">(<a href="#ref-FluryReidwyl-1988" role="doc-biblioref">Flury &amp; Riedwyl, 1988</a>)</span>. The goal is to see how well the real and fake banknotes can be distinguished. The measures are the <code>Length</code> and <code>Diagonal</code> lengths of a banknote and the <code>Left</code>, <code>Right</code>, <code>Top</code> and <code>Bottom</code> edge margins in mm.</p>
-<p>Before considering hypothesis tests, let’s look at some exploratory graphics. <a href="#fig-banknote-violin" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-banknote-violin</span></a> shows univariate violin and boxplots of each of the measures. To make this plot, faceted by measure, I first reshape the data from wide to long and make <code>measure</code> a factor with levels in the order of the variables in the data set.</p>
+<p>A classic example is the dataset <code>mbclust::banknote</code>, containing six size measures made on 100 genuine and 100 counterfeit old-Swiss 1000-franc bank notes <span class="citation" data-cites="FluryReidwyl-1988">(<a href="95-references.html#ref-FluryReidwyl-1988" role="doc-biblioref">Flury &amp; Riedwyl, 1988</a>)</span>. The goal is to see how well the real and fake banknotes can be distinguished. The measures are the <code>Length</code> and <code>Diagonal</code> lengths of a banknote and the <code>Left</code>, <code>Right</code>, <code>Top</code> and <code>Bottom</code> edge margins in mm.</p>
+<p>Before considering hypothesis tests, let’s look at some exploratory graphics. <a href="#fig-banknote-violin" class="quarto-xref">Figure&nbsp;<span>9.7</span></a> shows univariate violin and boxplots of each of the measures. To make this plot, faceted by measure, I first reshape the data from wide to long and make <code>measure</code> a factor with levels in the order of the variables in the data set.</p>
 <!-- figure-code: R/banknote.R -->
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb17" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">banknote</span>, package<span class="op">=</span> <span class="st">"mclust"</span><span class="op">)</span></span>
@@ -823,7 +823,7 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 </div>
 </div>
 </div>
-<p>A quick glance at <a href="#fig-banknote-violin" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-banknote-violin</span></a> shows that the counterfeit and genuine bills differ in their means on most of the measures, with the counterfeit ones slightly larger on Left, Right, Bottom and Top margins. But univariate plots don’t give an overall sense of how these variables are related to one another.</p>
+<p>A quick glance at <a href="#fig-banknote-violin" class="quarto-xref">Figure&nbsp;<span>9.7</span></a> shows that the counterfeit and genuine bills differ in their means on most of the measures, with the counterfeit ones slightly larger on Left, Right, Bottom and Top margins. But univariate plots don’t give an overall sense of how these variables are related to one another.</p>
 <div class="callout callout-style-default callout-note callout-titled" title="**Graph craft**: Layers and transparency">
 <div class="callout-header d-flex align-content-center">
 <div class="callout-icon-container">
@@ -834,7 +834,7 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 </div>
 </div>
 <div class="callout-body-container callout-body">
-<p><a href="#fig-banknote-violin" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-banknote-violin</span></a> is somewhat complex, so it is useful to understand the steps needed to make this figure show what I wanted. The plot in each panel contains three layers:</p>
+<p><a href="#fig-banknote-violin" class="quarto-xref">Figure&nbsp;<span>9.7</span></a> is somewhat complex, so it is useful to understand the steps needed to make this figure show what I wanted. The plot in each panel contains three layers:</p>
 <ol type="1">
 <li>the violin plot based on a density estimate, showing the shape of each distribution;</li>
 <li>the data points, but they are jittered horizontally using <code>geom_jiter()</code> because otherwise they would all overlap on the X axis;</li>
@@ -855,7 +855,7 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 <span><span class="co">#&gt; Proportion of Variance 0.491 0.213 0.145 0.075 0.0448 0.0315</span></span>
 <span><span class="co">#&gt; Cumulative Proportion  0.491 0.704 0.849 0.924 0.9685 1.0000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The biplot in <a href="#fig-banknote-biplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-banknote-biplot</span></a> gives a nicely coherent overview, at least in two dimensions. The first component shows the positive correlations among the measures of the margins, where the counterfeit bills are larger than the real ones and a negative correlation of the Diagonal with the other measures. The length of bills only distinguishes the types of banknotes on the second dimension.</p>
+<p>The biplot in <a href="#fig-banknote-biplot" class="quarto-xref">Figure&nbsp;<span>9.8</span></a> gives a nicely coherent overview, at least in two dimensions. The first component shows the positive correlations among the measures of the margins, where the counterfeit bills are larger than the real ones and a negative correlation of the Diagonal with the other measures. The length of bills only distinguishes the types of banknotes on the second dimension.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb19" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">banknote.pca</span> <span class="op">&lt;-</span> <span class="fu">ggbiplot</span><span class="fu">::</span><span class="fu"><a href="http://friendly.github.io/ggbiplot/reference/reflect.html">reflect</a></span><span class="op">(</span><span class="va">banknote.pca</span><span class="op">)</span></span>
 <span><span class="fu"><a href="http://friendly.github.io/ggbiplot/reference/ggbiplot.html">ggbiplot</a></span><span class="op">(</span><span class="va">banknote.pca</span>,</span>
@@ -936,21 +936,21 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 <p>The individual <span class="math inline">\(F_{(1, 198)}\)</span> statistics can be compared to the <span class="math inline">\(F_{(6, 193)} = 392\)</span> value for the overall multivariate test. While all of the individual tests are highly significant, the average of the univariate <span class="math inline">\(F\)</span>s is only 236. The multivariate test gains power by taking the correlations of the size measures into account.</p>
 </section></section><section id="variance-accounted-for-eta-square-eta2" class="level2" data-number="9.6"><h2 data-number="9.6" class="anchored" data-anchor-id="variance-accounted-for-eta-square-eta2">
 <span class="header-section-number">9.6</span> Variance accounted for: Eta square (<span class="math inline">\(\eta^2\)</span>)</h2>
-<p>In a univariate multiple regression model, the coefficient of determination <span class="math inline">\(R^2 = \text{SS}_H / \text{SS}_\text{Total}\)</span> gives the proportion of variance accounted for by hypothesized terms in <span class="math inline">\(H\)</span> relative to the total variance. An analog for ANOVA-type models with categorical, group factors as predictors is <span class="math inline">\(\eta^2\)</span> <span class="citation" data-cites="Pearson-1903">(<a href="#ref-Pearson-1903" role="doc-biblioref">Pearson, 1903</a>)</span>, defined as <span class="math display">\[
+<p>In a univariate multiple regression model, the coefficient of determination <span class="math inline">\(R^2 = \text{SS}_H / \text{SS}_\text{Total}\)</span> gives the proportion of variance accounted for by hypothesized terms in <span class="math inline">\(H\)</span> relative to the total variance. An analog for ANOVA-type models with categorical, group factors as predictors is <span class="math inline">\(\eta^2\)</span> <span class="citation" data-cites="Pearson-1903">(<a href="95-references.html#ref-Pearson-1903" role="doc-biblioref">Pearson, 1903</a>)</span>, defined as <span class="math display">\[
 \eta^2 = \frac{\text{SS}_\text{Between groups}}{\text{SS}_\text{Total}}
-\]</span> For multivariate response models, the generalization of <span class="math inline">\(\eta^2\)</span> uses multivariate analogs of these sums of squares, <span class="math inline">\(\mathbf{Q}_H\)</span> and <span class="math inline">\(\mathbf{Q}_T = \mathbf{Q}_H + \mathbf{Q}_E\)</span>, and there are different calculations for a single measure corresponding to the various test statistics (Wilks’ <span class="math inline">\(\Lambda\)</span>, etc.), as described in <a href="#sec-mlm-review" class="quarto-xref"><span class="quarto-unresolved-ref">sec-mlm-review</span></a>.</p>
+\]</span> For multivariate response models, the generalization of <span class="math inline">\(\eta^2\)</span> uses multivariate analogs of these sums of squares, <span class="math inline">\(\mathbf{Q}_H\)</span> and <span class="math inline">\(\mathbf{Q}_T = \mathbf{Q}_H + \mathbf{Q}_E\)</span>, and there are different calculations for a single measure corresponding to the various test statistics (Wilks’ <span class="math inline">\(\Lambda\)</span>, etc.), as described in <a href="10-mlm-review.html" class="quarto-xref"><span>Chapter 10</span></a>.</p>
 <p>Let’s calculate the <span class="math inline">\(\eta^2\)</span> for the multivariate model <code>banknote.mlm</code> with <code>Status</code> as the only predictor, giving <span class="math inline">\(\eta^2 = 0.92\)</span>, or 92% of the total variance.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb23" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu">heplots</span><span class="fu">::</span><span class="fu"><a href="https://friendly.github.io/heplots/reference/etasq.html">etasq</a></span><span class="op">(</span><span class="va">banknote.mlm</span><span class="op">)</span></span>
 <span><span class="co">#&gt;        eta^2</span></span>
 <span><span class="co">#&gt; Status 0.924</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>This can be compared to the principal components analysis and the biplot in <a href="#fig-banknote-biplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-banknote-biplot</span></a>, where two components (less favorably) accounted for 70% of total variance and it took four PCA dimensions to account for over 90%. The goals of PCA and MANOVA are different, of course, but they are both concerned with accounting for variance of multivariate data. We will meet another multivariate juicer, <strong>canonical discriminant analysis</strong> in <a href="#sec-vis-mlm" class="quarto-xref"><span class="quarto-unresolved-ref">sec-vis-mlm</span></a>.</p>
+<p>This can be compared to the principal components analysis and the biplot in <a href="#fig-banknote-biplot" class="quarto-xref">Figure&nbsp;<span>9.8</span></a>, where two components (less favorably) accounted for 70% of total variance and it took four PCA dimensions to account for over 90%. The goals of PCA and MANOVA are different, of course, but they are both concerned with accounting for variance of multivariate data. We will meet another multivariate juicer, <strong>canonical discriminant analysis</strong> in <a href="11-mlm-viz.html" class="quarto-xref"><span>Chapter 11</span></a>.</p>
 <!-- ## Canonical analysis -->
 </section><section id="what-weve-learned" class="level2" data-number="9.7"><h2 data-number="9.7" class="anchored" data-anchor-id="what-weve-learned">
 <span class="header-section-number">9.7</span> What we’ve learned</h2>
 <p>This chapter was designed to illustrate the main ideas for visualizing differences between means on multiple response variables in a two-group design. Hotelling’s <span class="math inline">\(T^2\)</span> is the generalization of a simple univariate <span class="math inline">\(t\)</span>-test and works by combining the responses into a weighted sum that has the maximum possible univariate <span class="math inline">\(t\)</span> for all choices of weights.</p>
-<p><a href="#fig-HE-framework" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-HE-framework</span></a> summarizes what was shown in <a href="#sec-t2-heplot" class="quarto-xref"><span class="quarto-unresolved-ref">sec-t2-heplot</span></a> and <a href="#sec-t2-discrim" class="quarto-xref"><span class="quarto-unresolved-ref">sec-t2-discrim</span></a>. The data ellipses for the two groups in the <code>mathscore</code> data summarize the information about means and within-group variances. In the HE plot, the difference between the means is itself summarized by the line through them, which represents the <span class="math inline">\(\mathbf{H} =\mathbf{Q}_H\)</span> matrix and within-group variation is represented by the “Error” ellipse which is the <span class="math inline">\(\mathbf{E} = \mathbf{S}_p = \mathbf{Q}_E\)</span> matrix.</p>
+<p><a href="#fig-HE-framework" class="quarto-xref">Figure&nbsp;<span>9.9</span></a> summarizes what was shown in <a href="#sec-t2-heplot" class="quarto-xref"><span>Section 9.3</span></a> and <a href="#sec-t2-discrim" class="quarto-xref"><span>Section 9.4</span></a>. The data ellipses for the two groups in the <code>mathscore</code> data summarize the information about means and within-group variances. In the HE plot, the difference between the means is itself summarized by the line through them, which represents the <span class="math inline">\(\mathbf{H} =\mathbf{Q}_H\)</span> matrix and within-group variation is represented by the “Error” ellipse which is the <span class="math inline">\(\mathbf{E} = \mathbf{S}_p = \mathbf{Q}_E\)</span> matrix.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-HE-framework" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -963,11 +963,11 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 </div>
 </div>
 </div>
-<p>As we will see later (<a href="#sec-vis-mlm" class="quarto-xref"><span class="quarto-unresolved-ref">sec-vis-mlm</span></a>), the <span class="math inline">\(\mathbf{H}\)</span> ellipse is scaled so that it provides a visual test of significance: it projects somewhere outside the <span class="math inline">\(\mathbf{E}\)</span> ellipse if and only if the means differ significantly. The direction of the line between the means is also the discriminant axis and scores on this axis are weighted sum of the responses that have the greatest possible mean difference.</p>
+<p>As we will see later (<a href="11-mlm-viz.html" class="quarto-xref"><span>Chapter 11</span></a>), the <span class="math inline">\(\mathbf{H}\)</span> ellipse is scaled so that it provides a visual test of significance: it projects somewhere outside the <span class="math inline">\(\mathbf{E}\)</span> ellipse if and only if the means differ significantly. The direction of the line between the means is also the discriminant axis and scores on this axis are weighted sum of the responses that have the greatest possible mean difference.</p>
 </section><section id="exercises" class="level2" data-number="9.8"><h2 data-number="9.8" class="anchored" data-anchor-id="exercises">
 <span class="header-section-number">9.8</span> Exercises</h2>
 <ol type="1">
-<li>The value of Hotelling’s <span class="math inline">\(T^2\)</span> found by <code><a href="https://rdrr.io/pkg/Hotelling/man/hotelling.test.html">hotelling.test()</a></code> is 64.17. The value of the equivalent <span class="math inline">\(F\)</span> statistic found by <code><a href="https://rdrr.io/pkg/car/man/Anova.html">Anova()</a></code> is 28.9. Verify that <a href="#eq-Fstat" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-Fstat</span></a> gives this result.</li>
+<li>The value of Hotelling’s <span class="math inline">\(T^2\)</span> found by <code><a href="https://rdrr.io/pkg/Hotelling/man/hotelling.test.html">hotelling.test()</a></code> is 64.17. The value of the equivalent <span class="math inline">\(F\)</span> statistic found by <code><a href="https://rdrr.io/pkg/car/man/Anova.html">Anova()</a></code> is 28.9. Verify that <a href="#eq-Fstat" class="quarto-xref">Equation&nbsp;<span>9.2</span></a> gives this result.</li>
 </ol>
 <div class="cell" data-layout-align="center">
 <p><strong>Packages used here</strong>:</p>
@@ -976,7 +976,7 @@ <h1 class="title"><span id="sec-Hotelling" class="quarto-section-identifier"><sp
 <!-- ## References {.unnumbered} -->
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-R-Hotelling" class="csl-entry" role="listitem">
 Curran, J., &amp; Hersh, T. (2021). <em>Hotelling: Hotelling’s t^2 test and variants</em>. <a href="https://CRAN.R-project.org/package=Hotelling">https://CRAN.R-project.org/package=Hotelling</a>
 </div>
diff --git a/docs/10-mlm-review.html b/docs/10-mlm-review.html
index c984cb55..6d724370 100644
--- a/docs/10-mlm-review.html
+++ b/docs/10-mlm-review.html
@@ -385,7 +385,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
   </ul><div class="toc-actions"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></nav>
     </div>
 <!-- main -->
-<main class="content quarto-banner-title-block" id="quarto-document-content"><!--- For HTML Only ---><!-- \require{newcommand} --><!-- %\renewcommand*{\det}[1]{\mathrm{det} (#1)} --><!-- %\renewcommand*{\det}[1]{|#1|} --><!-- \newcommand{\sizedmat}[2]{\mathord{\mathop{\mat{#1}}\limits_{(#2)}}} --><!-- \newcommand*{\E}{\mathcal{E}} --><!-- Index generation --><!-- % R packages:  indexed under both package name and packages! --><!-- % data sets:  --><!-- % R stuff --><p><a href="#sec-Hotelling" class="quarto-xref"><span class="quarto-unresolved-ref">sec-Hotelling</span></a> introduced the essential ideas of multivariate analysis in the context of a two-group design using Hotelling’s <span class="math inline">\(T^2\)</span>. Here, I extend this to the general Multivariate Linear Model (MLM). This can be understood as a simple extension of the univariate linear model, with the main difference being that there are multiple response variables considered together, instead of just one, analysed alone. These outcomes might reflect several different ways or scales for measuring an underlying theoretical construct, or they might represent different aspects of some phenomenon that are better understood when studied jointly.</p>
+<main class="content quarto-banner-title-block" id="quarto-document-content"><!--- For HTML Only ---><!-- \require{newcommand} --><!-- %\renewcommand*{\det}[1]{\mathrm{det} (#1)} --><!-- %\renewcommand*{\det}[1]{|#1|} --><!-- \newcommand{\sizedmat}[2]{\mathord{\mathop{\mat{#1}}\limits_{(#2)}}} --><!-- \newcommand*{\E}{\mathcal{E}} --><!-- Index generation --><!-- % R packages:  indexed under both package name and packages! --><!-- % data sets:  --><!-- % R stuff --><p><a href="09-hotelling.html" class="quarto-xref"><span>Chapter 9</span></a> introduced the essential ideas of multivariate analysis in the context of a two-group design using Hotelling’s <span class="math inline">\(T^2\)</span>. Here, I extend this to the general Multivariate Linear Model (MLM). This can be understood as a simple extension of the univariate linear model, with the main difference being that there are multiple response variables considered together, instead of just one, analysed alone. These outcomes might reflect several different ways or scales for measuring an underlying theoretical construct, or they might represent different aspects of some phenomenon that are better understood when studied jointly.</p>
 <p>For example, in the first case of different measures, there are numerous psychological scales used to assess depression or anxiety and it may be important to include more than one measure to ensure that the construct has been measured adequately. In the second case of various aspects, student “aptitude” or “achievement” reflects competency in different various subjects (reading, math, history, science, …) that are better studied together.</p>
 <p>In this context, there are multiple techniques that can be applied depending on the structure of the variables at hand. For instance, with one or more continuous predictors and multiple response variables, one could use multivariate multiple regression (MMRA) to obtain estimates useful for prediction. Instead, if the predictors are categorical, multivariate analysis of variance (MANOVA) can be applied to test for differences between groups. Again, this is akin to multiple regression and ANOVA in the univariate context – the same underlying model is utilized, but the tests for terms in the model are multivariate ones for the collection of all response variables, rather than univariate ones for a single response.</p>
 <!-- **TODO** Use `\Epsilon` = $\Epsilon$ here, which is defined as `\boldsymbol{\large\varepsilon}` for residuals. -->
@@ -435,7 +435,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <li>
 <span class="math inline">\(\boldsymbol{\Large\varepsilon}\)</span> is a matrix of errors in predicting <span class="math inline">\(\mathbf{Y}\)</span>.</li>
 </ul>
-<p>Writing <a href="#eq-mlm" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-mlm</span></a> in terms of its elements, we have</p>
+<p>Writing <a href="#eq-mlm" class="quarto-xref">Equation&nbsp;<span>10.1</span></a> in terms of its elements, we have</p>
 <p><span class="math display">\[
 \begin{align*}
 \overset{\mathbf{Y}}
@@ -494,9 +494,9 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <p>Just as in univariate models, the assumptions of the multivariate linear model almost entirely concern the behavior of the errors (residuals). Let <span class="math inline">\(\mathbf{\epsilon}_{i}^{\prime}\)</span> represent the <span class="math inline">\(i\)</span>th row of <span class="math inline">\(\boldsymbol{\Large\varepsilon}\)</span>. Then it is assumed that:</p>
 <ul>
 <li>
-<strong>Normality</strong>: The residuals, <span class="math inline">\(\mathbf{\epsilon}_{i}^{\prime}\)</span> are distributed as multivariate normal, <span class="math inline">\(\mathcal{N}_{p}(\mathbf{0},\boldsymbol{\Sigma})\)</span>, where <span class="math inline">\(\mathbf{\Sigma}\)</span> is a non-singular error-covariance matrix. Statistical tests of multivariate normality of the residuals include the Shapiro-Wilk <span class="citation" data-cites="ShapiroWilk1965">(<a href="#ref-ShapiroWilk1965" role="doc-biblioref">Shapiro &amp; Wilk, 1965</a>)</span> and Mardia <span class="citation" data-cites="Mardia:1970:MMS">(<a href="#ref-Mardia:1970:MMS" role="doc-biblioref">Mardia, 1970</a>)</span> tests (in the <strong>MVN</strong> package); <!-- mshapiro.test( )[in the mvnormtest package] can be used to perform the Shapiro-Wilk test for multivariate normality --> however this is often better assessed visually using a <span class="math inline">\(\chi^2\)</span> QQ plot of Mahalanobis squared distance against their corresponding <span class="math inline">\(\chi^2_p\)</span> values for <span class="math inline">\(p\)</span> degrees of freedom using <code><a href="https://friendly.github.io/heplots/reference/cqplot.html">heplots::cqplot()</a></code>.</li>
+<strong>Normality</strong>: The residuals, <span class="math inline">\(\mathbf{\epsilon}_{i}^{\prime}\)</span> are distributed as multivariate normal, <span class="math inline">\(\mathcal{N}_{p}(\mathbf{0},\boldsymbol{\Sigma})\)</span>, where <span class="math inline">\(\mathbf{\Sigma}\)</span> is a non-singular error-covariance matrix. Statistical tests of multivariate normality of the residuals include the Shapiro-Wilk <span class="citation" data-cites="ShapiroWilk1965">(<a href="95-references.html#ref-ShapiroWilk1965" role="doc-biblioref">Shapiro &amp; Wilk, 1965</a>)</span> and Mardia <span class="citation" data-cites="Mardia:1970:MMS">(<a href="95-references.html#ref-Mardia:1970:MMS" role="doc-biblioref">Mardia, 1970</a>)</span> tests (in the <strong>MVN</strong> package); <!-- mshapiro.test( )[in the mvnormtest package] can be used to perform the Shapiro-Wilk test for multivariate normality --> however this is often better assessed visually using a <span class="math inline">\(\chi^2\)</span> QQ plot of Mahalanobis squared distance against their corresponding <span class="math inline">\(\chi^2_p\)</span> values for <span class="math inline">\(p\)</span> degrees of freedom using <code><a href="https://friendly.github.io/heplots/reference/cqplot.html">heplots::cqplot()</a></code>.</li>
 <li>
-<strong>Homoscedasticity</strong>: The error-covariance matrix <span class="math inline">\(\mathbf{\Sigma}\)</span> is constant across all observations and grouping factors. Graphical methods to show if this assumption is met are described in <a href="#sec-eqcov" class="quarto-xref"><span class="quarto-unresolved-ref">sec-eqcov</span></a>.</li>
+<strong>Homoscedasticity</strong>: The error-covariance matrix <span class="math inline">\(\mathbf{\Sigma}\)</span> is constant across all observations and grouping factors. Graphical methods to show if this assumption is met are described in <a href="12-eqcov.html" class="quarto-xref"><span>Chapter 12</span></a>.</li>
 <li>
 <strong>Independence</strong>: <span class="math inline">\(\mathbf{\epsilon}_{i}^{\prime}\)</span> and <span class="math inline">\(\mathbf{\epsilon}_{j}^{\prime}\)</span> are independent for <span class="math inline">\(i\neq j\)</span>, so knowing the data for case <span class="math inline">\(i\)</span> gives no information about case <span class="math inline">\(j\)</span> (as would be true if the data consisted of pairs of husbands and wives);</li>
 <li>The predictors, <span class="math inline">\(\mathbf{X}\)</span>, are fixed and measured without error or at least they are independent of the errors, <span class="math inline">\(\boldsymbol{\Large\varepsilon}\)</span>.</li>
@@ -535,7 +535,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <span><span class="co">#&gt;  $ start  : int  0 1 1 0 0 1 2 3 1 5 ...</span></span>
 <span><span class="co">#&gt;  $ amount : int  100 97 88 92 95 85 82 89 77 84 ...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>For this data, boxplots for the two responses provide an initial look, shown in <a href="#fig-dogfood-boxplot" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-dogfood-boxplot</span></a>. Putting these side-by-side makes it easy to see the inverse relation between the medians on the two response variables.</p>
+<p>For this data, boxplots for the two responses provide an initial look, shown in <a href="#fig-dogfood-boxplot" class="quarto-xref">Figure&nbsp;<span>10.1</span></a>. Putting these side-by-side makes it easy to see the inverse relation between the medians on the two response variables.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb4" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">dog_long</span> <span class="op">&lt;-</span> <span class="va">dogfood</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu"><a href="https://tidyr.tidyverse.org/reference/pivot_longer.html">pivot_longer</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="va">start</span>, <span class="va">amount</span><span class="op">)</span>,</span>
@@ -606,13 +606,13 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <li>
 <span class="math inline">\(\widehat{\boldsymbol{\Large\varepsilon}} = \mathbf{Y} -\widehat{\mathbf{Y}}\)</span> is the matrix of residuals.</li>
 </ul>
-<p>We can visualize this decomposition in the simple case of a two-group design (for the <code>mathscore</code> data in <a href="#sec-t2-properties" class="quarto-xref"><span class="quarto-unresolved-ref">sec-t2-properties</span></a>) as shown in <a href="#fig-visualizing-SSP" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-visualizing-SSP</span></a>. Let <span class="math inline">\(\mathbf{y}_{ij}\)</span> be the vector of <span class="math inline">\(p\)</span> responses for subject <span class="math inline">\(j\)</span> in group <span class="math inline">\(i, i=1,\dots g\)</span> for <span class="math inline">\(j = 1, \dots n_i\)</span>. Then, using <span class="math inline">\(.\)</span> to represent a subscript averaged over, <a href="#eq-SSP" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-SSP</span></a> comes from the identity</p>
+<p>We can visualize this decomposition in the simple case of a two-group design (for the <code>mathscore</code> data in <a href="09-hotelling.html#sec-t2-properties" class="quarto-xref"><span>Section 9.2</span></a>) as shown in <a href="#fig-visualizing-SSP" class="quarto-xref">Figure&nbsp;<span>10.2</span></a>. Let <span class="math inline">\(\mathbf{y}_{ij}\)</span> be the vector of <span class="math inline">\(p\)</span> responses for subject <span class="math inline">\(j\)</span> in group <span class="math inline">\(i, i=1,\dots g\)</span> for <span class="math inline">\(j = 1, \dots n_i\)</span>. Then, using <span class="math inline">\(.\)</span> to represent a subscript averaged over, <a href="#eq-SSP" class="quarto-xref">Equation&nbsp;<span>10.2</span></a> comes from the identity</p>
 <p><span id="eq-THE-dev"><span class="math display">\[
 \underbrace{(\mathbf{y}_{ij} - \mathbf{y}_{\cdot \cdot})}_T =
 \underbrace{(\overline{\mathbf{y}}_{i \cdot} - \mathbf{y}_{\cdot \cdot})}_H +
 \underbrace{(\mathbf{y}_{ij} - \overline{\mathbf{y}}_{i \cdot})}_E
 \tag{10.3}\]</span></span></p>
-<p>where each side of <a href="#eq-THE-dev" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-THE-dev</span></a> is squared and summed over observations to give <a href="#eq-SSP" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-SSP</span></a>. In <a href="#fig-visualizing-SSP" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-visualizing-SSP</span></a>,</p>
+<p>where each side of <a href="#eq-THE-dev" class="quarto-xref">Equation&nbsp;<span>10.3</span></a> is squared and summed over observations to give <a href="#eq-SSP" class="quarto-xref">Equation&nbsp;<span>10.2</span></a>. In <a href="#fig-visualizing-SSP" class="quarto-xref">Figure&nbsp;<span>10.2</span></a>,</p>
 <ul>
 <li><p>The total variance <span class="math inline">\(\mathbf{SSP}_T\)</span> reflects the deviations of the observations <span class="math inline">\(\mathbf{y}_{ij}\)</span> from the grand mean <span class="math inline">\(\overline{\mathbf{y}}_{. .}\)</span> and has the data ellipse shown in gray.</p></li>
 <li><p>In the middle panel, all the observations are represented at their group means, <span class="math inline">\(\overline{\mathbf{y}}_{i .}\)</span>, the fitted values. Their variance and covariance is then reflected by deviations of the group means (weighted for the number of observations per group) around the grand mean.</p></li>
@@ -677,7 +677,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <span><span class="co">#&gt; start   25.8   11.8</span></span>
 <span><span class="co">#&gt; amount  11.8  390.3</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The decomposition of the total sum of squares and products in <a href="#eq-SSP" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-SSP</span></a> can be shown as:</p>
+<p>The decomposition of the total sum of squares and products in <a href="#eq-SSP" class="quarto-xref">Equation&nbsp;<span>10.2</span></a> can be shown as:</p>
 <p><span class="math display">\[
 \overset{\mathbf{SSP}_T}
   {\begin{pmatrix}
@@ -710,7 +710,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <p><span id="eq-he-eigen2"><span class="math display">\[
 \mathbf{H}(\mathbf{H}+\mathbf{E}+)^{-1} \; \rho_i = \rho_i \mathbf{v}_i \:\: ,
 \tag{10.5}\]</span></span></p>
-<p>which has the same eigenvectors as <a href="#eq-he-eigen" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-he-eigen</span></a> and the eigenvalues are <span class="math inline">\(\rho_i = \lambda_i / (1 + \lambda_i)\)</span>.</p>
+<p>which has the same eigenvectors as <a href="#eq-he-eigen" class="quarto-xref">Equation&nbsp;<span>10.4</span></a> and the eigenvalues are <span class="math inline">\(\rho_i = \lambda_i / (1 + \lambda_i)\)</span>.</p>
 <p>However, when the hypothesized model terms have <span class="math inline">\(\text{df}_h\)</span> degrees of freedom (columns of the <span class="math inline">\(\mathbf{X}\)</span> matrix for that term), <span class="math inline">\(\mathbf{H}\)</span> is of rank <span class="math inline">\(\text{df}_h\)</span>, so only <span class="math inline">\(s=\min(p, \text{df}_h)\)</span> eigenvalues can be non-zero. For example, a test for a hypothesis about a single quantitative predictor <span class="math inline">\(\mathbf{x}\)</span>, has <span class="math inline">\(\text{df}_h = 1\)</span> degree of freedom and <span class="math inline">\(\mathrm{rank} (\mathbf{H}) = 1\)</span>; for a factor with <span class="math inline">\(g\)</span> groups, <span class="math inline">\(\text{df}_h = \mathrm{rank} (\mathbf{H}) = g-1\)</span>.</p>
 <p>For the <code>dogfood</code> data, we get the following results:</p>
 <div class="cell" data-layout-align="center">
@@ -735,8 +735,8 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <!--  \det{ ( \mathbf{H}\mathbf{E}^{-1} - \lambda \mathbf{I} )} = 0 \Longrightarrow  -->
 <!--  \mathbf{H}\mathbf{E}^{-1} \lambda_i = \lambda_i \mathbf{v}_i \period -->
 <!-- $$ -->
-<p>The overall multivariate test for the model in <a href="#eq-mlm" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-mlm</span></a> is essentially a test of the hypothesis <span class="math inline">\(\mathcal{H}_0: \mathbf{B} = 0\)</span> (excluding the row for the intercept). Equivalently, this is a test based on the <em>incremental</em> <span class="math inline">\(\mathbf{SSP}_{H}\)</span> for the hypothesized terms in the model—that is, the difference between the <span class="math inline">\(\mathbf{SSP}_{H}\)</span> for the full model and the null, intercept-only model. The same idea can be applied to test the difference between any pair of <em>nested</em> models—the added contribution of terms in a larger model relative to a smaller model containing a subset of terms.</p>
-<p>The eigenvectors <span class="math inline">\(\mathbf{v}_i\)</span> in <a href="#eq-he-eigen" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-he-eigen</span></a> are also important. These are the weights for the variables in a linear combination <span class="math inline">\(v_{i1} \mathbf{y}_1 + v_{i2} \mathbf{y}_2 + \cdots + v_{ip} \mathbf{y}_p\)</span> which produces the largest univariate <span class="math inline">\(F\)</span> statistic for the <span class="math inline">\(i\)</span>-th dimension. We exploit this in canonical discriminant analysis and the corresponding canonical HE plots (<a href="#sec-candisc" class="quarto-xref"><span class="quarto-unresolved-ref">sec-candisc</span></a>).</p>
+<p>The overall multivariate test for the model in <a href="#eq-mlm" class="quarto-xref">Equation&nbsp;<span>10.1</span></a> is essentially a test of the hypothesis <span class="math inline">\(\mathcal{H}_0: \mathbf{B} = 0\)</span> (excluding the row for the intercept). Equivalently, this is a test based on the <em>incremental</em> <span class="math inline">\(\mathbf{SSP}_{H}\)</span> for the hypothesized terms in the model—that is, the difference between the <span class="math inline">\(\mathbf{SSP}_{H}\)</span> for the full model and the null, intercept-only model. The same idea can be applied to test the difference between any pair of <em>nested</em> models—the added contribution of terms in a larger model relative to a smaller model containing a subset of terms.</p>
+<p>The eigenvectors <span class="math inline">\(\mathbf{v}_i\)</span> in <a href="#eq-he-eigen" class="quarto-xref">Equation&nbsp;<span>10.4</span></a> are also important. These are the weights for the variables in a linear combination <span class="math inline">\(v_{i1} \mathbf{y}_1 + v_{i2} \mathbf{y}_2 + \cdots + v_{ip} \mathbf{y}_p\)</span> which produces the largest univariate <span class="math inline">\(F\)</span> statistic for the <span class="math inline">\(i\)</span>-th dimension. We exploit this in canonical discriminant analysis and the corresponding canonical HE plots (<a href="11-mlm-viz.html#sec-candisc" class="quarto-xref"><span>Section 11.3</span></a>).</p>
 <p>The eigenvectors of <span class="math inline">\(\mathbf{H}\mathbf{E}^{-1}\)</span> for the dogfood model are shown below:</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb10" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/base/colnames.html">rownames</a></span><span class="op">(</span><span class="va">eig</span><span class="op">$</span><span class="va">vectors</span><span class="op">)</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/colnames.html">rownames</a></span><span class="op">(</span><span class="va">HEinv</span><span class="op">)</span></span>
@@ -750,7 +750,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </section></section><section id="multivariate-test-statistics" class="level2" data-number="10.3"><h2 data-number="10.3" class="anchored" data-anchor-id="multivariate-test-statistics">
 <span class="header-section-number">10.3</span> Multivariate test statistics</h2>
 <p>In the univariate case, the overall <span class="math inline">\(F\)</span>-test of <span class="math inline">\(\mathcal{H}_0: \boldsymbol{\beta} = \mathbf{0}\)</span> is the uniformly most powerful invariant test when the assumptions are met. There is nothing better. This is not the case in the MLM.</p>
-<p>The reason is that when there are <span class="math inline">\(p &gt; 1\)</span> response variables, and we are testing a hypothesis comprising <span class="math inline">\(\text{df}_h &gt;1\)</span> coefficients or degrees of freedom, there are <span class="math inline">\(s &gt; 1\)</span> possible dimensions in which <span class="math inline">\(\mathbf{H}\)</span> can be large relative to <span class="math inline">\(\mathbf{E}\)</span>, each measured by the eigenvalue <span class="math inline">\(\lambda_i\)</span>. There are several test statistics that combine these into a single measure, shown in <a href="#tbl-mstats" class="quarto-xref">Table&nbsp;<span class="quarto-unresolved-ref">tbl-mstats</span></a>.</p>
+<p>The reason is that when there are <span class="math inline">\(p &gt; 1\)</span> response variables, and we are testing a hypothesis comprising <span class="math inline">\(\text{df}_h &gt;1\)</span> coefficients or degrees of freedom, there are <span class="math inline">\(s &gt; 1\)</span> possible dimensions in which <span class="math inline">\(\mathbf{H}\)</span> can be large relative to <span class="math inline">\(\mathbf{E}\)</span>, each measured by the eigenvalue <span class="math inline">\(\lambda_i\)</span>. There are several test statistics that combine these into a single measure, shown in <a href="#tbl-mstats" class="quarto-xref">Table&nbsp;<span>10.1</span></a>.</p>
 <!-- $$ -->
 <!-- \begin{align*} -->
 <!-- \text{Wilks's Lambda} \quad & \Lambda = \prod^s_i \frac{1}{1+\lambda_i} \quad\quad  && \eta^2 = 1-\Lambda^{1/s} \\ -->
@@ -804,7 +804,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </div>
 </figure>
 </div>
-<p>These correspond to different kinds of “means” of the <span class="math inline">\(\lambda_i\)</span>: geometric (Wilks), arithmetic (Pillai), harmonic (Hotelling-Lawley) and supremum (Roy). See <span class="citation" data-cites="Friendly-etal:ellipses:2013">Friendly et al. (<a href="#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">2013</a>)</span> for the geometry behind these measures.</p>
+<p>These correspond to different kinds of “means” of the <span class="math inline">\(\lambda_i\)</span>: geometric (Wilks), arithmetic (Pillai), harmonic (Hotelling-Lawley) and supremum (Roy). See <span class="citation" data-cites="Friendly-etal:ellipses:2013">Friendly et al. (<a href="95-references.html#ref-Friendly-etal:ellipses:2013" role="doc-biblioref">2013</a>)</span> for the geometry behind these measures.</p>
 <p>Each of these statistics have different sampling distributions under the null hypothesis, but they can all be converted to <span class="math inline">\(F\)</span> statistics. These are exact when <span class="math inline">\(s \le 2\)</span>, and approximations otherwise. As well, each has an analog of the <span class="math inline">\(R^2\)</span>-like partial <span class="math inline">\(\eta^2\)</span> measure, giving the partial association accounted for by each term in the MLM.</p>
 <section id="testing-contrasts-and-linear-hypotheses" class="level3" data-number="10.3.1"><h3 data-number="10.3.1" class="anchored" data-anchor-id="testing-contrasts-and-linear-hypotheses">
 <span class="header-section-number">10.3.1</span> Testing contrasts and linear hypotheses</h3>
@@ -817,7 +817,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 [\mathbf{C} (\mathbf{X}^\mathsf{T}\mathbf{X} )^{-1} \mathbf{C}^\mathsf{T}]^{-1} \,
 (\mathbf{C} \widehat{\mathbf{B}}) \:\: ,
 \tag{10.6}\]</span></span></p>
-<p>where there are <span class="math inline">\(s = \min(h, p)\)</span> non-zero eigenvalues of <span class="math inline">\(\mathbf{H}\mathbf{E}^{-1}\)</span>. In <a href="#eq-hmat" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-hmat</span></a>, <span class="math inline">\(\mathbf{H}\)</span> measures the (Mahalanobis) squared distances (and cross products) among the linear combinations <span class="math inline">\(\mathbf{C} \widehat{\mathbf{B}}\)</span> from the origin under the null hypothesis. <!-- An animated display of these ideas and the relations between data ellipses and HE plots can be seen at --> <!-- [https://www.datavis.ca/gallery/animation/manova/](https://www.datavis.ca/gallery/animation/manova/). --></p>
+<p>where there are <span class="math inline">\(s = \min(h, p)\)</span> non-zero eigenvalues of <span class="math inline">\(\mathbf{H}\mathbf{E}^{-1}\)</span>. In <a href="#eq-hmat" class="quarto-xref">Equation&nbsp;<span>10.6</span></a>, <span class="math inline">\(\mathbf{H}\)</span> measures the (Mahalanobis) squared distances (and cross products) among the linear combinations <span class="math inline">\(\mathbf{C} \widehat{\mathbf{B}}\)</span> from the origin under the null hypothesis. <!-- An animated display of these ideas and the relations between data ellipses and HE plots can be seen at --> <!-- [https://www.datavis.ca/gallery/animation/manova/](https://www.datavis.ca/gallery/animation/manova/). --></p>
 <p>For example, with three responses <span class="math inline">\(y_1, y_2, y_3\)</span> and three predictors <span class="math inline">\(x_1, x_2, x_3\)</span>, we can test the hypothesis that neither <span class="math inline">\(x_2\)</span> nor <span class="math inline">\(x_3\)</span> contribute at all to predicting the <span class="math inline">\(y\)</span>s in terms of the hypothesis that the coefficients for the corresponding rows of <span class="math inline">\(\mathbf{B}\)</span> are zero using a 1-row <span class="math inline">\(\mathbf{C}\)</span> matrix that simply selects those rows:</p>
 <!-- $$ -->
 <!-- \mathcal{H}_0 : \mathbf{C} \mathbf{B} =  -->
@@ -973,7 +973,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <span><span class="va">H3</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis</a></span><span class="op">(</span><span class="va">dogfood.mod</span>, <span class="va">hyp</span><span class="op">[</span><span class="fl">3</span><span class="op">]</span>, </span>
 <span>                       title<span class="op">=</span><span class="st">"Alps vs. Major"</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>Then, we can illustrate <a href="#eq-H-contrasts" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-H-contrasts</span></a> by extracting the 1 df <span class="math inline">\(\mathbf{H}\)</span> matrices (<code>SSPH</code>) from the results of <code>linearHypothesis</code>.</p>
+<p>Then, we can illustrate <a href="#eq-H-contrasts" class="quarto-xref">Equation&nbsp;<span>10.7</span></a> by extracting the 1 df <span class="math inline">\(\mathbf{H}\)</span> matrices (<code>SSPH</code>) from the results of <code>linearHypothesis</code>.</p>
 <!-- this doesn't work -->
 <p><span class="math display">\[
 \overset{\mathbf{H}}
@@ -1003,7 +1003,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </section></section><section id="anova-rightarrow-manova" class="level2" data-number="10.4"><h2 data-number="10.4" class="anchored" data-anchor-id="anova-rightarrow-manova">
 <span class="header-section-number">10.4</span> ANOVA <span class="math inline">\(\rightarrow\)</span> MANOVA</h2>
 <p>Multivariate analysis of variance (MANOVA) generalizes the familiar ANOVA model to situations where there are two or more response variables. Unlike ANOVA, which focuses on discerning statistical differences in one continuous dependent variable influenced by an independent variable (or grouping variable), MANOVA considers several dependent variables at once. It integrates these variables into a single, composite variable through a weighted linear combination, allowing for a comprehensive analysis of how these dependent variables collectively vary with respect to the levels of the independent variable. Essentially, MANOVA investigates whether the grouping variable explains significant variations in the combined dependent variables.</p>
-<p>The situation is illustrated in <a href="#fig-manova-diagram" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-manova-diagram</span></a> where there are two response measures, <span class="math inline">\(Y_1\)</span> and <span class="math inline">\(Y_2\)</span> with data collected for three groups. For concreteness, <span class="math inline">\(Y_1\)</span> might be a score on a math test and <span class="math inline">\(Y_2\)</span> might be a reading score. Let’s also say that group 1 has been studying Shakespeare, while group 2 has concentrated on physics, but group 3 has done nothing beyond the normal curriculum.</p>
+<p>The situation is illustrated in <a href="#fig-manova-diagram" class="quarto-xref">Figure&nbsp;<span>10.3</span></a> where there are two response measures, <span class="math inline">\(Y_1\)</span> and <span class="math inline">\(Y_2\)</span> with data collected for three groups. For concreteness, <span class="math inline">\(Y_1\)</span> might be a score on a math test and <span class="math inline">\(Y_2\)</span> might be a reading score. Let’s also say that group 1 has been studying Shakespeare, while group 2 has concentrated on physics, but group 3 has done nothing beyond the normal curriculum.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-manova-diagram" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -1016,8 +1016,8 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>As shown in the figure, the centroids, <span class="math inline">\((\mu_{1g}, \mu_{2g})\)</span>, clearly differ—the data ellipses barely overlap. A multivariate analysis would show a highly difference among groups. From a rough visual inspection, it seems that means differ on the math test <span class="math inline">\(Y_1\)</span>, with the physics group out-performing the other two. On the reading test <span class="math inline">\(Y_2\)</span> however it might turn out that the three group means don’t differ significantly in an ANOVA, but the Shakespeare and physics groups appear to outperform the normal curriculum group. Doing separate ANOVAs on these variables would miss what is so obvious from <a href="#fig-manova-diagram" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-manova-diagram</span></a>: there is wide separation among the groups in the two tests considered <em>jointly</em>.</p>
-<p><a href="#fig-manova-response-dimensions" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-manova-response-dimensions</span></a> illustrates a second important advantage of performing a multivariate analysis over separate ANOVAS: that of determining the number of dimensions or aspects along which groups differ. In the panel on the left, the means of the three groups increase nearly linearly on the combination of <span class="math inline">\(Y_1\)</span> and <span class="math inline">\(Y_2\)</span>, so their differences can be ascribed to a single dimension, which simplifies the interpretation.</p>
+<p>As shown in the figure, the centroids, <span class="math inline">\((\mu_{1g}, \mu_{2g})\)</span>, clearly differ—the data ellipses barely overlap. A multivariate analysis would show a highly difference among groups. From a rough visual inspection, it seems that means differ on the math test <span class="math inline">\(Y_1\)</span>, with the physics group out-performing the other two. On the reading test <span class="math inline">\(Y_2\)</span> however it might turn out that the three group means don’t differ significantly in an ANOVA, but the Shakespeare and physics groups appear to outperform the normal curriculum group. Doing separate ANOVAs on these variables would miss what is so obvious from <a href="#fig-manova-diagram" class="quarto-xref">Figure&nbsp;<span>10.3</span></a>: there is wide separation among the groups in the two tests considered <em>jointly</em>.</p>
+<p><a href="#fig-manova-response-dimensions" class="quarto-xref">Figure&nbsp;<span>10.4</span></a> illustrates a second important advantage of performing a multivariate analysis over separate ANOVAS: that of determining the number of dimensions or aspects along which groups differ. In the panel on the left, the means of the three groups increase nearly linearly on the combination of <span class="math inline">\(Y_1\)</span> and <span class="math inline">\(Y_2\)</span>, so their differences can be ascribed to a single dimension, which simplifies the interpretation.</p>
 <p>For example, the groups here might be patients diagnosed as normal, mild schizophrenia and profound schizophrenia, and the measures could be tests of memory and attention. The obvious multivariate interpretation from the figure is that of increasing impairment of cognitive functioning across the groups, comprised by memory and attention. Note also the positive association <em>within</em> each group: those who perform better on the memory task also do better on attention.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -1031,12 +1031,12 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>In contrast, the right panel of <a href="#fig-manova-response-dimensions" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-manova-response-dimensions</span></a> shows a situation where the group means have a low correlation. Data like this might arise in a study of parental competency, where there are are measures of both the degree of caring (<span class="math inline">\(Y_1\)</span>) and time spent in play (<span class="math inline">\(Y_2\)</span>) by fathers and groups consisting of fathers of children with no disability, or a physical disability or a mental ability.</p>
-<p>As can be seen in <a href="#fig-manova-response-dimensions" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-manova-response-dimensions</span></a> fathers of the disabled children differ from those of the not disabled group in two different directions corresponding to being higher on either <span class="math inline">\(Y_1\)</span> or <span class="math inline">\(Y_2\)</span>. The <span style="color: red;">red</span> arrows suggest that the differences among groups could be interpreted in terms of two uncorrelated dimensions, perhaps labeled overall competency and emphasis on physical activity. (The pattern in <a href="#fig-manova-response-dimensions" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-manova-response-dimensions</span></a> (right) is contrived for the sake of illustration; it does not reflect the data analyzed in the example below.)</p>
+<p>In contrast, the right panel of <a href="#fig-manova-response-dimensions" class="quarto-xref">Figure&nbsp;<span>10.4</span></a> shows a situation where the group means have a low correlation. Data like this might arise in a study of parental competency, where there are are measures of both the degree of caring (<span class="math inline">\(Y_1\)</span>) and time spent in play (<span class="math inline">\(Y_2\)</span>) by fathers and groups consisting of fathers of children with no disability, or a physical disability or a mental ability.</p>
+<p>As can be seen in <a href="#fig-manova-response-dimensions" class="quarto-xref">Figure&nbsp;<span>10.4</span></a> fathers of the disabled children differ from those of the not disabled group in two different directions corresponding to being higher on either <span class="math inline">\(Y_1\)</span> or <span class="math inline">\(Y_2\)</span>. The <span style="color: red;">red</span> arrows suggest that the differences among groups could be interpreted in terms of two uncorrelated dimensions, perhaps labeled overall competency and emphasis on physical activity. (The pattern in <a href="#fig-manova-response-dimensions" class="quarto-xref">Figure&nbsp;<span>10.4</span></a> (right) is contrived for the sake of illustration; it does not reflect the data analyzed in the example below.)</p>
 <section id="example-father-parenting-data" class="level3" data-number="10.4.1"><h3 data-number="10.4.1" class="anchored" data-anchor-id="example-father-parenting-data">
 <span class="header-section-number">10.4.1</span> Example: Father parenting data</h3>
-<p>I use a simple example of a three-group multivariate design to illustrate the basic ideas of fitting MLMs in R and testing hypotheses. Visualization methods using HE plots are discussed in <a href="#sec-vis-mlm" class="quarto-xref"><span class="quarto-unresolved-ref">sec-vis-mlm</span></a>.</p>
-<p>The dataset <code><a href="https://friendly.github.io/heplots/reference/Parenting.html">heplots::Parenting</a></code> come from an exercise (10B) in <span class="citation" data-cites="Meyers-etal:2006">Meyers et al. (<a href="#ref-Meyers-etal:2006" role="doc-biblioref">2006</a>)</span> and are probably contrived, but are modeled on a real study in which fathers were assessed on three subscales of a <em>Perceived Parenting Competence Scale</em>,</p>
+<p>I use a simple example of a three-group multivariate design to illustrate the basic ideas of fitting MLMs in R and testing hypotheses. Visualization methods using HE plots are discussed in <a href="11-mlm-viz.html" class="quarto-xref"><span>Chapter 11</span></a>.</p>
+<p>The dataset <code><a href="https://friendly.github.io/heplots/reference/Parenting.html">heplots::Parenting</a></code> come from an exercise (10B) in <span class="citation" data-cites="Meyers-etal:2006">Meyers et al. (<a href="95-references.html#ref-Meyers-etal:2006" role="doc-biblioref">2006</a>)</span> and are probably contrived, but are modeled on a real study in which fathers were assessed on three subscales of a <em>Perceived Parenting Competence Scale</em>,</p>
 <ul>
 <li>
 <code>caring</code>, caretaking responsibilities;</li>
@@ -1061,7 +1061,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <span><span class="fu"><a href="https://rdrr.io/r/stats/contrasts.html">contrasts</a></span><span class="op">(</span><span class="va">Parenting</span><span class="op">$</span><span class="va">group</span><span class="op">)</span> <span class="op">&lt;-</span> <span class="va">C</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <section id="exploratory-plots" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="exploratory-plots">Exploratory plots</h4>
-<p>Before setting up a model and testing, it is well-advised to examine the data graphically. The simplest plots are side-by-side boxplots (or violin plots) for the three responses. With <code>ggplot2</code>, this is easily done by reshaping the data to long format and using faceting. In <a href="#fig-parenting-boxpl" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-parenting-boxpl</span></a>, I’ve also plotted the group means with white dots.</p>
+<p>Before setting up a model and testing, it is well-advised to examine the data graphically. The simplest plots are side-by-side boxplots (or violin plots) for the three responses. With <code>ggplot2</code>, this is easily done by reshaping the data to long format and using faceting. In <a href="#fig-parenting-boxpl" class="quarto-xref">Figure&nbsp;<span>10.5</span></a>, I’ve also plotted the group means with white dots.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>See the ggplot code</summary><div class="sourceCode" id="cb16" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">parenting_long</span> <span class="op">&lt;-</span> <span class="va">Parenting</span> <span class="op">|&gt;</span></span>
 <span>  <span class="fu">tidyr</span><span class="fu">::</span><span class="fu"><a href="https://tidyr.tidyverse.org/reference/pivot_longer.html">pivot_longer</a></span><span class="op">(</span>cols<span class="op">=</span><span class="va">caring</span><span class="op">:</span><span class="va">play</span>, </span>
@@ -1115,7 +1115,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>If the covariance matrices were all the same, the data ellipses would have roughly the same size and orientation, but that is not the case in <a href="#fig-parenting-covEllipses" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-parenting-covEllipses</span></a>. The normal group shows greater variability overall and the correlations among the measures differ somewhat from group to group. We’ll assess later whether this makes a difference in the conclusions that can be drawn (<a href="#sec-eqcov" class="quarto-xref"><span class="quarto-unresolved-ref">sec-eqcov</span></a>). The group centroids also differ, but the pattern is not particularly clear. We’ll see an easier to understand view in HE plots and their canonical discriminant cousins.</p>
+<p>If the covariance matrices were all the same, the data ellipses would have roughly the same size and orientation, but that is not the case in <a href="#fig-parenting-covEllipses" class="quarto-xref">Figure&nbsp;<span>10.6</span></a>. The normal group shows greater variability overall and the correlations among the measures differ somewhat from group to group. We’ll assess later whether this makes a difference in the conclusions that can be drawn (<a href="12-eqcov.html" class="quarto-xref"><span>Chapter 12</span></a>). The group centroids also differ, but the pattern is not particularly clear. We’ll see an easier to understand view in HE plots and their canonical discriminant cousins.</p>
 </section><section id="testing-the-model" class="level4 nunumbered" data-number="10.4.1.1"><h4 class="nunumbered anchored" data-number="10.4.1.1" data-anchor-id="testing-the-model">
 <span class="header-section-number">10.4.1.1</span> Testing the model</h4>
 <p>Let’s proceed to fit the multivariate model predicting all three scales from the <code>group</code> factor. <code><a href="https://rdrr.io/r/stats/lm.html">lm()</a></code> for a multivariate response returns an object of class <code>"mlm"</code>, for which there are many methods (use <code>methods(class="mlm")</code> to find them).</p>
@@ -1269,7 +1269,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <p>When groups are defined by an ordered factor, such as level of physical fitness (rated 1–5) or grade in school, it is tempting to treat that as a numeric variable and use a multivariate regression model. This would assume that the effect of that factor is linear and if not, we might consider adding polynomial terms. A different strategy, often preferable, is to make the group variable an <em>ordered factor</em>, for which R assigns <em>polynomial contrasts</em>. This gives separate tests of the linear, quadratic, cubic, … trends of the response, without the need to specify them separately in the model</p>
 </section><section id="example-adolescent-mental-health" class="level3" data-number="10.4.3"><h3 data-number="10.4.3" class="anchored" data-anchor-id="example-adolescent-mental-health">
 <span class="header-section-number">10.4.3</span> Example: Adolescent mental health</h3>
-<p>The dataset <code><a href="https://friendly.github.io/heplots/reference/AddHealth.html">heplots::AddHealth</a></code> contains a large cross-sectional sample of participants from grades 7–12 from the National Longitudinal Study of Adolescent Health, described by <span class="citation" data-cites="Warne2014">Warne (<a href="#ref-Warne2014" role="doc-biblioref">2014</a>)</span>. It contains responses to two Likert-scale (1–5) items, <code>anxiety</code> and <code>depression</code>. <code>grade</code> is an <em>ordered</em> factor, which means that the default contrasts are taken as orthogonal polynomials with linear (<code>grade.L</code>), quadratic (<code>grade.Q</code>), up to 5th degree (<code>grade^5</code>) trends, which decompose the total effect of grade.</p>
+<p>The dataset <code><a href="https://friendly.github.io/heplots/reference/AddHealth.html">heplots::AddHealth</a></code> contains a large cross-sectional sample of participants from grades 7–12 from the National Longitudinal Study of Adolescent Health, described by <span class="citation" data-cites="Warne2014">Warne (<a href="95-references.html#ref-Warne2014" role="doc-biblioref">2014</a>)</span>. It contains responses to two Likert-scale (1–5) items, <code>anxiety</code> and <code>depression</code>. <code>grade</code> is an <em>ordered</em> factor, which means that the default contrasts are taken as orthogonal polynomials with linear (<code>grade.L</code>), quadratic (<code>grade.Q</code>), up to 5th degree (<code>grade^5</code>) trends, which decompose the total effect of grade.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb27" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">AddHealth</span>, package<span class="op">=</span><span class="st">"heplots"</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/utils/str.html">str</a></span><span class="op">(</span><span class="va">AddHealth</span><span class="op">)</span></span>
@@ -1297,7 +1297,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 &amp; + \cdots
 \begin{bmatrix} \beta_{5,\text{anx}} \\ \beta_{5,\text{dep}} \end{bmatrix} \text{grade}^5
 \end{aligned} \tag{10.9}\]</span></span></p>
-<p>With<code>grade</code> represented as an ordered factor, the values of <span class="math inline">\(x\)</span> in <a href="#eq-AH-mod" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-AH-mod</span></a> are those of the orthogonal polynomials given by <code>poly(grade,5)</code>.</p>
+<p>With<code>grade</code> represented as an ordered factor, the values of <span class="math inline">\(x\)</span> in <a href="#eq-AH-mod" class="quarto-xref">Equation&nbsp;<span>10.8</span></a> are those of the orthogonal polynomials given by <code>poly(grade,5)</code>.</p>
 <section id="exploratory-plots-1" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="exploratory-plots-1">Exploratory plots</h4>
 <p>Some exploratory analysis is useful before fitting and visualizing models. As a first step, we find the means, standard deviations, and standard errors of the means.</p>
 <div class="cell" data-layout-align="center">
@@ -1351,7 +1351,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </div>
 </div>
 </div>
-<p>It is also useful to within-group correlations using <code><a href="https://friendly.github.io/heplots/reference/covEllipses.html">covEllipses()</a></code>, as shown in <a href="#fig-addhealth-covellipse" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-addhealth-covellipse</span></a>. This also plots the bivariate means showing the form of the association , treating anxiety and depression as multivariate outcomes. (Because the variability of the scores within groups is so large compared to the range of the means, I show the data ellipses with coverage of only 10%.)</p>
+<p>It is also useful to within-group correlations using <code><a href="https://friendly.github.io/heplots/reference/covEllipses.html">covEllipses()</a></code>, as shown in <a href="#fig-addhealth-covellipse" class="quarto-xref">Figure&nbsp;<span>10.8</span></a>. This also plots the bivariate means showing the form of the association , treating anxiety and depression as multivariate outcomes. (Because the variability of the scores within groups is so large compared to the range of the means, I show the data ellipses with coverage of only 10%.)</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb30" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://friendly.github.io/heplots/reference/covEllipses.html">covEllipses</a></span><span class="op">(</span><span class="va">AddHealth</span><span class="op">[</span>, <span class="fl">3</span><span class="op">:</span><span class="fl">2</span><span class="op">]</span>, group <span class="op">=</span> <span class="va">AddHealth</span><span class="op">$</span><span class="va">grade</span>,</span>
 <span>            pooled <span class="op">=</span> <span class="cn">FALSE</span>, level <span class="op">=</span> <span class="fl">0.1</span>,</span>
@@ -1371,9 +1371,9 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </section><section id="fit-the-mlm" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="fit-the-mlm">Fit the MLM</h4>
 <p>Now, let’s fit the MLM for both responses jointly in relation to <code>grade</code>. The null hypothesis is that the means for anxiety and depression are the same at all six grades, <span class="math display">\[
 \mathcal{H}_0: \mathbf{\mu}_7 = \mathbf{\mu}_8 = \cdots = \mathbf{\mu}_{12} \; ,
-\]</span> or equivalently, that all coefficients except the intercept in the model <a href="#eq-AH-mod" class="quarto-xref">Equation&nbsp;<span class="quarto-unresolved-ref">eq-AH-mod</span></a> are zero, <span class="math display">\[
+\]</span> or equivalently, that all coefficients except the intercept in the model <a href="#eq-AH-mod" class="quarto-xref">Equation&nbsp;<span>10.8</span></a> are zero, <span class="math display">\[
 \mathcal{H}_0: \boldsymbol{\beta}_1 =  \boldsymbol{\beta}_2  = \cdots =  \boldsymbol{\beta}_5 = \boldsymbol{0} \; .
-\]</span> We fit the MANOVA model, and test the grade effect using <code><a href="https://rdrr.io/pkg/car/man/Anova.html">car::Anova()</a></code>. The effect of <code>grade</code> is highly significant, as we could tell from <a href="#fig-addhealth-means-each" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-addhealth-means-each</span></a>.</p>
+\]</span> We fit the MANOVA model, and test the grade effect using <code><a href="https://rdrr.io/pkg/car/man/Anova.html">car::Anova()</a></code>. The effect of <code>grade</code> is highly significant, as we could tell from <a href="#fig-addhealth-means-each" class="quarto-xref">Figure&nbsp;<span>10.7</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb31" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">AH.mlm</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">anxiety</span>, <span class="va">depression</span><span class="op">)</span> <span class="op">~</span> <span class="va">grade</span>, data <span class="op">=</span> <span class="va">AddHealth</span><span class="op">)</span></span>
 <span></span>
@@ -1405,7 +1405,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <span><span class="co">#&gt; ---</span></span>
 <span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>The test of the quadratic coefficients <span class="math inline">\(\mathcal{H}_0 : \boldsymbol{\beta}_2 = \boldsymbol{0}\)</span> indicates significant curvature in trends across grade, as we saw in the plots of their means in <a href="#fig-addhealth-means-each" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-addhealth-means-each</span></a>. One interpretation might be that depression and anxiety after increasing steadily up to grade eleven could level off thereafter.</p>
+<p>The test of the quadratic coefficients <span class="math inline">\(\mathcal{H}_0 : \boldsymbol{\beta}_2 = \boldsymbol{0}\)</span> indicates significant curvature in trends across grade, as we saw in the plots of their means in <a href="#fig-addhealth-means-each" class="quarto-xref">Figure&nbsp;<span>10.7</span></a>. One interpretation might be that depression and anxiety after increasing steadily up to grade eleven could level off thereafter.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb34" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co">## quadratic effect</span></span>
 <span><span class="fu"><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis</a></span><span class="op">(</span><span class="va">AH.mlm</span>, <span class="st">"grade.Q"</span><span class="op">)</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span>SSP <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
@@ -1440,7 +1440,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 <span class="header-section-number">10.4.4</span> Factorial MANOVA</h3>
 <p>When there are two or more categorical factors, the general linear model provides a way to investigate the effects (differences in means) of each simultaneously. More importantly, this allows you to determine if factors <em>interact</em>, so the effect of one factor varies with the levels of another factor …</p>
 <section id="example-penguins-data" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="example-penguins-data">Example: Penguins data</h4>
-<p>In <a href="#sec-multivariate_plots" class="quarto-xref"><span class="quarto-unresolved-ref">sec-multivariate_plots</span></a> we examined the Palmer penguins data graphically, using a mosaic plot (<a href="#fig-peng-mosaic" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-mosaic</span></a>) of the frequencies of the three factors, <code>species</code>, <code>island</code> and <code>sex</code> and then <code>ggpairs()</code> scatterplot matrix (<a href="#fig-peng-ggpairs1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-peng-ggpairs1</span></a>).</p>
+<p>In <a href="03-multivariate_plots.html" class="quarto-xref"><span>Chapter 3</span></a> we examined the Palmer penguins data graphically, using a mosaic plot (<a href="03-multivariate_plots.html#fig-peng-mosaic" class="quarto-xref">Figure&nbsp;<span>3.30</span></a>) of the frequencies of the three factors, <code>species</code>, <code>island</code> and <code>sex</code> and then <code>ggpairs()</code> scatterplot matrix (<a href="03-multivariate_plots.html#fig-peng-ggpairs1" class="quarto-xref">Figure&nbsp;<span>3.31</span></a>).</p>
 </section></section></section><section id="mra-rightarrow-mmra" class="level2" data-number="10.5"><h2 data-number="10.5" class="anchored" data-anchor-id="mra-rightarrow-mmra">
 <span class="header-section-number">10.5</span> MRA <span class="math inline">\(\rightarrow\)</span> MMRA</h2>
 </section><section id="ancova-rightarrow-mancova" class="level2" data-number="10.6"><h2 data-number="10.6" class="anchored" data-anchor-id="ancova-rightarrow-mancova">
@@ -1454,7 +1454,7 @@ <h1 class="title"><span id="sec-mlm-review" class="quarto-section-identifier"><s
 </div>
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-Friendly-etal:ellipses:2013" class="csl-entry" role="listitem">
 Friendly, M., Monette, G., &amp; Fox, J. (2013). Elliptical insights: Understanding statistical methods through elliptical geometry. <em>Statistical Science</em>, <em>28</em>(1), 1–39. <a href="https://doi.org/10.1214/12-STS402">https://doi.org/10.1214/12-STS402</a>
 </div>
diff --git a/docs/11-mlm-viz.html b/docs/11-mlm-viz.html
new file mode 100644
index 00000000..f09c2a20
--- /dev/null
+++ b/docs/11-mlm-viz.html
@@ -0,0 +1,921 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.5.53">
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+<title>11&nbsp; Visualizing Multivariate Models – Visualizing Multivariate Data and Models in R</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+<script src="site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="site_libs/quarto-nav/headroom.min.js"></script>
+<script src="site_libs/clipboard/clipboard.min.js"></script>
+<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="site_libs/quarto-search/fuse.min.js"></script>
+<script src="site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="./">
+<link href="./12-eqcov.html" rel="next">
+<link href="./10-mlm-review.html" rel="prev">
+<link href="./images/favicon/favicon.ico" rel="icon">
+<script src="site_libs/quarto-html/quarto.js"></script>
+<script src="site_libs/quarto-html/popper.min.js"></script>
+<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="site_libs/quarto-html/anchor.min.js"></script>
+<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<link href="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.css" rel="stylesheet"><script id="quarto-search-options" type="application/json">{
+  "location": "sidebar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "start",
+  "type": "textbox",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script><script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script><script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script><script type="text/javascript">
+const typesetMath = (el) => {
+  if (window.MathJax) {
+    // MathJax Typeset
+    window.MathJax.typeset([el]);
+  } else if (window.katex) {
+    // KaTeX Render
+    var mathElements = el.getElementsByClassName("math");
+    var macros = [];
+    for (var i = 0; i < mathElements.length; i++) {
+      var texText = mathElements[i].firstChild;
+      if (mathElements[i].tagName == "SPAN") {
+        window.katex.render(texText.data, mathElements[i], {
+          displayMode: mathElements[i].classList.contains('display'),
+          throwOnError: false,
+          macros: macros,
+          fleqn: false
+        });
+      }
+    }
+  }
+}
+window.Quarto = {
+  typesetMath
+};
+</script>
+</head>
+<body class="nav-sidebar floating">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner"><nav class="quarto-secondary-nav"><div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./09-hotelling.html">Multivariate Linear Models</a></li><li class="breadcrumb-item"><a href="./11-mlm-viz.html"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></a></li></ol></nav>
+        <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+        </a>
+      <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
+        <i class="bi bi-search"></i>
+      </button>
+    </div>
+  </nav></header><!-- content --><header id="title-block-header" class="quarto-title-block default page-columns page-full"><div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./09-hotelling.html">Multivariate Linear Models</a></li><li class="breadcrumb-item"><a href="./11-mlm-viz.html"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></a></li></ol></nav>
+      <h1 class="title"><span id="sec-vis-mlm" class="quarto-section-identifier"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></span></h1>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto"><div class="pt-lg-2 mt-2 text-left sidebar-header">
+    <div class="sidebar-title mb-0 py-0">
+      <a href="./">Visualizing Multivariate Data and Models in R</a> 
+        <div class="sidebar-tools-main">
+    <a href="https://github.com/friendly/vis-MLM-book" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+</div>
+    </div>
+      </div>
+        <div class="mt-2 flex-shrink-0 align-items-center">
+        <div class="sidebar-search">
+        <div id="quarto-search" class="" title="Search"></div>
+        </div>
+        </div>
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Preface</span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
+ <span class="menu-text">Orienting Ideas</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./01-intro.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./02-getting_started.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Getting Started</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
+ <span class="menu-text">Exploratory Methods</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./03-multivariate_plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Plots of Multivariate Data</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./04-pca-biplot.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Dimension Reduction</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
+ <span class="menu-text">Univariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./05-linear_models.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Overview of Linear models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./06-linear_models-plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Plots for univariate response models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./07-lin-mod-topics.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Topics in Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./08-collinearity-ridge.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Collinearity &amp; Ridge Regression</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
+ <span class="menu-text">Multivariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./09-hotelling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Hotelling’s <span class="math inline">\(T^2\)</span></span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./10-mlm-review.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Multivariate Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./11-mlm-viz.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./12-eqcov.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./13-case-studies.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
+ <span class="menu-text">End matter</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./91-colophon.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Colophon</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./95-references.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">References</span></a>
+  </div>
+</li>
+      </ul>
+</li>
+    </ul>
+</div>
+</nav><div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active"><h2 id="toc-title">Table of contents</h2>
+   
+  <ul>
+<li><a href="#sec-he-framework" id="toc-sec-he-framework" class="nav-link active" data-scroll-target="#sec-he-framework"><span class="header-section-number">11.1</span> HE plot framework</a></li>
+  <li><a href="#he-plot-construction" id="toc-he-plot-construction" class="nav-link" data-scroll-target="#he-plot-construction"><span class="header-section-number">11.2</span> HE plot construction</a></li>
+  <li><a href="#sec-candisc" id="toc-sec-candisc" class="nav-link" data-scroll-target="#sec-candisc"><span class="header-section-number">11.3</span> Canonical discriminant analysis</a></li>
+  </ul><div class="toc-actions"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content"><!--- For HTML Only ---><!-- \require{newcommand} --><!-- %\renewcommand*{\det}[1]{\mathrm{det} (#1)} --><!-- %\renewcommand*{\det}[1]{|#1|} --><!-- \newcommand{\sizedmat}[2]{\mathord{\mathop{\mat{#1}}\limits_{(#2)}}} --><!-- \newcommand*{\E}{\mathcal{E}} --><!-- Index generation --><!-- % R packages:  indexed under both package name and packages! --><!-- % data sets:  --><!-- % R stuff --><p>Tests of multivariate models, including multivariate analysis of variance (MANOVA) for group differences and multivariate multiple regression (MMRA) can be easily visualized by plots of a hypothesis (“H”) data ellipse for the fitted values relative to the corresponding plot of the error ellipse (“E”) of the residuals, which I call the HE plot framework.</p>
+<p>For more than a few response variables, these result can be projected onto a lower-dimensional “canonical discriminant” space providing an even simpler description.</p>
+<p><strong>Packages</strong></p>
+<p>In this chapter we use the following packages. Load them now</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb1" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://r-forge.r-project.org/projects/car/">car</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="http://friendly.github.io/heplots/">heplots</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://ggplot2.tidyverse.org">ggplot2</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org">dplyr</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://tidyr.tidyverse.org">tidyr</a></span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<section id="sec-he-framework" class="level2" data-number="11.1"><h2 data-number="11.1" class="anchored" data-anchor-id="sec-he-framework">
+<span class="header-section-number">11.1</span> HE plot framework</h2>
+<p><a href="09-hotelling.html" class="quarto-xref"><span>Chapter 9</span></a> illustrated the basic ideas of the framework for visualizing multivariate linear models in the context of a simple two group design, using Hotelling’s <span class="math inline">\(T^2\)</span>. The main ideas were illustrated in <a href="09-hotelling.html#fig-HE-framework" class="quarto-xref">Figure&nbsp;<span>9.9</span></a>.</p>
+<p>Having described the statistical ideas behind the MLM in <a href="10-mlm-review.html" class="quarto-xref"><span>Chapter 10</span></a>, we can proceed to extend this framework to larger designs. <a href="#fig-dogfood-quartet" class="quarto-xref">Figure&nbsp;<span>11.1</span></a> illustrates these ideas using the simple one-way MANOVA design of the dogfood data from <a href="10-mlm-review.html#sec-dogfood-data" class="quarto-xref"><span>Section 10.2.1</span></a>.</p>
+<div class="cell" data-layout-align="center">
+<div class="cell-output-display">
+<div id="fig-dogfood-quartet" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-dogfood-quartet-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="images/dogfood-quartet.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:100.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-dogfood-quartet-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;11.1: <strong>Dogfood quartet</strong>: Illustration of the conceptual ideas of the HE plot framework for the dogfood data. (a) Scatterplot of the data; (b) Summary using data ellipses; (c) HE plot shows the variation in the means in relation to pooled within group variance; (d) Transformation from data space to canonical space
+</figcaption></figure>
+</div>
+</div>
+</div>
+<ul>
+<li><p>In data space, each group is summarized by its <strong>data ellipse</strong>, representing the means and covariances.</p></li>
+<li><p>Variation against the hypothesis of equal means can be seen by the <span class="math inline">\(\mathbf{H}\)</span> ellipse in the <strong>HE plot</strong>, representing the data ellipse of the fitted values. Error variance is shown in the <span class="math inline">\(\mathbf{E}\)</span> ellipse, representing the pooled within-group covariance matrix, <span class="math inline">\(\mathbf{S}_p\)</span> and the data ellipse of the residuals from the model.</p></li>
+<li><p>The MANOVA (or Hotelling’s <span class="math inline">\(T^2\)</span>) is formally equivalent to a <strong>discriminant analysis</strong>, predicting group membership from the response variables which can be seen in data space. (The main difference is emphasis and goals: MANOVA seeks to test differences among group means, while discriminant analysis aims at classification of the observations into groups.)</p></li>
+<li><p>This effectively projects the <span class="math inline">\(p\)</span>-dimensional space of the predictors into the smaller <strong>canonical space</strong> that shows the greatest differences among the groups.</p></li>
+</ul>
+<!--
+
+
+
+
+
+
+
+::: {.cell layout-align="center"}
+::: {.cell-output-display}
+![The Hypothesis Error plot framework for a two-group design. Above: Data ellipses can be summarized in an HE plot showing the pooled within-group error ($\mathbf{E}$) ellipse and the $\mathbf{H}$ 'ellipse' for the group means. Below: Observations projected on the line joining the means give discriminant scores which correpond to a one-dimensional canonical space, represented by a boxplot of their scores and arrows reflecting the variable weights.](images/HE-framework.png){#fig-HE-framework fig-align='center' width=100%}
+:::
+:::
+
+
+
+
+
+
+
+--><!--
+Having described the statistical ideas behind the MLM in @sec-mlm-review, we can proceed to
+extend this framework to larger designs. A conceptual overview is shown in @fig-arcmanov1 for a one-way MANOVA
+design with 8 groups.
+
+
+
+
+
+
+
+
+::: {.cell layout-align="center"}
+::: {.cell-output-display}
+![Conceptual plots showing the essential ideas behind multivariate tests, in terms of the hypothesis ($\mathbf{H}$) and error ($\mathbf{E}$) matrices for a 1-way MANOVA design with two response variables, $Y_1$ and $Y_2$](images/arcmanov.png){#fig-arcmanov1 fig-align='center' width=100%}
+:::
+:::
+
+
+
+
+
+
+
+--><p>For more complex models such as MANOVA with multiple factors or multivariate multivariate regression, there is one <span class="math inline">\(\mathbf{H}\)</span> ellipse for each term in the model. …</p>
+</section><section id="he-plot-construction" class="level2" data-number="11.2"><h2 data-number="11.2" class="anchored" data-anchor-id="he-plot-construction">
+<span class="header-section-number">11.2</span> HE plot construction</h2>
+<p>The HE plot is constructed to allow a direct visualization of the “size” of hypothesized terms in a multivariate linear model in relation to unexplained error variation. These can be displayed in 2D or 3D plots, so I use the term “ellipsoid” below to cover all cases.</p>
+<p>Error variation is represented by a standard 68% data ellipsoid of the <span class="math inline">\(\mathbf{E}\)</span> matrix of the residuals in <span class="math inline">\(\boldsymbol{\Large\varepsilon}\)</span>. This is divided by the residual degrees of freedom, so the size of <span class="math inline">\(\mathbf{E} / \text{df}_e\)</span> is analogous to a mean square error in univariate tests. The choice of 68% coverage allows you to ``read’’ the residual standard deviation as the half-length of the shadow of the <span class="math inline">\(\mathbf{E}\)</span> ellipsoid on any axis (see <a href="03-multivariate_plots.html#fig-galton-ellipse-r" class="quarto-xref">Figure&nbsp;<span>3.10</span></a>). The <span class="math inline">\(\mathbf{E}\)</span> ellipsoid is then translated to the overall (grand) means <span class="math inline">\(\bar{\mathbf{y}}\)</span> of the variables plotted, which allows us to show the means for factor levels on the same scale, facilitating interpretation. In the notation of <a href="03-multivariate_plots.html#eq-ellE" class="quarto-xref">Equation&nbsp;<span>3.2</span></a>, the error ellipsoid is given by <span class="math display">\[
+\mathcal{E}_c (\bar{\mathbf{y}}, \mathbf{E}) = \bar{\mathbf{y}} \; \oplus \; c\,\mathbf{E}^{1/2} \:\: ,
+\]</span> where <span class="math inline">\(c = \sqrt{2 F_{2, n-2}^{0.68}}\)</span> for 2D plots and <span class="math inline">\(c = \sqrt{3 F_{3, n-3}^{0.68}}\)</span> for 3D.</p>
+<p>An ellipsoid representing variation in the means of a factor (or any other term reflected in a general linear hypothesis test, <a href="10-mlm-review.html#eq-hmat" class="quarto-xref">Equation&nbsp;<span>10.6</span></a>) in the <span class="math inline">\(\mathbf{H}\)</span> matrix is simply the data ellipse of the fitted values for that term. Dividing the hypothesis matrix by the error degrees of freedom, giving <span class="math inline">\(\mathbf{H} / \text{df}_e\)</span>, puts this on the same scale as the ellipse. <!-- , as shown in the left panel of \figref{fig:heplot-iris1}. --> I refer to this as <em>effect size scaling</em>, because it is similar to an effect size index used in univariate models, e.g., <span class="math inline">\(ES = (\bar{y}_1 - \bar{y}_2) / s_e\)</span> in a two-group, univariate design.</p>
+<p>This is illustrated in … <!--
+
+
+
+
+
+
+
+::: {.cell layout-align="center"}
+
+```{.r .cell-code}
+op <- par(mar = c(4, 4, 1, 1) + .5,
+          mfrow = c(1, 2))
+col <-c("blue", "darkgreen", "brown")
+clr <- c(col, "red")
+covEllipses(cbind(Sepal.Length, Sepal.Width) ~ Species, data=iris,
+      pooled = TRUE,
+      fill=TRUE,
+      fill.alpha = 0.1,
+      lwd = 3,
+      col = clr,
+      cex = 1.5, cex.lab = 1.5,
+      label.pos = c(3, 1, 3, 0),
+      xlim = c(4,8), ylim = c(2,4))
+
+iris.mod <- lm(cbind(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) ~
+                 Species, data=iris)
+heplot(iris.mod, size = "effect",
+       cex = 1.5, cex.lab = 1.5,
+       fill=TRUE, fill.alpha=c(0.3,0.1),
+       xlim = c(4,8), ylim = c(2,4))
+```
+
+::: {.cell-output-display}
+![](figs/ch11/unnamed-chunk-6-1.png){fig-align='center' width=2100}
+:::
+
+```{.r .cell-code}
+par(op)
+```
+:::
+
+
+
+
+
+
+
+--></p>
+<p>The geometry of ellipsoids and multivariate tests allow us to go further with another re-scaling of the <span class="math inline">\(\mathbf{H}\)</span> ellipsoid that gives a <em>visual test of significance</em> for any term in a MLM. This is done simply by dividing <span class="math inline">\(\mathbf{H} / df_e\)</span> further by the <span class="math inline">\(\alpha\)</span>-critical value of the corresponding test statistic to show the strength of evidence against the null hypothesis. Among the various multivariate test statistics, Roy’s maximum root test, based on the largest eigenvalue <span class="math inline">\(\lambda_1\)</span> of <span class="math inline">\(\mathbf{H} \mathbf{E}^{-1}\)</span>, gives <span class="math inline">\(\mathbf{H} / (\lambda_\alpha df_e)\)</span> which has the visual property that the scaled <span class="math inline">\(\mathbf{H}\)</span> ellipsoid will protrude <em>somewhere</em> outside the standard <span class="math inline">\(\mathbf{E}\)</span> ellipsoid if and only if Roy’s test is significant at significance level <span class="math inline">\(\alpha\)</span>. The critical value <span class="math inline">\(\lambda_\alpha\)</span> for Roy’s test is <span class="math display">\[
+\lambda_\alpha = \left(\frac{\text{df}_1}{\text{df}_2}\right) \; F_{\text{df}_1, \text{df}_2}^{1-\alpha} \:\: ,
+\]</span> where <span class="math inline">\(\text{df}_1 = \max(p, \text{df}_h)\)</span> and <span class="math inline">\(\text{df}_2 = \text{df}_h + \text{df}_e - \text{df}_1\)</span>.</p>
+<p>For these data, the HE plot using significance scaling is shown in the right panel of .</p>
+</section><section id="sec-candisc" class="level2" data-number="11.3"><h2 data-number="11.3" class="anchored" data-anchor-id="sec-candisc">
+<span class="header-section-number">11.3</span> Canonical discriminant analysis</h2>
+<div class="cell" data-layout-align="center">
+<pre data-code-line-numbers=""><code>#&gt; Writing packages to  C:/R/Projects/Vis-MLM-book/bib/pkgs.txt
+#&gt; 8  packages used here:
+#&gt;  broom, car, carData, dplyr, ggplot2, heplots, knitr, tidyr</code></pre>
+</div>
+
+
+</section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const onCopySuccess = function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  }
+  const getTextToCopy = function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
+    text: getTextToCopy
+  });
+  clipboard.on('success', onCopySuccess);
+  if (window.document.getElementById('quarto-embedded-source-code-modal')) {
+    // For code content inside modals, clipBoardJS needs to be initialized with a container option
+    // TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
+    const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
+      text: getTextToCopy,
+      container: window.document.getElementById('quarto-embedded-source-code-modal')
+    });
+    clipboardModal.on('success', onCopySuccess);
+  }
+    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
+    var mailtoRegex = new RegExp(/^mailto:/);
+      var filterRegex = new RegExp('/' + window.location.host + '/');
+    var isInternal = (href) => {
+        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
+    }
+    // Inspect non-navigation links and adorn them if external
+ 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
+    for (var i=0; i<links.length; i++) {
+      const link = links[i];
+      if (!isInternal(link.href)) {
+        // undo the damage that might have been done by quarto-nav.js in the case of
+        // links that we want to consider external
+        if (link.dataset.originalHref !== undefined) {
+          link.href = link.dataset.originalHref;
+        }
+      }
+    }
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      if (note) {
+        return note.innerHTML;
+      } else {
+        return "";
+      }
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script><nav class="page-navigation"><div class="nav-page nav-page-previous">
+      <a href="./10-mlm-review.html" class="pagination-link" aria-label="Multivariate Linear Models">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Multivariate Linear Models</span></span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+      <a href="./12-eqcov.html" class="pagination-link" aria-label="Visualizing Equality of Covariance Matrices">
+        <span class="nav-page-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></span> <i class="bi bi-arrow-right-short"></i>
+      </a>
+  </div>
+</nav>
+</div> <!-- /content -->
+
+
+
+<footer class="footer"><div class="nav-footer"><div class="nav-footer-center"><div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></div></div></footer><script src="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.js" defer="true"></script>
+</body></html>
\ No newline at end of file
diff --git a/docs/12-eqcov.html b/docs/12-eqcov.html
new file mode 100644
index 00000000..5d86e854
--- /dev/null
+++ b/docs/12-eqcov.html
@@ -0,0 +1,1114 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.5.53">
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+<title>12&nbsp; Visualizing Equality of Covariance Matrices – Visualizing Multivariate Data and Models in R</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+/* CSS for citations */
+div.csl-bib-body { }
+div.csl-entry {
+  clear: both;
+  margin-bottom: 0em;
+}
+.hanging-indent div.csl-entry {
+  margin-left:2em;
+  text-indent:-2em;
+}
+div.csl-left-margin {
+  min-width:2em;
+  float:left;
+}
+div.csl-right-inline {
+  margin-left:2em;
+  padding-left:1em;
+}
+div.csl-indent {
+  margin-left: 2em;
+}</style>
+
+<script src="site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="site_libs/quarto-nav/headroom.min.js"></script>
+<script src="site_libs/clipboard/clipboard.min.js"></script>
+<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="site_libs/quarto-search/fuse.min.js"></script>
+<script src="site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="./">
+<link href="./13-case-studies.html" rel="next">
+<link href="./11-mlm-viz.html" rel="prev">
+<link href="./images/favicon/favicon.ico" rel="icon">
+<script src="site_libs/quarto-html/quarto.js"></script>
+<script src="site_libs/quarto-html/popper.min.js"></script>
+<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="site_libs/quarto-html/anchor.min.js"></script>
+<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<link href="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.css" rel="stylesheet"><script id="quarto-search-options" type="application/json">{
+  "location": "sidebar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "start",
+  "type": "textbox",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script><script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script><script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script><script type="text/javascript">
+const typesetMath = (el) => {
+  if (window.MathJax) {
+    // MathJax Typeset
+    window.MathJax.typeset([el]);
+  } else if (window.katex) {
+    // KaTeX Render
+    var mathElements = el.getElementsByClassName("math");
+    var macros = [];
+    for (var i = 0; i < mathElements.length; i++) {
+      var texText = mathElements[i].firstChild;
+      if (mathElements[i].tagName == "SPAN") {
+        window.katex.render(texText.data, mathElements[i], {
+          displayMode: mathElements[i].classList.contains('display'),
+          throwOnError: false,
+          macros: macros,
+          fleqn: false
+        });
+      }
+    }
+  }
+}
+window.Quarto = {
+  typesetMath
+};
+</script>
+</head>
+<body class="nav-sidebar floating">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner"><nav class="quarto-secondary-nav"><div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./09-hotelling.html">Multivariate Linear Models</a></li><li class="breadcrumb-item"><a href="./12-eqcov.html"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></a></li></ol></nav>
+        <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+        </a>
+      <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
+        <i class="bi bi-search"></i>
+      </button>
+    </div>
+  </nav></header><!-- content --><header id="title-block-header" class="quarto-title-block default page-columns page-full"><div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./09-hotelling.html">Multivariate Linear Models</a></li><li class="breadcrumb-item"><a href="./12-eqcov.html"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></a></li></ol></nav>
+      <h1 class="title"><span id="sec-eqcov" class="quarto-section-identifier"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></span></h1>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto"><div class="pt-lg-2 mt-2 text-left sidebar-header">
+    <div class="sidebar-title mb-0 py-0">
+      <a href="./">Visualizing Multivariate Data and Models in R</a> 
+        <div class="sidebar-tools-main">
+    <a href="https://github.com/friendly/vis-MLM-book" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+</div>
+    </div>
+      </div>
+        <div class="mt-2 flex-shrink-0 align-items-center">
+        <div class="sidebar-search">
+        <div id="quarto-search" class="" title="Search"></div>
+        </div>
+        </div>
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Preface</span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
+ <span class="menu-text">Orienting Ideas</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./01-intro.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./02-getting_started.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Getting Started</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
+ <span class="menu-text">Exploratory Methods</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./03-multivariate_plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Plots of Multivariate Data</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./04-pca-biplot.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Dimension Reduction</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
+ <span class="menu-text">Univariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./05-linear_models.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Overview of Linear models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./06-linear_models-plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Plots for univariate response models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./07-lin-mod-topics.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Topics in Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./08-collinearity-ridge.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Collinearity &amp; Ridge Regression</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
+ <span class="menu-text">Multivariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./09-hotelling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Hotelling’s <span class="math inline">\(T^2\)</span></span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./10-mlm-review.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Multivariate Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./11-mlm-viz.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./12-eqcov.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./13-case-studies.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
+ <span class="menu-text">End matter</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./91-colophon.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Colophon</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./95-references.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">References</span></a>
+  </div>
+</li>
+      </ul>
+</li>
+    </ul>
+</div>
+</nav><div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active"><h2 id="toc-title">Table of contents</h2>
+   
+  <ul>
+<li><a href="#sec-homogeneity-ANOVA" id="toc-sec-homogeneity-ANOVA" class="nav-link active" data-scroll-target="#sec-homogeneity-ANOVA"><span class="header-section-number">12.1</span> Homogeneity of Variance in Univariate ANOVA</a></li>
+  <li><a href="#sec-mlevene" id="toc-sec-mlevene" class="nav-link" data-scroll-target="#sec-mlevene"><span class="header-section-number">12.2</span> Visualizing Levene’s test</a></li>
+  <li><a href="#sec-homogeneity-MANOVA" id="toc-sec-homogeneity-MANOVA" class="nav-link" data-scroll-target="#sec-homogeneity-MANOVA"><span class="header-section-number">12.3</span> Homogeneity of variance in MANOVA</a></li>
+  <li><a href="#sec-boxM" id="toc-sec-boxM" class="nav-link" data-scroll-target="#sec-boxM"><span class="header-section-number">12.4</span> Assessing heterogeneity of covariance matrices: Box’s M test</a></li>
+  <li><a href="#visualizing-heterogeneity" id="toc-visualizing-heterogeneity" class="nav-link" data-scroll-target="#visualizing-heterogeneity"><span class="header-section-number">12.5</span> Visualizing heterogeneity</a></li>
+  </ul><div class="toc-actions"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content"><!--- For HTML Only ---><!-- \require{newcommand} --><!-- %\renewcommand*{\det}[1]{\mathrm{det} (#1)} --><!-- %\renewcommand*{\det}[1]{|#1|} --><!-- \newcommand{\sizedmat}[2]{\mathord{\mathop{\mat{#1}}\limits_{(#2)}}} --><!-- \newcommand*{\E}{\mathcal{E}} --><!-- Index generation --><!-- % R packages:  indexed under both package name and packages! --><!-- % data sets:  --><!-- % R stuff --><blockquote class="blockquote">
+<p><em>To make the preliminary test on variances is rather like putting to sea in a rowing boat to find out whether conditions are sufficiently calm for an ocean liner to leave port.</em> — G. E. P. Box <span class="citation" data-cites="Box:1953">(<a href="95-references.html#ref-Box:1953" role="doc-biblioref">1953</a>)</span></p>
+</blockquote>
+<p>This chapter concerns the extension of tests of homogeneity of variance from the classical univariate ANOVA setting to the analogous multivariate (MANOVA) setting. Such tests are a routine but important aspect of data analysis, as particular violations can drastically impact model estimates and appropriate conclusions that can be drawn <span class="citation" data-cites="Lix:1996">(<a href="95-references.html#ref-Lix:1996" role="doc-biblioref">Lix &amp; Keselman, 1996</a>)</span>.</p>
+<p>Beyond issues of model assumptions, the question of equality of covariance matrices is often of general interest itself. For instance, variability is often an important issue in studies of strict equivalence in laboratories comparing across multiple patient measurements and in other applied contexts <span class="citation" data-cites="Gastwirth-etal:2009">(see <a href="95-references.html#ref-Gastwirth-etal:2009" role="doc-biblioref">Gastwirth et al., 2009</a> for other exemplars)</span>. Moreover the outcome of such tests often have important consequences for the details of a main method of analysis. Just as the Welsh <span class="math inline">\(t\)</span>-test <span class="citation" data-cites="Welch:1947">(<a href="95-references.html#ref-Welch:1947" role="doc-biblioref">Welch, 1947</a>)</span> is now commonly used and reported for a two-group test of differences in means under unequal variances, a preliminary test of equality of covariance matrices is often used in discriminant analysis to decide whether linear (LDA) or quadratic discriminant analysis (QDA) should be applied in a given problem. In such cases, the data at hand should inform the choice of statistical analysis to utilize.</p>
+<p>We provide some answers to the following questions:</p>
+<ul>
+<li><p><strong>Visualization</strong>: How can we visualize differences among group variances and covariance matrices, perhaps in a way that is analogous to what is done to visualize differences among group means? As will be illustrated, differences among covariance matrices can be comprised of spread in overall size (“scatter”) and shape (“orientation”). These can be seen in data space with data ellipses, particularly if the data is centered by shifting all groups to the grand mean,</p></li>
+<li><p><strong>Low-D views</strong>: When there are more than a few response variables, what low-dimensional views can show the most interesting properties related to the equality of covariance matrices? Projecting the data into the space of the principal components serves well again here. Surprisingly, we will see that the small dimensions contain useful information about differences among the group covariance matrices.</p></li>
+<li><p><strong>Other statistics</strong>: Box’s <span class="math inline">\(M\)</span>-test is most widely used. Are there other worthwhile test statistics? We will see that graphics methods suggest alternatives.</p></li>
+</ul>
+<p>The following subsections provide a capsule summary of the issues in this topic. Most of the discussion is couched in terms of a one-way design for simplicity, but the same ideas can apply to two-way (and higher) designs, where a “group” factor is defined as the product combination (interaction) of two or more factor variables. When there are also numeric covariates, this topic can also be extended to the multivariate analysis of covariance (MANCOVA) setting. This can be accomplished by applying these techniques to the residuals from predictions by the covariates alone.</p>
+<p><strong>Packages</strong></p>
+<p>In this chapter we use the following packages. Load them now</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb1" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://r-forge.r-project.org/projects/car/">car</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="http://friendly.github.io/heplots/">heplots</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/friendly/candisc/">candisc</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://ggplot2.tidyverse.org">ggplot2</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org">dplyr</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://tidyr.tidyverse.org">tidyr</a></span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<section id="sec-homogeneity-ANOVA" class="level2" data-number="12.1"><h2 data-number="12.1" class="anchored" data-anchor-id="sec-homogeneity-ANOVA">
+<span class="header-section-number">12.1</span> Homogeneity of Variance in Univariate ANOVA</h2>
+<p>In classical (Gaussian) univariate ANOVA models, the main interest is typically on tests of mean differences in a response <span class="math inline">\(y\)</span> according to one or more factors. The validity of the typical <span class="math inline">\(F\)</span> test, however, relies on the assumption of <em>homogeneity of variance</em>: all groups have the same (or similar) variance, <span class="math display">\[
+\sigma_1^2 = \sigma_2^2 = \cdots = \sigma_g^2 \; .
+\]</span></p>
+<p>It turns out that the <span class="math inline">\(F\)</span> test for differences in means is relatively robust to violation of this assumption <span class="citation" data-cites="Harwell:1992">(<a href="95-references.html#ref-Harwell:1992" role="doc-biblioref">Harwell et al., 1992</a>)</span>, as long as the group sample sizes are roughly equal.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> This applies to Type I error <span class="math inline">\(\alpha\)</span> rates, which are not much affected. However, unequal variance makes the ANOVA tests less efficient: you lose power to detect significant differences.</p>
+<p>A variety of classical test statistics for homogeneity of variance are available, including Hartley’s <span class="math inline">\(F_{max}\)</span> <span class="citation" data-cites="Hartley:1950">(<a href="95-references.html#ref-Hartley:1950" role="doc-biblioref">Hartley, 1950</a>)</span>, Cochran’s <em>C</em> <span class="citation" data-cites="Cochran:1941">(<a href="95-references.html#ref-Cochran:1941" role="doc-biblioref">Cochran, 1941</a>)</span>,and Bartlett’s test <span class="citation" data-cites="Bartlett:1937">(<a href="95-references.html#ref-Bartlett:1937" role="doc-biblioref">Bartlett, 1937</a>)</span>, but these have been found to have terrible statistical properties <span class="citation" data-cites="Rogan:1977">(<a href="95-references.html#ref-Rogan:1977" role="doc-biblioref">Rogan &amp; Keselman, 1977</a>)</span>, which prompted Box’s famous quote.</p>
+<p>Levene <span class="citation" data-cites="Levene:1960">(<a href="95-references.html#ref-Levene:1960" role="doc-biblioref">1960</a>)</span> introduced a different form of test, based on the simple idea that when variances are equal across groups, the average <em>absolute values</em> of differences between the observations and group means will also be equal, i.e., substituting an <span class="math inline">\(L_1\)</span> norm for the <span class="math inline">\(L_2\)</span> norm of variance. In a one-way design, this is equivalent to a test of group differences in the means of the auxilliary variable <span class="math inline">\(z_{ij} = | y_{ij} - \bar{y}_i |\)</span>.</p>
+<p>More robust versions of this test were proposed by <span class="citation" data-cites="BrownForsythe:1974">Brown &amp; Forsythe (<a href="95-references.html#ref-BrownForsythe:1974" role="doc-biblioref">1974</a>)</span>. These tests substitute the group mean by either the group <strong>median</strong> or a <strong>trimmed mean</strong> in the ANOVA of the absolute deviations. Some suggest these should be almost always preferred to Levene’s version using the mean deviation. See <span class="citation" data-cites="Conover-etal:1981">Conover et al. (<a href="95-references.html#ref-Conover-etal:1981" role="doc-biblioref">1981</a>)</span> for an early review and <span class="citation" data-cites="Gastwirth-etal:2009">Gastwirth et al. (<a href="95-references.html#ref-Gastwirth-etal:2009" role="doc-biblioref">2009</a>)</span> for a general discussion of these tests. In what follows, we refer to this class of tests as “Levene-type” tests and suggest a multivariate extension described below (<a href="#sec-mlevene" class="quarto-xref"><span>Section 12.2</span></a>).</p>
+<p>These deviations from a group central can be calculated using <code><a href="https://friendly.github.io/heplots/reference/colDevs.html">heplots::colDevs()</a></code> and the central value can be a function, like <code>mean</code>, <code>median</code> or an anonymous one like <code>function(x) mean(x, trim = 0.1))</code> that trims 10% off each side of the distribution. With a response <code>Y</code> Levene’s test then be performed “by hand” as follows:</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb2" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">Z.mean</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/MathFun.html">abs</a></span><span class="op">(</span> <span class="fu"><a href="https://friendly.github.io/heplots/reference/colDevs.html">colDevs</a></span><span class="op">(</span><span class="va">Y</span>, <span class="va">group</span><span class="op">)</span> <span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">Z.mean</span> <span class="op">~</span> <span class="va">group</span><span class="op">)</span></span>
+<span></span>
+<span><span class="va">Z.med</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/MathFun.html">abs</a></span><span class="op">(</span> <span class="fu"><a href="https://friendly.github.io/heplots/reference/colDevs.html">colDevs</a></span><span class="op">(</span><span class="va">Y</span>, <span class="va">group</span>, <span class="va">median</span><span class="op">)</span> <span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="va">Z.med</span> <span class="op">~</span> <span class="va">group</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>The function <code><a href="https://rdrr.io/pkg/car/man/leveneTest.html">car::leveneTest()</a></code> does this, so we could examine whether the variances are equal in the Penguin variables, one at a time, like so:</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb3" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">peng</span>, package <span class="op">=</span> <span class="st">"heplots"</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/pkg/car/man/leveneTest.html">leveneTest</a></span><span class="op">(</span><span class="va">bill_length</span> <span class="op">~</span> <span class="va">species</span>, data<span class="op">=</span><span class="va">peng</span><span class="op">)</span></span>
+<span><span class="co">#&gt; Levene's Test for Homogeneity of Variance (center = median)</span></span>
+<span><span class="co">#&gt;        Df F value Pr(&gt;F)</span></span>
+<span><span class="co">#&gt; group   2    2.29    0.1</span></span>
+<span><span class="co">#&gt;       330</span></span>
+<span>  <span class="co"># ...</span></span>
+<span><span class="fu"><a href="https://rdrr.io/pkg/car/man/leveneTest.html">leveneTest</a></span><span class="op">(</span><span class="va">body_mass</span> <span class="op">~</span> <span class="va">species</span>, data<span class="op">=</span><span class="va">peng</span><span class="op">)</span></span>
+<span><span class="co">#&gt; Levene's Test for Homogeneity of Variance (center = median)</span></span>
+<span><span class="co">#&gt;        Df F value Pr(&gt;F)   </span></span>
+<span><span class="co">#&gt; group   2    5.13 0.0064 **</span></span>
+<span><span class="co">#&gt;       330                  </span></span>
+<span><span class="co">#&gt; ---</span></span>
+<span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>More conveniently, <code>heplots:leveneTests()</code> with an “s”, does this for each of a set of response variables, specified in a data frame, a model formula or a <code>"mlm"</code> object. It also formats the results in a more pleasing way:</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb4" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">peng.mod</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">bill_length</span>, <span class="va">bill_depth</span>, <span class="va">flipper_length</span>, <span class="va">body_mass</span><span class="op">)</span> <span class="op">~</span> <span class="va">species</span>, </span>
+<span>               data <span class="op">=</span> <span class="va">peng</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://friendly.github.io/heplots/reference/leveneTests.html">leveneTests</a></span><span class="op">(</span><span class="va">peng.mod</span><span class="op">)</span></span>
+<span><span class="co">#&gt; Levene's Tests for Homogeneity of Variance (center = median)</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt;                df1 df2 F value Pr(&gt;F)   </span></span>
+<span><span class="co">#&gt; bill_length      2 330    2.29 0.1033   </span></span>
+<span><span class="co">#&gt; bill_depth       2 330    1.91 0.1494   </span></span>
+<span><span class="co">#&gt; flipper_length   2 330    0.44 0.6426   </span></span>
+<span><span class="co">#&gt; body_mass        2 330    5.13 0.0064 **</span></span>
+<span><span class="co">#&gt; ---</span></span>
+<span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>So, this tells us that the groups do not differ in variances on first three variables, but they do for <code>body_mass</code>.</p>
+</section><section id="sec-mlevene" class="level2" data-number="12.2"><h2 data-number="12.2" class="anchored" data-anchor-id="sec-mlevene">
+<span class="header-section-number">12.2</span> Visualizing Levene’s test</h2>
+<p>To gain some insight into the problem of homogeneity of variance it is helpful how the situation looks in terms of data. For the Penguin data, it might be simplest just boxplots of the variables and try to see whether the <strong>widths</strong> of the central 50% boxes seem to be the same, as in <a href="#fig-peng-boxplots" class="quarto-xref">Figure&nbsp;<span>12.1</span></a>. However, it is perceptually difficult to focus on differences with widths of the boxes within each panel when their centers also differ from group to group.</p>
+<div class="cell" data-layout-align="center">
+<details class="code-fold"><summary>See the code</summary><div class="sourceCode" id="cb5" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/source.html">source</a></span><span class="op">(</span><span class="st">"R/penguin/penguin-colors.R"</span><span class="op">)</span></span>
+<span><span class="va">col</span> <span class="op">&lt;-</span> <span class="fu">peng.colors</span><span class="op">(</span><span class="st">"dark"</span><span class="op">)</span></span>
+<span><span class="va">clr</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="va">col</span>, <span class="fu"><a href="https://rdrr.io/r/grDevices/gray.html">gray</a></span><span class="op">(</span><span class="fl">.20</span><span class="op">)</span><span class="op">)</span></span>
+<span><span class="va">peng_long</span> <span class="op">&lt;-</span> <span class="va">peng</span> <span class="op">|&gt;</span> </span>
+<span>  <span class="fu"><a href="https://tidyr.tidyverse.org/reference/pivot_longer.html">pivot_longer</a></span><span class="op">(</span><span class="va">bill_length</span><span class="op">:</span><span class="va">body_mass</span>, </span>
+<span>               names_to <span class="op">=</span> <span class="st">"variable"</span>, </span>
+<span>               values_to <span class="op">=</span> <span class="st">"value"</span><span class="op">)</span> </span>
+<span></span>
+<span><span class="va">peng_long</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span><span class="op">(</span><span class="va">species</span><span class="op">)</span> <span class="op">|&gt;</span> </span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span><span class="op">(</span><span class="fu"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span><span class="op">(</span><span class="va">value</span>, <span class="va">species</span>, fill <span class="op">=</span> <span class="va">species</span><span class="op">)</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_boxplot.html">geom_boxplot</a></span><span class="op">(</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/facet_wrap.html">facet_wrap</a></span><span class="op">(</span><span class="op">~</span> <span class="va">variable</span>, scales <span class="op">=</span> <span class="st">'free_x'</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu">theme_penguins</span><span class="op">(</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_bw</a></span><span class="op">(</span>base_size <span class="op">=</span> <span class="fl">14</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/theme.html">theme</a></span><span class="op">(</span>legend.position <span class="op">=</span> <span class="st">'none'</span><span class="op">)</span> </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details><div class="cell-output-display">
+<div id="fig-peng-boxplots" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-peng-boxplots-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/ch12/fig-peng-boxplots-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:90.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-peng-boxplots-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;12.1: Boxplots for the Penguin variables. For assessing homogeneity of variance, we should be looking for differences in width of the central 50% boxes in each panel, rather than difference in central tendency.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>Instead, you can see more <em>directly</em> what is tested by the Levene test by graphing the absolute deviations from the group means or medians. This is another example of the graphic idea that to make visual comparisons easier by plotting quantities of direct interest. You can calculate these values as follows:</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb6" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">vars</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"bill_length"</span>, <span class="st">"bill_depth"</span>, <span class="st">"flipper_length"</span>, <span class="st">"body_mass"</span><span class="op">)</span></span>
+<span><span class="va">pengDevs</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://friendly.github.io/heplots/reference/colDevs.html">colDevs</a></span><span class="op">(</span><span class="va">peng</span><span class="op">[</span>, <span class="va">vars</span><span class="op">]</span>, <span class="va">peng</span><span class="op">$</span><span class="va">species</span>, <span class="va">median</span><span class="op">)</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu"><a href="https://rdrr.io/r/base/MathFun.html">abs</a></span><span class="op">(</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>From a boxplot of the absolute deviations in <a href="#fig-peng-devplots" class="quarto-xref">Figure&nbsp;<span>12.2</span></a> your eye can now focus on the central value, shown by the median ‘|’ line, because Levene’s method is testing whether these differ across groups.</p>
+<div class="cell" data-layout-align="center">
+<details class="code-fold"><summary>See the code</summary><div class="sourceCode" id="cb7" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># calculate absolute differences from median</span></span>
+<span><span class="va">dev_long</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html">data.frame</a></span><span class="op">(</span>species <span class="op">=</span> <span class="va">peng</span><span class="op">$</span><span class="va">species</span>, <span class="va">pengDevs</span><span class="op">)</span> <span class="op">|&gt;</span> </span>
+<span>  <span class="fu"><a href="https://tidyr.tidyverse.org/reference/pivot_longer.html">pivot_longer</a></span><span class="op">(</span><span class="va">bill_length</span><span class="op">:</span><span class="va">body_mass</span>, </span>
+<span>               names_to <span class="op">=</span> <span class="st">"variable"</span>, </span>
+<span>               values_to <span class="op">=</span> <span class="st">"value"</span><span class="op">)</span> </span>
+<span></span>
+<span><span class="va">dev_long</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span><span class="op">(</span><span class="va">species</span><span class="op">)</span> <span class="op">|&gt;</span> </span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span><span class="op">(</span><span class="fu"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span><span class="op">(</span><span class="va">value</span>, <span class="va">species</span>, fill <span class="op">=</span> <span class="va">species</span><span class="op">)</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_boxplot.html">geom_boxplot</a></span><span class="op">(</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/facet_wrap.html">facet_wrap</a></span><span class="op">(</span><span class="op">~</span> <span class="va">variable</span>, scales <span class="op">=</span> <span class="st">'free_x'</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/labs.html">xlab</a></span><span class="op">(</span><span class="st">"absolute median deviation"</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu">theme_penguins</span><span class="op">(</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_bw</a></span><span class="op">(</span>base_size <span class="op">=</span> <span class="fl">14</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/theme.html">theme</a></span><span class="op">(</span>legend.position <span class="op">=</span> <span class="st">'none'</span><span class="op">)</span> </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details><div class="cell-output-display">
+<div id="fig-peng-devplots" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-peng-devplots-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/ch12/fig-peng-devplots-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:90.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-peng-devplots-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;12.2: Boxplots for absolute differences from group medians for the Penguin data.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>It is now easy to see that the medians largely align for all the variables except for <code>body_mass</code>.</p>
+</section><section id="sec-homogeneity-MANOVA" class="level2" data-number="12.3"><h2 data-number="12.3" class="anchored" data-anchor-id="sec-homogeneity-MANOVA">
+<span class="header-section-number">12.3</span> Homogeneity of variance in MANOVA</h2>
+<p>In the MANOVA context, the main emphasis, of course, is on differences among mean vectors, testing <span class="math display">\[
+\mathcal{H}_0 : \mathbf{\mu}_1 = \mathbf{\mu}_2 = \cdots = \mathbf{\mu}_g \; .
+\]</span> However, the standard test statistics (Wilks’ Lambda, Hotelling-Lawley trace, Pillai-Bartlett trace, Roy’s maximum root) rely upon the analogous assumption that the within-group covariance <strong>matrices</strong> <span class="math inline">\(\mathbf{\Sigma}_i\)</span> are equal for all groups, <span class="math display">\[
+\mathbf{\Sigma}_1 = \mathbf{\Sigma}_2 = \cdots = \mathbf{\Sigma}_g \; .
+\]</span> This is much stronger than in the univariate case, because it also requires that all the correlations between pairs of variables are the same for all groups. For example, for two responses, there are three parameters (<span class="math inline">\(\rho, \sigma_1^2, \sigma_2^2\)</span>) assumed equal across all groups; for <span class="math inline">\(p\)</span> responses, there are <span class="math inline">\(p (p+1) / 2\)</span> assumed equal.</p>
+<!--
+For example, in the case of two responses,
+we must assume:
+
+$$
+\begin{pmatrix} 
+\sigma_1^2             & \textsf{sym} \\ 
+\rho \sigma_1 \sigma_2 & \sigma_2^2   \\ 
+\end{pmatrix}_1 = 
+\begin{pmatrix} 
+\sigma_1^2             & \textsf{sym} \\ 
+\rho \sigma_1 \sigma_2 & \sigma_2^2   \\ 
+\end{pmatrix}_2 = \dots =
+\begin{pmatrix} 
+\sigma_1^2             & \textsf{sym} \\ 
+\rho \sigma_1 \sigma_2 & \sigma_2^2   \\ 
+\end{pmatrix}
+_g
+$$
+-->
+<!-- **Insert** pairs covEllipses for penguins data -->
+<p>To preview the main example, <a href="#fig-peng-covEllipse0" class="quarto-xref">Figure&nbsp;<span>12.3</span></a> shows data ellipses for the main size variables in the <code><a href="https://allisonhorst.github.io/palmerpenguins/reference/penguins.html">palmerpenguins::penguins</a></code> data.</p>
+<p>To view the relations …</p>
+<div class="cell" data-layout-align="center">
+<details class="code-fold"><summary>See the code</summary><div class="sourceCode" id="cb8" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">4</span>, <span class="fl">4</span>, <span class="fl">1</span>, <span class="fl">1</span><span class="op">)</span> <span class="op">+</span> <span class="fl">.5</span>,</span>
+<span>          mfrow <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">1</span>,<span class="fl">2</span><span class="op">)</span><span class="op">)</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://friendly.github.io/heplots/reference/covEllipses.html">covEllipses</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">bill_length</span>, <span class="va">bill_depth</span><span class="op">)</span> <span class="op">~</span> <span class="va">species</span>, data<span class="op">=</span><span class="va">peng</span>,</span>
+<span>  fill <span class="op">=</span> <span class="cn">TRUE</span>,</span>
+<span>  fill.alpha <span class="op">=</span> <span class="fl">0.1</span>,</span>
+<span>  lwd <span class="op">=</span> <span class="fl">3</span>,</span>
+<span>  col <span class="op">=</span> <span class="va">clr</span><span class="op">)</span></span>
+<span></span>
+<span><span class="fu"><a href="https://friendly.github.io/heplots/reference/covEllipses.html">covEllipses</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">bill_length</span>, <span class="va">bill_depth</span><span class="op">)</span> <span class="op">~</span> <span class="va">species</span>, data<span class="op">=</span><span class="va">peng</span>,</span>
+<span>  center <span class="op">=</span> <span class="cn">TRUE</span>, </span>
+<span>  fill <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/rep.html">rep</a></span><span class="op">(</span><span class="cn">FALSE</span>,<span class="fl">3</span><span class="op">)</span>, <span class="cn">TRUE</span><span class="op">)</span>, </span>
+<span>  fill.alpha <span class="op">=</span> <span class="fl">.1</span>, </span>
+<span>  lwd <span class="op">=</span> <span class="fl">3</span>,</span>
+<span>  col <span class="op">=</span> <span class="va">clr</span>,</span>
+<span>  label.pos <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">1</span><span class="op">:</span><span class="fl">3</span>,<span class="fl">0</span><span class="op">)</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span><span class="va">op</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details><div class="cell-output-display">
+<div id="fig-peng-covEllipse0" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-peng-covEllipse0-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/ch12/fig-peng-covEllipse0-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:100.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-peng-covEllipse0-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;12.3: Data ellipses for bill length and bill depth in the penguins data, also showing the pooled covariance. Left: As is; right: centered at the grand means for easier comparison.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>All pairs:</p>
+<div class="cell" data-layout-align="center">
+<details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb9" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">clr</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fu">peng.colors</span><span class="op">(</span><span class="op">)</span>, <span class="st">"black"</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://friendly.github.io/heplots/reference/covEllipses.html">covEllipses</a></span><span class="op">(</span><span class="va">peng</span><span class="op">[</span>,<span class="fl">3</span><span class="op">:</span><span class="fl">6</span><span class="op">]</span>, <span class="va">peng</span><span class="op">$</span><span class="va">species</span>, </span>
+<span>  variables<span class="op">=</span><span class="fl">1</span><span class="op">:</span><span class="fl">4</span>,</span>
+<span>  col <span class="op">=</span> <span class="va">clr</span>,</span>
+<span>  fill<span class="op">=</span><span class="cn">TRUE</span>, </span>
+<span>  fill.alpha<span class="op">=</span><span class="fl">.1</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details><div class="cell-output-display">
+<div id="fig-peng-covEllipse-pairs" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-peng-covEllipse-pairs-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/ch12/fig-peng-covEllipse-pairs-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:100.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-peng-covEllipse-pairs-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;12.4: All pairwise covariance ellipses for the penguins data.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>They covariance ellipses look pretty similar in size, shape and orientation. But what does Box’s M test (described below) say? As you can see, it concludes strongly against the null hypothesis.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb10" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://friendly.github.io/heplots/reference/boxM.html">boxM</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">bill_length</span>, <span class="va">bill_depth</span>, <span class="va">flipper_length</span>, <span class="va">body_mass</span><span class="op">)</span> <span class="op">~</span> <span class="va">species</span>, data<span class="op">=</span><span class="va">peng</span><span class="op">)</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt;  Box's M-test for Homogeneity of Covariance Matrices</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; data:  Y</span></span>
+<span><span class="co">#&gt; Chi-Sq (approx.) = 75, df = 20, p-value = 3e-08</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section><section id="sec-boxM" class="level2" data-number="12.4"><h2 data-number="12.4" class="anchored" data-anchor-id="sec-boxM">
+<span class="header-section-number">12.4</span> Assessing heterogeneity of covariance matrices: Box’s M test</h2>
+<p><span class="citation" data-cites="Box:1949">Box (<a href="95-references.html#ref-Box:1949" role="doc-biblioref">1949</a>)</span> proposed the following likelihood-ratio test (LRT) statistic for testing the hypothesis of equal covariance matrices, <span class="math display">\[
+M = (N -g) \ln \;|\; \mathbf{S}_p \;|\; - \sum_{i=1}^g (n_i -1) \ln \;|\; \mathbf{S}_i \;|\; \; ,
+\]</span> {eq-boxm}</p>
+<p>where <span class="math inline">\(N = \sum n_i\)</span> is the total sample size and <span class="math inline">\(\mathbf{S}_p = (N-g)^{-1} \sum_{i=1}^g (n_i - 1) \mathbf{S}_i\)</span> is the pooled covariance matrix. <span class="math inline">\(M\)</span> can thus be thought of as a ratio of the determinant of the pooled <span class="math inline">\(\mathbf{S}_p\)</span> to the geometric mean of the determinants of the separate <span class="math inline">\(\mathbf{S}_i\)</span>.</p>
+<p>In practice, there are various transformations of the value of <span class="math inline">\(M\)</span> to yield a test statistic with an approximately known distribution <span class="citation" data-cites="Timm:75">(<a href="95-references.html#ref-Timm:75" role="doc-biblioref">Timm, 1975</a>)</span>. Roughly speaking, when each <span class="math inline">\(n_i &gt; 20\)</span>, a <span class="math inline">\(\chi^2\)</span> approximation is often used; otherwise an <span class="math inline">\(F\)</span> approximation is known to be more accurate.</p>
+<p>Asymptotically, <span class="math inline">\(-2 \ln (M)\)</span> has a <span class="math inline">\(\chi^2\)</span> distribution. The <span class="math inline">\(\chi^2\)</span> approximation due to Box <span class="citation" data-cites="Box:1949 Box:1950">(<a href="95-references.html#ref-Box:1949" role="doc-biblioref">1949</a>, <a href="95-references.html#ref-Box:1950" role="doc-biblioref">1950</a>)</span> is that <span class="math display">\[
+X^2 = -2 (1-c_1) \ln (M) \quad \sim \quad \chi^2_{df}
+\]</span> with <span class="math inline">\(df = (g-1) p (p+1)/2\)</span> degrees of freedom, and a bias correction constant: <span class="math display">\[
+c_1 = \left(
+\sum_i \frac{1}{n_i -1}
+- \frac{1}{N-g}
+\right)
+\frac{2p^2 +3p -1}{6 (p+1)(g-1)} \; .
+\]</span></p>
+<p>In this form, Bartlett’s test for equality of variances in the univariate case is the special case of Box’s M when there is only one response variable, so Bartlett’s test is sometimes used as univariate follow-up to determine which response variables show heterogeneity of variance.</p>
+<p>Yet, like its univariate counterpart, Box’s test is well-known to be highly sensitive to violation of (multivariate) normality and the presence of outliers. For example, <span class="citation" data-cites="TikuBalakrishnan:1984">Tiku &amp; Balakrishnan (<a href="95-references.html#ref-TikuBalakrishnan:1984" role="doc-biblioref">1984</a>)</span> concluded from simulation studies that the normal-theory LRT provides poor control of Type I error under even modest departures from normality. <span class="citation" data-cites="OBrien:1992">O’Brien (<a href="95-references.html#ref-OBrien:1992" role="doc-biblioref">1992</a>)</span> proposed some robust alternatives, and showed that Box’s normal theory approximation suffered both in controlling the null size of the test and in power. <span class="citation" data-cites="ZhangBoos:1992:BCV">Zhang &amp; Boos (<a href="95-references.html#ref-ZhangBoos:1992:BCV" role="doc-biblioref">1992</a>)</span> also carried out simulation studies with similar conclusions and used bootstrap methods to obtain corrected critical values.</p>
+</section><section id="visualizing-heterogeneity" class="level2" data-number="12.5"><h2 data-number="12.5" class="anchored" data-anchor-id="visualizing-heterogeneity">
+<span class="header-section-number">12.5</span> Visualizing heterogeneity</h2>
+<p>The goal of this chapter is to use the above background as a platform for discussing approaches to visualizing and testing the heterogeneity of covariance matrices in multivariate designs. While researchers often rely on a single number to determine if their data have met a particular threshold, such compression will often obscure interesting information, particularly when a test concludes that differences exist, and one is left to wonder ``why?’’. It is within this context where, again, visualizations often reign supreme. In fact, we find it somewhat surprising that this issue has not been addressed before graphically in any systematic way. <strong>TODO: cut this down</strong></p>
+<p>In what follows, we propose three visualization-based approaches to questions of heterogeneity of covariance in MANOVA designs:</p>
+<ol type="a">
+<li><p>direct visualization of the information in the <span class="math inline">\(\mathbf{S}_i\)</span> and <span class="math inline">\(\mathbf{S}_p\)</span> using <em>data ellipsoids</em> to show size and shape as minimal schematic summaries;</p></li>
+<li><p>a simple dotplot of the components of Box’s M test: the log determinants of the <span class="math inline">\(\mathbf{S}_i\)</span> together with that of the pooled <span class="math inline">\(\mathbf{S}_p\)</span>. Extensions of these simple plots raise the question of whether measures of heterogeneity other than that captured in Box’s test might also be useful; and,</p></li>
+<li><p>the connection between Levene-type tests and an ANOVA (of centered absolute differences) suggests a parallel with a multivariate extension of Levene-type tests and a MANOVA. We explore this with a version of Hypothesis-Error (HE) plots we have found useful for visualizing mean differences in MANOVA designs.</p></li>
+</ol>
+<div class="cell" data-layout-align="center">
+<pre data-code-line-numbers=""><code>#&gt; Writing packages to  C:/R/Projects/Vis-MLM-book/bib/pkgs.txt
+#&gt; 9  packages used here:
+#&gt;  broom, candisc, car, carData, dplyr, ggplot2, heplots, knitr, tidyr</code></pre>
+</div>
+<!-- ## References -->
+
+
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
+<div id="ref-Bartlett:1937" class="csl-entry" role="listitem">
+Bartlett, M. S. (1937). Properties of sufficiency and statistical tests. <em>Proceedings of the Royal Society of London. Series A</em>, <em>160</em>(901), 268–282. <a href="https://doi.org/10.2307/96803">https://doi.org/10.2307/96803</a>
+</div>
+<div id="ref-Box:1949" class="csl-entry" role="listitem">
+Box, G. E. P. (1949). A general distribution theory for a class of likelihood criteria. <em>Biometrika</em>, <em>36</em>(3-4), 317–346. <a href="https://doi.org/10.1093/biomet/36.3-4.317">https://doi.org/10.1093/biomet/36.3-4.317</a>
+</div>
+<div id="ref-Box:1950" class="csl-entry" role="listitem">
+Box, G. E. P. (1950). <a href="">Problems in the analysis of growth and wear curves</a>. <em>Biometrics</em>, <em>6</em>, 362–389.
+</div>
+<div id="ref-Box:1953" class="csl-entry" role="listitem">
+Box, G. E. P. (1953). Non-normality and tests on variances. <em>Biometrika</em>, <em>40</em>(3/4), 318–335. <a href="https://doi.org/10.2307/2333350">https://doi.org/10.2307/2333350</a>
+</div>
+<div id="ref-BrownForsythe:1974" class="csl-entry" role="listitem">
+Brown, M. B., &amp; Forsythe, A. B. (1974). Robust tests for equality of variances. <em>Journal of the American Statistical Association</em>, <em>69</em>(346), 364–367. <a href="https://doi.org/10.1080/01621459.1974.10482955">https://doi.org/10.1080/01621459.1974.10482955</a>
+</div>
+<div id="ref-Cochran:1941" class="csl-entry" role="listitem">
+Cochran, W. G. (1941). The distribution of the largest of a set of estimated variances as a fraction of their total. <em>Annals of Eugenics</em>, <em>11</em>(1), 47–52. <a href="https://doi.org/10.1111/j.1469-1809.1941.tb02271.x">https://doi.org/10.1111/j.1469-1809.1941.tb02271.x</a>
+</div>
+<div id="ref-Conover-etal:1981" class="csl-entry" role="listitem">
+Conover, W. J., Johnson, M. E., &amp; Johnson, M. M. (1981). A comparative study of tests for homogeneity of variances, with applications to the outer continental shelf bidding data. <em>Technometrics</em>, <em>23</em>(4), 351–361. <a href="https://doi.org/10.1080/00401706.1981.10487680">https://doi.org/10.1080/00401706.1981.10487680</a>
+</div>
+<div id="ref-Gastwirth-etal:2009" class="csl-entry" role="listitem">
+Gastwirth, J. L., Gel, Y. R., &amp; Miao, W. (2009). The impact of <span class="nocase">Levene’s</span> test of equality of variances on statistical theory and practice. <em>Statistical Science</em>, <em>24</em>(3), 343–360. <a href="https://doi.org/10.1214/09-STS301">https://doi.org/10.1214/09-STS301</a>
+</div>
+<div id="ref-Hartley:1950" class="csl-entry" role="listitem">
+Hartley, H. O. (1950). The use of range in analysis of variance. <em>Biometrika</em>, <em>37</em>(3–4), 271–280. <a href="https://doi.org/10.1093/biomet/37.3-4.271">https://doi.org/10.1093/biomet/37.3-4.271</a>
+</div>
+<div id="ref-Harwell:1992" class="csl-entry" role="listitem">
+Harwell, M. R., Rubinstein, E. N., Hayes, W. S., &amp; Olds, C. C. (1992). Summarizing monte carlo results in methodological research: The one- and two-factor fixed effects <span>ANOVA</span> cases. <em>Journal of Educational and Behavioral Statistics</em>, <em>17</em>(4), 315–339. <a href="https://doi.org/10.3102/10769986017004315">https://doi.org/10.3102/10769986017004315</a>
+</div>
+<div id="ref-Levene:1960" class="csl-entry" role="listitem">
+Levene, H. (1960). Robust tests for equality of variances. In I. Olkin, S. G. Ghurye, W. Hoeffding, W. G. Madow, &amp; H. B. Mann (Eds.), <em>Contributions to probability and statistics: Essays in honor of <span>Harold Hotelling</span></em> (pp. 278–292). Stanford University Press.
+</div>
+<div id="ref-Lix:1996" class="csl-entry" role="listitem">
+Lix, J. M., L. M. Keselman, &amp; Keselman, H. J. (1996). Consequences of assumption violations revisited: A quantitative review of alternatives to the one-way analysis of variance <span>F</span> test. <em>Review of Educational Research</em>, <em>66</em>(4), 579–619. <a href="https://doi.org/10.3102/00346543066004579">https://doi.org/10.3102/00346543066004579</a>
+</div>
+<div id="ref-OBrien:1992" class="csl-entry" role="listitem">
+O’Brien, P. C. (1992). Robust procedures for testing equality of covariance matrices. <em>Biometrics</em>, <em>48</em>(3), 819–827. <a href="http://www.jstor.org/stable/2532347">http://www.jstor.org/stable/2532347</a>
+</div>
+<div id="ref-Rogan:1977" class="csl-entry" role="listitem">
+Rogan, J. C., &amp; Keselman, H. J. (1977). Is the <span>ANOVA</span> f-test robust to variance heterogeneity when sample sizes are equal?: An investigation via a coefficient of variation. <em>American Educational Research Journal</em>, <em>14</em>(4), 493–498. <a href="https://doi.org/10.3102/00028312014004493">https://doi.org/10.3102/00028312014004493</a>
+</div>
+<div id="ref-TikuBalakrishnan:1984" class="csl-entry" role="listitem">
+Tiku, M. L., &amp; Balakrishnan, N. (1984). Testing equality of population variances the robust way. <em>Communications in Statistics - Theory and Methods</em>, <em>13</em>(17), 2143–2159. <a href="https://doi.org/10.1080/03610928408828818">https://doi.org/10.1080/03610928408828818</a>
+</div>
+<div id="ref-Timm:75" class="csl-entry" role="listitem">
+Timm, N. H. (1975). <em>Multivariate analysis with applications in education and psychology</em>. Wadsworth (Brooks/Cole).
+</div>
+<div id="ref-Welch:1947" class="csl-entry" role="listitem">
+Welch, B. L. (1947). The generalization of "student’s" problem when several different population varlances are involved. <em>Biometrika</em>, <em>34</em>(1–2), 28–35. <a href="https://doi.org/10.1093/biomet/34.1-2.28">https://doi.org/10.1093/biomet/34.1-2.28</a>
+</div>
+<div id="ref-ZhangBoos:1992:BCV" class="csl-entry" role="listitem">
+Zhang, J., &amp; Boos, D. D. (1992). Bootstrap critical values for testing homogeneity of covariance matrices. <em>Journal of the American Statistical Association</em>, <em>87</em>(418), 425–429. <a href="http://www.jstor.org/stable/2290273">http://www.jstor.org/stable/2290273</a>
+</div>
+</div>
+</section><section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes"><hr>
+<ol>
+<li id="fn1"><p>If group sizes are greatly unequal <strong>and</strong> homogeneity of variance is violated, then the <span class="math inline">\(F\)</span> statistic is too liberal (<span class="math inline">\(p\)</span> values too large) when large sample variances are associated with small group sizes. Conversely, the <span class="math inline">\(F\)</span> statistic is too conservative if large variances are associated with large group sizes.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+</ol></section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const onCopySuccess = function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  }
+  const getTextToCopy = function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
+    text: getTextToCopy
+  });
+  clipboard.on('success', onCopySuccess);
+  if (window.document.getElementById('quarto-embedded-source-code-modal')) {
+    // For code content inside modals, clipBoardJS needs to be initialized with a container option
+    // TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
+    const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
+      text: getTextToCopy,
+      container: window.document.getElementById('quarto-embedded-source-code-modal')
+    });
+    clipboardModal.on('success', onCopySuccess);
+  }
+    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
+    var mailtoRegex = new RegExp(/^mailto:/);
+      var filterRegex = new RegExp('/' + window.location.host + '/');
+    var isInternal = (href) => {
+        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
+    }
+    // Inspect non-navigation links and adorn them if external
+ 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
+    for (var i=0; i<links.length; i++) {
+      const link = links[i];
+      if (!isInternal(link.href)) {
+        // undo the damage that might have been done by quarto-nav.js in the case of
+        // links that we want to consider external
+        if (link.dataset.originalHref !== undefined) {
+          link.href = link.dataset.originalHref;
+        }
+      }
+    }
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      if (note) {
+        return note.innerHTML;
+      } else {
+        return "";
+      }
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script><nav class="page-navigation"><div class="nav-page nav-page-previous">
+      <a href="./11-mlm-viz.html" class="pagination-link" aria-label="Visualizing Multivariate Models">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+      <a href="./13-case-studies.html" class="pagination-link" aria-label="Case studies">
+        <span class="nav-page-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></span> <i class="bi bi-arrow-right-short"></i>
+      </a>
+  </div>
+</nav>
+</div> <!-- /content -->
+
+
+
+<footer class="footer"><div class="nav-footer"><div class="nav-footer-center"><div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></div></div></footer><script src="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.js" defer="true"></script>
+</body></html>
\ No newline at end of file
diff --git a/docs/13-case-studies.html b/docs/13-case-studies.html
new file mode 100644
index 00000000..0b604d4e
--- /dev/null
+++ b/docs/13-case-studies.html
@@ -0,0 +1,1288 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.5.53">
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+<title>13&nbsp; Case studies – Visualizing Multivariate Data and Models in R</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+/* CSS for citations */
+div.csl-bib-body { }
+div.csl-entry {
+  clear: both;
+  margin-bottom: 0em;
+}
+.hanging-indent div.csl-entry {
+  margin-left:2em;
+  text-indent:-2em;
+}
+div.csl-left-margin {
+  min-width:2em;
+  float:left;
+}
+div.csl-right-inline {
+  margin-left:2em;
+  padding-left:1em;
+}
+div.csl-indent {
+  margin-left: 2em;
+}</style>
+
+<script src="site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="site_libs/quarto-nav/headroom.min.js"></script>
+<script src="site_libs/clipboard/clipboard.min.js"></script>
+<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="site_libs/quarto-search/fuse.min.js"></script>
+<script src="site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="./">
+<link href="./91-colophon.html" rel="next">
+<link href="./12-eqcov.html" rel="prev">
+<link href="./images/favicon/favicon.ico" rel="icon">
+<script src="site_libs/quarto-html/quarto.js"></script>
+<script src="site_libs/quarto-html/popper.min.js"></script>
+<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="site_libs/quarto-html/anchor.min.js"></script>
+<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<link href="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.css" rel="stylesheet"><script id="quarto-search-options" type="application/json">{
+  "location": "sidebar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "start",
+  "type": "textbox",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script><script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script><script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script><script type="text/javascript">
+const typesetMath = (el) => {
+  if (window.MathJax) {
+    // MathJax Typeset
+    window.MathJax.typeset([el]);
+  } else if (window.katex) {
+    // KaTeX Render
+    var mathElements = el.getElementsByClassName("math");
+    var macros = [];
+    for (var i = 0; i < mathElements.length; i++) {
+      var texText = mathElements[i].firstChild;
+      if (mathElements[i].tagName == "SPAN") {
+        window.katex.render(texText.data, mathElements[i], {
+          displayMode: mathElements[i].classList.contains('display'),
+          throwOnError: false,
+          macros: macros,
+          fleqn: false
+        });
+      }
+    }
+  }
+}
+window.Quarto = {
+  typesetMath
+};
+</script>
+</head>
+<body class="nav-sidebar floating">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner"><nav class="quarto-secondary-nav"><div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./09-hotelling.html">Multivariate Linear Models</a></li><li class="breadcrumb-item"><a href="./13-case-studies.html"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></a></li></ol></nav>
+        <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+        </a>
+      <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
+        <i class="bi bi-search"></i>
+      </button>
+    </div>
+  </nav></header><!-- content --><header id="title-block-header" class="quarto-title-block default page-columns page-full"><div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./09-hotelling.html">Multivariate Linear Models</a></li><li class="breadcrumb-item"><a href="./13-case-studies.html"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></a></li></ol></nav>
+      <h1 class="title">
+<span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span>
+</h1>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto"><div class="pt-lg-2 mt-2 text-left sidebar-header">
+    <div class="sidebar-title mb-0 py-0">
+      <a href="./">Visualizing Multivariate Data and Models in R</a> 
+        <div class="sidebar-tools-main">
+    <a href="https://github.com/friendly/vis-MLM-book" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+</div>
+    </div>
+      </div>
+        <div class="mt-2 flex-shrink-0 align-items-center">
+        <div class="sidebar-search">
+        <div id="quarto-search" class="" title="Search"></div>
+        </div>
+        </div>
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Preface</span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
+ <span class="menu-text">Orienting Ideas</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./01-intro.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./02-getting_started.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Getting Started</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
+ <span class="menu-text">Exploratory Methods</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./03-multivariate_plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Plots of Multivariate Data</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./04-pca-biplot.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Dimension Reduction</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
+ <span class="menu-text">Univariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./05-linear_models.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Overview of Linear models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./06-linear_models-plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Plots for univariate response models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./07-lin-mod-topics.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Topics in Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./08-collinearity-ridge.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Collinearity &amp; Ridge Regression</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
+ <span class="menu-text">Multivariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./09-hotelling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Hotelling’s <span class="math inline">\(T^2\)</span></span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./10-mlm-review.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Multivariate Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./11-mlm-viz.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./12-eqcov.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./13-case-studies.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
+ <span class="menu-text">End matter</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./91-colophon.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Colophon</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./95-references.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">References</span></a>
+  </div>
+</li>
+      </ul>
+</li>
+    </ul>
+</div>
+</nav><div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active"><h2 id="toc-title">Table of contents</h2>
+   
+  <ul>
+<li>
+<a href="#neuro--and-social-cognitive-measures-in-psychiatric-groups" id="toc-neuro--and-social-cognitive-measures-in-psychiatric-groups" class="nav-link active" data-scroll-target="#neuro--and-social-cognitive-measures-in-psychiatric-groups"><span class="header-section-number">13.1</span> Neuro- and Social-cognitive measures in psychiatric groups</a>
+  <ul class="collapse">
+<li><a href="#research-questions" id="toc-research-questions" class="nav-link" data-scroll-target="#research-questions"><span class="header-section-number">13.1.1</span> Research questions</a></li>
+  <li><a href="#data" id="toc-data" class="nav-link" data-scroll-target="#data"><span class="header-section-number">13.1.2</span> Data</a></li>
+  <li><a href="#a-first-look" id="toc-a-first-look" class="nav-link" data-scroll-target="#a-first-look"><span class="header-section-number">13.1.3</span> A first look</a></li>
+  <li><a href="#bivariate-views" id="toc-bivariate-views" class="nav-link" data-scroll-target="#bivariate-views"><span class="header-section-number">13.1.4</span> Bivariate views</a></li>
+  </ul>
+</li>
+  <li>
+<a href="#fitting-the-mlm" id="toc-fitting-the-mlm" class="nav-link" data-scroll-target="#fitting-the-mlm"><span class="header-section-number">13.2</span> Fitting the MLM</a>
+  <ul class="collapse">
+<li><a href="#he-plot" id="toc-he-plot" class="nav-link" data-scroll-target="#he-plot"><span class="header-section-number">13.2.1</span> HE plot</a></li>
+  <li><a href="#canonical-space" id="toc-canonical-space" class="nav-link" data-scroll-target="#canonical-space"><span class="header-section-number">13.2.2</span> Canonical space</a></li>
+  </ul>
+</li>
+  <li>
+<a href="#social-cognitive-measures" id="toc-social-cognitive-measures" class="nav-link" data-scroll-target="#social-cognitive-measures"><span class="header-section-number">13.3</span> Social cognitive measures</a>
+  <ul class="collapse">
+<li><a href="#model-checking" id="toc-model-checking" class="nav-link" data-scroll-target="#model-checking"><span class="header-section-number">13.3.1</span> Model checking</a></li>
+  <li><a href="#canonical-he-plot" id="toc-canonical-he-plot" class="nav-link" data-scroll-target="#canonical-he-plot"><span class="header-section-number">13.3.2</span> Canonical HE plot</a></li>
+  </ul>
+</li>
+  </ul><div class="toc-actions"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content"><!--- For HTML Only ---><!-- \require{newcommand} --><!-- %\renewcommand*{\det}[1]{\mathrm{det} (#1)} --><!-- %\renewcommand*{\det}[1]{|#1|} --><!-- \newcommand{\sizedmat}[2]{\mathord{\mathop{\mat{#1}}\limits_{(#2)}}} --><!-- \newcommand*{\E}{\mathcal{E}} --><!-- Index generation --><!-- % R packages:  indexed under both package name and packages! --><!-- % data sets:  --><!-- % R stuff --><p>This chapter presents some complete analyses of datasets that will be prominent in the book. Some of this material may later be moved to earlier chapters.</p>
+<p><strong>Packages</strong></p>
+<p>In this chapter we use the following packages. Load them now</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb1" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://r-forge.r-project.org/projects/car/">car</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="http://friendly.github.io/heplots/">heplots</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/friendly/candisc/">candisc</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://ggplot2.tidyverse.org">ggplot2</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org">dplyr</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://tidyr.tidyverse.org">tidyr</a></span><span class="op">)</span></span>
+<span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://kwstat.github.io/corrgram/">corrgram</a></span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<!-- **Packages** -->
+<!-- In this chapter we use the following packages. Load them now -->
+<!-- ```{r} -->
+<!-- library(car) -->
+<!-- library(heplots) -->
+<!-- library(Hotelling) -->
+<!-- library(ggplot2) -->
+<!-- library(dplyr) -->
+<!-- library(tidyr) -->
+<!-- ``` -->
+<section id="neuro--and-social-cognitive-measures-in-psychiatric-groups" class="level2" data-number="13.1"><h2 data-number="13.1" class="anchored" data-anchor-id="neuro--and-social-cognitive-measures-in-psychiatric-groups">
+<span class="header-section-number">13.1</span> Neuro- and Social-cognitive measures in psychiatric groups</h2>
+<p>A Ph.D.&nbsp;dissertation by Laura Hartman <span class="citation" data-cites="Hartman:2016">(<a href="95-references.html#ref-Hartman:2016" role="doc-biblioref">2016</a>)</span> at York University was designed to evaluate whether and how clinical patients diagnosed (on the DSM-IV) as schizophrenic or with schizoaffective disorder could be distinguished from each other and from a normal, control sample on collections of standardized tests in the following domains:</p>
+<ul>
+<li><p><strong>Neuro-cognitive</strong>: processing speed, attention, verbal learning, visual learning and problem solving;</p></li>
+<li><p><strong>Social-cognitive</strong>: managing emotions, theory of mind, externalizing, personalizing bias.</p></li>
+</ul>
+<p>The study is an important contribution to clinical research because the two diagnostic categories are subtly different and their symptoms often overlap. Yet, they’re very different and often require different treatments. A key difference between schizoaffective disorder and schizophrenia is the prominence of mood disorder involving bipolar, manic and depressive moods. With schizoaffective disorder, mood disorders are front and center. With schizophrenia, that is not a dominant part of the disorder, but psychotic ideation (hearing voices, seeing imaginary people) is.</p>
+<section id="research-questions" class="level3" data-number="13.1.1"><h3 data-number="13.1.1" class="anchored" data-anchor-id="research-questions">
+<span class="header-section-number">13.1.1</span> Research questions</h3>
+<p>This example is concerned with the following substantitive questions:</p>
+<ul>
+<li><p>To what extent can patients diagnosed as schizophrenic or with schizoaffective disorder be distinguished from each other and from a normal control sample using a well-validated, comprehensive neurocognitive battery specifically designed for individuals with psychosis <span class="citation" data-cites="Heinrichs-etal:2015">(<a href="95-references.html#ref-Heinrichs-etal:2015" role="doc-biblioref">Heinrichs et al., 2015</a>)</span> ?</p></li>
+<li><p>If the groups differ, do any of the cognitive domains show particularly larger or smaller differences among these groups?</p></li>
+<li><p>Do the neurocognitive measures discriminate among the in the same or different ways? If different, how many separate aspects or dimensions are distinguished?</p></li>
+</ul>
+<p>Apart from the research interest, it could aid diagnosis and treatment if these similar mental disorders could be distinguished by tests in the cognitive domain.</p>
+</section><section id="data" class="level3" data-number="13.1.2"><h3 data-number="13.1.2" class="anchored" data-anchor-id="data">
+<span class="header-section-number">13.1.2</span> Data</h3>
+<p>The clinical sample comprised 116 male and female patients who met the following criteria: 1) a diagnosis of schizophrenia (<span class="math inline">\(n\)</span> = 70) or schizoaffective disorder (<span class="math inline">\(n\)</span> = 46) confirmed by the Structured Clinical Interview for DSM-IV-TR Axis I Disorders; 2) were outpatients; 3) a history free of developmental or learning disability; 4) age 18-65; 5) a history free of neurological or endocrine disorder; and 6) no concurrent diagnosis of substance use disorder. Non-psychiatric control participants (<span class="math inline">\(n\)</span> = 146) were screened for medical and psychiatric illness and history of substance abuse and were recruited from three outpatient clinics.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb2" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">NeuroCog</span>, package<span class="op">=</span><span class="st">"heplots"</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://pillar.r-lib.org/reference/glimpse.html">glimpse</a></span><span class="op">(</span><span class="va">NeuroCog</span><span class="op">)</span></span>
+<span><span class="co">#&gt; Rows: 242</span></span>
+<span><span class="co">#&gt; Columns: 10</span></span>
+<span><span class="co">#&gt; $ Dx        &lt;fct&gt; Schizophrenia, Schizophrenia, Schizophrenia, Sch…</span></span>
+<span><span class="co">#&gt; $ Speed     &lt;int&gt; 19, 8, 14, 7, 21, 31, -1, 17, 7, 37, 30, 26, 32,…</span></span>
+<span><span class="co">#&gt; $ Attention &lt;int&gt; 9, 25, 23, 18, 9, 10, 8, 20, 30, 15, 27, 20, 23,…</span></span>
+<span><span class="co">#&gt; $ Memory    &lt;int&gt; 19, 15, 15, 14, 35, 26, 3, 27, 26, 17, 28, 22, 2…</span></span>
+<span><span class="co">#&gt; $ Verbal    &lt;int&gt; 33, 28, 20, 34, 28, 29, 20, 30, 26, 33, 34, 33, …</span></span>
+<span><span class="co">#&gt; $ Visual    &lt;int&gt; 24, 24, 13, 16, 29, 21, 12, 32, 27, 21, 19, 18, …</span></span>
+<span><span class="co">#&gt; $ ProbSolv  &lt;int&gt; 39, 40, 32, 31, 45, 33, 29, 29, 30, 33, 30, 39, …</span></span>
+<span><span class="co">#&gt; $ SocialCog &lt;int&gt; 28, 37, 24, 36, 28, 28, 28, 44, 39, 24, 32, 36, …</span></span>
+<span><span class="co">#&gt; $ Age       &lt;int&gt; 44, 26, 55, 53, 51, 21, 53, 56, 48, 46, 48, 31, …</span></span>
+<span><span class="co">#&gt; $ Sex       &lt;fct&gt; Female, Male, Female, Male, Male, Male, Male, Fe…</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>The diagnostic classification variable is called <code>Dx</code> in the dataset. To facilitate answering questions regarding group differences, the following contrasts were applied: the first column compares the control group to the average of the diagnosed groups, the second compares the schizophrenia group against the schizoaffective group.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb3" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/stats/contrasts.html">contrasts</a></span><span class="op">(</span><span class="va">NeuroCog</span><span class="op">$</span><span class="va">Dx</span><span class="op">)</span></span>
+<span><span class="co">#&gt;                 [,1] [,2]</span></span>
+<span><span class="co">#&gt; Schizophrenia   -0.5    1</span></span>
+<span><span class="co">#&gt; Schizoaffective -0.5   -1</span></span>
+<span><span class="co">#&gt; Control          1.0    0</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>In this analysis, we ignore the <code>SocialCog</code> variable. The primary focus is on the variables <code>Attention : ProbSolv</code>.</p>
+</section><section id="a-first-look" class="level3" data-number="13.1.3"><h3 data-number="13.1.3" class="anchored" data-anchor-id="a-first-look">
+<span class="header-section-number">13.1.3</span> A first look</h3>
+<p>As always, plot the data first! We want an overview of the distributions of the variables to see the centers, spread, shape and possible outliers for each group on each variable.</p>
+<p>The plot below combines the use of boxplots and violin plots to give an informative display. As we saw earlier (Chapter XXX), doing this with <code>ggplot2</code> requires reshaping the data to long format.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb4" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># Reshape from wide to long</span></span>
+<span><span class="va">NC_long</span> <span class="op">&lt;-</span> <span class="va">NeuroCog</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span><span class="op">(</span><span class="op">-</span><span class="va">SocialCog</span>, <span class="op">-</span><span class="va">Age</span>, <span class="op">-</span><span class="va">Sex</span><span class="op">)</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu">tidyr</span><span class="fu">::</span><span class="fu"><a href="https://tidyr.tidyverse.org/reference/gather.html">gather</a></span><span class="op">(</span>key <span class="op">=</span> <span class="va">response</span>, value <span class="op">=</span> <span class="st">"value"</span>, <span class="va">Speed</span><span class="op">:</span><span class="va">ProbSolv</span><span class="op">)</span></span>
+<span><span class="co"># view 3 observations per group and measure</span></span>
+<span><span class="va">NC_long</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span><span class="op">(</span><span class="va">Dx</span><span class="op">)</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/sample_n.html">sample_n</a></span><span class="op">(</span><span class="fl">3</span><span class="op">)</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/group_by.html">ungroup</a></span><span class="op">(</span><span class="op">)</span></span>
+<span><span class="co">#&gt; # A tibble: 9 × 3</span></span>
+<span><span class="co">#&gt;   Dx              response  value</span></span>
+<span><span class="co">#&gt;   &lt;fct&gt;           &lt;chr&gt;     &lt;int&gt;</span></span>
+<span><span class="co">#&gt; 1 Schizophrenia   Speed        39</span></span>
+<span><span class="co">#&gt; 2 Schizophrenia   Visual       21</span></span>
+<span><span class="co">#&gt; 3 Schizophrenia   Memory       40</span></span>
+<span><span class="co">#&gt; 4 Schizoaffective ProbSolv     40</span></span>
+<span><span class="co">#&gt; 5 Schizoaffective Speed        25</span></span>
+<span><span class="co">#&gt; 6 Schizoaffective Verbal       48</span></span>
+<span><span class="co">#&gt; 7 Control         Speed        33</span></span>
+<span><span class="co">#&gt; 8 Control         ProbSolv     43</span></span>
+<span><span class="co">#&gt; 9 Control         Attention    37</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>In the plot, we take care to adjust the transparency (<code>alpha</code>) values for the points, violin plots and boxplots so that all can be seen. Options for <code><a href="https://ggplot2.tidyverse.org/reference/geom_boxplot.html">geom_boxplot()</a></code> are used to give these greater visual prominence.</p>
+<div class="cell" data-layout-align="center">
+<details open="" class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb5" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span><span class="op">(</span><span class="va">NC_long</span>, <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span><span class="op">(</span>x<span class="op">=</span><span class="va">Dx</span>, y<span class="op">=</span><span class="va">value</span>, fill<span class="op">=</span><span class="va">Dx</span><span class="op">)</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_jitter.html">geom_jitter</a></span><span class="op">(</span>shape<span class="op">=</span><span class="fl">16</span>, alpha<span class="op">=</span><span class="fl">0.8</span>, size<span class="op">=</span><span class="fl">1</span>, width<span class="op">=</span><span class="fl">0.2</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_violin.html">geom_violin</a></span><span class="op">(</span>alpha <span class="op">=</span> <span class="fl">0.1</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_boxplot.html">geom_boxplot</a></span><span class="op">(</span>width<span class="op">=</span><span class="fl">0.5</span>, alpha<span class="op">=</span><span class="fl">0.4</span>, </span>
+<span>               outlier.alpha<span class="op">=</span><span class="fl">1</span>, outlier.size <span class="op">=</span> <span class="fl">3</span>, outlier.color <span class="op">=</span> <span class="st">"red"</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/scale_discrete.html">scale_x_discrete</a></span><span class="op">(</span>labels <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"Schizo"</span>, <span class="st">"SchizAff"</span>, <span class="st">"Control"</span><span class="op">)</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/facet_wrap.html">facet_wrap</a></span><span class="op">(</span><span class="op">~</span><span class="va">response</span>, scales <span class="op">=</span> <span class="st">"free_y"</span>, as.table <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_bw</a></span><span class="op">(</span><span class="op">)</span> <span class="op">+</span></span>
+<span>  <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/theme.html">theme</a></span><span class="op">(</span>legend.position<span class="op">=</span><span class="st">"bottom"</span>,</span>
+<span>        axis.title <span class="op">=</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/element.html">element_text</a></span><span class="op">(</span>size <span class="op">=</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/element.html">rel</a></span><span class="op">(</span><span class="fl">1.2</span><span class="op">)</span><span class="op">)</span>,</span>
+<span>        axis.text  <span class="op">=</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/element.html">element_text</a></span><span class="op">(</span>face <span class="op">=</span> <span class="st">"bold"</span><span class="op">)</span>,</span>
+<span>        strip.text <span class="op">=</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/element.html">element_text</a></span><span class="op">(</span>size <span class="op">=</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/element.html">rel</a></span><span class="op">(</span><span class="fl">1.2</span><span class="op">)</span><span class="op">)</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details><div class="cell-output-display">
+<div id="fig-NC-boxplot" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-NC-boxplot-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-NC-boxplot-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:120.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-NC-boxplot-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.1: Boxplots and violin plots of the <code>NeuroCog</code> data.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>We can see that the control participants score higher on all measures, but there is no consistent pattern of medians for the two patient groups. But these univariate summaries do not inform about the relations among variables.</p>
+</section><section id="bivariate-views" class="level3" data-number="13.1.4"><h3 data-number="13.1.4" class="anchored" data-anchor-id="bivariate-views">
+<span class="header-section-number">13.1.4</span> Bivariate views</h3>
+<section id="corrgram" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="corrgram">Corrgram</h4>
+<p>A corrgram <span class="citation" data-cites="Friendly:02:corrgram">(<a href="95-references.html#ref-Friendly:02:corrgram" role="doc-biblioref">Friendly, 2002</a>)</span> provides a useful reconnaisance plot of the bivariate correlations in the dataset. It suppresses details, and allows focus on the overall pattern. The <code><a href="http://kwstat.github.io/corrgram/reference/corrgram.html">corrgram::corrgram()</a></code> function has the ability to enhance perception by permuting the variables in the order of their variable vectors in a biplot, so more highly correlated variables are adjacent in the plot, and example of <em>effect ordering</em> for data displays <span class="citation" data-cites="FriendlyKwan:03:effect">(<a href="95-references.html#ref-FriendlyKwan:03:effect" role="doc-biblioref">Friendly &amp; Kwan, 2003</a>)</span>.</p>
+<p>The plot below includes all variables except for <code>Dx</code> group. There are a number of <code>panel.*</code> functions for choosing how the correlation for each pair is rendered.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb6" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">NeuroCog</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span><span class="op">(</span><span class="op">-</span><span class="va">Dx</span><span class="op">)</span> <span class="op">|&gt;</span></span>
+<span>  <span class="fu"><a href="http://kwstat.github.io/corrgram/reference/corrgram.html">corrgram</a></span><span class="op">(</span>order <span class="op">=</span> <span class="cn">TRUE</span>,</span>
+<span>           diag.panel <span class="op">=</span> <span class="va">panel.density</span>,</span>
+<span>           upper.panel <span class="op">=</span> <span class="va">panel.pie</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-NC-corrgram" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-NC-corrgram-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-NC-corrgram-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:90.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-NC-corrgram-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.2: corrgram of the <code>NeuroCog</code> data. The upper and lower triangles use two different ways of encoding the value of the correlation for each pair of variables.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>In this plot you can see that adjacent variables are more highly correlated than those more widely separated. The diagonal panels show that most variables are reasonably symmetric in their distributions. <code>Age</code>, not included in this analysis is negatively correlated with the others: older participants tend to do less well on these tests.</p>
+</section><section id="scatterplot-matrix" class="level4 unnumbered"><h4 class="unnumbered anchored" data-anchor-id="scatterplot-matrix">Scatterplot matrix</h4>
+<p>A scatterplot matrix gives a more detailed overview of all pairwise relations. The plot below suppresses the data points and summarizes the relation using data ellipses and regression lines. The model syntax, <code>~ Speed + ... |Dx</code>, treats <code>Dx</code> as a conditioning variable (similar to the use of the <code>color</code> aestheic in <code>ggplot2</code>) giving a separate data ellipse and regression line for each group. (The legend is suppressed here. The groups are <span style="color: red;">Schizophrenic</span>, <span style="color: green;">SchizoAffective</span>, <span style="color: blue;">Normal</span>.)</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb7" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/pkg/car/man/scatterplotMatrix.html">scatterplotMatrix</a></span><span class="op">(</span><span class="op">~</span> <span class="va">Speed</span> <span class="op">+</span> <span class="va">Attention</span> <span class="op">+</span> <span class="va">Memory</span> <span class="op">+</span> <span class="va">Verbal</span> <span class="op">+</span> <span class="va">Visual</span> <span class="op">+</span> <span class="va">ProbSolv</span> <span class="op">|</span> <span class="va">Dx</span>,</span>
+<span>  data<span class="op">=</span><span class="va">NeuroCog</span>,</span>
+<span>  plot.points <span class="op">=</span> <span class="cn">FALSE</span>,</span>
+<span>  smooth <span class="op">=</span> <span class="cn">FALSE</span>,</span>
+<span>  legend <span class="op">=</span> <span class="cn">FALSE</span>,</span>
+<span>  col <span class="op">=</span> <span class="fu">scales</span><span class="fu">::</span><span class="fu"><a href="https://scales.r-lib.org/reference/pal_hue.html">hue_pal</a></span><span class="op">(</span><span class="op">)</span><span class="op">(</span><span class="fl">3</span><span class="op">)</span>,</span>
+<span>  ellipse<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span>levels<span class="op">=</span><span class="fl">0.68</span><span class="op">)</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-NC-scatmat" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-NC-scatmat-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-NC-scatmat-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:100.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-NC-scatmat-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.3: Scatterplot matrix of the <code>NeuroCog</code> data. Points are suppressed here, focusing on the data ellipses and regression lines. Colors for the groups: Schizophrenic (red), SchizoAffective (green), Normal (blue)
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>In this figure, we can see that the regression lines have similar slopes and similar data ellipses for the groups, though with a few exceptions.</p>
+<p><strong>TODO</strong>: Should we add biplot here?</p>
+</section></section></section><section id="fitting-the-mlm" class="level2" data-number="13.2"><h2 data-number="13.2" class="anchored" data-anchor-id="fitting-the-mlm">
+<span class="header-section-number">13.2</span> Fitting the MLM</h2>
+<p>We proceed to fit the one-way MANOVA model.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb8" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">NC.mlm</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">Speed</span>, <span class="va">Attention</span>, <span class="va">Memory</span>, <span class="va">Verbal</span>, <span class="va">Visual</span>, <span class="va">ProbSolv</span><span class="op">)</span> <span class="op">~</span> <span class="va">Dx</span>,</span>
+<span>             data<span class="op">=</span><span class="va">NeuroCog</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/pkg/car/man/Anova.html">Anova</a></span><span class="op">(</span><span class="va">NC.mlm</span><span class="op">)</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; Type II MANOVA Tests: Pillai test statistic</span></span>
+<span><span class="co">#&gt;    Df test stat approx F num Df den Df  Pr(&gt;F)    </span></span>
+<span><span class="co">#&gt; Dx  2     0.299     6.89     12    470 1.6e-11 ***</span></span>
+<span><span class="co">#&gt; ---</span></span>
+<span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>The first research question is captured by the contrasts for the <code>Dx</code> factor shown above. We can test these with <code><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">car::linearHypothesis()</a></code>. The contrast <code>Dx1</code> for control vs.&nbsp;the diagnosed groups is highly significant,</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb9" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># control vs. patients</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis</a></span><span class="op">(</span><span class="va">NC.mlm</span>, <span class="st">"Dx1"</span><span class="op">)</span>, SSP<span class="op">=</span><span class="cn">FALSE</span><span class="op">)</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; Multivariate Tests: </span></span>
+<span><span class="co">#&gt;                  Df test stat approx F num Df den Df  Pr(&gt;F)    </span></span>
+<span><span class="co">#&gt; Pillai            1     0.289     15.9      6    234 2.8e-15 ***</span></span>
+<span><span class="co">#&gt; Wilks             1     0.711     15.9      6    234 2.8e-15 ***</span></span>
+<span><span class="co">#&gt; Hotelling-Lawley  1     0.407     15.9      6    234 2.8e-15 ***</span></span>
+<span><span class="co">#&gt; Roy               1     0.407     15.9      6    234 2.8e-15 ***</span></span>
+<span><span class="co">#&gt; ---</span></span>
+<span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>but the second contrast, <code>Dx2</code>, comparing the schizophrenic and schizoaffective group, is not.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb10" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># Schizo vs SchizAff</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis</a></span><span class="op">(</span><span class="va">NC.mlm</span>, <span class="st">"Dx2"</span><span class="op">)</span>, SSP<span class="op">=</span><span class="cn">FALSE</span><span class="op">)</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; Multivariate Tests: </span></span>
+<span><span class="co">#&gt;                  Df test stat approx F num Df den Df Pr(&gt;F)</span></span>
+<span><span class="co">#&gt; Pillai            1     0.006    0.249      6    234   0.96</span></span>
+<span><span class="co">#&gt; Wilks             1     0.994    0.249      6    234   0.96</span></span>
+<span><span class="co">#&gt; Hotelling-Lawley  1     0.006    0.249      6    234   0.96</span></span>
+<span><span class="co">#&gt; Roy               1     0.006    0.249      6    234   0.96</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<section id="he-plot" class="level3" data-number="13.2.1"><h3 data-number="13.2.1" class="anchored" data-anchor-id="he-plot">
+<span class="header-section-number">13.2.1</span> HE plot</h3>
+<p>So the question becomes: how to understand these results.<br><code><a href="https://friendly.github.io/heplots/reference/heplot.html">heplot()</a></code> shows the visualization of the multivariate model in the space of two response variables (the first two by default). The result (<a href="#fig-NC-HEplot" class="quarto-xref">Figure&nbsp;<span>13.4</span></a>) tells a very simple story: The control group performs higher on higher measures than the diagnosed groups, which do not differ between themselves.</p>
+<p>(For technical reasons, to abbreviate the group labels in the plot, we need to <code><a href="https://rdrr.io/r/stats/update.html">update()</a></code> the MLM model after the labels are reassigned.)</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb11" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># abbreviate levels for plots</span></span>
+<span><span class="va">NeuroCog</span><span class="op">$</span><span class="va">Dx</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/factor.html">factor</a></span><span class="op">(</span><span class="va">NeuroCog</span><span class="op">$</span><span class="va">Dx</span>, </span>
+<span>                      labels <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"Schiz"</span>, <span class="st">"SchAff"</span>, <span class="st">"Contr"</span><span class="op">)</span><span class="op">)</span></span>
+<span><span class="va">NC.mlm</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/update.html">update</a></span><span class="op">(</span><span class="va">NC.mlm</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb12" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">5</span>,<span class="fl">4</span>,<span class="fl">1</span>,<span class="fl">1</span><span class="op">)</span><span class="op">+</span><span class="fl">.1</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://friendly.github.io/heplots/reference/heplot.html">heplot</a></span><span class="op">(</span><span class="va">NC.mlm</span>, </span>
+<span>       fill<span class="op">=</span><span class="cn">TRUE</span>, fill.alpha<span class="op">=</span><span class="fl">0.1</span>,</span>
+<span>       cex.lab<span class="op">=</span><span class="fl">1.3</span>, cex<span class="op">=</span><span class="fl">1.25</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span><span class="va">op</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-NC-HEplot" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-NC-HEplot-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-NC-HEplot-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-NC-HEplot-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.4: HE plot of Speed and Attention in the MLM for the <code>NeuroCog</code> data. The labeled points show the means of the groups on the two variables. The blue H ellipse for groups indicates the strong positive correlation of the group means.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>This pattern is consistent across all of the response variables, as we see from a plot of <code>pairs(NC.mlm)</code>:</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb13" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/graphics/pairs.html">pairs</a></span><span class="op">(</span><span class="va">NC.mlm</span>, </span>
+<span>      fill<span class="op">=</span><span class="cn">TRUE</span>, fill.alpha<span class="op">=</span><span class="fl">0.1</span>,</span>
+<span>      var.cex<span class="op">=</span><span class="fl">2</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-NC-HE-pairs" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-NC-HE-pairs-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-NC-HE-pairs-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:100.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-NC-HE-pairs-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.5: HE plot matrix of the MLM for <code>NeuroCog</code> data.
+</figcaption></figure>
+</div>
+</div>
+</div>
+</section><section id="canonical-space" class="level3" data-number="13.2.2"><h3 data-number="13.2.2" class="anchored" data-anchor-id="canonical-space">
+<span class="header-section-number">13.2.2</span> Canonical space</h3>
+<p>We can gain further insight, and a simplified plot showing all the response variables by projecting the MANOVA into the canonical space, which is entirely 2-dimensional (because <span class="math inline">\(df_h=2\)</span>). However, the output from <code><a href="https://friendly.github.io/candisc/reference/candisc.html">candisc()</a></code> shows that 98.5% of the mean differences among groups can be accounted for in one canonical dimension. ::: {.cell layout-align=“center”}</p>
+<div class="sourceCode" id="cb14" data-code-line-numbers=""><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">NC.can</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://friendly.github.io/candisc/reference/candisc.html">candisc</a></span><span class="op">(</span><span class="va">NC.mlm</span><span class="op">)</span></span>
+<span><span class="va">NC.can</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; Canonical Discriminant Analysis for Dx:</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt;    CanRsq Eigenvalue Difference Percent Cumulative</span></span>
+<span><span class="co">#&gt; 1 0.29295    0.41433      0.408    98.5       98.5</span></span>
+<span><span class="co">#&gt; 2 0.00625    0.00629      0.408     1.5      100.0</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; Test of H0: The canonical correlations in the </span></span>
+<span><span class="co">#&gt; current row and all that follow are zero</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt;   LR test stat approx F numDF denDF Pr(&gt; F)    </span></span>
+<span><span class="co">#&gt; 1        0.703     7.53    12   468   9e-13 ***</span></span>
+<span><span class="co">#&gt; 2        0.994     0.30     5   235    0.91    </span></span>
+<span><span class="co">#&gt; ---</span></span>
+<span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>:::</p>
+<p><a href="#fig-NC-candisc" class="quarto-xref">Figure&nbsp;<span>13.6</span></a> is the result of the <code><a href="https://rdrr.io/r/graphics/plot.default.html">plot()</a></code> method for class <code>"candisc"</code> objects, that is, the result of calling <code>plot(NC.can, ...)</code>. It plots the two canonical scores, <span class="math inline">\(\mathbf{Z}_{n \times 2}\)</span> for the subjects, together with data ellipses for each of the three groups.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb15" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">pos</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">4</span>, <span class="fl">1</span>, <span class="fl">4</span>, <span class="fl">4</span>, <span class="fl">1</span>, <span class="fl">3</span><span class="op">)</span></span>
+<span><span class="va">col</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"red"</span>, <span class="st">"darkgreen"</span>, <span class="st">"blue"</span><span class="op">)</span></span>
+<span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">5</span>,<span class="fl">4</span>,<span class="fl">1</span>,<span class="fl">1</span><span class="op">)</span><span class="op">+</span><span class="fl">.1</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/graphics/plot.default.html">plot</a></span><span class="op">(</span><span class="va">NC.can</span>, </span>
+<span>     ellipse<span class="op">=</span><span class="cn">TRUE</span>, </span>
+<span>     rev.axes<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="cn">TRUE</span>,<span class="cn">FALSE</span><span class="op">)</span>, </span>
+<span>     pch<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">7</span>,<span class="fl">9</span>,<span class="fl">10</span><span class="op">)</span>,</span>
+<span>     var.cex<span class="op">=</span><span class="fl">1.2</span>, cex.lab<span class="op">=</span><span class="fl">1.5</span>, var.lwd<span class="op">=</span><span class="fl">2</span>,  scale<span class="op">=</span><span class="fl">4.5</span>, </span>
+<span>     col<span class="op">=</span><span class="va">col</span>,</span>
+<span>     var.col<span class="op">=</span><span class="st">"black"</span>, var.pos<span class="op">=</span><span class="va">pos</span>,</span>
+<span>     prefix<span class="op">=</span><span class="st">"Canonical dimension "</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span><span class="va">op</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-NC-candisc" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-NC-candisc-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-NC-candisc-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:100.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-NC-candisc-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.6: Canonical discriminant plot for the <code>NeuroCog</code> data MANOVA. Scores on the two canonical dimensions are plotted, together with 68% data ellipses for each group.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>The interpretation of <a href="#fig-NC-candisc" class="quarto-xref">Figure&nbsp;<span>13.6</span></a> is again fairly straightforward. As noted earlier (REF???), the projections of the variable vectors in this plot on the coordinate axes are proportional to the correlations of the responses with the canonical scores. From this, we see that the normal group differs from the two patient groups, having higher scores on all the neurocognitive variables, most of which are highyl correlated. The problem solving measure is slightly different, and this, compared to the cluster of memory, verbal and attention, is what distinguishes the schizophrenic group from the schizoaffectives.</p>
+<p>The separation of the groups is essentially one-dimensional, with the control group higher on all measures. Moreover, the variables processing speed and visual memory are the purest measures of this dimension, but all variables contribute positively. The second canonical dimension accounts for only 1.5% of group mean differences and is non-significant (by a likelihood ratio test). Yet, if we were to interpret it, we would note that the schizophrenia group is slightly higher on this dimension, scoring better in problem solving and slightly worse on working memory, attention, and verbal learning tasks.</p>
+<p><strong>Summary</strong></p>
+<p>This analysis gives a very simple description of the data, in relation to the research questions posed earlier:</p>
+<ul>
+<li><p>On the basis of these neurocognitive tests, the schizophrenic and schizoaffective groups do not differ significantly overall, but these groups differ greatly from the normal controls.</p></li>
+<li><p>All cognitive domains distinguish the groups in the same direction, with the greatest differences shown for the variables most closely aligned with the horizontal axis in <a href="#fig-NC-candisc" class="quarto-xref">Figure&nbsp;<span>13.6</span></a>.</p></li>
+</ul></section></section><section id="social-cognitive-measures" class="level2" data-number="13.3"><h2 data-number="13.3" class="anchored" data-anchor-id="social-cognitive-measures">
+<span class="header-section-number">13.3</span> Social cognitive measures</h2>
+<p>The social cognitive measures were designed to tap various aspects of the perception and cognitive processing of emotions of others. Emotion perception was assessed using a Managing Emotions score from the MCCB. A “theory of mind” (<code>ToM</code>) score assessed ability to read the emotions of others from photographs of the eye region of male and female faces. Two other measures, externalizing bias (<code>ExtBias</code>) and personalizing bias (<code>PersBias</code>) were calculated from a scale measuring the degree to which individuals attribute internal, personal or situational causal attributions to positive and negative social events.</p>
+<p>The analysis of the <code>SocialCog</code> data proceeds in a similar way: first we fit the MANOVA model, then test the overall differences among groups using <code><a href="https://rdrr.io/pkg/car/man/Anova.html">Anova()</a></code>. We find that the overall multivariate test is again significant,</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb16" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">SocialCog</span>, package<span class="op">=</span><span class="st">"heplots"</span><span class="op">)</span></span>
+<span><span class="va">SC.mlm</span> <span class="op">&lt;-</span>  <span class="fu"><a href="https://rdrr.io/r/stats/lm.html">lm</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html">cbind</a></span><span class="op">(</span><span class="va">MgeEmotions</span>,<span class="va">ToM</span>, <span class="va">ExtBias</span>, <span class="va">PersBias</span><span class="op">)</span> <span class="op">~</span> <span class="va">Dx</span>,</span>
+<span>               data<span class="op">=</span><span class="va">SocialCog</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/pkg/car/man/Anova.html">Anova</a></span><span class="op">(</span><span class="va">SC.mlm</span><span class="op">)</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; Type II MANOVA Tests: Pillai test statistic</span></span>
+<span><span class="co">#&gt;    Df test stat approx F num Df den Df  Pr(&gt;F)    </span></span>
+<span><span class="co">#&gt; Dx  2     0.212     3.97      8    268 0.00018 ***</span></span>
+<span><span class="co">#&gt; ---</span></span>
+<span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>Testing the same two contrasts using <code><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis()</a></code> (results not shown), w e find that the overall multivariate test is again significant, but now <em>both</em> contrasts are significant (Dx1: <span class="math inline">\(F(4, 133)=5.21, p &lt; 0.001\)</span>; Dx2: <span class="math inline">\(F(4, 133)=2.49, p = 0.0461\)</span>), the test for <code>Dx2</code> just barely.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb17" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co"># control vs. patients</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis</a></span><span class="op">(</span><span class="va">SC.mlm</span>, <span class="st">"Dx1"</span><span class="op">)</span>, SSP<span class="op">=</span><span class="cn">FALSE</span><span class="op">)</span></span>
+<span><span class="co"># Schizo vs. SchizAff</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis</a></span><span class="op">(</span><span class="va">SC.mlm</span>, <span class="st">"Dx2"</span><span class="op">)</span>, SSP<span class="op">=</span><span class="cn">FALSE</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>These results are important, because, if they are reliable and make sense substantively, they imply that patients with schizophrenia and schizoaffective diagnoses <em>can</em> be distinguished by their performance on tasks assessing social perception and cognition. This was potentially a new finding in the literature on schizophrenia.</p>
+<p>As we did above, it is useful to visualize the nature of these differences among groups with HE plots for the <code>SC.mlm</code> model. Each contrast has a corresponding <span class="math inline">\(\mathbf{H}\)</span> ellipse, which we can show in the plot using the <code>hypotheses</code> argument. With a single degree of freedom, these degenerate ellipses plot as lines.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb18" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">5</span>,<span class="fl">4</span>,<span class="fl">1</span>,<span class="fl">1</span><span class="op">)</span><span class="op">+</span><span class="fl">.1</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://friendly.github.io/heplots/reference/heplot.html">heplot</a></span><span class="op">(</span><span class="va">SC.mlm</span>, </span>
+<span>       hypotheses<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="st">"Dx1"</span><span class="op">=</span><span class="st">"Dx1"</span>, <span class="st">"Dx2"</span><span class="op">=</span><span class="st">"Dx2"</span><span class="op">)</span>,</span>
+<span>       fill<span class="op">=</span><span class="cn">TRUE</span>, fill.alpha<span class="op">=</span><span class="fl">.1</span>,</span>
+<span>       cex.lab<span class="op">=</span><span class="fl">1.5</span>, cex<span class="op">=</span><span class="fl">1.2</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span><span class="va">op</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-SC-HEplot" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-SC-HEplot-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-SC-HEplot-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-SC-HEplot-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.7: HE plot of Speed and Attention in the MLM for the <code>SocialCog</code> data. The labeled points show the means of the groups on the two variables. The lines for Dx1 and Dx2 show the tests of the contrasts among groups.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>It can be seen that the three group means are approximately equally spaced on the <code>ToM</code> measure, whereas for <code>MgeEmotions</code>, the control and schizoaffective groups are quite similar, and both are higher than the schizophrenic group. This ordering of the three groups was somewhat similar for the other responses, as we could see in a <code>pairs(SC.mlm)</code> plot.</p>
+<section id="model-checking" class="level3" data-number="13.3.1"><h3 data-number="13.3.1" class="anchored" data-anchor-id="model-checking">
+<span class="header-section-number">13.3.1</span> Model checking</h3>
+<p>Normally, we would continue this analysis, and consider other HE and canonical discriminant plots to further interpret the results, in particular the relations of the cognitive measures to group differences, or perhaps an analysis of the relationships between the neuro- and social-cognitive measures. We don’t pursue this here for reasons of length, but this example actually has a more important lesson to demonstrate.</p>
+<p>Before beginning the MANOVA analyses, extensive data screening was done by the client using SPSS, in which all the response <em>and</em> predictor variables were checked for univariate normality and multivariate normality (MVN) for both sets. This traditional approach yielded a huge amount of tabular output and no graphs, and did not indicate any major violation of assumptions.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a></p>
+<p>A simple visual test of MVN and the possible presence of multivariate outliers is related to the theory of the data ellipse: Under MVN, the squared Mahalanobis distances <span class="math inline">\(D^2_M (\mathbf{y}) = (\mathbf{y} - \bar{\mathbf{y}})' \, \mathbf{S}^{-1} \, (\mathbf{y} - \bar{\mathbf{y}})\)</span> should follow a <span class="math inline">\(\chi^2_p\)</span> distribution. Thus, a quantile-quantile plot of the ordered <span class="math inline">\(D^2_M\)</span> values vs.&nbsp;corresponding quantiles of the <span class="math inline">\(\chi^2\)</span> distribution should approximate a straight line <span class="citation" data-cites="Cox:1968 Healy:1968:MNP">(<a href="95-references.html#ref-Cox:1968" role="doc-biblioref">Cox, 1968</a>; <a href="95-references.html#ref-Healy:1968:MNP" role="doc-biblioref">Healy, 1968</a>)</span>. Note that this should be applied to the <em>residuals</em> from the model – <code>residuals(SC.mlm)</code> – and not to the response variables directly.</p>
+<p><code><a href="https://friendly.github.io/heplots/reference/cqplot.html">heplots::cqplot()</a></code> implements this for <code>"mlm"</code> objects Calling this function for the model <code>SC.mlm</code> produces <a href="#fig-SC-cqplot" class="quarto-xref">Figure&nbsp;<span>13.8</span></a>. It is immediately apparent that there is one extreme multivariate outlier; three other points are identified, but the remaining observations are nearly within the 95% confidence envelope (using a robust MVE estimate of <span class="math inline">\(\mathbf{S}\)</span>).</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb19" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">5</span>,<span class="fl">4</span>,<span class="fl">1</span>,<span class="fl">1</span><span class="op">)</span><span class="op">+</span><span class="fl">.1</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://friendly.github.io/heplots/reference/cqplot.html">cqplot</a></span><span class="op">(</span><span class="va">SC.mlm</span>, method<span class="op">=</span><span class="st">"mve"</span>, </span>
+<span>       id.n<span class="op">=</span><span class="fl">4</span>, </span>
+<span>       main<span class="op">=</span><span class="st">""</span>, </span>
+<span>       cex.lab<span class="op">=</span><span class="fl">1.25</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span><span class="va">op</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-SC-cqplot" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-SC-cqplot-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-SC-cqplot-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-SC-cqplot-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.8: Chi-square quantile-quantile plot for residuals from the model <code>SC.mlm</code>. The confidence band gives a point-wise 95% envelope, providing information about uncertainty. One extreme multivariate outlier is highlighted.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>Further checking revealed that this was a data entry error where one case (15) in the schizophrenia group had a score of -33 recorded on the <code>ExtBias</code> measure, whose valid range was (-10, +10). In R, it is very easy to re-fit a model to a subset of observations (rather than modifying the dataset itself) using <code><a href="https://rdrr.io/r/stats/update.html">update()</a></code>. The result of the overall Anova and the test of <code>Dx1</code> were unchanged; however, the multivariate test for the most interesting contrast <code>Dx2</code> comparing the schizophrenia and schizoaffective groups became non-significant at the <span class="math inline">\(\alpha=0.05\)</span> level (<span class="math inline">\(F(4, 133)=2.18, p = 0.0742\)</span>).</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb20" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">SC.mlm1</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/stats/update.html">update</a></span><span class="op">(</span><span class="va">SC.mlm</span>, </span>
+<span>                  subset<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/colnames.html">rownames</a></span><span class="op">(</span><span class="va">SocialCog</span><span class="op">)</span><span class="op">!=</span><span class="st">"15"</span><span class="op">)</span></span>
+<span></span>
+<span><span class="fu"><a href="https://rdrr.io/pkg/car/man/Anova.html">Anova</a></span><span class="op">(</span><span class="va">SC.mlm1</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis</a></span><span class="op">(</span><span class="va">SC.mlm1</span>, <span class="st">"Dx1"</span><span class="op">)</span>, SSP<span class="op">=</span><span class="cn">FALSE</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/pkg/car/man/linearHypothesis.html">linearHypothesis</a></span><span class="op">(</span><span class="va">SC.mlm1</span>, <span class="st">"Dx2"</span><span class="op">)</span>, SSP<span class="op">=</span><span class="cn">FALSE</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section><section id="canonical-he-plot" class="level3" data-number="13.3.2"><h3 data-number="13.3.2" class="anchored" data-anchor-id="canonical-he-plot">
+<span class="header-section-number">13.3.2</span> Canonical HE plot</h3>
+<p>This outcome creates a bit of a quandry for further analysis (do univariate follow-up tests? try a robust model?) and reporting (what to claim about the <code>Dx2</code> contrast?) that we don’t explore here. Rather, we proceed to attempt to interpret the MLM with the aid of canonical analysis and a canonical HE plot. The canonical analysis of the model <code>SC.mlm1</code> now shows that both canonical dimensions are significant, and account for 83.9% and 16.1% of between group mean differences respectively.</p>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb21" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">SC.can1</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://friendly.github.io/candisc/reference/candisc.html">candisc</a></span><span class="op">(</span><span class="va">SC.mlm1</span><span class="op">)</span></span>
+<span><span class="va">SC.can1</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; Canonical Discriminant Analysis for Dx:</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt;   CanRsq Eigenvalue Difference Percent Cumulative</span></span>
+<span><span class="co">#&gt; 1 0.1645     0.1969      0.159    83.9       83.9</span></span>
+<span><span class="co">#&gt; 2 0.0364     0.0378      0.159    16.1      100.0</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; Test of H0: The canonical correlations in the </span></span>
+<span><span class="co">#&gt; current row and all that follow are zero</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt;   LR test stat approx F numDF denDF Pr(&gt; F)    </span></span>
+<span><span class="co">#&gt; 1        0.805     3.78     8   264 0.00032 ***</span></span>
+<span><span class="co">#&gt; 2        0.964     1.68     3   133 0.17537    </span></span>
+<span><span class="co">#&gt; ---</span></span>
+<span><span class="co">#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb22" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="va">op</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">5</span>,<span class="fl">4</span>,<span class="fl">1</span>,<span class="fl">1</span><span class="op">)</span><span class="op">+</span><span class="fl">.1</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://friendly.github.io/heplots/reference/heplot.html">heplot</a></span><span class="op">(</span><span class="va">SC.can1</span>, </span>
+<span>  fill<span class="op">=</span><span class="cn">TRUE</span>, fill.alpha<span class="op">=</span><span class="fl">.1</span>,</span>
+<span>  hypotheses<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/list.html">list</a></span><span class="op">(</span><span class="st">"Dx1"</span><span class="op">=</span><span class="st">"Dx1"</span>, <span class="st">"Dx2"</span><span class="op">=</span><span class="st">"Dx2"</span><span class="op">)</span>,</span>
+<span>  lwd <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">1</span>, <span class="fl">2</span>, <span class="fl">3</span>, <span class="fl">3</span><span class="op">)</span>,</span>
+<span>  col<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="st">"red"</span>, <span class="st">"blue"</span>, <span class="st">"darkgreen"</span>, <span class="st">"darkgreen"</span><span class="op">)</span>,</span>
+<span>  var.lwd<span class="op">=</span><span class="fl">2</span>, </span>
+<span>  var.col<span class="op">=</span><span class="st">"black"</span>, </span>
+<span>  label.pos<span class="op">=</span><span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">3</span>,<span class="fl">1</span><span class="op">)</span>, </span>
+<span>  var.cex<span class="op">=</span><span class="fl">1.2</span>, </span>
+<span>  cex<span class="op">=</span><span class="fl">1.25</span>, cex.lab<span class="op">=</span><span class="fl">1.2</span>, </span>
+<span>  scale<span class="op">=</span><span class="fl">2.8</span>,</span>
+<span>  prefix<span class="op">=</span><span class="st">"Canonical dimension "</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span><span class="va">op</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div id="fig-SC1-hecan" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
+<figure class="quarto-float quarto-float-fig figure"><div aria-describedby="fig-SC1-hecan-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+<img src="figs/case-studies/fig-SC1-hecan-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%">
+</div>
+<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-SC1-hecan-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
+Figure&nbsp;13.9: Canonical HE plot for the corrected <code>SocialCog</code> MANOVA. The variable vectors show the correlations of the responses with the canonical variables. The embedded green lines show the projections of the <strong>H</strong> ellipses for the contrasts <code>Dx1</code> and <code>Dx2</code> in canonical space.
+</figcaption></figure>
+</div>
+</div>
+</div>
+<p>The HE plot version of this canonical plot is shown in <a href="#fig-SC1-hecan" class="quarto-xref">Figure&nbsp;<span>13.9</span></a>. Because the <code><a href="https://friendly.github.io/heplots/reference/heplot.html">heplot()</a></code> method for a <code>"candisc"</code> object refits the original model to the <span class="math inline">\(\mathbf{Z}\)</span> canonical scores, it is easy to also project other linear hypotheses into this space. Note that in this view, both the <code>Dx1</code> and <code>Dx2</code> contrasts project outside <span class="math inline">\(\mathbf{E}\)</span> ellipse.<a href="#fn2" class="footnote-ref" id="fnref2" role="doc-noteref"><sup>2</sup></a>.</p>
+<p>This canonical HE plot has a very simple description:</p>
+<ul>
+<li>Dimension 1 orders the groups from control to schizoaffective to schizophrenia, while dimension 2 separates the schizoaffective group from the others;</li>
+<li>Externalizing bias and theory of mind contributes most to the first dimension, while personal bias and managing emotions are more aligned with the second; and,</li>
+<li>The relations of the two contrasts to group differences and to the response variables can be easily read from this plot.</li>
+</ul>
+<div class="cell" data-layout-align="center">
+<div class="sourceCode" id="cb23" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="co">#cat("Packages used here:\n")</span></span>
+<span><span class="fu">write_pkgs</span><span class="op">(</span>file <span class="op">=</span> <span class="va">.pkg_file</span><span class="op">)</span></span>
+<span><span class="co">#&gt; 10  packages used here:</span></span>
+<span><span class="co">#&gt;  broom, candisc, car, carData, corrgram, dplyr, ggplot2, heplots, knitr, tidyr</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<!-- ## References {.unnumbered} -->
+
+
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
+<div id="ref-Cox:1968" class="csl-entry" role="listitem">
+Cox, D. R. (1968). Notes on some aspects of regression analysis. <em>Journal of the Royal Statistical Society Series A</em>, <em>131</em>, 265–279.
+</div>
+<div id="ref-Friendly:02:corrgram" class="csl-entry" role="listitem">
+Friendly, M. (2002). Corrgrams: Exploratory displays for correlation matrices. <em>The American Statistician</em>, <em>56</em>(4), 316–324. <a href="https://doi.org/10.1198/000313002533">https://doi.org/10.1198/000313002533</a>
+</div>
+<div id="ref-FriendlyKwan:03:effect" class="csl-entry" role="listitem">
+Friendly, M., &amp; Kwan, E. (2003). Effect ordering for data displays. <em>Computational Statistics and Data Analysis</em>, <em>43</em>(4), 509–539. <a href="https://doi.org/10.1016/S0167-9473(02)00290-6">https://doi.org/10.1016/S0167-9473(02)00290-6</a>
+</div>
+<div id="ref-Hartman:2016" class="csl-entry" role="listitem">
+Hartman, L. I. (2016). <em>Schizophrenia and schizoaffective disorder: One condition or two?</em> [PhD dissertation]. York University.
+</div>
+<div id="ref-Healy:1968:MNP" class="csl-entry" role="listitem">
+Healy, M. J. R. (1968). Multivariate normal plotting. <em>Journal of the Royal Statistical Society Series C</em>, <em>17</em>(2), 157–161.
+</div>
+<div id="ref-Heinrichs-etal:2015" class="csl-entry" role="listitem">
+Heinrichs, R. W., Pinnock, F., Muharib, E., Hartman, L., Goldberg, J., &amp; McDermid Vaz, S. (2015). Neurocognitive normality in schizophrenia revisited. <em>Schizophrenia Research: Cognition</em>, <em>2</em>(4), 227–232. <a href="https://doi.org/10.1016/j.scog.2015.09.001">https://doi.org/10.1016/j.scog.2015.09.001</a>
+</div>
+<div id="ref-Mardia:1970:MMS" class="csl-entry" role="listitem">
+Mardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. <em>Biometrika</em>, <em>57</em>(3), 519–530. https://doi.org/<a href="http://dx.doi.org/10.2307/2334770">http://dx.doi.org/10.2307/2334770</a>
+</div>
+<div id="ref-Mardia:1974" class="csl-entry" role="listitem">
+Mardia, K. V. (1974). Applications of some measures of multivariate skewness and kurtosis in testing normality and robustness studies. <em>Sankhya: The Indian Journal of Statistics, Series B</em>, <em>36</em>(2), 115–128. <a href="http://www.jstor.org/stable/25051892">http://www.jstor.org/stable/25051892</a>
+</div>
+</div>
+</section></section><section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes"><hr>
+<ol>
+<li id="fn1"><p>Actually, multivariate normality of the predictors in <span class="math inline">\(\mathbf{X}\)</span> is not required in the MLM. This assumption applies only to the conditional values <span class="math inline">\(\mathbf{Y} \;|\; \mathbf{X}\)</span>, i.e., that the errors <span class="math inline">\(\mathbf{\epsilon}_{i}' \sim \mathcal{N}_{p}(\mathbf{0},\boldsymbol{\Sigma})\)</span> with constant covariance matrix. Moreover, the widely used MVN test statistics, such as Mardia’s <span class="citation" data-cites="Mardia:1970:MMS">(<a href="95-references.html#ref-Mardia:1970:MMS" role="doc-biblioref">1970</a>)</span> test based on multivariate skewness and kurtosis are known to be quite sensitive to mild departures in kurtosis <span class="citation" data-cites="Mardia:1974">(<a href="95-references.html#ref-Mardia:1974" role="doc-biblioref">Mardia, 1974</a>)</span> which do not threaten the validity of the multivariate tests.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn2"><p>The direct application of significance tests to canonical scores probably requires some adjustment because these are computed to have the optimal between-group discrimination.<a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+</ol></section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const onCopySuccess = function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  }
+  const getTextToCopy = function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
+    text: getTextToCopy
+  });
+  clipboard.on('success', onCopySuccess);
+  if (window.document.getElementById('quarto-embedded-source-code-modal')) {
+    // For code content inside modals, clipBoardJS needs to be initialized with a container option
+    // TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
+    const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
+      text: getTextToCopy,
+      container: window.document.getElementById('quarto-embedded-source-code-modal')
+    });
+    clipboardModal.on('success', onCopySuccess);
+  }
+    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
+    var mailtoRegex = new RegExp(/^mailto:/);
+      var filterRegex = new RegExp('/' + window.location.host + '/');
+    var isInternal = (href) => {
+        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
+    }
+    // Inspect non-navigation links and adorn them if external
+ 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
+    for (var i=0; i<links.length; i++) {
+      const link = links[i];
+      if (!isInternal(link.href)) {
+        // undo the damage that might have been done by quarto-nav.js in the case of
+        // links that we want to consider external
+        if (link.dataset.originalHref !== undefined) {
+          link.href = link.dataset.originalHref;
+        }
+      }
+    }
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      if (note) {
+        return note.innerHTML;
+      } else {
+        return "";
+      }
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script><nav class="page-navigation"><div class="nav-page nav-page-previous">
+      <a href="./12-eqcov.html" class="pagination-link" aria-label="Visualizing Equality of Covariance Matrices">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+      <a href="./91-colophon.html" class="pagination-link" aria-label="Colophon">
+        <span class="nav-page-text">Colophon</span> <i class="bi bi-arrow-right-short"></i>
+      </a>
+  </div>
+</nav>
+</div> <!-- /content -->
+
+
+
+<footer class="footer"><div class="nav-footer"><div class="nav-footer-center"><div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></div></div></footer><script src="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.js" defer="true"></script>
+</body></html>
\ No newline at end of file
diff --git a/docs/91-colophon.html b/docs/91-colophon.html
new file mode 100644
index 00000000..9c443eb3
--- /dev/null
+++ b/docs/91-colophon.html
@@ -0,0 +1,1121 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.5.53">
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+<title>Colophon – Visualizing Multivariate Data and Models in R</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+</style>
+
+<script src="site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="site_libs/quarto-nav/headroom.min.js"></script>
+<script src="site_libs/clipboard/clipboard.min.js"></script>
+<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="site_libs/quarto-search/fuse.min.js"></script>
+<script src="site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="./">
+<link href="./95-references.html" rel="next">
+<link href="./13-case-studies.html" rel="prev">
+<link href="./images/favicon/favicon.ico" rel="icon">
+<script src="site_libs/quarto-html/quarto.js"></script>
+<script src="site_libs/quarto-html/popper.min.js"></script>
+<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="site_libs/quarto-html/anchor.min.js"></script>
+<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<link href="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.css" rel="stylesheet"><script id="quarto-search-options" type="application/json">{
+  "location": "sidebar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "start",
+  "type": "textbox",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script><script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script><script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script><script type="text/javascript">
+const typesetMath = (el) => {
+  if (window.MathJax) {
+    // MathJax Typeset
+    window.MathJax.typeset([el]);
+  } else if (window.katex) {
+    // KaTeX Render
+    var mathElements = el.getElementsByClassName("math");
+    var macros = [];
+    for (var i = 0; i < mathElements.length; i++) {
+      var texText = mathElements[i].firstChild;
+      if (mathElements[i].tagName == "SPAN") {
+        window.katex.render(texText.data, mathElements[i], {
+          displayMode: mathElements[i].classList.contains('display'),
+          throwOnError: false,
+          macros: macros,
+          fleqn: false
+        });
+      }
+    }
+  }
+}
+window.Quarto = {
+  typesetMath
+};
+</script>
+</head>
+<body class="nav-sidebar floating">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner"><nav class="quarto-secondary-nav"><div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./91-colophon.html">End matter</a></li><li class="breadcrumb-item"><a href="./91-colophon.html">Colophon</a></li></ol></nav>
+        <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+        </a>
+      <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
+        <i class="bi bi-search"></i>
+      </button>
+    </div>
+  </nav></header><!-- content --><header id="title-block-header" class="quarto-title-block default page-columns page-full"><div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./91-colophon.html">End matter</a></li><li class="breadcrumb-item"><a href="./91-colophon.html">Colophon</a></li></ol></nav>
+      <h1 class="title">Colophon</h1>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto"><div class="pt-lg-2 mt-2 text-left sidebar-header">
+    <div class="sidebar-title mb-0 py-0">
+      <a href="./">Visualizing Multivariate Data and Models in R</a> 
+        <div class="sidebar-tools-main">
+    <a href="https://github.com/friendly/vis-MLM-book" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+</div>
+    </div>
+      </div>
+        <div class="mt-2 flex-shrink-0 align-items-center">
+        <div class="sidebar-search">
+        <div id="quarto-search" class="" title="Search"></div>
+        </div>
+        </div>
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Preface</span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
+ <span class="menu-text">Orienting Ideas</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./01-intro.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./02-getting_started.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Getting Started</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
+ <span class="menu-text">Exploratory Methods</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./03-multivariate_plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Plots of Multivariate Data</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./04-pca-biplot.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Dimension Reduction</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
+ <span class="menu-text">Univariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./05-linear_models.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Overview of Linear models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./06-linear_models-plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Plots for univariate response models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./07-lin-mod-topics.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Topics in Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./08-collinearity-ridge.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Collinearity &amp; Ridge Regression</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
+ <span class="menu-text">Multivariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./09-hotelling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Hotelling’s <span class="math inline">\(T^2\)</span></span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./10-mlm-review.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Multivariate Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./11-mlm-viz.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./12-eqcov.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./13-case-studies.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
+ <span class="menu-text">End matter</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./91-colophon.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text">Colophon</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./95-references.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">References</span></a>
+  </div>
+</li>
+      </ul>
+</li>
+    </ul>
+</div>
+</nav><div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active"><h2 id="toc-title">Table of contents</h2>
+   
+  <ul>
+<li><a href="#package-versions" id="toc-package-versions" class="nav-link active" data-scroll-target="#package-versions">Package versions</a></li>
+  </ul><div class="toc-actions"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content"><p>This book was produced using <a href="https://cran.r-project.org/">R version 4.4.1 (2024-06-14 ucrt)</a>. Fundamental to this was the framework for reproducible documents provided by Yihui Xie’s <a href="https://cran.r-project.org/web/packages/knitr/index.html">knitr package</a>.</p>
+<p><a href="https://quarto.org/">Quarto</a> was used to compile and render the book in HTML and PDF formats. [** Don’t really need all this**]</p>
+<div class="cell" data-layout-align="center">
+<pre data-code-line-numbers=""><code>Quarto 1.5.53
+[&gt;] Checking versions of quarto binary dependencies...
+      Pandoc version 3.2.0: OK
+      Dart Sass version 1.70.0: OK
+      Deno version 1.41.0: OK
+      Typst version 0.11.0: OK
+[&gt;] Checking versions of quarto dependencies......OK
+[&gt;] Checking Quarto installation......OK
+      Version: 1.5.53
+      CodePage: 1252
+[&gt;] Checking tools....................OK
+      TinyTeX: (not installed)
+      Chromium: (not installed)
+[&gt;] Checking LaTeX....................OK
+      Tex:  (not detected)
+[&gt;] Checking basic markdown render....OK
+[&gt;] Checking Python 3 installation....(None)
+      Unable to locate an installed version of Python 3.
+      Install Python 3 from https://www.python.org/downloads/
+[&gt;] Checking R installation...........OK
+      Version: 4.4.1
+      LibPaths:
+        - C:/R/R-4.4.1/library
+      knitr: 1.49
+      rmarkdown: 2.29
+[&gt;] Checking Knitr engine render......OK</code></pre>
+</div>
+<section id="package-versions" class="level2"><h2 class="anchored" data-anchor-id="package-versions">Package versions</h2>
+<p>The principal R package versions used in examples and illustrations are listed below. These were captured via <code>sessioninfo:::package_info()</code> from all <code><a href="https://rdrr.io/r/base/library.html">library()</a></code> commands in the text, and scripts which also updated the references to packages.</p>
+<p>At the time of writing, most of these were current on <a href="http://cran.us.r-project.org/">CRAN</a> repositories but some development versions are indicated as “local” in the <code>source</code> column.</p>
+<div class="cell" data-layout-align="center">
+<table class="caption-top table table-sm table-striped small">
+<thead><tr class="header">
+<th style="text-align: left;">package</th>
+<th style="text-align: left;">version</th>
+<th style="text-align: left;">date</th>
+<th style="text-align: left;">source</th>
+</tr></thead>
+<tbody>
+<tr class="odd">
+<td style="text-align: left;">bayestestR</td>
+<td style="text-align: left;">0.15.0</td>
+<td style="text-align: left;">2024-10-17</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">broom</td>
+<td style="text-align: left;">1.0.7</td>
+<td style="text-align: left;">2024-09-26</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">candisc</td>
+<td style="text-align: left;">0.9.0</td>
+<td style="text-align: left;">2024-10-31</td>
+<td style="text-align: left;">local</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">car</td>
+<td style="text-align: left;">3.1-3</td>
+<td style="text-align: left;">2024-09-27</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">carData</td>
+<td style="text-align: left;">3.0-5</td>
+<td style="text-align: left;">2022-01-06</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">corpcor</td>
+<td style="text-align: left;">1.6.10</td>
+<td style="text-align: left;">2021-09-16</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">correlation</td>
+<td style="text-align: left;">0.8.6</td>
+<td style="text-align: left;">2024-10-26</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">corrgram</td>
+<td style="text-align: left;">1.14</td>
+<td style="text-align: left;">2021-04-29</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">corrplot</td>
+<td style="text-align: left;">0.95</td>
+<td style="text-align: left;">2024-10-14</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">datawizard</td>
+<td style="text-align: left;">0.13.0</td>
+<td style="text-align: left;">2024-10-05</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">dplyr</td>
+<td style="text-align: left;">1.1.4</td>
+<td style="text-align: left;">2023-11-17</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">easystats</td>
+<td style="text-align: left;">0.7.3</td>
+<td style="text-align: left;">2024-07-22</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">effects</td>
+<td style="text-align: left;">4.2-2</td>
+<td style="text-align: left;">2022-07-13</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">effectsize</td>
+<td style="text-align: left;">1.0.0</td>
+<td style="text-align: left;">2024-12-10</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">factoextra</td>
+<td style="text-align: left;">1.0.7</td>
+<td style="text-align: left;">2020-04-01</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">FactoMineR</td>
+<td style="text-align: left;">2.11</td>
+<td style="text-align: left;">2024-04-20</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">forcats</td>
+<td style="text-align: left;">1.0.0</td>
+<td style="text-align: left;">2023-01-29</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">genridge</td>
+<td style="text-align: left;">0.8.0</td>
+<td style="text-align: left;">2024-12-02</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">GGally</td>
+<td style="text-align: left;">2.2.1</td>
+<td style="text-align: left;">2024-02-14</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">gganimate</td>
+<td style="text-align: left;">1.0.9</td>
+<td style="text-align: left;">2024-02-27</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">ggbiplot</td>
+<td style="text-align: left;">0.6.2</td>
+<td style="text-align: left;">2024-01-08</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">ggdensity</td>
+<td style="text-align: left;">1.0.0</td>
+<td style="text-align: left;">2023-02-09</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">ggeffects</td>
+<td style="text-align: left;">2.0.0</td>
+<td style="text-align: left;">2024-11-27</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">ggpcp</td>
+<td style="text-align: left;">0.2.0</td>
+<td style="text-align: left;">2022-11-28</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">ggplot2</td>
+<td style="text-align: left;">3.5.1</td>
+<td style="text-align: left;">2024-04-23</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">ggpubr</td>
+<td style="text-align: left;">0.6.0</td>
+<td style="text-align: left;">2023-02-10</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">ggrepel</td>
+<td style="text-align: left;">0.9.6</td>
+<td style="text-align: left;">2024-09-07</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">ggstats</td>
+<td style="text-align: left;">0.7.0</td>
+<td style="text-align: left;">2024-09-22</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">heplots</td>
+<td style="text-align: left;">1.7.3</td>
+<td style="text-align: left;">2024-12-20</td>
+<td style="text-align: left;">local</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">Hotelling</td>
+<td style="text-align: left;">1.0-8</td>
+<td style="text-align: left;">2021-09-09</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">imager</td>
+<td style="text-align: left;">1.0.2</td>
+<td style="text-align: left;">2024-05-13</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">insight</td>
+<td style="text-align: left;">1.0.0</td>
+<td style="text-align: left;">2024-11-26</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">knitr</td>
+<td style="text-align: left;">1.49</td>
+<td style="text-align: left;">2024-11-08</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">lubridate</td>
+<td style="text-align: left;">1.9.4</td>
+<td style="text-align: left;">2024-12-08</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">magrittr</td>
+<td style="text-align: left;">2.0.3</td>
+<td style="text-align: left;">2022-03-30</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">marginaleffects</td>
+<td style="text-align: left;">0.24.0</td>
+<td style="text-align: left;">2024-11-25</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">MASS</td>
+<td style="text-align: left;">7.3-61</td>
+<td style="text-align: left;">2024-06-13</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">matlib</td>
+<td style="text-align: left;">1.0.1</td>
+<td style="text-align: left;">2024-10-23</td>
+<td style="text-align: left;">local</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">modelbased</td>
+<td style="text-align: left;">0.8.9</td>
+<td style="text-align: left;">2024-10-26</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">modelsummary</td>
+<td style="text-align: left;">2.2.0</td>
+<td style="text-align: left;">2024-09-02</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">parameters</td>
+<td style="text-align: left;">0.24.0</td>
+<td style="text-align: left;">2024-11-27</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">patchwork</td>
+<td style="text-align: left;">1.3.0</td>
+<td style="text-align: left;">2024-09-16</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">performance</td>
+<td style="text-align: left;">0.12.4</td>
+<td style="text-align: left;">2024-10-18</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">purrr</td>
+<td style="text-align: left;">1.0.2</td>
+<td style="text-align: left;">2023-08-10</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">qgraph</td>
+<td style="text-align: left;">1.9.8</td>
+<td style="text-align: left;">2023-11-03</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">readr</td>
+<td style="text-align: left;">2.1.5</td>
+<td style="text-align: left;">2024-01-10</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">report</td>
+<td style="text-align: left;">0.5.9</td>
+<td style="text-align: left;">2024-07-10</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">Rtsne</td>
+<td style="text-align: left;">0.17</td>
+<td style="text-align: left;">2023-12-07</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">see</td>
+<td style="text-align: left;">0.9.0</td>
+<td style="text-align: left;">2024-09-06</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">stringr</td>
+<td style="text-align: left;">1.5.1</td>
+<td style="text-align: left;">2023-11-14</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">tibble</td>
+<td style="text-align: left;">3.2.1</td>
+<td style="text-align: left;">2023-03-20</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">tidyr</td>
+<td style="text-align: left;">1.3.1</td>
+<td style="text-align: left;">2024-01-24</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">tidyverse</td>
+<td style="text-align: left;">2.0.0</td>
+<td style="text-align: left;">2023-02-22</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">tourr</td>
+<td style="text-align: left;">1.2.0</td>
+<td style="text-align: left;">2024-04-20</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">vcd</td>
+<td style="text-align: left;">1.4-13</td>
+<td style="text-align: left;">2024-09-16</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">VisCollin</td>
+<td style="text-align: left;">0.1.2</td>
+<td style="text-align: left;">2023-09-05</td>
+<td style="text-align: left;">CRAN</td>
+</tr>
+</tbody>
+</table>
+</div>
+
+
+</section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const onCopySuccess = function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  }
+  const getTextToCopy = function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
+    text: getTextToCopy
+  });
+  clipboard.on('success', onCopySuccess);
+  if (window.document.getElementById('quarto-embedded-source-code-modal')) {
+    // For code content inside modals, clipBoardJS needs to be initialized with a container option
+    // TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
+    const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
+      text: getTextToCopy,
+      container: window.document.getElementById('quarto-embedded-source-code-modal')
+    });
+    clipboardModal.on('success', onCopySuccess);
+  }
+    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
+    var mailtoRegex = new RegExp(/^mailto:/);
+      var filterRegex = new RegExp('/' + window.location.host + '/');
+    var isInternal = (href) => {
+        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
+    }
+    // Inspect non-navigation links and adorn them if external
+ 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
+    for (var i=0; i<links.length; i++) {
+      const link = links[i];
+      if (!isInternal(link.href)) {
+        // undo the damage that might have been done by quarto-nav.js in the case of
+        // links that we want to consider external
+        if (link.dataset.originalHref !== undefined) {
+          link.href = link.dataset.originalHref;
+        }
+      }
+    }
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      if (note) {
+        return note.innerHTML;
+      } else {
+        return "";
+      }
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script><nav class="page-navigation"><div class="nav-page nav-page-previous">
+      <a href="./13-case-studies.html" class="pagination-link" aria-label="Case studies">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+      <a href="./95-references.html" class="pagination-link" aria-label="References">
+        <span class="nav-page-text">References</span> <i class="bi bi-arrow-right-short"></i>
+      </a>
+  </div>
+</nav>
+</div> <!-- /content -->
+
+
+
+<footer class="footer"><div class="nav-footer"><div class="nav-footer-center"><div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></div></div></footer><script src="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.js" defer="true"></script>
+</body></html>
\ No newline at end of file
diff --git a/docs/95-references.html b/docs/95-references.html
new file mode 100644
index 00000000..453b8671
--- /dev/null
+++ b/docs/95-references.html
@@ -0,0 +1,1834 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.5.53">
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+<title>References – Visualizing Multivariate Data and Models in R</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for citations */
+div.csl-bib-body { }
+div.csl-entry {
+  clear: both;
+}
+.hanging-indent div.csl-entry {
+  margin-left:2em;
+  text-indent:-2em;
+}
+div.csl-left-margin {
+  min-width:2em;
+  float:left;
+}
+div.csl-right-inline {
+  margin-left:2em;
+  padding-left:1em;
+}
+div.csl-indent {
+  margin-left: 2em;
+}</style>
+
+<script src="site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="site_libs/quarto-nav/headroom.min.js"></script>
+<script src="site_libs/clipboard/clipboard.min.js"></script>
+<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="site_libs/quarto-search/fuse.min.js"></script>
+<script src="site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="./">
+<link href="./91-colophon.html" rel="prev">
+<link href="./images/favicon/favicon.ico" rel="icon">
+<script src="site_libs/quarto-html/quarto.js"></script>
+<script src="site_libs/quarto-html/popper.min.js"></script>
+<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="site_libs/quarto-html/anchor.min.js"></script>
+<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<link href="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.css" rel="stylesheet"><script id="quarto-search-options" type="application/json">{
+  "location": "sidebar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "start",
+  "type": "textbox",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script><script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script><script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script><script type="text/javascript">
+const typesetMath = (el) => {
+  if (window.MathJax) {
+    // MathJax Typeset
+    window.MathJax.typeset([el]);
+  } else if (window.katex) {
+    // KaTeX Render
+    var mathElements = el.getElementsByClassName("math");
+    var macros = [];
+    for (var i = 0; i < mathElements.length; i++) {
+      var texText = mathElements[i].firstChild;
+      if (mathElements[i].tagName == "SPAN") {
+        window.katex.render(texText.data, mathElements[i], {
+          displayMode: mathElements[i].classList.contains('display'),
+          throwOnError: false,
+          macros: macros,
+          fleqn: false
+        });
+      }
+    }
+  }
+}
+window.Quarto = {
+  typesetMath
+};
+</script>
+</head>
+<body class="nav-sidebar floating">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner"><nav class="quarto-secondary-nav"><div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./91-colophon.html">End matter</a></li><li class="breadcrumb-item"><a href="./95-references.html">References</a></li></ol></nav>
+        <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+        </a>
+      <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
+        <i class="bi bi-search"></i>
+      </button>
+    </div>
+  </nav></header><!-- content --><header id="title-block-header" class="quarto-title-block default page-columns page-full"><div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./91-colophon.html">End matter</a></li><li class="breadcrumb-item"><a href="./95-references.html">References</a></li></ol></nav>
+      <h1 class="title">References</h1>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto"><div class="pt-lg-2 mt-2 text-left sidebar-header">
+    <div class="sidebar-title mb-0 py-0">
+      <a href="./">Visualizing Multivariate Data and Models in R</a> 
+        <div class="sidebar-tools-main">
+    <a href="https://github.com/friendly/vis-MLM-book" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+</div>
+    </div>
+      </div>
+        <div class="mt-2 flex-shrink-0 align-items-center">
+        <div class="sidebar-search">
+        <div id="quarto-search" class="" title="Search"></div>
+        </div>
+        </div>
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Preface</span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
+ <span class="menu-text">Orienting Ideas</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./01-intro.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./02-getting_started.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Getting Started</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
+ <span class="menu-text">Exploratory Methods</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./03-multivariate_plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Plots of Multivariate Data</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./04-pca-biplot.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Dimension Reduction</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
+ <span class="menu-text">Univariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./05-linear_models.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Overview of Linear models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./06-linear_models-plots.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Plots for univariate response models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./07-lin-mod-topics.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Topics in Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./08-collinearity-ridge.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Collinearity &amp; Ridge Regression</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
+ <span class="menu-text">Multivariate Linear Models</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./09-hotelling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Hotelling’s <span class="math inline">\(T^2\)</span></span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./10-mlm-review.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Multivariate Linear Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./11-mlm-viz.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Visualizing Multivariate Models</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./12-eqcov.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Visualizing Equality of Covariance Matrices</span></span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./13-case-studies.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Case studies</span></span></a>
+  </div>
+</li>
+      </ul>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
+ <span class="menu-text">End matter</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
+<li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./91-colophon.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Colophon</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./95-references.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text">References</span></a>
+  </div>
+</li>
+      </ul>
+</li>
+    </ul>
+</div>
+</nav><div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar zindex-bottom">
+        
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content"><div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="ref-Abbott:1884" class="csl-entry" role="listitem">
+Abbott, E. A. (1884). <em>Flatland: A romance of many dimensions</em>.
+Buccaneer Books.
+</div>
+<div id="ref-R-rgl" class="csl-entry" role="listitem">
+Adler, D., &amp; Murdoch, D. (2023). <em>Rgl: 3D visualization using
+OpenGL</em>. <a href="https://CRAN.R-project.org/package=rgl">https://CRAN.R-project.org/package=rgl</a>
+</div>
+<div id="ref-Aluja-etal-2018" class="csl-entry" role="listitem">
+Aluja, T., Morineau, A., &amp; Sanchez, G. (2018). <em>Principal
+component analysis for data science</em>. <a href="https://pca4ds.github.io/">https://pca4ds.github.io/</a>
+</div>
+<div id="ref-Andrews:72" class="csl-entry" role="listitem">
+Andrews, D. F. (1972). Plots of high dimensional data.
+<em>Biometrics</em>, <em>28</em>, 123–136.
+</div>
+<div id="ref-Anscombe:73" class="csl-entry" role="listitem">
+Anscombe, F. J. (1973). Graphs in statistical analysis. <em>The American
+Statistician</em>, <em>27</em>, 17–21.
+</div>
+<div id="ref-R-marginaleffects" class="csl-entry" role="listitem">
+Arel-Bundock, V. (2024a). <em>Marginaleffects: Predictions, comparisons,
+slopes, marginal means, and hypothesis tests</em>. <a href="https://marginaleffects.com/">https://marginaleffects.com/</a>
+</div>
+<div id="ref-R-modelsummary" class="csl-entry" role="listitem">
+Arel-Bundock, V. (2024b). <em>Modelsummary: Summary tables and plots for
+statistical models and data: Beautiful, customizable, and
+publication-ready</em>. <a href="https://modelsummary.com">https://modelsummary.com</a>
+</div>
+<div id="ref-Asimov:85" class="csl-entry" role="listitem">
+Asimov, D. (1985). Grand tour. <em>SIAM Journal of Scientific and
+Statistical Computing</em>, <em>6</em>(1), 128–143.
+</div>
+<div id="ref-Barabasi2016network" class="csl-entry" role="listitem">
+Barab’asi, A.-L. (2016). <em>Network science</em>. Cambridge University
+Press.
+</div>
+<div id="ref-Bartlett:1937" class="csl-entry" role="listitem">
+Bartlett, M. S. (1937). Properties of sufficiency and statistical tests.
+<em>Proceedings of the Royal Society of London. Series A</em>,
+<em>160</em>(901), 268–282. <a href="https://doi.org/10.2307/96803">https://doi.org/10.2307/96803</a>
+</div>
+<div id="ref-Becker:1996:VDC" class="csl-entry" role="listitem">
+Becker, R. A., Cleveland, W. S., &amp; Shyu, M.-J. (1996). The visual
+design and control of trellis display. <em>Journal of Computational and
+Graphical Statistics</em>, <em>5</em>(2), 123–155.
+</div>
+<div id="ref-Belsley:91a" class="csl-entry" role="listitem">
+Belsley, D. A. (1991). <em>Conditioning diagnostics: Collinearity and
+weak data in regression</em>. Wiley.
+</div>
+<div id="ref-Belsley-etal:80" class="csl-entry" role="listitem">
+Belsley, D. A., Kuh, E., &amp; Welsch, R. E. (1980). <em>Regression
+diagnostics: Identifying influential data and sources of
+collinearity</em>. John Wiley; Sons.
+</div>
+<div id="ref-Biecek-etal:2023" class="csl-entry" role="listitem">
+Biecek, P., Baniecki, H., Krzyzinski, M., &amp; Cook, D. (2023).
+<em>Performance is not enough: A story of the rashomon’s quartet</em>.
+<a href="https://arxiv.org/abs/2302.13356">https://arxiv.org/abs/2302.13356</a>
+</div>
+<div id="ref-Black-etal-2018" class="csl-entry" role="listitem">
+Black, C., Southwell, C., Emmerson, L., Lunn, D., &amp; Hart, T. (2018).
+Time-lapse imagery of adélie penguins reveals differential winter
+strategies and breeding site occupation. <em>PLOS ONE</em>,
+<em>13</em>(3), e0193532. <a href="https://doi.org/10.1371/journal.pone.0193532">https://doi.org/10.1371/journal.pone.0193532</a>
+</div>
+<div id="ref-Blishen-etal-1987" class="csl-entry" role="listitem">
+Blishen, B., Carroll, W., &amp; Moore, C. (1987). The 1981 socioeconomic
+index for occupations in canada. <em>Canadian Review of Sociology/Revue
+Canadienne de Sociologie</em>, <em>24</em>(4), 465–488. <a href="https://doi.org/10.1111/j.1755-618x.1987.tb00639.x">https://doi.org/10.1111/j.1755-618x.1987.tb00639.x</a>
+</div>
+<div id="ref-Bock1963" class="csl-entry" role="listitem">
+Bock, R. D. (1963). Programming univariate and multivariate analysis of
+variance. <em>Technometrics</em>, <em>5</em>(1), 95–117. <a href="https://doi.org/10.1080/00401706.1963.10490061">https://doi.org/10.1080/00401706.1963.10490061</a>
+</div>
+<div id="ref-Bock1964" class="csl-entry" role="listitem">
+Bock, R. D. (1964). A computer program forunivariate and multivariate
+analysis of variance. <em>Proceedings of Scientific Symposium on
+Statistics</em>.
+</div>
+<div id="ref-BondyMurty2008" class="csl-entry" role="listitem">
+Bondy, J. A., &amp; Murty, U. S. R. (2008). <em>Graph theory</em>.
+Springer.
+</div>
+<div id="ref-BorgGroenen2005" class="csl-entry" role="listitem">
+Borg, I., &amp; Groenen, P. J. F. (2005). <em><span class="nocase">Modern Multidimensional Scaling: Theory and
+Applications</span></em>. Springer.
+</div>
+<div id="ref-Borg2018" class="csl-entry" role="listitem">
+Borg, I., Groenen, P. J. F., &amp; Mair, P. (2018). Applied
+multidimensional scaling and unfolding. In <em>SpringerBriefs in
+Statistics</em>. Springer International Publishing. <a href="https://doi.org/10.1007/978-3-319-73471-2">https://doi.org/10.1007/978-3-319-73471-2</a>
+</div>
+<div id="ref-Box:1949" class="csl-entry" role="listitem">
+Box, G. E. P. (1949). A general distribution theory for a class of
+likelihood criteria. <em>Biometrika</em>, <em>36</em>(3-4), 317–346. <a href="https://doi.org/10.1093/biomet/36.3-4.317">https://doi.org/10.1093/biomet/36.3-4.317</a>
+</div>
+<div id="ref-Box:1950" class="csl-entry" role="listitem">
+Box, G. E. P. (1950). <a href="">Problems in the analysis of growth and
+wear curves</a>. <em>Biometrics</em>, <em>6</em>, 362–389.
+</div>
+<div id="ref-Box:1953" class="csl-entry" role="listitem">
+Box, G. E. P. (1953). Non-normality and tests on variances.
+<em>Biometrika</em>, <em>40</em>(3/4), 318–335. <a href="https://doi.org/10.2307/2333350">https://doi.org/10.2307/2333350</a>
+</div>
+<div id="ref-BrownForsythe:1974" class="csl-entry" role="listitem">
+Brown, M. B., &amp; Forsythe, A. B. (1974). Robust tests for equality of
+variances. <em>Journal of the American Statistical Association</em>,
+<em>69</em>(346), 364–367. <a href="https://doi.org/10.1080/01621459.1974.10482955">https://doi.org/10.1080/01621459.1974.10482955</a>
+</div>
+<div id="ref-Brown-Zidek-1980" class="csl-entry" role="listitem">
+Brown, P. J., &amp; Zidek, J. V. (1980). Adaptive multivariate ridge
+regression. <em>The Annals of Statistics</em>, <em>8</em>(1), 64–74. <a href="http://www.jstor.org/stable/2240743">http://www.jstor.org/stable/2240743</a>
+</div>
+<div id="ref-Buja-etal-2005" class="csl-entry" role="listitem">
+Buja, A., Cook, D., Asimov, D., &amp; Hurley, C. (2005). Computational
+methods for high-dimensional rotations in data visualization. In J. S.
+CR Rao EJ Wegman (Ed.), <em>Handbook of statistics</em> (pp. 391–413).
+Elsevier. <a href="https://doi.org/10.1016/s0169-7161(04)24014-7">https://doi.org/10.1016/s0169-7161(04)24014-7</a>
+</div>
+<div id="ref-Ocagne:1885" class="csl-entry" role="listitem">
+cagne, M. (1885). <em>Coordonn<span>é</span>es parall<span>è</span>les
+et axiales: M<span>é</span>thode de transformation
+g<span>é</span>om<span>é</span>trique et
+proc<span>é</span>d<span>é</span> nouveau de calcul graphique
+d<span>é</span>duits de la consid<span>é</span>ration des
+coordonn<span>é</span>es parall<span>è</span>lles</em>.
+Gauthier-Villars. <a href="http://historical.library.cornell.edu/cgi-bin/cul.math/docviewer?did=00620001&amp;seq=3">http://historical.library.cornell.edu/cgi-bin/cul.math/docviewer?did=00620001&amp;seq=3</a>
+</div>
+<div id="ref-Cajori:1926" class="csl-entry" role="listitem">
+Cajori, F. (1926). Origins of fourth dimension concepts. <em>The
+American Mathematical Monthly</em>, <em>33</em>(8), 397–406. <a href="https://doi.org/10.1080/00029890.1926.11986607">https://doi.org/10.1080/00029890.1926.11986607</a>
+</div>
+<div id="ref-Cattell1966" class="csl-entry" role="listitem">
+Cattell, R. B. (1966). The scree test for the number of factors.
+<em>Multivariate Behavioral Research</em>, <em>1</em>(2), 245–276. <a href="https://doi.org/10.1207/s15327906mbr0102_10">https://doi.org/10.1207/s15327906mbr0102_10</a>
+</div>
+<div id="ref-ChambersHastie1991" class="csl-entry" role="listitem">
+Chambers, J. M., &amp; Hastie, T. J. (1991). <em>Statistical models in
+s</em> (p. 624). Chapman &amp; Hall/CRC.
+</div>
+<div id="ref-Cleveland:79" class="csl-entry" role="listitem">
+Cleveland, W. S. (1979). Robust locally weighted regression and
+smoothing scatterplots. <em>Journal of the American Statistical
+Association</em>, <em>74</em>, 829–836.
+</div>
+<div id="ref-Cleveland:85" class="csl-entry" role="listitem">
+Cleveland, W. S. (1985). <em>The elements of graphing data</em>.
+Wadsworth Advanced Books.
+</div>
+<div id="ref-ClevelandDevlin:88" class="csl-entry" role="listitem">
+Cleveland, W. S., &amp; Devlin, S. J. (1988). Locally weighted
+regression: An approach to regression analysis by local fitting.
+<em>Journal of the American Statistical Association</em>, <em>83</em>,
+596–610.
+</div>
+<div id="ref-ClevelandMcGill:84b" class="csl-entry" role="listitem">
+Cleveland, W. S., &amp; McGill, R. (1984). Graphical perception: Theory,
+experimentation and application to the development of graphical methods.
+<em>Journal of the American Statistical Association</em>, <em>79</em>,
+531–554.
+</div>
+<div id="ref-ClevelandMcGill:85" class="csl-entry" role="listitem">
+Cleveland, W. S., &amp; McGill, R. (1985). Graphical perception and
+graphical methods for analyzing scientific data. <em>Science</em>,
+<em>229</em>, 828–833.
+</div>
+<div id="ref-Clyde-etal-1966" class="csl-entry" role="listitem">
+Clyde, D. J., Cramer, E. M., &amp; Sherin, R. J. (1966).
+<em>Multivariate statistical programs</em>. Biometric
+Laboratory,University of Miami.
+</div>
+<div id="ref-Cochran:1941" class="csl-entry" role="listitem">
+Cochran, W. G. (1941). The distribution of the largest of a set of
+estimated variances as a fraction of their total. <em>Annals of
+Eugenics</em>, <em>11</em>(1), 47–52. <a href="https://doi.org/10.1111/j.1469-1809.1941.tb02271.x">https://doi.org/10.1111/j.1469-1809.1941.tb02271.x</a>
+</div>
+<div id="ref-Conover-etal:1981" class="csl-entry" role="listitem">
+Conover, W. J., Johnson, M. E., &amp; Johnson, M. M. (1981). A
+comparative study of tests for homogeneity of variances, with
+applications to the outer continental shelf bidding data.
+<em>Technometrics</em>, <em>23</em>(4), 351–361. <a href="https://doi.org/10.1080/00401706.1981.10487680">https://doi.org/10.1080/00401706.1981.10487680</a>
+</div>
+<div id="ref-Cook-etal-1995" class="csl-entry" role="listitem">
+Cook, D., Buja, A., Cabrera, J., &amp; Hurley, C. (1995). Grand tour and
+projection pursuit. <em>Journal of Computational and Graphical
+Statistics</em>, <em>4</em>(3), 155. <a href="https://doi.org/10.2307/1390844">https://doi.org/10.2307/1390844</a>
+</div>
+<div id="ref-Cook-etal-2008" class="csl-entry" role="listitem">
+Cook, D., Buja, A., Lee, E.-K., &amp; Wickham, H. (2008). Grand tours,
+projection pursuit guided tours, and manual controls. In <em>Handbook of
+data visualization</em> (pp. 295–314). Springer Berlin Heidelberg. <a href="https://doi.org/10.1007/978-3-540-33037-0_13">https://doi.org/10.1007/978-3-540-33037-0_13</a>
+</div>
+<div id="ref-CookLaa-mulgar" class="csl-entry" role="listitem">
+Cook, D., &amp; Laa, U. (2024). <em>Interactively exploring
+high-dimensional data and models in <span>R</span></em>. Online. <a href="https://dicook.github.io/mulgar_book/">https://dicook.github.io/mulgar_book/</a>
+</div>
+<div id="ref-CookSwayne:2007" class="csl-entry" role="listitem">
+Cook, D., &amp; Swayne, D. F. (2007). <em>Interactive and dynamic
+graphics for data analysis : With <span>R</span> and
+<span>GGobi</span></em>. Springer. <a href="http://www.ggobi.org/book/">http://www.ggobi.org/book/</a>
+</div>
+<div id="ref-Cook:77" class="csl-entry" role="listitem">
+Cook, R. D. (1977). Detection of influential observation in linear
+regression. <em>Technometrics</em>, <em>19</em>(1), 15–18. <a href="http://links.jstor.org/sici?sici=0040-1706%28197702%2919%3A1%3C15%3ADOIOIL%3E2.0.CO%3B2-8">http://links.jstor.org/sici?sici=0040-1706%28197702%2919%3A1%3C15%3ADOIOIL%3E2.0.CO%3B2-8</a>
+</div>
+<div id="ref-Cook:93" class="csl-entry" role="listitem">
+Cook, R. D. (1993). Exploring partial residual plots.
+<em>Technometrics</em>, <em>35</em>(4), 351–362.
+</div>
+<div id="ref-Cook-1996" class="csl-entry" role="listitem">
+Cook, R. D. (1996). Added-variable plots and curvature in linear
+regression. <em>Technometrics</em>, <em>38</em>(3), 275–278. <a href="https://doi.org/10.1080/00401706.1996.10484507">https://doi.org/10.1080/00401706.1996.10484507</a>
+</div>
+<div id="ref-CookWeisberg:82" class="csl-entry" role="listitem">
+Cook, R. D., &amp; Weisberg, S. (1982). <em>Residuals and influence in
+regression</em>. Chapman; Hall.
+</div>
+<div id="ref-CookWeisberg-1994" class="csl-entry" role="listitem">
+Cook, R. D., &amp; Weisberg, S. (1994). ARES plots for generalized
+linear models. <em>Computational Statistics &amp; Data Analysis</em>,
+<em>17</em>(3), 303–315. <a href="https://doi.org/10.1016/0167-9473(92)00075-3">https://doi.org/10.1016/0167-9473(92)00075-3</a>
+</div>
+<div id="ref-Costantini2015" class="csl-entry" role="listitem">
+Costantini, G., Epskamp, S., Borsboom, D., Perugini, M., Mõttus, R.,
+Waldorp, L. J., &amp; Cramer, A. O. J. (2015). State of the <span class="nocase">aRt</span> personality research: A tutorial on network
+analysis of personality data in <span>R</span>. <em>Journal of Research
+in Personality</em>, <em>54</em>, 13–29. <a href="https://doi.org/10.1016/j.jrp.2014.07.003">https://doi.org/10.1016/j.jrp.2014.07.003</a>
+</div>
+<div id="ref-Cotton-2013" class="csl-entry" role="listitem">
+Cotton, R. (2013). <em><span>Learning R</span></em>. O’Reilly Media.
+</div>
+<div id="ref-Cox:1968" class="csl-entry" role="listitem">
+Cox, D. R. (1968). Notes on some aspects of regression analysis.
+<em>Journal of the Royal Statistical Society Series A</em>,
+<em>131</em>, 265–279.
+</div>
+<div id="ref-R-igraph" class="csl-entry" role="listitem">
+Csárdi, G., Nepusz, T., Traag, V., Horvát, S., Zanini, F., Noom, D.,
+&amp; Müller, K. (2024). <em><span class="nocase">igraph</span>: Network
+analysis and visualization in r</em>. <a href="https://doi.org/10.5281/zenodo.7682609">https://doi.org/10.5281/zenodo.7682609</a>
+</div>
+<div id="ref-R-Hotelling" class="csl-entry" role="listitem">
+Curran, J., &amp; Hersh, T. (2021). <em>Hotelling: Hotelling’s t^2 test
+and variants</em>. <a href="https://CRAN.R-project.org/package=Hotelling">https://CRAN.R-project.org/package=Hotelling</a>
+</div>
+<div id="ref-R-datasauRus" class="csl-entry" role="listitem">
+Davies, R., Locke, S., &amp; D’Agostino McGowan, L. (2022).
+<em>datasauRus: Datasets from the datasaurus dozen</em>. <a href="https://CRAN.R-project.org/package=datasauRus">https://CRAN.R-project.org/package=datasauRus</a>
+</div>
+<div id="ref-Davis:1990" class="csl-entry" role="listitem">
+Davis, C. (1990). Body image and weight preoccupation: A comparison
+between exercising and non-exercising women. <em>Appetite</em>,
+<em>16</em>(1), 84. <a href="https://doi.org/10.1016/0195-6663(91)90115-9">https://doi.org/10.1016/0195-6663(91)90115-9</a>
+</div>
+<div id="ref-Dempster:69" class="csl-entry" role="listitem">
+Dempster, A. P. (1969). <em>Elements of continuous multivariate
+analysis</em>. Addison-Wesley.
+</div>
+<div id="ref-Dempster1972" class="csl-entry" role="listitem">
+Dempster, A. P. (1972). Covariance selection. <em>Biometrics</em>,
+<em>28</em>(1), 157–175.
+</div>
+<div id="ref-Dixon1965" class="csl-entry" role="listitem">
+Dixon, W. J. (1965). <em>BMD biomedical computer programs</em>. Health
+Sciences Computing Facility, School of Medicine, University of
+California; Health Sciences Computing Faculty.
+</div>
+<div id="ref-R-adegraphics" class="csl-entry" role="listitem">
+Dray, S., Siberchicot, A., &amp; Jean Thioulouse. Based on earlier work
+by Alice Julien-Laferrière., with contributions from. (2023).
+<em>Adegraphics: An S4 lattice-based package for the representation of
+multivariate data</em>. <a href="http://pbil.univ-lyon1.fr/ADE-4/">http://pbil.univ-lyon1.fr/ADE-4/</a>
+</div>
+<div id="ref-Duncan:61" class="csl-entry" role="listitem">
+Duncan, O. D. (1961). A socioeconomic index for all occupations. In Jr.
+A. J. Reiss, P. K. H. O. D. Duncan, &amp; C. C. North (Eds.),
+<em>Occupations and social status</em>. The Free Press.
+</div>
+<div id="ref-Efron-etal:leas:2004" class="csl-entry" role="listitem">
+Efron, B., Hastie, T., Johnstone, I., &amp; Tibshirani, R. (2004). Least
+angle regression. <em>The Annals of Statistics</em>, <em>32</em>(2),
+407–499.
+</div>
+<div id="ref-Emerson-etal:2013" class="csl-entry" role="listitem">
+Emerson, J. W., Green, W. A., Schloerke, B., Crowley, J., Cook, D.,
+Hofmann, H., &amp; Wickham, H. (2013). The generalized pairs plot.
+<em>Journal of Computational and Graphical Statistics</em>,
+<em>22</em>(1), 79–91. <a href="http://www.tandfonline.com/doi/ref/10.1080/10618600.2012.694762">http://www.tandfonline.com/doi/ref/10.1080/10618600.2012.694762</a>
+</div>
+<div id="ref-Euler:1758" class="csl-entry" role="listitem">
+Euler, L. (1758). Elementa doctrinae solidorum. <em>Novi Commentarii
+Academiae Scientiarum Petropolitanae</em>, <em>4</em>, 109–140. <a href="https://scholarlycommons.pacific.edu/euler-works/230/">https://scholarlycommons.pacific.edu/euler-works/230/</a>
+</div>
+<div id="ref-FarquharFarquhar:91" class="csl-entry" role="listitem">
+Farquhar, A. B., &amp; Farquhar, H. (1891). <em>Economic and industrial
+delusions: A discourse of the case for protection</em>. Putnam.
+</div>
+<div id="ref-Fienberg:71" class="csl-entry" role="listitem">
+Fienberg, S. E. (1971). Randomization and social affairs: The 1970 draft
+lottery. <em>Science</em>, <em>171</em>, 255–261.
+</div>
+<div id="ref-Finn1967" class="csl-entry" role="listitem">
+Finn, J. D. (1967). <em><span>MULTIVARIANCE</span>: Fortran program for
+univariate and multivariate analysis of variance and covariance</em>.
+School of Education, State University of New York at Buffalo.
+</div>
+<div id="ref-Fisher1923" class="csl-entry" role="listitem">
+Fisher, R. A. (1923). Studies in crop variation. II. The manurial
+response of different potato varieties. <em>The Journal of Agricultural
+Science</em>, <em>13</em>(2), 311–320. <a href="https://hdl.handle.net/2440/15179">https://hdl.handle.net/2440/15179</a>
+</div>
+<div id="ref-Fisher:25" class="csl-entry" role="listitem">
+Fisher, R. A. (1925b). <em>Statistical methods for research
+workers</em>. Oliver &amp; Boyd.
+</div>
+<div id="ref-Fisher-1936" class="csl-entry" role="listitem">
+Fisher, R. A. (1925a). <em>Statistical methods for research workers</em>
+(6th ed.). Oliver &amp; Boyd.
+</div>
+<div id="ref-Fisher:1936" class="csl-entry" role="listitem">
+Fisher, R. A. (1936). The use of multiple measurements in taxonomic
+problems. <em>Annals of Eugenics</em>, <em>7</em>(2), 179–188. <a href="https://doi.org/10.1111/j.1469-1809.1936.tb02137.x">https://doi.org/10.1111/j.1469-1809.1936.tb02137.x</a>
+</div>
+<div id="ref-Fishkeller-etal:1974b" class="csl-entry" role="listitem">
+Fishkeller, M. A., Friedman, J. H., &amp; Tukey, J. W. (1974).
+<span>PRIM-9</span>, an interactive multidimensional data display and
+analysis system. <em>Proceedings of the Pacific ACM Regional
+Conference</em>.
+</div>
+<div id="ref-FluryReidwyl-1988" class="csl-entry" role="listitem">
+Flury, B., &amp; Riedwyl, H. (1988). <em>Multivariate statistics: A
+practical approach</em>. Chapman &amp; Hall.
+</div>
+<div id="ref-Fox:87" class="csl-entry" role="listitem">
+Fox, J. (1987). Effect displays for generalized linear models. In C. C.
+Clogg (Ed.), <em>Sociological methodology, 1987</em> (pp. 347–361).
+Jossey-Bass.
+</div>
+<div id="ref-Fox:03:effects" class="csl-entry" role="listitem">
+Fox, J. (2003). Effect displays in <span>R</span> for generalized linear
+models. <em>Journal of Statistical Software</em>, <em>8</em>(15), 1–27.
+</div>
+<div id="ref-Fox:2016:ARA" class="csl-entry" role="listitem">
+Fox, J. (2016). <em>Applied regression analysis and generalized linear
+models</em> (Third edition.). SAGE.
+</div>
+<div id="ref-Fox2020" class="csl-entry" role="listitem">
+Fox, J. (2020). <em>Regression diagnostics</em> (2nd ed.).
+<span>SAGE</span> Publications, Inc. <a href="https://doi.org/10.4135/9781071878651">https://doi.org/10.4135/9781071878651</a>
+</div>
+<div id="ref-Fox2021" class="csl-entry" role="listitem">
+Fox, J. (2021). <em>A mathematical primer for social statistics</em>
+(2nd ed.). SAGE Publications, Inc. <a href="https://doi.org/10.4135/9781071878835">https://doi.org/10.4135/9781071878835</a>
+</div>
+<div id="ref-FoxMonette:92" class="csl-entry" role="listitem">
+Fox, J., &amp; Monette, G. (1992). Generalized collinearity diagnostics.
+<em>Journal of the American Statistical Association</em>,
+<em>87</em>(417), 178–183.
+</div>
+<div id="ref-FoxWeisberg:2018" class="csl-entry" role="listitem">
+Fox, J., &amp; Weisberg, S. (2018a). <em>An <span>R</span> companion to
+applied regression</em> (Third). SAGE Publications. <a href="https://books.google.ca/books?id=uPNrDwAAQBAJ">https://books.google.ca/books?id=uPNrDwAAQBAJ</a>
+</div>
+<div id="ref-FoxWeisberg2018" class="csl-entry" role="listitem">
+Fox, J., &amp; Weisberg, S. (2018b). Visualizing fit and lack of fit in
+complex regression models with predictor effect plots and partial
+residuals. <em>Journal of Statistical Software</em>, <em>87</em>(9). <a href="https://doi.org/10.18637/jss.v087.i09">https://doi.org/10.18637/jss.v087.i09</a>
+</div>
+<div id="ref-R-car" class="csl-entry" role="listitem">
+Fox, J., Weisberg, S., &amp; Price, B. (2023). <em>Car: Companion to
+applied regression</em>. <a href="https://CRAN.R-project.org/package=car">https://CRAN.R-project.org/package=car</a>
+</div>
+<div id="ref-R-effects" class="csl-entry" role="listitem">
+Fox, J., Weisberg, S., Price, B., Friendly, M., &amp; Hong, J. (2022).
+<em>Effects: Effect displays for linear, generalized linear, and other
+models</em>. <a href="https://www.r-project.org">https://www.r-project.org</a>
+</div>
+<div id="ref-R-glmnet" class="csl-entry" role="listitem">
+Friedman, J., Hastie, T., Tibshirani, R., Narasimhan, B., Tay, K.,
+Simon, N., &amp; Yang, J. (2023). <em>Glmnet: Lasso and elastic-net
+regularized generalized linear models</em>. <a href="https://glmnet.stanford.edu">https://glmnet.stanford.edu</a>
+</div>
+<div id="ref-Friendly:91" class="csl-entry" role="listitem">
+Friendly, M. (1991). <em><span>SAS System</span> for statistical
+graphics</em> (1st ed.). SAS Institute. <a href="http://www.sas. com/service/doc/pubcat/uspubcat/ind_files/56143.html">http://www.sas.
+com/service/doc/pubcat/uspubcat/ind_files/56143.html</a>
+</div>
+<div id="ref-Friendly:94a" class="csl-entry" role="listitem">
+Friendly, M. (1994). Mosaic displays for multi-way contingency tables.
+<em>Journal of the American Statistical Association</em>, <em>89</em>,
+190–200. <a href="http://www.jstor.org/stable/2291215">http://www.jstor.org/stable/2291215</a>
+</div>
+<div id="ref-Friendly:99:EMD" class="csl-entry" role="listitem">
+Friendly, M. (1999). Extending mosaic displays: Marginal, conditional,
+and partial views of categorical data. <em>Journal of Computational and
+Graphical Statistics</em>, <em>8</em>(3), 373–395. <a href="http://datavis.ca/papers/drew/drew.pdf">http://datavis.ca/papers/drew/drew.pdf</a>
+</div>
+<div id="ref-Friendly:02:corrgram" class="csl-entry" role="listitem">
+Friendly, M. (2002). Corrgrams: Exploratory displays for correlation
+matrices. <em>The American Statistician</em>, <em>56</em>(4), 316–324.
+<a href="https://doi.org/10.1198/000313002533">https://doi.org/10.1198/000313002533</a>
+</div>
+<div id="ref-Friendly-07-manova" class="csl-entry" role="listitem">
+Friendly, M. (2007). <span>HE</span> plots for multivariate general
+linear models. <em>Journal of Computational and Graphical
+Statistics</em>, <em>16</em>(2), 421–444. <a href="https://doi.org/10.1198/106186007X208407">https://doi.org/10.1198/106186007X208407</a>
+</div>
+<div id="ref-Friendly:2008:golden" class="csl-entry" role="listitem">
+Friendly, M. (2008). The <span>Golden Age</span> of statistical
+graphics. <em>Statistical Science</em>, <em>23</em>(4), 502–535. <a href="https://doi.org/10.1214/08-STS268">https://doi.org/10.1214/08-STS268</a>
+</div>
+<div id="ref-Friendly-2011-gentalk" class="csl-entry" role="listitem">
+Friendly, M. (2011). <em>Generalized ridge trace plots: Visualizing bias
+and precision with the genridge <span>R</span> package</em>. SCS
+Seminar.
+</div>
+<div id="ref-Friendly:genridge:2013" class="csl-entry" role="listitem">
+Friendly, M. (2013). The generalized ridge trace plot: Visualizing bias
+<em>and</em> precision. <em>Journal of Computational and Graphical
+Statistics</em>, <em>22</em>(1), 50–68. <a href="https://doi.org/10.1080/10618600.2012.681237">https://doi.org/10.1080/10618600.2012.681237</a>
+</div>
+<div id="ref-Friendly2022" class="csl-entry" role="listitem">
+Friendly, M. (2022). The life and works of andr<span>é</span>-michel
+guerry, revisited. <em>Sociological Spectrum</em>, <em>42</em>(4-6),
+233–259. <a href="https://doi.org/10.1080/02732173.2022.2078450">https://doi.org/10.1080/02732173.2022.2078450</a>
+</div>
+<div id="ref-R-vcdExtra" class="csl-entry" role="listitem">
+Friendly, M. (2023). <em>vcdExtra: Vcd extensions and additions</em>. <a href="https://friendly.github.io/vcdExtra/">https://friendly.github.io/vcdExtra/</a>
+</div>
+<div id="ref-R-genridge" class="csl-entry" role="listitem">
+Friendly, M. (2024). <em>Genridge: Generalized ridge trace plots for
+ridge regression</em>. <a href="https://github.com/friendly/genridge">https://github.com/friendly/genridge</a>
+</div>
+<div id="ref-R-matlib" class="csl-entry" role="listitem">
+Friendly, M., Fox, J., &amp; Chalmers, P. (2024). <em>Matlib: Matrix
+functions for teaching and learning linear algebra and multivariate
+statistics</em>. <a href="https://github.com/friendly/matlib">https://github.com/friendly/matlib</a>
+</div>
+<div id="ref-FriendlyKwan:03:effect" class="csl-entry" role="listitem">
+Friendly, M., &amp; Kwan, E. (2003). Effect ordering for data displays.
+<em>Computational Statistics and Data Analysis</em>, <em>43</em>(4),
+509–539. <a href="https://doi.org/10.1016/S0167-9473(02)00290-6">https://doi.org/10.1016/S0167-9473(02)00290-6</a>
+</div>
+<div id="ref-FriendlyKwan:2009" class="csl-entry" role="listitem">
+Friendly, M., &amp; Kwan, E. (2009). Where’s <span>Waldo</span>:
+Visualizing collinearity diagnostics. <em>The American
+Statistician</em>, <em>63</em>(1), 56–65. <a href="https://doi.org/10.1198/tast.2009.0012">https://doi.org/10.1198/tast.2009.0012</a>
+</div>
+<div id="ref-FriendlyMeyer:2016:DDAR" class="csl-entry" role="listitem">
+Friendly, M., &amp; Meyer, D. (2016). <em>Discrete data analysis with
+<span>R</span>: Visualization and modeling techniques for categorical
+and count data</em>. Chapman &amp; Hall/CRC.
+</div>
+<div id="ref-Friendly-etal:ellipses:2013" class="csl-entry" role="listitem">
+Friendly, M., Monette, G., &amp; Fox, J. (2013). Elliptical insights:
+Understanding statistical methods through elliptical geometry.
+<em>Statistical Science</em>, <em>28</em>(1), 1–39. <a href="https://doi.org/10.1214/12-STS402">https://doi.org/10.1214/12-STS402</a>
+</div>
+<div id="ref-FriendlyWainer:2021:TOGS" class="csl-entry" role="listitem">
+Friendly, M., &amp; Wainer, H. (2021). <em>A history of data
+visualization and graphic communication</em>. Harvard University Press.
+<a href="https://doi.org/10.4159/9780674259034">https://doi.org/10.4159/9780674259034</a>
+</div>
+<div id="ref-Fuller2006" class="csl-entry" role="listitem">
+Fuller, W. (2006). <em>Measurement error models</em> (2nd ed.). John
+Wiley &amp; Sons.
+</div>
+<div id="ref-Funkhouser:1937" class="csl-entry" role="listitem">
+Funkhouser, H. G. (1937). Historical development of the graphical
+representation of statistical data. <em>Osiris</em>, <em>3</em>(1),
+269–405. <a href="http://tinyurl.com/32ema9">http://tinyurl.com/32ema9</a>
+</div>
+<div id="ref-Gabriel:71" class="csl-entry" role="listitem">
+Gabriel, K. R. (1971). The biplot graphic display of matrices with
+application to principal components analysis. <em>Biometrics</em>,
+<em>58</em>(3), 453–467. <a href="https://doi.org/10.2307/2334381">https://doi.org/10.2307/2334381</a>
+</div>
+<div id="ref-Gabriel:81" class="csl-entry" role="listitem">
+Gabriel, K. R. (1981). Biplot display of multivariate matrices for
+inspection of data and diagnosis. In V. Barnett (Ed.), <em>Interpreting
+multivariate data</em> (pp. 147–173). John Wiley; Sons.
+</div>
+<div id="ref-Galton:1863" class="csl-entry" role="listitem">
+Galton, F. (1863). <em>Meteorographica, or methods of mapping the
+weather</em>. Macmillan. <a href="http://www.mugu.com/galton/books/meteorographica/index.htm">http://www.mugu.com/galton/books/meteorographica/index.htm</a>
+</div>
+<div id="ref-Galton:1886" class="csl-entry" role="listitem">
+Galton, F. (1886). Regression towards mediocrity in hereditary stature.
+<em>Journal of the Anthropological Institute</em>, <em>15</em>, 246–263.
+<a href="http://www.jstor.org/cgi-bin/jstor/viewitem/09595295/dm995266/99p0374f/0">http://www.jstor.org/cgi-bin/jstor/viewitem/09595295/dm995266/99p0374f/0</a>
+</div>
+<div id="ref-Galton:1889" class="csl-entry" role="listitem">
+Galton, F. (1889). <em>Natural inheritance</em>. Macmillan. <a href="http://galton.org/books/natural-inheritance/pdf/galton-nat-inh-1up-clean.pdf">http://galton.org/books/natural-inheritance/pdf/galton-nat-inh-1up-clean.pdf</a>
+</div>
+<div id="ref-Gannett:1898" class="csl-entry" role="listitem">
+Gannett, H. (1898). <em>Statistical atlas of the united states, eleventh
+(1890) census</em>. U.S. Government Printing Office.
+</div>
+<div id="ref-Gastwirth-etal:2009" class="csl-entry" role="listitem">
+Gastwirth, J. L., Gel, Y. R., &amp; Miao, W. (2009). The impact of <span class="nocase">Levene’s</span> test of equality of variances on
+statistical theory and practice. <em>Statistical Science</em>,
+<em>24</em>(3), 343–360. <a href="https://doi.org/10.1214/09-STS301">https://doi.org/10.1214/09-STS301</a>
+</div>
+<div id="ref-Gelman-etal:2023" class="csl-entry" role="listitem">
+Gelman, A., Hullman, J., &amp; Kennedy, L. (2023). <em>Causal quartets:
+Different ways to attain the same average treatment effect</em>. <a href="http://www.stat.columbia.edu/~gelman/research/unpublished/causal_quartets.pdf">http://www.stat.columbia.edu/~gelman/research/unpublished/causal_quartets.pdf</a>
+</div>
+<div id="ref-R-penalized" class="csl-entry" role="listitem">
+Goeman, J., Meijer, R., Chaturvedi, N., &amp; Lueder, M. (2022).
+<em>Penalized: L1 (lasso and fused lasso) and L2 (ridge) penalized
+estimation in GLMs and in the cox model</em>. <a href="https://CRAN.R-project.org/package=penalized">https://CRAN.R-project.org/package=penalized</a>
+</div>
+<div id="ref-Gorman2014" class="csl-entry" role="listitem">
+Gorman, K. B., Williams, T. D., &amp; Fraser, W. R. (2014). Ecological
+sexual dimorphism and environmental variability within a community of
+antarctic penguins (genus pygoscelis). <em><span>PLoS</span>
+<span>ONE</span></em>, <em>9</em>(3), e90081. <a href="https://doi.org/10.1371/journal.pone.0090081">https://doi.org/10.1371/journal.pone.0090081</a>
+</div>
+<div id="ref-GowerHand:96" class="csl-entry" role="listitem">
+Gower, J. C., &amp; Hand, D. J. (1996). <em>Biplots</em>. Chapman &amp;
+Hall.
+</div>
+<div id="ref-Gower-etal:2011" class="csl-entry" role="listitem">
+Gower, J. C., Lubbe, S. G., &amp; Roux, N. J. L. (2011).
+<em>Understanding biplots</em>. Wiley. <a href="http://books.google.ca/books?id=66gQCi5JOKYC">http://books.google.ca/books?id=66gQCi5JOKYC</a>
+</div>
+<div id="ref-Grandjean2016" class="csl-entry" role="listitem">
+Grandjean, M. (2016). A social network analysis of <span>T</span>witter:
+<span>M</span>apping the digital humanities community. <em>Cogent Arts
+&amp;Amp; Humanities</em>, <em>3</em>(1), 1171458. <a href="https://doi.org/10.1080/23311983.2016.1171458">https://doi.org/10.1080/23311983.2016.1171458</a>
+</div>
+<div id="ref-Graybill1961" class="csl-entry" role="listitem">
+Graybill, F. A. (1961). <em>An introduction to linear statistical
+models</em>. McGraw-Hill.
+</div>
+<div id="ref-Greenacre:84" class="csl-entry" role="listitem">
+Greenacre, M. (1984). <em>Theory and applications of correspondence
+analysis</em>. Academic Press.
+</div>
+<div id="ref-Greenacre:2010:biplots" class="csl-entry" role="listitem">
+Greenacre, M. (2010). <em>Biplots in practice</em>.
+Fundaci<span>ó</span>n BBVA. <a href="https://books.google.ca/books?id=dv4LrFP7U\_EC">https://books.google.ca/books?id=dv4LrFP7U\_EC</a>
+</div>
+<div id="ref-Guerry:1833" class="csl-entry" role="listitem">
+Guerry, A.-M. (1833). <em>Essai sur la statistique morale de la
+<span>France</span></em>. Crochard.
+</div>
+<div id="ref-R-seriation" class="csl-entry" role="listitem">
+Hahsler, M., Buchta, C., &amp; Hornik, K. (2024). <em>Seriation:
+Infrastructure for ordering objects using seriation</em>. <a href="https://github.com/mhahsler/seriation">https://github.com/mhahsler/seriation</a>
+</div>
+<div id="ref-Haitovsky1987" class="csl-entry" role="listitem">
+Haitovsky, Y. (1987). On multivariate ridge regression.
+<em>Biometrika</em>, <em>74</em>(3), 563–570. <a href="https://doi.org/10.1093/biomet/74.3.563">https://doi.org/10.1093/biomet/74.3.563</a>
+</div>
+<div id="ref-Harrison2023" class="csl-entry" role="listitem">
+Harrison, P. (2023). Langevitour: Smooth interactive touring of high
+dimensions, demonstrated with scRNA-seq data. <em>The R Journal</em>,
+<em>15</em>(2), 206–219. <a href="https://doi.org/10.32614/RJ-2023-046">https://doi.org/10.32614/RJ-2023-046</a>
+</div>
+<div id="ref-R-langevitour" class="csl-entry" role="listitem">
+Harrison, P. (2024). <em>Langevitour: Langevin tour</em>. <a href="https://logarithmic.net/langevitour/">https://logarithmic.net/langevitour/</a>
+</div>
+<div id="ref-R-detourr" class="csl-entry" role="listitem">
+Hart, C., &amp; Wang, E. (2022). <em>Detourr: Portable and performant
+tour animations</em>. <a href="https://CRAN.R-project.org/package=detourr">https://CRAN.R-project.org/package=detourr</a>
+</div>
+<div id="ref-Hartigan:75" class="csl-entry" role="listitem">
+Hartigan, J. A. (1975a). <em>Clustering algorithms</em>. John Wiley;
+Sons.
+</div>
+<div id="ref-Hartigan:75b" class="csl-entry" role="listitem">
+Hartigan, J. A. (1975b). Printer graphics for clustering. <em>Journal of
+Statistical Computing and Simulation</em>, <em>4</em>, 187–213.
+</div>
+<div id="ref-Hartley:1950" class="csl-entry" role="listitem">
+Hartley, H. O. (1950). The use of range in analysis of variance.
+<em>Biometrika</em>, <em>37</em>(3–4), 271–280. <a href="https://doi.org/10.1093/biomet/37.3-4.271">https://doi.org/10.1093/biomet/37.3-4.271</a>
+</div>
+<div id="ref-Hartman:2016" class="csl-entry" role="listitem">
+Hartman, L. I. (2016). <em>Schizophrenia and schizoaffective disorder:
+One condition or two?</em> [PhD dissertation]. York University.
+</div>
+<div id="ref-Harwell:1992" class="csl-entry" role="listitem">
+Harwell, M. R., Rubinstein, E. N., Hayes, W. S., &amp; Olds, C. C.
+(1992). Summarizing monte carlo results in methodological research: The
+one- and two-factor fixed effects <span>ANOVA</span> cases. <em>Journal
+of Educational and Behavioral Statistics</em>, <em>17</em>(4), 315–339.
+<a href="https://doi.org/10.3102/10769986017004315">https://doi.org/10.3102/10769986017004315</a>
+</div>
+<div id="ref-Hastie-etal-2009" class="csl-entry" role="listitem">
+Hastie, T., Tibshirani, R., &amp; Friedman, J. (2009). <em>The elements
+of statistical learning: Data mining, inference and prediction</em> (2nd
+ed.). Springer. <a href="http://www-stat.stanford.edu/~tibs/ElemStatLearn/">http://www-stat.stanford.edu/~tibs/ElemStatLearn/</a>
+</div>
+<div id="ref-Healy:1968:MNP" class="csl-entry" role="listitem">
+Healy, M. J. R. (1968). Multivariate normal plotting. <em>Journal of the
+Royal Statistical Society Series C</em>, <em>17</em>(2), 157–161.
+</div>
+<div id="ref-Heinrichs-etal:2015" class="csl-entry" role="listitem">
+Heinrichs, R. W., Pinnock, F., Muharib, E., Hartman, L., Goldberg, J.,
+&amp; McDermid Vaz, S. (2015). Neurocognitive normality in schizophrenia
+revisited. <em>Schizophrenia Research: Cognition</em>, <em>2</em>(4),
+227–232. <a href="https://doi.org/10.1016/j.scog.2015.09.001">https://doi.org/10.1016/j.scog.2015.09.001</a>
+</div>
+<div id="ref-Herschel:1833" class="csl-entry" role="listitem">
+Herschel, J. F. W. (1833). On the investigation of the orbits of
+revolving double stars: Being a supplement to a paper entitled
+"micrometrical measures of 364 double stars". <em>Memoirs of the Royal
+Astronomical Society</em>, <em>5</em>, 171–222.
+</div>
+<div id="ref-HoaglinWelsch1978" class="csl-entry" role="listitem">
+Hoaglin, D. C., &amp; Welsch, R. E. (1978). The hat matrix in regression
+and <span>ANOVA</span>. <em>The American Statistician</em>,
+<em>32</em>(1), 17–22. <a href="https://doi.org/10.1080/00031305.1978.10479237">https://doi.org/10.1080/00031305.1978.10479237</a>
+</div>
+<div id="ref-Hocking2013" class="csl-entry" role="listitem">
+Hocking, R. R. (2013). <em>Methods and applications of linear models:
+Regression and the analysis of variance</em>. Wiley. <a href="https://books.google.ca/books?id=iq2J-1iS6HcC">https://books.google.ca/books?id=iq2J-1iS6HcC</a>
+</div>
+<div id="ref-HoerlKennard:1970a" class="csl-entry" role="listitem">
+Hoerl, A. E., &amp; Kennard, R. W. (1970). Ridge regression:
+<span>B</span>iased estimation for nonorthogonal problems.
+<em>Technometrics</em>, <em>12</em>, 55–67.
+</div>
+<div id="ref-Hoerl-etal-1975" class="csl-entry" role="listitem">
+Hoerl, A. E., Kennard, R. W., &amp; Baldwin, K. F. (1975). Ridge
+regression: Some simulations. <em>Communications in Statistics</em>,
+<em>4</em>(2), 105–123. <a href="https://doi.org/10.1080/03610927508827232">https://doi.org/10.1080/03610927508827232</a>
+</div>
+<div id="ref-R-ggpcp" class="csl-entry" role="listitem">
+Hofmann, H., VanderPlas, S., &amp; Ge, Y. (2022). <em>Ggpcp: Parallel
+coordinate plots in the ggplot2 framework</em>. <a href="https://github.com/heike/ggpcp">https://github.com/heike/ggpcp</a>
+</div>
+<div id="ref-Hofstadter1979" class="csl-entry" role="listitem">
+Hofstadter, D. R. (1979). <em>Gödel, escher, bach: An eternal golden
+braid</em>. Basic Books.
+</div>
+<div id="ref-Hojsgaard2012graphical" class="csl-entry" role="listitem">
+Højsgaard, S., Edwards, D., &amp; Lauritzen, S. (2012). <em>Graphical
+models with <span>R</span></em>. Springer Science &amp; Business Media.
+</div>
+<div id="ref-R-palmerpenguins" class="csl-entry" role="listitem">
+Horst, A., Hill, A., &amp; Gorman, K. (2022). <em>Palmerpenguins: Palmer
+archipelago (antarctica) penguin data</em>. <a href="https://allisonhorst.github.io/palmerpenguins/">https://allisonhorst.github.io/palmerpenguins/</a>
+</div>
+<div id="ref-Hotelling:1931" class="csl-entry" role="listitem">
+Hotelling, H. (1931). The generalization of <span class="nocase">Student’s</span> ratio. <em>The Annals of Mathematical
+Statistics</em>, <em>2</em>(3), 360–378. <a href="https://doi.org/10.1214/aoms/1177732979">https://doi.org/10.1214/aoms/1177732979</a>
+</div>
+<div id="ref-R-FactoMineR" class="csl-entry" role="listitem">
+Husson, F., Josse, J., Le, S., &amp; Mazet, J. (2024). <em>FactoMineR:
+Multivariate exploratory data analysis and data mining</em>. <a href="http://factominer.free.fr">http://factominer.free.fr</a>
+</div>
+<div id="ref-Husson-etal-2017" class="csl-entry" role="listitem">
+Husson, F., Le, S., &amp; Pagès, J. (2017). <em>Exploratory multivariate
+analysis by example using r</em>. Chapman &amp; Hall. <a href="https://doi.org/10.1201/b21874">https://doi.org/10.1201/b21874</a>
+</div>
+<div id="ref-IBM1965" class="csl-entry" role="listitem">
+IBM. (1965). <em>Proceedings of the IBM scientific computing symposium
+on statistics: Oct 21-23, 1963</em> (L. Robinson, Ed.). IBM. <a href="https://www.amazon.com/Proceedings-Scientific-Computing-Symposium-Statistics/dp/B000GL5RLU">https://www.amazon.com/Proceedings-Scientific-Computing-Symposium-Statistics/dp/B000GL5RLU</a>
+</div>
+<div id="ref-Inselberg:1985" class="csl-entry" role="listitem">
+Inselberg, A. (1985). The plane with parallel coordinates. <em>The
+Visual Computer</em>, <em>1</em>, 69–91.
+</div>
+<div id="ref-IsvoranuEpskamp2022" class="csl-entry" role="listitem">
+Isvoranu, A.-M., Epskamp, S., Waldorp, L. J., &amp; Borsboom, D. (2022).
+<em>Network psychometrics with r: A guide for behavioral and social
+scientists</em>. Routledge. <a href="https://doi.org/10.4324/9781003111238">https://doi.org/10.4324/9781003111238</a>
+</div>
+<div id="ref-R-factoextra" class="csl-entry" role="listitem">
+Kassambara, A., &amp; Mundt, F. (2020). <em>Factoextra: Extract and
+visualize the results of multivariate data analyses</em>. <a href="http://www.sthda.com/english/rpkgs/factoextra">http://www.sthda.com/english/rpkgs/factoextra</a>
+</div>
+<div id="ref-KastellecLeoni:2007" class="csl-entry" role="listitem">
+Kastellec, J. P., &amp; Leoni, E. L. (2007). Using graphs instead of
+tables in political science. <em>Perspectives on Politics</em>,
+<em>5</em>(04), 755–771. <a href="https://doi.org/10.1017/S1537592707072209">https://doi.org/10.1017/S1537592707072209</a>
+</div>
+<div id="ref-R-Rtsne" class="csl-entry" role="listitem">
+Krijthe, J. (2023). <em>Rtsne: T-distributed stochastic neighbor
+embedding using a barnes-hut implementation</em>. <a href="https://github.com/jkrijthe/Rtsne">https://github.com/jkrijthe/Rtsne</a>
+</div>
+<div id="ref-Kruskal1964" class="csl-entry" role="listitem">
+Kruskal, J. B. (1964). Multidimensional scaling by optimizing goodness
+of fit to a nonmetric hypothesis. <em>Psychometrika</em>,
+<em>29</em>(1), 1–27. <a href="https://doi.org/10.1007/bf02289565">https://doi.org/10.1007/bf02289565</a>
+</div>
+<div id="ref-Kwan-etal:2009" class="csl-entry" role="listitem">
+Kwan, E., Lu, I. R. R., &amp; Friendly, M. (2009). Tableplot: A new tool
+for assessing precise predictions. <em>Zeitschrift f<span>ü</span>r
+Psychologie / Journal of Psychology</em>, <em>217</em>(1), 38–48. <a href="https://doi.org/10.1027/0044-3409.217.1.38">https://doi.org/10.1027/0044-3409.217.1.38</a>
+</div>
+<div id="ref-R-ggstats" class="csl-entry" role="listitem">
+Larmarange, J. (2024). <em>Ggstats: Extension to ggplot2 for plotting
+stats</em>. <a href="https://larmarange.github.io/ggstats/">https://larmarange.github.io/ggstats/</a>
+</div>
+<div id="ref-LarsenMcCleary:72" class="csl-entry" role="listitem">
+Larsen, W. A., &amp; McCleary, S. J. (1972). The use of partial residual
+plots in regression analysis. <em>Technometrics</em>, <em>14</em>,
+781–790.
+</div>
+<div id="ref-Lauritzen1996" class="csl-entry" role="listitem">
+Lauritzen, S. L. (1996). <em>Graphical models</em>. Oxford University
+Press.
+</div>
+<div id="ref-LawlessWang:1976" class="csl-entry" role="listitem">
+Lawless, J. F., &amp; Wang, P. (1976). A simulation study of ridge and
+other regression estimators. <em>Communications in Statistics</em>,
+<em>5</em>, 307–323.
+</div>
+<div id="ref-LeeCook-2009" class="csl-entry" role="listitem">
+Lee, E.-K., &amp; Cook, D. (2009). A projection pursuit index for large
+p small n data. <em>Statistics and Computing</em>, <em>20</em>(3),
+381–392. <a href="https://doi.org/10.1007/s11222-009-9131-1">https://doi.org/10.1007/s11222-009-9131-1</a>
+</div>
+<div id="ref-R-liminal" class="csl-entry" role="listitem">
+Lee, S. (2021). <em>Liminal: Multivariate data visualization with tours
+and embeddings</em>. <a href="https://CRAN.R-project.org/package=liminal">https://CRAN.R-project.org/package=liminal</a>
+</div>
+<div id="ref-Levene:1960" class="csl-entry" role="listitem">
+Levene, H. (1960). Robust tests for equality of variances. In I. Olkin,
+S. G. Ghurye, W. Hoeffding, W. G. Madow, &amp; H. B. Mann (Eds.),
+<em>Contributions to probability and statistics: Essays in honor of
+<span>Harold Hotelling</span></em> (pp. 278–292). Stanford University
+Press.
+</div>
+<div id="ref-Lix:1996" class="csl-entry" role="listitem">
+Lix, J. M., L. M. Keselman, &amp; Keselman, H. J. (1996). Consequences
+of assumption violations revisited: A quantitative review of
+alternatives to the one-way analysis of variance <span>F</span> test.
+<em>Review of Educational Research</em>, <em>66</em>(4), 579–619. <a href="https://doi.org/10.3102/00346543066004579">https://doi.org/10.3102/00346543066004579</a>
+</div>
+<div id="ref-Longley:1967" class="csl-entry" role="listitem">
+Longley, J. W. (1967). An appraisal of least squares programs for the
+electronic computer from the point of view of the user. <em>Journal of
+the American Statistical Association</em>, <em>62</em>, 819–841.
+https://doi.org/<a href="https://www.tandfonline.com/doi/abs/10.1080/01621459.1967.10500896">https://www.tandfonline.com/doi/abs/10.1080/01621459.1967.10500896</a>
+</div>
+<div id="ref-R-ggeffects" class="csl-entry" role="listitem">
+Lüdecke, D. (2024). <em>Ggeffects: Create tidy data frames of marginal
+effects for ggplot from model outputs</em>. <a href="https://strengejacke.github.io/ggeffects/">https://strengejacke.github.io/ggeffects/</a>
+</div>
+<div id="ref-Ludecke-etal-performance" class="csl-entry" role="listitem">
+Lüdecke, D., Ben-Shachar, M. S., Patil, I., Waggoner, P., &amp;
+Makowski, D. (2021). <span class="nocase">performance</span>: An
+<span>R</span> package for assessment, comparison and testing of
+statistical models. <em>Journal of Open Source Software</em>,
+<em>6</em>(60), 3139. <a href="https://doi.org/10.21105/joss.03139">https://doi.org/10.21105/joss.03139</a>
+</div>
+<div id="ref-R-easystats" class="csl-entry" role="listitem">
+Lüdecke, D., Ben-Shachar, M. S., Patil, I., Wiernik, B. M., &amp;
+Makowski, D. (2022). Easystats: Framework for easy statistical modeling,
+visualization, and reporting. In <em>CRAN</em>. <a href="https://easystats.github.io/easystats/">https://easystats.github.io/easystats/</a>
+</div>
+<div id="ref-MaatenHinton2008" class="csl-entry" role="listitem">
+Maaten, L. van der, &amp; Hinton, G. (2008). Visualizing data using
+<span class="nocase">t-SNE</span>. <em>Journal of Machine Learning
+Research</em>, <em>9</em>, 2579–2605. <a href="http://www.jmlr.org/papers/v9/vandermaaten08a.html">http://www.jmlr.org/papers/v9/vandermaaten08a.html</a>
+</div>
+<div id="ref-Mardia:1970:MMS" class="csl-entry" role="listitem">
+Mardia, K. V. (1970). Measures of multivariate skewness and kurtosis
+with applications. <em>Biometrika</em>, <em>57</em>(3), 519–530.
+https://doi.org/<a href="http://dx.doi.org/10.2307/2334770">http://dx.doi.org/10.2307/2334770</a>
+</div>
+<div id="ref-Mardia:1974" class="csl-entry" role="listitem">
+Mardia, K. V. (1974). Applications of some measures of multivariate
+skewness and kurtosis in testing normality and robustness studies.
+<em>Sankhya: The Indian Journal of Statistics, Series B</em>,
+<em>36</em>(2), 115–128. <a href="http://www.jstor.org/stable/25051892">http://www.jstor.org/stable/25051892</a>
+</div>
+<div id="ref-Marquardt:1970" class="csl-entry" role="listitem">
+Marquardt, D. W. (1970). Generalized inverses, ridge regression, biased
+linear estimation, and nonlinear estimation. <em>Technometrics</em>,
+<em>12</em>, 591–612.
+</div>
+<div id="ref-MarquardtSnee1975" class="csl-entry" role="listitem">
+Marquardt, D. W., &amp; Snee, R. D. (1975). Ridge regression in
+practice. <em>The American Statistician</em>, <em>29</em>(1), 3–20. <a href="https://doi.org/10.1080/00031305.1975.10479105">https://doi.org/10.1080/00031305.1975.10479105</a>
+</div>
+<div id="ref-MartiLaguna2003" class="csl-entry" role="listitem">
+Martí, R., &amp; Laguna, M. (2003). Heuristics and meta-heuristics for
+2-layer straight line crossing minimization. <em>Discrete Applied
+Mathematics</em>, <em>127</em>(3), 665–678.
+</div>
+<div id="ref-MatejkaFitzmaurice2017" class="csl-entry" role="listitem">
+Matejka, J., &amp; Fitzmaurice, G. (2017, May). Same stats, different
+graphs. <em>Proceedings of the 2017 <span>CHI</span> Conference on Human
+Factors in Computing Systems</em>. <a href="https://doi.org/10.1145/3025453.3025912">https://doi.org/10.1145/3025453.3025912</a>
+</div>
+<div id="ref-Matloff-2011" class="csl-entry" role="listitem">
+Matloff, N. (2011). <em>The art of <span>R</span> programming:
+<span>A</span> tour of statistical software design</em>. No Starch
+Press.
+</div>
+<div id="ref-McDonald:2009" class="csl-entry" role="listitem">
+McDonald, G. C. (2009). Ridge regression. <em>Wiley Interdisciplinary
+Reviews: Computational Statistics</em>, <em>1</em>(1), 93–100. <a href="https://doi.org/10.1002/wics.14">https://doi.org/10.1002/wics.14</a>
+</div>
+<div id="ref-McGowan2023" class="csl-entry" role="listitem">
+McGowan, L. D., Gerke, T., &amp; Barrett, M. (2023). Causal inference is
+not just a statistics problem. <em>Journal of Statistics and Data
+Science Education</em>, 1–9. <a href="https://doi.org/10.1080/26939169.2023.2276446">https://doi.org/10.1080/26939169.2023.2276446</a>
+</div>
+<div id="ref-R-vcd" class="csl-entry" role="listitem">
+Meyer, D., Zeileis, A., Hornik, K., &amp; Friendly, M. (2024). <em>Vcd:
+Visualizing categorical data</em>. <a href="https://CRAN.R-project.org/package=vcd">https://CRAN.R-project.org/package=vcd</a>
+</div>
+<div id="ref-Meyers-etal:2006" class="csl-entry" role="listitem">
+Meyers, L. S., Gamst, G., &amp; Guarino, A. J. (2006). <em>Applied
+multivariate research: Design and interpretation</em>. SAGE
+Publications.
+</div>
+<div id="ref-Monette:90" class="csl-entry" role="listitem">
+Monette, G. (1990). Geometry of multiple regression and interactive
+3-<span>D</span> graphics. In J. Fox &amp; S. Long (Eds.), <em>Modern
+methods of data analysis</em> (pp. 209–256). SAGE Publications.
+</div>
+<div id="ref-OBrien:1992" class="csl-entry" role="listitem">
+O’Brien, P. C. (1992). Robust procedures for testing equality of
+covariance matrices. <em>Biometrics</em>, <em>48</em>(3), 819–827. <a href="http://www.jstor.org/stable/2532347">http://www.jstor.org/stable/2532347</a>
+</div>
+<div id="ref-R-vegan" class="csl-entry" role="listitem">
+Oksanen, J., Simpson, G. L., Blanchet, F. G., Kindt, R., Legendre, P.,
+Minchin, P. R., O’Hara, R. B., Solymos, P., Stevens, M. H. H., Szoecs,
+E., Wagner, H., Barbour, M., Bedward, M., Bolker, B., Borcard, D.,
+Carvalho, G., Chirico, M., De Caceres, M., Durand, S., … Weedon, J.
+(2024). <em>Vegan: Community ecology package</em>. <a href="https://github.com/vegandevs/vegan">https://github.com/vegandevs/vegan</a>
+</div>
+<div id="ref-R-ggdensity" class="csl-entry" role="listitem">
+Otto, J., &amp; Kahle, D. (2023). <em>Ggdensity: Interpretable bivariate
+density visualization with ggplot2</em>. <a href="https://jamesotto852.github.io/ggdensity/">https://jamesotto852.github.io/ggdensity/</a>
+</div>
+<div id="ref-Pearson:1896" class="csl-entry" role="listitem">
+Pearson, K. (1896). Contributions to the mathematical theory of
+evolution—<span>III</span>, regression, heredity and panmixia.
+<em>Philosophical Transactions of the Royal Society of London</em>,
+<em>187</em>, 253–318.
+</div>
+<div id="ref-Pearson:1901" class="csl-entry" role="listitem">
+Pearson, K. (1901). On lines and planes of closest fit to systems of
+points in space. <em>Philosophical Magazine</em>, <em>6</em>(2),
+559–572.
+</div>
+<div id="ref-Pearson-1903" class="csl-entry" role="listitem">
+Pearson, K. (1903). I. Mathematical contributions to the theory of
+evolution. —XI. On the influence of natural selection on the variability
+and correlation of organs. <em>Philosophical Transactions of the Royal
+Society of London</em>, <em>200</em>(321–330), 1–66. <a href="https://doi.org/10.1098/rsta.1903.0001">https://doi.org/10.1098/rsta.1903.0001</a>
+</div>
+<div id="ref-R-gganimate" class="csl-entry" role="listitem">
+Pedersen, T. L., &amp; Robinson, D. (2024). <em>Gganimate: A grammar of
+animated graphics</em>. <a href="https://gganimate.com">https://gganimate.com</a>
+</div>
+<div id="ref-Pineo-Porter-1967" class="csl-entry" role="listitem">
+Pineo, P. O., &amp; Porter, J. (1967). Occupational prestige in canada*.
+<em>Canadian Review of Sociology</em>, <em>4</em>(1), 24–40.
+https://doi.org/<a href="https://doi.org/10.1111/j.1755-618X.1967.tb00472.x">https://doi.org/10.1111/j.1755-618X.1967.tb00472.x</a>
+</div>
+<div id="ref-PineoPorter2008" class="csl-entry" role="listitem">
+Pineo, P. O., &amp; Porter, J. (2008). Occupational prestige in canada.
+<em>Canadian Review of Sociology</em>, <em>4</em>(1), 24–40. <a href="https://doi.org/10.1111/j.1755-618x.1967.tb00472.x">https://doi.org/10.1111/j.1755-618x.1967.tb00472.x</a>
+</div>
+<div id="ref-Playfair:1786" class="csl-entry" role="listitem">
+Playfair, W. (1786). <em>Commercial and political atlas: Representing,
+by copper-plate charts, the progress of the commerce, revenues,
+expenditure, and debts of england, during the whole of the eighteenth
+century</em>. Debrett; Robinson;; Sewell. <a href="http://ucpj.uchicago.edu/Isis/journal/demo/v000n000/000000/000000.fg4.html">http://ucpj.uchicago.edu/Isis/journal/demo/v000n000/000000/000000.fg4.html</a>
+</div>
+<div id="ref-Playfair:1801" class="csl-entry" role="listitem">
+Playfair, W. (1801). <em>Statistical breviary; shewing, on a principle
+entirely new, the resources of every state and kingdom in
+<span>Europe</span></em>. Wallis.
+</div>
+<div id="ref-ReavenMiller:68" class="csl-entry" role="listitem">
+Reaven, G. M., &amp; Miller, R. G. (1968). Study of the relationship
+between glucose and insulin responses to an oral glucose load in man.
+<em>Diabetes</em>, <em>17</em>(9), 560–569. <a href="https://doi.org/10.2337/diab.17.9.560">https://doi.org/10.2337/diab.17.9.560</a>
+</div>
+<div id="ref-ReavenMiller:79" class="csl-entry" role="listitem">
+Reaven, G. M., &amp; Miller, R. G. (1979). An attempt to define the
+nature of chemical diabetes using a multidimensional analysis.
+<em>Diabetologia</em>, <em>16</em>, 17–24.
+</div>
+<div id="ref-Robinaugh2019" class="csl-entry" role="listitem">
+Robinaugh, D. J., Hoekstra, R. H. A., Toner, E. R., &amp; Borsboom, D.
+(2019). The network approach to psychopathology: A review of the
+literature 2008–2018 and an agenda for future research.
+<em>Psychological Medicine</em>, <em>50</em>(3), 353–366. <a href="https://doi.org/10.1017/s0033291719003404">https://doi.org/10.1017/s0033291719003404</a>
+</div>
+<div id="ref-Rogan:1977" class="csl-entry" role="listitem">
+Rogan, J. C., &amp; Keselman, H. J. (1977). Is the <span>ANOVA</span>
+f-test robust to variance heterogeneity when sample sizes are equal?: An
+investigation via a coefficient of variation. <em>American Educational
+Research Journal</em>, <em>14</em>(4), 493–498. <a href="https://doi.org/10.3102/00028312014004493">https://doi.org/10.3102/00028312014004493</a>
+</div>
+<div id="ref-R-lattice" class="csl-entry" role="listitem">
+Sarkar, D. (2024). <em>Lattice: Trellis graphics for r</em>. <a href="https://lattice.r-forge.r-project.org/">https://lattice.r-forge.r-project.org/</a>
+</div>
+<div id="ref-Scheffe1960" class="csl-entry" role="listitem">
+Scheffé, H. A. (1960). <em>The analysis of variance</em>. Wiley.
+</div>
+<div id="ref-R-GGally" class="csl-entry" role="listitem">
+Schloerke, B., Cook, D., Larmarange, J., Briatte, F., Marbach, M.,
+Thoen, E., Elberg, A., &amp; Crowley, J. (2024). <em>GGally: Extension
+to ggplot2</em>. <a href="https://ggobi.github.io/ggally/">https://ggobi.github.io/ggally/</a>
+</div>
+<div id="ref-Scott1992" class="csl-entry" role="listitem">
+Scott, D. W. (1992). <em>Multivariate density estimation: Theory,
+practice, and visualization</em>. Wiley.
+</div>
+<div id="ref-Searle-etal:80" class="csl-entry" role="listitem">
+Searle, S. R., Speed, F. M., &amp; Milliken, G. A. (1980). Population
+marginal means in the linear model: An alternative to least squares
+means. <em>The American Statistician</em>, <em>34</em>(4), 216–221.
+</div>
+<div id="ref-ShapiroWilk1965" class="csl-entry" role="listitem">
+Shapiro, S. S., &amp; Wilk, M. B. (1965). An analysis of variance test
+for normality (complete samples). <em>Biometrika</em>, <em>52</em>(3–4),
+591–611. <a href="https://doi.org/10.1093/biomet/52.3-4.591">https://doi.org/10.1093/biomet/52.3-4.591</a>
+</div>
+<div id="ref-Shepard1962a" class="csl-entry" role="listitem">
+Shepard, R. N. (1962a). The analysis of proximities: Multidimensional
+scaling with an unknown distance function. i. <em>Psychometrika</em>,
+<em>27</em>(2), 125–140. <a href="https://doi.org/10.1007/bf02289630">https://doi.org/10.1007/bf02289630</a>
+</div>
+<div id="ref-Shepard1962b" class="csl-entry" role="listitem">
+Shepard, R. N. (1962b). The analysis of proximities: Multidimensional
+scaling with an unknown distance function. II. <em>Psychometrika</em>,
+<em>27</em>(3), 219–246. <a href="https://doi.org/10.1007/bf02289621">https://doi.org/10.1007/bf02289621</a>
+</div>
+<div id="ref-Shepard-etal-1972b" class="csl-entry" role="listitem">
+Shepard, R. N., Romney, A. K., Nerlove, S. B., &amp; Board, M. S. S.
+(1972a). <em>Multidimensional scaling; theory and applications in the
+behavioral sciences: Vols. II. Applications</em>. Seminar Press. <a href="https://books.google.ca/books?id=PpFAAQAAIAAJ">https://books.google.ca/books?id=PpFAAQAAIAAJ</a>
+</div>
+<div id="ref-Shepard-etal-1972a" class="csl-entry" role="listitem">
+Shepard, R. N., Romney, A. K., Nerlove, S. B., &amp; Board, M. S. S.
+(1972b). <em>Multidimensional scaling: Theory and applications in the
+behavioral sciences: Vols. I. Theory</em>. Seminar Press. <a href="https://books.google.ca/books?id=pJRAAQAAIAAJ">https://books.google.ca/books?id=pJRAAQAAIAAJ</a>
+</div>
+<div id="ref-Shoben1983" class="csl-entry" role="listitem">
+Shoben, E. J. (1983). Applications of multidimensional scaling in
+cognitive psychology. <em>Applied Psychological Measurement</em>,
+<em>7</em>(4), 473–490. <a href="https://doi.org/10.1177/014662168300700406">https://doi.org/10.1177/014662168300700406</a>
+</div>
+<div id="ref-Silverman:86" class="csl-entry" role="listitem">
+Silverman, B. W. (1986). <em>Density estimation for statistics and data
+analysis</em>. Chapman &amp; Hall.
+</div>
+<div id="ref-Simpson:51" class="csl-entry" role="listitem">
+Simpson, E. H. (1951). The interpretation of interaction in contingency
+tables. <em>Journal of the Royal Statistical Society, Series B</em>,
+<em>30</em>, 238–241.
+</div>
+<div id="ref-Swayne-etal-1998" class="csl-entry" role="listitem">
+Swayne, D. F., Cook, D., &amp; Buja, A. (1998). XGobi: Interactive
+dynamic data visualization in the x window system. <em>Journal of
+Computational and Graphical Statistics</em>, <em>7</em>(1), 113–130. <a href="https://doi.org/10.1080/10618600.1998.10474764">https://doi.org/10.1080/10618600.1998.10474764</a>
+</div>
+<div id="ref-Swayne-etal-2003" class="csl-entry" role="listitem">
+Swayne, D. F., Lang, D. T., Buja, A., &amp; Cook, D. (2003).
+<span>GGobi</span>: Evolving from <span>XGobi</span> into an extensible
+framework for interactive data visualization. <em>Computational
+Statistics &amp;Amp; Data Analysis</em>, <em>43</em>(4), 423–444. <a href="https://doi.org/10.1016/s0167-9473(02)00286-4">https://doi.org/10.1016/s0167-9473(02)00286-4</a>
+</div>
+<div id="ref-Teetor2011" class="csl-entry" role="listitem">
+Teetor, P. (2011). <em><span class="nocase">R cookbook</span></em>.
+<span>O’Reilly</span> Media.
+</div>
+<div id="ref-Tibshriani:regr:1996" class="csl-entry" role="listitem">
+Tibshirani, R. (1996). Regression shrinkage and selection via the lasso.
+<em>Journal of the Royal Statistical Society, Series B:
+Methodological</em>, <em>58</em>, 267–288.
+</div>
+<div id="ref-TikuBalakrishnan:1984" class="csl-entry" role="listitem">
+Tiku, M. L., &amp; Balakrishnan, N. (1984). Testing equality of
+population variances the robust way. <em>Communications in Statistics -
+Theory and Methods</em>, <em>13</em>(17), 2143–2159. <a href="https://doi.org/10.1080/03610928408828818">https://doi.org/10.1080/03610928408828818</a>
+</div>
+<div id="ref-Timm:75" class="csl-entry" role="listitem">
+Timm, N. H. (1975). <em>Multivariate analysis with applications in
+education and psychology</em>. Wadsworth (Brooks/Cole).
+</div>
+<div id="ref-Torgerson1952" class="csl-entry" role="listitem">
+Torgerson, W. S. (1952). Multidimensional scaling: I. Theory and method.
+<em>Psychometrika</em>, <em>17</em>(4), 401–419. <a href="https://doi.org/10.1007/bf02288916">https://doi.org/10.1007/bf02288916</a>
+</div>
+<div id="ref-VanderPlas2023" class="csl-entry" role="listitem">
+VanderPlas, S., Ge, Y., Unwin, A., &amp; Hofmann, H. (2023). Penguins go
+parallel: A grammar of graphics framework for generalized parallel
+coordinate plots. <em>Journal of Computational and Graphical
+Statistics</em>, 1–16. <a href="https://doi.org/10.1080/10618600.2023.2195462">https://doi.org/10.1080/10618600.2023.2195462</a>
+</div>
+<div id="ref-VellemanWelsh:81" class="csl-entry" role="listitem">
+Velleman, P. F., &amp; Welsh, R. E. (1981). Efficient computing of
+regression diagnostics. <em>The American Statistician</em>,
+<em>35</em>(4), 234–242.
+</div>
+<div id="ref-Vinod:1978" class="csl-entry" role="listitem">
+Vinod, H. D. (1978). A survey of ridge regression and related techniques
+for improvements over ordinary least squares. <em>The Review of
+Economics and Statistics</em>, <em>60</em>(1), 121–131. <a href="http://www.jstor.org/stable/1924340">http://www.jstor.org/stable/1924340</a>
+</div>
+<div id="ref-R-loon" class="csl-entry" role="listitem">
+Waddell, A., &amp; Oldford, R. W. (2023). <em>Loon: Interactive
+statistical data visualization</em>. <a href="https://CRAN.R-project.org/package=loon">https://CRAN.R-project.org/package=loon</a>
+</div>
+<div id="ref-Warne2014" class="csl-entry" role="listitem">
+Warne, F. T. (2014). A primer on multivariate analysis of
+variance(MANOVA) for behavioral scientists. <em>Practical Assessment,
+Research &amp; Evaluation</em>, <em>19</em>(1). <a href="https://scholarworks.umass.edu/pare/vol19/iss1/17/">https://scholarworks.umass.edu/pare/vol19/iss1/17/</a>
+</div>
+<div id="ref-Wegman:1990" class="csl-entry" role="listitem">
+Wegman, E. J. (1990). Hyperdimensional data analysis using parallel
+coordinates. <em>Journal of the American Statistical Association</em>,
+<em>85</em>(411), 664–675.
+</div>
+<div id="ref-R-corrplot" class="csl-entry" role="listitem">
+Wei, T., &amp; Simko, V. (2024). <em>Corrplot: Visualization of a
+correlation matrix</em>. <a href="https://github.com/taiyun/corrplot">https://github.com/taiyun/corrplot</a>
+</div>
+<div id="ref-Welch:1947" class="csl-entry" role="listitem">
+Welch, B. L. (1947). The generalization of "student’s" problem when
+several different population varlances are involved.
+<em>Biometrika</em>, <em>34</em>(1–2), 28–35. <a href="https://doi.org/10.1093/biomet/34.1-2.28">https://doi.org/10.1093/biomet/34.1-2.28</a>
+</div>
+<div id="ref-West2001" class="csl-entry" role="listitem">
+West, D. B. (2001). <em>Introduction to graph theory</em>. Prentice
+hall.
+</div>
+<div id="ref-Whittaker1990" class="csl-entry" role="listitem">
+Whittaker, J. (1990). <em>Graphical models in applied multivariate
+statistics</em>. John Wiley; Sons.
+</div>
+<div id="ref-Wickham2014" class="csl-entry" role="listitem">
+Wickham, H. (2014). <em><span>Advanced R</span></em>. <span>Chapman and
+Hall/CRC</span>.
+</div>
+<div id="ref-R-tourr" class="csl-entry" role="listitem">
+Wickham, H., &amp; Cook, D. (2024). <em>Tourr: Tour methods for
+multivariate data visualisation</em>. <a href="https://github.com/ggobi/tourr">https://github.com/ggobi/tourr</a>
+</div>
+<div id="ref-Wickham-etal-2011" class="csl-entry" role="listitem">
+Wickham, H., Cook, D., Hofmann, H., &amp; Buja, A. (2011). Tourr: An
+<span>R</span> package for exploring multivariate data with projections.
+<em>Journal of Statistical Software</em>, <em>40</em>(2). <a href="https://doi.org/10.18637/jss.v040.i02">https://doi.org/10.18637/jss.v040.i02</a>
+</div>
+<div id="ref-WilkinsonRogers1973" class="csl-entry" role="listitem">
+Wilkinson, G. N., &amp; Rogers, C. E. (1973). Symbolic description of
+factorial models for analysis of variance. <em>Applied Statistics</em>,
+<em>22</em>(3), 392. <a href="https://doi.org/10.2307/2346786">https://doi.org/10.2307/2346786</a>
+</div>
+<div id="ref-Winer1962" class="csl-entry" role="listitem">
+Winer, B. J. (1962). <em>Statistical principles in experimental
+design</em>. McGraw-Hill.
+</div>
+<div id="ref-Wood:2006" class="csl-entry" role="listitem">
+Wood, S. N. (2006). <em>Generalized additive models: An introduction
+with r</em>. Chapman; Hall/CRC Press.
+</div>
+<div id="ref-R-corrgram" class="csl-entry" role="listitem">
+Wright, K. (2021). <em>Corrgram: Plot a correlogram</em>. <a href="https://kwstat.github.io/corrgram/">https://kwstat.github.io/corrgram/</a>
+</div>
+<div id="ref-R-animation" class="csl-entry" role="listitem">
+Xie, Y. (2021). <em>Animation: A gallery of animations in statistics and
+utilities to create animations</em>. <a href="https://yihui.org/animation/">https://yihui.org/animation/</a>
+</div>
+<div id="ref-R-loon-tour" class="csl-entry" role="listitem">
+Xu, Z., &amp; Oldford, R. W. (2021). <em>Loon.tour: Tour in ’loon’</em>.
+<a href="https://cran.r-project.org/package=loon.tourr">https://cran.r-project.org/package=loon.tourr</a>
+</div>
+<div id="ref-ZhangBoos:1992:BCV" class="csl-entry" role="listitem">
+Zhang, J., &amp; Boos, D. D. (1992). Bootstrap critical values for
+testing homogeneity of covariance matrices. <em>Journal of the American
+Statistical Association</em>, <em>87</em>(418), 425–429. <a href="http://www.jstor.org/stable/2290273">http://www.jstor.org/stable/2290273</a>
+</div>
+</div>
+<section id="package-used" class="level4"><h4 class="anchored" data-anchor-id="package-used">Package used</h4>
+
+
+</section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const onCopySuccess = function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  }
+  const getTextToCopy = function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
+    text: getTextToCopy
+  });
+  clipboard.on('success', onCopySuccess);
+  if (window.document.getElementById('quarto-embedded-source-code-modal')) {
+    // For code content inside modals, clipBoardJS needs to be initialized with a container option
+    // TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
+    const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
+      text: getTextToCopy,
+      container: window.document.getElementById('quarto-embedded-source-code-modal')
+    });
+    clipboardModal.on('success', onCopySuccess);
+  }
+    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
+    var mailtoRegex = new RegExp(/^mailto:/);
+      var filterRegex = new RegExp('/' + window.location.host + '/');
+    var isInternal = (href) => {
+        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
+    }
+    // Inspect non-navigation links and adorn them if external
+ 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
+    for (var i=0; i<links.length; i++) {
+      const link = links[i];
+      if (!isInternal(link.href)) {
+        // undo the damage that might have been done by quarto-nav.js in the case of
+        // links that we want to consider external
+        if (link.dataset.originalHref !== undefined) {
+          link.href = link.dataset.originalHref;
+        }
+      }
+    }
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      if (note) {
+        return note.innerHTML;
+      } else {
+        return "";
+      }
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script><nav class="page-navigation"><div class="nav-page nav-page-previous">
+      <a href="./91-colophon.html" class="pagination-link" aria-label="Colophon">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text">Colophon</span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+  </div>
+</nav>
+</div> <!-- /content -->
+
+
+
+<footer class="footer"><div class="nav-footer"><div class="nav-footer-center"><div class="toc-actions"><ul><li><a href="https://github.com/friendly/vis-MLM-book/issues/new" class="toc-action"><i class="bi bi-github"></i>Report an issue</a></li></ul></div></div></div></footer><script src="site_libs/quarto-contrib/line-highlight-1.0.0/line-highlight.js" defer="true"></script>
+</body></html>
\ No newline at end of file
diff --git a/docs/figs/case-studies/fig-NC-HE-pairs-1.png b/docs/figs/case-studies/fig-NC-HE-pairs-1.png
new file mode 100644
index 00000000..a8bb70fc
Binary files /dev/null and b/docs/figs/case-studies/fig-NC-HE-pairs-1.png differ
diff --git a/docs/figs/case-studies/fig-NC-HEplot-1.png b/docs/figs/case-studies/fig-NC-HEplot-1.png
new file mode 100644
index 00000000..32d61f73
Binary files /dev/null and b/docs/figs/case-studies/fig-NC-HEplot-1.png differ
diff --git a/docs/figs/case-studies/fig-NC-boxplot-1.png b/docs/figs/case-studies/fig-NC-boxplot-1.png
new file mode 100644
index 00000000..eaaf75ee
Binary files /dev/null and b/docs/figs/case-studies/fig-NC-boxplot-1.png differ
diff --git a/docs/figs/case-studies/fig-NC-candisc-1.png b/docs/figs/case-studies/fig-NC-candisc-1.png
new file mode 100644
index 00000000..acf835c6
Binary files /dev/null and b/docs/figs/case-studies/fig-NC-candisc-1.png differ
diff --git a/docs/figs/case-studies/fig-NC-corrgram-1.png b/docs/figs/case-studies/fig-NC-corrgram-1.png
new file mode 100644
index 00000000..2f56729b
Binary files /dev/null and b/docs/figs/case-studies/fig-NC-corrgram-1.png differ
diff --git a/docs/figs/case-studies/fig-NC-scatmat-1.png b/docs/figs/case-studies/fig-NC-scatmat-1.png
new file mode 100644
index 00000000..225a6517
Binary files /dev/null and b/docs/figs/case-studies/fig-NC-scatmat-1.png differ
diff --git a/docs/figs/case-studies/fig-SC-HEplot-1.png b/docs/figs/case-studies/fig-SC-HEplot-1.png
new file mode 100644
index 00000000..0ddf6dca
Binary files /dev/null and b/docs/figs/case-studies/fig-SC-HEplot-1.png differ
diff --git a/docs/figs/case-studies/fig-SC-cqplot-1.png b/docs/figs/case-studies/fig-SC-cqplot-1.png
new file mode 100644
index 00000000..f4e11fee
Binary files /dev/null and b/docs/figs/case-studies/fig-SC-cqplot-1.png differ
diff --git a/docs/figs/case-studies/fig-SC1-hecan-1.png b/docs/figs/case-studies/fig-SC1-hecan-1.png
new file mode 100644
index 00000000..123e5da5
Binary files /dev/null and b/docs/figs/case-studies/fig-SC1-hecan-1.png differ
diff --git a/docs/figs/ch04/fig-crime-biplot2-1.png b/docs/figs/ch04/fig-crime-biplot2-1.png
index 28c57e99..ab5b66b3 100644
Binary files a/docs/figs/ch04/fig-crime-biplot2-1.png and b/docs/figs/ch04/fig-crime-biplot2-1.png differ
diff --git a/docs/figs/ch04/fig-crime-biplot3-1.png b/docs/figs/ch04/fig-crime-biplot3-1.png
index 213f247f..5f7add36 100644
Binary files a/docs/figs/ch04/fig-crime-biplot3-1.png and b/docs/figs/ch04/fig-crime-biplot3-1.png differ
diff --git a/docs/figs/ch04/fig-mtcars-biplot-1.png b/docs/figs/ch04/fig-mtcars-biplot-1.png
index 5855f544..5d3853bd 100644
Binary files a/docs/figs/ch04/fig-mtcars-biplot-1.png and b/docs/figs/ch04/fig-mtcars-biplot-1.png differ
diff --git a/docs/figs/ch06/fig-duncan-check-model-1.png b/docs/figs/ch06/fig-duncan-check-model-1.png
index a7b321a7..4ff3ee5f 100644
Binary files a/docs/figs/ch06/fig-duncan-check-model-1.png and b/docs/figs/ch06/fig-duncan-check-model-1.png differ
diff --git a/docs/figs/ch12/fig-peng-boxplots-1.png b/docs/figs/ch12/fig-peng-boxplots-1.png
new file mode 100644
index 00000000..0cb8509f
Binary files /dev/null and b/docs/figs/ch12/fig-peng-boxplots-1.png differ
diff --git a/docs/figs/ch12/fig-peng-covEllipse-pairs-1.png b/docs/figs/ch12/fig-peng-covEllipse-pairs-1.png
new file mode 100644
index 00000000..5bdd0cdc
Binary files /dev/null and b/docs/figs/ch12/fig-peng-covEllipse-pairs-1.png differ
diff --git a/docs/figs/ch12/fig-peng-covEllipse0-1.png b/docs/figs/ch12/fig-peng-covEllipse0-1.png
new file mode 100644
index 00000000..64eccf32
Binary files /dev/null and b/docs/figs/ch12/fig-peng-covEllipse0-1.png differ
diff --git a/docs/figs/ch12/fig-peng-devplots-1.png b/docs/figs/ch12/fig-peng-devplots-1.png
new file mode 100644
index 00000000..662a4ea5
Binary files /dev/null and b/docs/figs/ch12/fig-peng-devplots-1.png differ
diff --git a/docs/images/dogfood-quartet.png b/docs/images/dogfood-quartet.png
new file mode 100644
index 00000000..8d6e5d7c
Binary files /dev/null and b/docs/images/dogfood-quartet.png differ
diff --git a/docs/index.html b/docs/index.html
index 41146c53..90bce70e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -407,14 +407,14 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 <blockquote class="blockquote">
 <p>To comport oneself with perfect propriety in Polygonal society, one ought to be a Polygon oneself. — Edwin A. Abbott, <em>Flatland</em></p>
 </blockquote>
-<p>In 1884, an English schoolmaster, Edwin Abbott Abbott, shook the world of Victorian culture with a slim volume, <em>Flatland: A Romance of Many Dimensions</em> <span class="citation" data-cites="Abbott:1884">(<a href="#ref-Abbott:1884" role="doc-biblioref">Abbott, 1884</a>)</span>. He described a two-dimensional world, <em>Flatland</em>, inhabited entirely by geometric figures in the plane. His purpose was satirical, to poke fun at the social and gender class system at the time: Women were mere line segments, while men were represented as polygons with varying numbers of sides— a triangle was a working man, but acute isosceles were soldiers or criminals of very small angle; gentlemen and professionals had more sides. Abbot published this under the pseudonym, “A Square”, suggesting his place in the hierarchy.</p>
+<p>In 1884, an English schoolmaster, Edwin Abbott Abbott, shook the world of Victorian culture with a slim volume, <em>Flatland: A Romance of Many Dimensions</em> <span class="citation" data-cites="Abbott:1884">(<a href="95-references.html#ref-Abbott:1884" role="doc-biblioref">Abbott, 1884</a>)</span>. He described a two-dimensional world, <em>Flatland</em>, inhabited entirely by geometric figures in the plane. His purpose was satirical, to poke fun at the social and gender class system at the time: Women were mere line segments, while men were represented as polygons with varying numbers of sides— a triangle was a working man, but acute isosceles were soldiers or criminals of very small angle; gentlemen and professionals had more sides. Abbot published this under the pseudonym, “A Square”, suggesting his place in the hierarchy.</p>
 <blockquote class="blockquote">
 <p>True, said the Sphere; it appears to you a Plane, because you are not accustomed to light and shade and perspective; just as in Flatland a Hexagon would appear a Straight Line to one who has not the Art of Sight Recognition. But in reality it is a Solid, as you shall learn by the sense of Feeling. — Edwin A. Abbott, <em>Flatland</em></p>
 </blockquote>
 <p>But how did it feel to be a member of a flatland society? How could a point (a newborn child?) understand a line (a woman)? How does a Triangle “see” a Hexagon or even a infinitely-sided Circle? Abbott introduces the very idea of different dimensions of existence through dreams and visions:</p>
 <ul>
 <li><p>A Square dreams of visiting a one-dimensional <em>Lineland</em> where men appear as lines, and women are merely “illustrious points”, but the inhabitants can only see the Square as lines.</p></li>
-<li><p>In a vision, the Square is visited by a Sphere, to illustrate what a 2D Flatlander could understand from a 3D sphere (<a href="#fig-flatland-spheres" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-flatland-spheres</span></a>) that passes through the plane he inhabits. It is a large circle when seen at the moment of its’ greatest extent. As the Spehere rises, it becomes progressively smaller, until it becomes a point, and then vanishes.</p></li>
+<li><p>In a vision, the Square is visited by a Sphere, to illustrate what a 2D Flatlander could understand from a 3D sphere (<a href="#fig-flatland-spheres" class="quarto-xref">Figure&nbsp;<span>1</span></a>) that passes through the plane he inhabits. It is a large circle when seen at the moment of its’ greatest extent. As the Spehere rises, it becomes progressively smaller, until it becomes a point, and then vanishes.</p></li>
 </ul>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
@@ -423,7 +423,7 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 <img src="images/flatland-spheres.jpg" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:90.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-flatland-spheres-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;1: A 2D Flatlander seeing a sphere as it passes through Flatland. The line, labeled ‘My Eye’ indicates what the Flatlander would see. Source: <span class="citation" data-cites="Abbott:1884">Abbott (<a href="#ref-Abbott:1884" role="doc-biblioref">1884</a>)</span>
+Figure&nbsp;1: A 2D Flatlander seeing a sphere as it passes through Flatland. The line, labeled ‘My Eye’ indicates what the Flatlander would see. Source: <span class="citation" data-cites="Abbott:1884">Abbott (<a href="95-references.html#ref-Abbott:1884" role="doc-biblioref">1884</a>)</span>
 </figcaption></figure>
 </div>
 </div>
@@ -432,7 +432,7 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 <blockquote class="blockquote">
 <p>In One Dimensions, did not a moving Point produce a Line with two terminal points? In two Dimensions, did not a moving Line produce a Square with four terminal points? In Three Dimensions, did not a moving Square produce - did not the eyes of mine behold it - that blessed being, a Cube, with eight terminal points? And in Four Dimensions, shall not a moving Cube - alas, for Analogy, and alas for the Progress of Truth if it be not so - shall not, I say the motion of a divine Cube result in a still more divine organization with sixteen terminal points? — Edwin A. Abbott</p>
 </blockquote>
-<p>For Abbot, the way for a citizen of any world to imagine one more dimension was to consider how a higher-dimensional object would change over time.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> A line moved over time could produce a rectangle as shown in <a href="#fig-1D-4D" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-1D-4D</span></a>; that rectangle moving in another direction over time would produce a 3D figure, and so forth.</p>
+<p>For Abbot, the way for a citizen of any world to imagine one more dimension was to consider how a higher-dimensional object would change over time.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> A line moved over time could produce a rectangle as shown in <a href="#fig-1D-4D" class="quarto-xref">Figure&nbsp;<span>2</span></a>; that rectangle moving in another direction over time would produce a 3D figure, and so forth.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-1D-4D" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -445,7 +445,7 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 </div>
 </div>
 </div>
-<p>But wait! Where does that 4D thing (a <em>tesseract</em>) come from? To really see a tesseract it helps to view it in an animation over time (<a href="#fig-tesseract" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-tesseract</span></a>). But like the Square, contemplating 3D from a 2D world, it takes some imagination.</p>
+<p>But wait! Where does that 4D thing (a <em>tesseract</em>) come from? To really see a tesseract it helps to view it in an animation over time (<a href="#fig-tesseract" class="quarto-xref">Figure&nbsp;<span>3</span></a>). But like the Square, contemplating 3D from a 2D world, it takes some imagination.</p>
 <!-- ::: {.content-visible when-format="html"} -->
 <!-- :::: {#fig-tesseract} -->
 <!-- ```{=html} -->
@@ -493,7 +493,7 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 Figure&nbsp;4: Four views of the <code>pollen</code> data, zooming in, clockwise from the upper left to discover the word “EUREKA”.
 </figcaption></figure>
 </div>
-<p>This can be seen better in a 3D animation. The <code>rgl</code> package <span class="citation" data-cites="R-rgl">(<a href="#ref-R-rgl" role="doc-biblioref">Adler &amp; Murdoch, 2023</a>)</span> is used to create a 3D scatterplot of the first three variables. Then the <code>animation</code> package <span class="citation" data-cites="R-animation">(<a href="#ref-R-animation" role="doc-biblioref">Xie, 2021</a>)</span> is use to record a sequence of images, adjusting the <code>rgl::par3d(zoom)</code> value.</p>
+<p>This can be seen better in a 3D animation. The <code>rgl</code> package <span class="citation" data-cites="R-rgl">(<a href="95-references.html#ref-R-rgl" role="doc-biblioref">Adler &amp; Murdoch, 2023</a>)</span> is used to create a 3D scatterplot of the first three variables. Then the <code>animation</code> package <span class="citation" data-cites="R-animation">(<a href="95-references.html#ref-R-animation" role="doc-biblioref">Xie, 2021</a>)</span> is use to record a sequence of images, adjusting the <code>rgl::par3d(zoom)</code> value.</p>
 <div class="cell" data-layout-align="center">
 <details class="code-fold"><summary>Code</summary><div class="sourceCode" id="cb1" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://yihui.org/animation/">animation</a></span><span class="op">)</span></span>
 <span><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/dmurdoch/rgl">rgl</a></span><span class="op">)</span></span>
@@ -532,8 +532,8 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 </figcaption></figure>
 </div>
 <section id="sec-discoveries" class="level3"><h3 class="anchored" data-anchor-id="sec-discoveries">Multivariate scientific discoveries</h3>
-<p>Lest this example seem contrived (which it admittedly is), multivariate visualization has played an important role in quite a few scientific discoveries. Among these, Francis Galton’s <span class="citation" data-cites="Galton:1863">(<a href="#ref-Galton:1863" role="doc-biblioref">1863</a>)</span> discovery of the anti-cyclonic pattern of wind direction in relation to barometric pressure from many weather measures recorded systematically across all weather stations, lighthouses and observatories in Europe in December 1861 stands out as the best example of a scientific discovery achieved almost entirely through graphical means–– something that was totally unexpected, and purely the product of his use of remarkably novel high-dimensional graphs <span class="citation" data-cites="FriendlyWainer:2021:TOGS">(<a href="#ref-FriendlyWainer:2021:TOGS" role="doc-biblioref">Friendly &amp; Wainer, 2021, pp. 170–173</a>)</span>.</p>
-<p>A more recent example is the discovery of two general classes in the development of Type 2 diabetes by <span class="citation" data-cites="ReavenMiller:79">Reaven &amp; Miller (<a href="#ref-ReavenMiller:79" role="doc-biblioref">1979</a>)</span>, using PRIM-9 <span class="citation" data-cites="Fishkeller-etal:1974b">(<a href="#ref-Fishkeller-etal:1974b" role="doc-biblioref">Fishkeller et al., 1974</a>)</span>, the first computer system for high-dimensional visualization<a href="#fn2" class="footnote-ref" id="fnref2" role="doc-noteref"><sup>2</sup></a>. In an earlier study <span class="citation" data-cites="ReavenMiller:68">Reaven &amp; Miller (<a href="#ref-ReavenMiller:68" role="doc-biblioref">1968</a>)</span> examined the relation between blood glucose levels and the production of insulin in normal subjects and in patients with varying degrees of hyperglicemia (elevated blood sugar level). They found a peculiar ‘’horse shoe’’ shape in this relation (shown in <a href="#fig-diabetes1" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-diabetes1</span></a>), about which they could only speculate: perhaps individuals with the best glucose tolerance also had the lowest levels of insulin as a response to an oral dose of glucose; perhaps those with low glucose response could secrete higher levels of insulin; perhaps those who were low on both glucose and insulin responses followed some other mechanism. In 2D plots, this was a mystery.</p>
+<p>Lest this example seem contrived (which it admittedly is), multivariate visualization has played an important role in quite a few scientific discoveries. Among these, Francis Galton’s <span class="citation" data-cites="Galton:1863">(<a href="95-references.html#ref-Galton:1863" role="doc-biblioref">1863</a>)</span> discovery of the anti-cyclonic pattern of wind direction in relation to barometric pressure from many weather measures recorded systematically across all weather stations, lighthouses and observatories in Europe in December 1861 stands out as the best example of a scientific discovery achieved almost entirely through graphical means–– something that was totally unexpected, and purely the product of his use of remarkably novel high-dimensional graphs <span class="citation" data-cites="FriendlyWainer:2021:TOGS">(<a href="95-references.html#ref-FriendlyWainer:2021:TOGS" role="doc-biblioref">Friendly &amp; Wainer, 2021, pp. 170–173</a>)</span>.</p>
+<p>A more recent example is the discovery of two general classes in the development of Type 2 diabetes by <span class="citation" data-cites="ReavenMiller:79">Reaven &amp; Miller (<a href="95-references.html#ref-ReavenMiller:79" role="doc-biblioref">1979</a>)</span>, using PRIM-9 <span class="citation" data-cites="Fishkeller-etal:1974b">(<a href="95-references.html#ref-Fishkeller-etal:1974b" role="doc-biblioref">Fishkeller et al., 1974</a>)</span>, the first computer system for high-dimensional visualization<a href="#fn2" class="footnote-ref" id="fnref2" role="doc-noteref"><sup>2</sup></a>. In an earlier study <span class="citation" data-cites="ReavenMiller:68">Reaven &amp; Miller (<a href="95-references.html#ref-ReavenMiller:68" role="doc-biblioref">1968</a>)</span> examined the relation between blood glucose levels and the production of insulin in normal subjects and in patients with varying degrees of hyperglicemia (elevated blood sugar level). They found a peculiar ‘’horse shoe’’ shape in this relation (shown in <a href="#fig-diabetes1" class="quarto-xref">Figure&nbsp;<span>6</span></a>), about which they could only speculate: perhaps individuals with the best glucose tolerance also had the lowest levels of insulin as a response to an oral dose of glucose; perhaps those with low glucose response could secrete higher levels of insulin; perhaps those who were low on both glucose and insulin responses followed some other mechanism. In 2D plots, this was a mystery.</p>
 <div class="cell" data-layout-align="center">
 <div class="sourceCode" id="cb2" data-source-line-numbers="nil" data-code-line-numbers="nil"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html">data</a></span><span class="op">(</span><span class="va">Diabetes</span>, package<span class="op">=</span><span class="st">"heplots"</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/graphics/plot.default.html">plot</a></span><span class="op">(</span><span class="va">instest</span> <span class="op">~</span> <span class="va">glutest</span>, data<span class="op">=</span><span class="va">Diabetes</span>, </span>
@@ -547,13 +547,13 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 <img src="figs/fig-diabetes1-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-diabetes1-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;6: Reproduction of a graph similar to that from <span class="citation" data-cites="ReavenMiller:68">Reaven &amp; Miller (<a href="#ref-ReavenMiller:68" role="doc-biblioref">1968</a>)</span> on the relationship between glucose and insulin response to being given an oral dose of glucose.
+Figure&nbsp;6: Reproduction of a graph similar to that from <span class="citation" data-cites="ReavenMiller:68">Reaven &amp; Miller (<a href="95-references.html#ref-ReavenMiller:68" role="doc-biblioref">1968</a>)</span> on the relationship between glucose and insulin response to being given an oral dose of glucose.
 </figcaption></figure>
 </div>
 </div>
 </div>
 <!-- knitr::include_graphics("images/diabetes1.png") -->
-<p>An answer to their questions came ten years later, when they were able to visualize similar but new data in 3D using the PRIM-9 system. In a carefully controlled study, they also measured ‘’steady state plasma glucose’’ (SSPG), a measure of the efficiency of use of insulin in the body, where large values mean insulin resistance, as well as other variables. PRIM-9 allowed them to explore various sets of three variables, and, more importantly, to rotate a given plot in three dimensions to search for interesting features. One plot that stood out concerned the relation between plasma glucose response, plasma insulin response and SSPG response, shown in <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ReavenMiller-3d</span></a>.</p>
+<p>An answer to their questions came ten years later, when they were able to visualize similar but new data in 3D using the PRIM-9 system. In a carefully controlled study, they also measured ‘’steady state plasma glucose’’ (SSPG), a measure of the efficiency of use of insulin in the body, where large values mean insulin resistance, as well as other variables. PRIM-9 allowed them to explore various sets of three variables, and, more importantly, to rotate a given plot in three dimensions to search for interesting features. One plot that stood out concerned the relation between plasma glucose response, plasma insulin response and SSPG response, shown in <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span>7</span></a>.</p>
 <div class="cell" data-layout-align="center">
 <div class="cell-output-display">
 <div id="fig-ReavenMiller-3d" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
@@ -561,18 +561,18 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 <img src="images/ReavenMiller-3d-annotated.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%">
 </div>
 <figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-ReavenMiller-3d-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
-Figure&nbsp;7: Artist’s rendition of data from <span class="citation" data-cites="ReavenMiller:79">Reaven &amp; Miller (<a href="#ref-ReavenMiller:79" role="doc-biblioref">1979</a>)</span> as seen in three dimensions using the PRIM-9 system. Labels for the clusters have been added, identifying the three groups of patients. <em>Source</em>: <span class="citation" data-cites="ReavenMiller:79">Reaven &amp; Miller (<a href="#ref-ReavenMiller:79" role="doc-biblioref">1979</a>)</span>.
+Figure&nbsp;7: Artist’s rendition of data from <span class="citation" data-cites="ReavenMiller:79">Reaven &amp; Miller (<a href="95-references.html#ref-ReavenMiller:79" role="doc-biblioref">1979</a>)</span> as seen in three dimensions using the PRIM-9 system. Labels for the clusters have been added, identifying the three groups of patients. <em>Source</em>: <span class="citation" data-cites="ReavenMiller:79">Reaven &amp; Miller (<a href="95-references.html#ref-ReavenMiller:79" role="doc-biblioref">1979</a>)</span>.
 </figcaption></figure>
 </div>
 </div>
 </div>
-<p>From this graphical insight, they were able to classify the participants into three groups, based on clinical levels of glucose and insulin. The people in the wing on the left in <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ReavenMiller-3d</span></a> were considered to have overt diabetes, the most advanced form, characterized by elevated fasting blood glucose concentration and classical diabetic symptoms. Those in the right wing were classified as latent or chemical diabetics, with no symptoms of diabetes but demonstrable abnormality of oral or intravenous glucose tolerance. Those in the central blob were classified as normal.</p>
-<p>Previous thinking was that Type 2 diabetes (when the body cannot make <em>enough</em> insulin, as opposed to Type I, an autoimmune condition where the pancreatic cells have been destroyed) progressed from the chemical stage to an overt one in a smooth transition. However, it was clear from <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-ReavenMiller-3d</span></a> that the only “path” from one to the other lead through the cluster of normal patients near the origin, so that explanation must be wrong. Instead, this suggested that the chemical and overt diabetics were distinct classes. Indeed, longitudinal studies showed that patients classified as chemical diabetics rarely developed the overt form. The understanding of the etiology of Type 2 diabetes was altered dramatically by the power of high-D interactive graphics.</p>
+<p>From this graphical insight, they were able to classify the participants into three groups, based on clinical levels of glucose and insulin. The people in the wing on the left in <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span>7</span></a> were considered to have overt diabetes, the most advanced form, characterized by elevated fasting blood glucose concentration and classical diabetic symptoms. Those in the right wing were classified as latent or chemical diabetics, with no symptoms of diabetes but demonstrable abnormality of oral or intravenous glucose tolerance. Those in the central blob were classified as normal.</p>
+<p>Previous thinking was that Type 2 diabetes (when the body cannot make <em>enough</em> insulin, as opposed to Type I, an autoimmune condition where the pancreatic cells have been destroyed) progressed from the chemical stage to an overt one in a smooth transition. However, it was clear from <a href="#fig-ReavenMiller-3d" class="quarto-xref">Figure&nbsp;<span>7</span></a> that the only “path” from one to the other lead through the cluster of normal patients near the origin, so that explanation must be wrong. Instead, this suggested that the chemical and overt diabetics were distinct classes. Indeed, longitudinal studies showed that patients classified as chemical diabetics rarely developed the overt form. The understanding of the etiology of Type 2 diabetes was altered dramatically by the power of high-D interactive graphics.</p>
 </section></section><section id="what-i-assume" class="level2"><h2 class="anchored" data-anchor-id="what-i-assume">What I assume</h2>
 <p>It is assumed that the reader has a background in applied <em>intermediate</em> statistics including material on univariate linear models including analysis of variance (ANOVA) and multiple regression. This means you should be familiar with … <strong>TODO</strong>: Complete this required background</p>
-<p>There will also be some mathematics in the book where words and diagrams are not enough. The mathematical level will be intermediate, mostly consisting of simple algebra. No derivations, proofs, theorems here! For multivariate methods, it will be useful to express ideas using matrix notation to simplify presentation. The single symbol I’m using math to express ideas, and all you will need is a reading-level of understanding. For this, the first chapter of <span class="citation" data-cites="Fox2021">Fox (<a href="#ref-Fox2021" role="doc-biblioref">2021</a>)</span>, <em>A mathematical primer for social statistics</em>, is excellent. If you want to learn something of using matrix algebra for data analysis and statistics, I recommend our package <span style="color: brown;"><strong>matlib</strong></span> <span class="citation" data-cites="R-matlib">(<a href="#ref-R-matlib" role="doc-biblioref">Friendly et al., 2024</a>)</span>.</p>
+<p>There will also be some mathematics in the book where words and diagrams are not enough. The mathematical level will be intermediate, mostly consisting of simple algebra. No derivations, proofs, theorems here! For multivariate methods, it will be useful to express ideas using matrix notation to simplify presentation. The single symbol I’m using math to express ideas, and all you will need is a reading-level of understanding. For this, the first chapter of <span class="citation" data-cites="Fox2021">Fox (<a href="95-references.html#ref-Fox2021" role="doc-biblioref">2021</a>)</span>, <em>A mathematical primer for social statistics</em>, is excellent. If you want to learn something of using matrix algebra for data analysis and statistics, I recommend our package <span style="color: brown;"><strong>matlib</strong></span> <span class="citation" data-cites="R-matlib">(<a href="95-references.html#ref-R-matlib" role="doc-biblioref">Friendly et al., 2024</a>)</span>.</p>
 <p>I also assume the reader to have at least a basic familiarity with R. While R fundamentals are outside the scope of the book, I believe that this language provides a rich set of resources, far beyond that offered by other statistical software packages, and is well worth learning.</p>
-<p>For those not familiar with R, I recommend <span class="citation" data-cites="Matloff-2011">Matloff (<a href="#ref-Matloff-2011" role="doc-biblioref">2011</a>)</span>, <span class="citation" data-cites="Wickham2014">Wickham (<a href="#ref-Wickham2014" role="doc-biblioref">2014</a>)</span>, and <span class="citation" data-cites="Cotton-2013">Cotton (<a href="#ref-Cotton-2013" role="doc-biblioref">2013</a>)</span> for introductions to programming in the language. <span class="citation" data-cites="FoxWeisberg:2018">Fox &amp; Weisberg (<a href="#ref-FoxWeisberg:2018" role="doc-biblioref">2018</a>)</span> and <span class="citation" data-cites="Teetor2011">Teetor (<a href="#ref-Teetor2011" role="doc-biblioref">2011</a>)</span> are great for learning about how to conduct basic statistical analyses in R. <strong>TODO</strong>: Revise this list.</p>
+<p>For those not familiar with R, I recommend <span class="citation" data-cites="Matloff-2011">Matloff (<a href="95-references.html#ref-Matloff-2011" role="doc-biblioref">2011</a>)</span>, <span class="citation" data-cites="Wickham2014">Wickham (<a href="95-references.html#ref-Wickham2014" role="doc-biblioref">2014</a>)</span>, and <span class="citation" data-cites="Cotton-2013">Cotton (<a href="95-references.html#ref-Cotton-2013" role="doc-biblioref">2013</a>)</span> for introductions to programming in the language. <span class="citation" data-cites="FoxWeisberg:2018">Fox &amp; Weisberg (<a href="95-references.html#ref-FoxWeisberg:2018" role="doc-biblioref">2018</a>)</span> and <span class="citation" data-cites="Teetor2011">Teetor (<a href="95-references.html#ref-Teetor2011" role="doc-biblioref">2011</a>)</span> are great for learning about how to conduct basic statistical analyses in R. <strong>TODO</strong>: Revise this list.</p>
 <p><strong>TODO</strong>: Add stuff on general books about graphics</p>
 </section><section id="conventions-used-in-this-book" class="level2"><h2 class="anchored" data-anchor-id="conventions-used-in-this-book">Conventions used in this book</h2>
 <p><strong>TODO</strong>: Some stuff below is just for testing… Revise.</p>
@@ -581,7 +581,7 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 <li><p><em>italic</em> : indicates terms to be <em>emphasized</em> or defined in the text, …</p></li>
 <li>
 <p><strong>bold</strong> : is used for names of R packages. Or, better yet: <strong><code>bold monospace</code></strong>, but I’d rather this be in a <span style="color: darkgreen;">different color</span>. Perhaps I can use “r colorize(”<strong>lattice</strong>”, “green”)” inline -&gt; <span style="color: green;"><strong>lattice</strong></span> will do this? This does bold &amp; color, but can’t use monospace.</p>
-<p>I can now use inline ‘pkg(“lattice”)’ generating <span style="color: brown;"><strong>lattice</strong></span>, or also with a citation, <code>pkg("lattice", cite=TRUE)</code> -&gt; <span style="color: brown;"><strong>lattice</strong></span> <span class="citation" data-cites="R-lattice">(<a href="#ref-R-lattice" role="doc-biblioref">Sarkar, 2024</a>)</span>. Can also refer to the <span style="color: brown;"><strong>matlib</strong></span> package <span class="citation" data-cites="R-matlib">(<a href="#ref-R-matlib" role="doc-biblioref">Friendly et al., 2024</a>)</span>, including “package” between the name and citation.</p>
+<p>I can now use inline ‘pkg(“lattice”)’ generating <span style="color: brown;"><strong>lattice</strong></span>, or also with a citation, <code>pkg("lattice", cite=TRUE)</code> -&gt; <span style="color: brown;"><strong>lattice</strong></span> <span class="citation" data-cites="R-lattice">(<a href="95-references.html#ref-R-lattice" role="doc-biblioref">Sarkar, 2024</a>)</span>. Can also refer to the <span style="color: brown;"><strong>matlib</strong></span> package <span class="citation" data-cites="R-matlib">(<a href="95-references.html#ref-R-matlib" role="doc-biblioref">Friendly et al., 2024</a>)</span>, including “package” between the name and citation.</p>
 </li>
 <li><p><code>fixed-width</code> : is used in program listings as well as in text to refer to variable and function names, R statement elements and keywords.</p></li>
 <li><p>R code in program listings and output is presented in <code>monospaced (typewriter)</code> font, <a href="https://fonts.google.com/specimen/Fira+Mono"><code>fira mono</code></a></p></li>
@@ -591,7 +591,7 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 <!-- ## References {.unnumbered} -->
 
 
-<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
+<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list" style="display: none">
 <div id="ref-Abbott:1884" class="csl-entry" role="listitem">
 Abbott, E. A. (1884). <em>Flatland: A romance of many dimensions</em>. Buccaneer Books.
 </div>
@@ -646,7 +646,7 @@ <h1 class="title">Visualizing Multivariate Data and Models in R</h1>
 </div>
 </section></section><section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes"><hr>
 <ol>
-<li id="fn1"><p>In his famous TV series, <em>Cosmos</em>, Carl Sagan provides <a href="https://youtu.be/UnURElCzGc0">an intriguing video presentation</a> Flatland and the 4th dimension. However, as far back as 1754 <span class="citation" data-cites="Cajori:1926">(<a href="#ref-Cajori:1926" role="doc-biblioref">Cajori, 1926</a>)</span>, the idea of adding a fourth dimension appears in Jean le Rond d’Alembert’s “Dimensions”, and one realization of a four-dimensional object is a <em>tesseract</em>, shown in <a href="#fig-1D-4D" class="quarto-xref">Figure&nbsp;<span class="quarto-unresolved-ref">fig-1D-4D</span></a>.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn1"><p>In his famous TV series, <em>Cosmos</em>, Carl Sagan provides <a href="https://youtu.be/UnURElCzGc0">an intriguing video presentation</a> Flatland and the 4th dimension. However, as far back as 1754 <span class="citation" data-cites="Cajori:1926">(<a href="95-references.html#ref-Cajori:1926" role="doc-biblioref">Cajori, 1926</a>)</span>, the idea of adding a fourth dimension appears in Jean le Rond d’Alembert’s “Dimensions”, and one realization of a four-dimensional object is a <em>tesseract</em>, shown in <a href="#fig-1D-4D" class="quarto-xref">Figure&nbsp;<span>2</span></a>.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 <li id="fn2"><p>PRIM-9 is an acronym for <strong>P</strong>icturing, <strong>R</strong>otation, <strong>I</strong>solation and <strong>M</strong>asking in up to <strong>9</strong> dimensions. These operations are fundamental to interactive and dynamic data visualization.<a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
 </ol></section></main><!-- /main --><script id="quarto-html-after-body" type="application/javascript">
 window.document.addEventListener("DOMContentLoaded", function (event) {
diff --git a/docs/search.json b/docs/search.json
new file mode 100644
index 00000000..e111359f
--- /dev/null
+++ b/docs/search.json
@@ -0,0 +1,1139 @@
+[
+  {
+    "objectID": "index.html",
+    "href": "index.html",
+    "title": "Visualizing Multivariate Data and Models in R",
+    "section": "",
+    "text": "Preface\nThis book is about graphical methods developed recently for multivariate data, and their uses in understanding relationships when there are several aspects to be considered together. Data visualization methods for statistical analysis are well-developed for simple linear models with a single outcome variable. However, with applied research in the social and behavioral sciences, it is often the case that the phenomena of interest (e.g., depression, job satisfaction, academic achievement, childhood ADHD disorders, etc.) can be measured in several different ways or related aspects.\nFor example, if academic achievement can be measured for adolescents by reading, mathematics, science and history scores, how do predictors such as parental encouragement, school environment and socioeconomic status affect all these outcomes? In a similar way? In different ways? In such cases, much more can be understood from a multivariate approach that considers the correlations among the outcomes. Yet, sadly, researchers typically examine the outcomes one by one which often only tells part of the data story.\nHowever, to do this it is useful to set the stage for multivariate thinking, with a grand scheme for statistics and data visualization, a parable, and an example of multivariate discovery.",
+    "crumbs": [
+      "Preface"
+    ]
+  },
+  {
+    "objectID": "index.html#one-two-many",
+    "href": "index.html#one-two-many",
+    "title": "Visualizing Multivariate Data and Models in R",
+    "section": "ONE, TWO, MANY",
+    "text": "ONE, TWO, MANY\nThere is an old and helpful idea I learned from John Hartigan in my graduate days at Princeton:\n\nIn statistics and data visualization all methods can be classified by the number of dimensions contemplated, on a scale of ONE, TWO, MANY.\n\nBy this, he meant that, at a global level, all data, statistical summaries, and graphical displays could be classified as:\n\n\nunivariate: a single variable, considered in isolation (age, COVID cases, pizzas ordered). Univariate numerical summaries are means, medians, measures of variablilty, and so forth. Univariate displays include dot plots, boxplots, histograms and density estimates.\n\nbivariate: two variables, considered jointly. Numerical summaries include correlations, covariances and two-way tables of frequencies or measures of association for categorical variables. Bivariate displays include scatterplots and mosaic plots.\n\nmultivariate: three or more variables, considered jointly. Numerical summaries include correlation and covariance matrices, consisting of all pairwise values, but also derived measures from the analysis of these matrices (eigenvalues, eigenvectors). Graphical displays of multivariate data can sometimes be shown in 3D, but often involve multiple views of the data projected into 2D plots.\n\nAs a quasi-numerical scale, I refer to these as 1D, 2D and nD. This admits the possibility of half-integer cases, such as 1.5D, where the main focus is on a single variable, but that is classified by a simple factor (e.g., gender), or 2.5D where a 2D scatterplot can show other variables using color, shape or other visual attributes His point in this classification was that once you’ve reached three variables, all higher dimensions involve similar summaries and data displays.\nUnivariate and bivariate methods and displays are well-known. This book is about how these ideas can be extended to an \\(n\\)-dimensional world. Three-dimensional data displays are now fairly easy to produce, even if they are sometimes difficult to understand. But how can we even think about four or more dimensions? The difficulty can be appreciated by considering the tale of Flatland.",
+    "crumbs": [
+      "Preface"
+    ]
+  },
+  {
+    "objectID": "index.html#flatland",
+    "href": "index.html#flatland",
+    "title": "Visualizing Multivariate Data and Models in R",
+    "section": "Flatland",
+    "text": "Flatland\n\nTo comport oneself with perfect propriety in Polygonal society, one ought to be a Polygon oneself. — Edwin A. Abbott, Flatland\n\nIn 1884, an English schoolmaster, Edwin Abbott Abbott, shook the world of Victorian culture with a slim volume, Flatland: A Romance of Many Dimensions (Abbott, 1884). He described a two-dimensional world, Flatland, inhabited entirely by geometric figures in the plane. His purpose was satirical, to poke fun at the social and gender class system at the time: Women were mere line segments, while men were represented as polygons with varying numbers of sides— a triangle was a working man, but acute isosceles were soldiers or criminals of very small angle; gentlemen and professionals had more sides. Abbot published this under the pseudonym, “A Square”, suggesting his place in the hierarchy.\n\nTrue, said the Sphere; it appears to you a Plane, because you are not accustomed to light and shade and perspective; just as in Flatland a Hexagon would appear a Straight Line to one who has not the Art of Sight Recognition. But in reality it is a Solid, as you shall learn by the sense of Feeling. — Edwin A. Abbott, Flatland\n\nBut how did it feel to be a member of a flatland society? How could a point (a newborn child?) understand a line (a woman)? How does a Triangle “see” a Hexagon or even a infinitely-sided Circle? Abbott introduces the very idea of different dimensions of existence through dreams and visions:\n\nA Square dreams of visiting a one-dimensional Lineland where men appear as lines, and women are merely “illustrious points”, but the inhabitants can only see the Square as lines.\nIn a vision, the Square is visited by a Sphere, to illustrate what a 2D Flatlander could understand from a 3D sphere (Figure 1) that passes through the plane he inhabits. It is a large circle when seen at the moment of its’ greatest extent. As the Spehere rises, it becomes progressively smaller, until it becomes a point, and then vanishes.\n\n\n\n\n\n\n\n\nFigure 1: A 2D Flatlander seeing a sphere as it passes through Flatland. The line, labeled ‘My Eye’ indicates what the Flatlander would see. Source: Abbott (1884)\n\n\n\n\nAbbott goes on to state what could be considered as a demonstration (or proof) by induction of the difficulties of seeing in 1, 2, 3 dimensions, and how the idea motion over time (one more dimension) could allow citizens of any 1D, 2D, 3D world to contemplate one more dimension.\n\nIn One Dimensions, did not a moving Point produce a Line with two terminal points? In two Dimensions, did not a moving Line produce a Square with four terminal points? In Three Dimensions, did not a moving Square produce - did not the eyes of mine behold it - that blessed being, a Cube, with eight terminal points? And in Four Dimensions, shall not a moving Cube - alas, for Analogy, and alas for the Progress of Truth if it be not so - shall not, I say the motion of a divine Cube result in a still more divine organization with sixteen terminal points? — Edwin A. Abbott\n\nFor Abbot, the way for a citizen of any world to imagine one more dimension was to consider how a higher-dimensional object would change over time.1 A line moved over time could produce a rectangle as shown in Figure 2; that rectangle moving in another direction over time would produce a 3D figure, and so forth.\n\n\n\n\n\n\n\nFigure 2: Geometrical objects in 1 to 4 dimensions. One more dimension can be thought of as the trace of movement over time.\n\n\n\n\nBut wait! Where does that 4D thing (a tesseract) come from? To really see a tesseract it helps to view it in an animation over time (Figure 3). But like the Square, contemplating 3D from a 2D world, it takes some imagination.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nFigure 3: Animation of a tesseract, a cube changing over time.\n\n\nYet the deep mathematics of more than three dimensions only emerged in the 19th century. In Newtonian mechanics, space and time were always considered independent of each other. Our familiar three-dimensional space, of length, width, and height had formed the backbone of Euclidean geometry for millenea. However, the idea that space and time are indeed interwoven was first proposed by German mathematician Hermann Minkowski (1864–1909) in 1908. This was a powerful idea. It bore fruit when Albert Einstein revolutionized the Newtonian conceptions of gravity in 1915 when he presented a theory of general relativity which was based primarily on the fact that mass and energy warp the fabric of four-dimensional spacetime.\nThe parable of Flatland can provide inspiration for statistical thinking and data visualization. Once we go beyond bivariate statistics and 2D plots, we are in a multivariate world of possibly MANY dimensions. It takes only some imagination and suitable methods to get there.\nLike Abbott’s Flatland, this book is a romance, in many dimensions, of what we can learn from modern methods of data visualization.",
+    "crumbs": [
+      "Preface"
+    ]
+  },
+  {
+    "objectID": "index.html#eureka",
+    "href": "index.html#eureka",
+    "title": "Visualizing Multivariate Data and Models in R",
+    "section": "EUREKA!",
+    "text": "EUREKA!\nEven modest sized multivariate data can have secrets that can be revealed in the right view. As an example, David Coleman at RCA Laboratories in Princeton, N.J. generated a dataset of five (fictitious) measurements of grains of pollen for the 1986 Data Exposition at the Joint statistical Meetings. The first three variables are the lengths of geometric features 3848 observed sampled pollen grains – in the x, y, and z dimensions: a ridge along x, a nub in the y direction, and a crack in along the z dimension. The fourth variable is pollen grain weight, and the fifth is density. The challenge was to “find something interesting” in this dataset, now available as animation::pollen. \nThose who solved the puzzle were able to find an orientation of this 5-dimensional dataset, such that zooming in revealed a magic word, “EUREKA” spelled in points, as in the following figure.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nFigure 4: Four views of the pollen data, zooming in, clockwise from the upper left to discover the word “EUREKA”.\n\n\nThis can be seen better in a 3D animation. The rgl package (Adler & Murdoch, 2023) is used to create a 3D scatterplot of the first three variables. Then the animation package (Xie, 2021) is use to record a sequence of images, adjusting the rgl::par3d(zoom) value.\n\nCodelibrary(animation)\nlibrary(rgl)\ndata(pollen, package = \"animation\")\noopt = ani.options(interval = 0.05)\n## adjust the viewpoint\nuM =\n  matrix(c(-0.3709192276, -0.5133571028, -0.7738776206, 0, \n           -0.7305060625,  0.6758151054, -0.0981751680, 0, \n            0.57339602708, 0.5289064049, -0.6256819367, 0, \n           0, 0, 0, 1), 4, 4)\nopen3d(userMatrix = uM, \n       windowRect = c(10, 10, 510, 510))\n\nplot3d(pollen[, 1:3])\n\n# zoom in\nzm = seq(1, 0.045, length = 200)\npar3d(zoom = 1)\nfor (i in 1:length(zm)) {\n  par3d(zoom = zm[i])\n  ani.pause()\n}\nani.options(oopt)\n\n\n\n\n\n\nAnimation of zooming in on the pollen data. This figure only appears in the online version.\n\n\n\nFigure 5\n\n\nMultivariate scientific discoveries\nLest this example seem contrived (which it admittedly is), multivariate visualization has played an important role in quite a few scientific discoveries. Among these, Francis Galton’s (1863) discovery of the anti-cyclonic pattern of wind direction in relation to barometric pressure from many weather measures recorded systematically across all weather stations, lighthouses and observatories in Europe in December 1861 stands out as the best example of a scientific discovery achieved almost entirely through graphical means–– something that was totally unexpected, and purely the product of his use of remarkably novel high-dimensional graphs (Friendly & Wainer, 2021, pp. 170–173).\nA more recent example is the discovery of two general classes in the development of Type 2 diabetes by Reaven & Miller (1979), using PRIM-9 (Fishkeller et al., 1974), the first computer system for high-dimensional visualization2. In an earlier study Reaven & Miller (1968) examined the relation between blood glucose levels and the production of insulin in normal subjects and in patients with varying degrees of hyperglicemia (elevated blood sugar level). They found a peculiar ‘’horse shoe’’ shape in this relation (shown in Figure 6), about which they could only speculate: perhaps individuals with the best glucose tolerance also had the lowest levels of insulin as a response to an oral dose of glucose; perhaps those with low glucose response could secrete higher levels of insulin; perhaps those who were low on both glucose and insulin responses followed some other mechanism. In 2D plots, this was a mystery.\n\ndata(Diabetes, package=\"heplots\")\nplot(instest ~ glutest, data=Diabetes, \n     pch=16,\n     cex.lab=1.25,\n     xlab=\"Glucose response\",\n     ylab=\"Insulin response\")\n\n\n\n\n\n\nFigure 6: Reproduction of a graph similar to that from Reaven & Miller (1968) on the relationship between glucose and insulin response to being given an oral dose of glucose.\n\n\n\n\n\nAn answer to their questions came ten years later, when they were able to visualize similar but new data in 3D using the PRIM-9 system. In a carefully controlled study, they also measured ‘’steady state plasma glucose’’ (SSPG), a measure of the efficiency of use of insulin in the body, where large values mean insulin resistance, as well as other variables. PRIM-9 allowed them to explore various sets of three variables, and, more importantly, to rotate a given plot in three dimensions to search for interesting features. One plot that stood out concerned the relation between plasma glucose response, plasma insulin response and SSPG response, shown in Figure 7.\n\n\n\n\n\n\n\nFigure 7: Artist’s rendition of data from Reaven & Miller (1979) as seen in three dimensions using the PRIM-9 system. Labels for the clusters have been added, identifying the three groups of patients. Source: Reaven & Miller (1979).\n\n\n\n\nFrom this graphical insight, they were able to classify the participants into three groups, based on clinical levels of glucose and insulin. The people in the wing on the left in Figure 7 were considered to have overt diabetes, the most advanced form, characterized by elevated fasting blood glucose concentration and classical diabetic symptoms. Those in the right wing were classified as latent or chemical diabetics, with no symptoms of diabetes but demonstrable abnormality of oral or intravenous glucose tolerance. Those in the central blob were classified as normal.\nPrevious thinking was that Type 2 diabetes (when the body cannot make enough insulin, as opposed to Type I, an autoimmune condition where the pancreatic cells have been destroyed) progressed from the chemical stage to an overt one in a smooth transition. However, it was clear from Figure 7 that the only “path” from one to the other lead through the cluster of normal patients near the origin, so that explanation must be wrong. Instead, this suggested that the chemical and overt diabetics were distinct classes. Indeed, longitudinal studies showed that patients classified as chemical diabetics rarely developed the overt form. The understanding of the etiology of Type 2 diabetes was altered dramatically by the power of high-D interactive graphics.",
+    "crumbs": [
+      "Preface"
+    ]
+  },
+  {
+    "objectID": "index.html#what-i-assume",
+    "href": "index.html#what-i-assume",
+    "title": "Visualizing Multivariate Data and Models in R",
+    "section": "What I assume",
+    "text": "What I assume\nIt is assumed that the reader has a background in applied intermediate statistics including material on univariate linear models including analysis of variance (ANOVA) and multiple regression. This means you should be familiar with … TODO: Complete this required background\nThere will also be some mathematics in the book where words and diagrams are not enough. The mathematical level will be intermediate, mostly consisting of simple algebra. No derivations, proofs, theorems here! For multivariate methods, it will be useful to express ideas using matrix notation to simplify presentation. The single symbol I’m using math to express ideas, and all you will need is a reading-level of understanding. For this, the first chapter of Fox (2021), A mathematical primer for social statistics, is excellent. If you want to learn something of using matrix algebra for data analysis and statistics, I recommend our package matlib (Friendly et al., 2024).\nI also assume the reader to have at least a basic familiarity with R. While R fundamentals are outside the scope of the book, I believe that this language provides a rich set of resources, far beyond that offered by other statistical software packages, and is well worth learning.\nFor those not familiar with R, I recommend Matloff (2011), Wickham (2014), and Cotton (2013) for introductions to programming in the language. Fox & Weisberg (2018) and Teetor (2011) are great for learning about how to conduct basic statistical analyses in R. TODO: Revise this list.\nTODO: Add stuff on general books about graphics",
+    "crumbs": [
+      "Preface"
+    ]
+  },
+  {
+    "objectID": "index.html#conventions-used-in-this-book",
+    "href": "index.html#conventions-used-in-this-book",
+    "title": "Visualizing Multivariate Data and Models in R",
+    "section": "Conventions used in this book",
+    "text": "Conventions used in this book\nTODO: Some stuff below is just for testing… Revise.\nThe following typographic conventions are used in this book:\n\nitalic : indicates terms to be emphasized or defined in the text, …\n\nbold : is used for names of R packages. Or, better yet: bold monospace, but I’d rather this be in a different color. Perhaps I can use “r colorize(”lattice”, “green”)” inline -&gt; lattice will do this? This does bold & color, but can’t use monospace.\nI can now use inline ‘pkg(“lattice”)’ generating lattice, or also with a citation, pkg(\"lattice\", cite=TRUE) -&gt; lattice (Sarkar, 2024). Can also refer to the matlib package (Friendly et al., 2024), including “package” between the name and citation.\n\nfixed-width : is used in program listings as well as in text to refer to variable and function names, R statement elements and keywords.\nR code in program listings and output is presented in monospaced (typewriter) font, fira mono\nfixed-width italic : isn’t used yet, but probably should be.\n\nFor R functions in packages, we use the notation package::function(), for example: car::Anova() to identify where those functions are defined\n\n\n\n\n\nAbbott, E. A. (1884). Flatland: A romance of many dimensions. Buccaneer Books.\n\n\nAdler, D., & Murdoch, D. (2023). Rgl: 3D visualization using OpenGL. https://CRAN.R-project.org/package=rgl\n\n\nCajori, F. (1926). Origins of fourth dimension concepts. The American Mathematical Monthly, 33(8), 397–406. https://doi.org/10.1080/00029890.1926.11986607\n\n\nCotton, R. (2013). Learning R. O’Reilly Media.\n\n\nFishkeller, M. A., Friedman, J. H., & Tukey, J. W. (1974). PRIM-9, an interactive multidimensional data display and analysis system. Proceedings of the Pacific ACM Regional Conference.\n\n\nFox, J. (2021). A mathematical primer for social statistics (2nd ed.). SAGE Publications, Inc. https://doi.org/10.4135/9781071878835\n\n\nFox, J., & Weisberg, S. (2018). An R companion to applied regression (Third). SAGE Publications. https://books.google.ca/books?id=uPNrDwAAQBAJ\n\n\nFriendly, M., Fox, J., & Chalmers, P. (2024). Matlib: Matrix functions for teaching and learning linear algebra and multivariate statistics. https://github.com/friendly/matlib\n\n\nFriendly, M., & Wainer, H. (2021). A history of data visualization and graphic communication. Harvard University Press. https://doi.org/10.4159/9780674259034\n\n\nGalton, F. (1863). Meteorographica, or methods of mapping the weather. Macmillan. http://www.mugu.com/galton/books/meteorographica/index.htm\n\n\nMatloff, N. (2011). The art of R programming: A tour of statistical software design. No Starch Press.\n\n\nReaven, G. M., & Miller, R. G. (1968). Study of the relationship between glucose and insulin responses to an oral glucose load in man. Diabetes, 17(9), 560–569. https://doi.org/10.2337/diab.17.9.560\n\n\nReaven, G. M., & Miller, R. G. (1979). An attempt to define the nature of chemical diabetes using a multidimensional analysis. Diabetologia, 16, 17–24.\n\n\nSarkar, D. (2024). Lattice: Trellis graphics for r. https://lattice.r-forge.r-project.org/\n\n\nTeetor, P. (2011). R cookbook. O’Reilly Media.\n\n\nWickham, H. (2014). Advanced R. Chapman and Hall/CRC.\n\n\nXie, Y. (2021). Animation: A gallery of animations in statistics and utilities to create animations. https://yihui.org/animation/",
+    "crumbs": [
+      "Preface"
+    ]
+  },
+  {
+    "objectID": "index.html#footnotes",
+    "href": "index.html#footnotes",
+    "title": "Visualizing Multivariate Data and Models in R",
+    "section": "",
+    "text": "In his famous TV series, Cosmos, Carl Sagan provides an intriguing video presentation Flatland and the 4th dimension. However, as far back as 1754 (Cajori, 1926), the idea of adding a fourth dimension appears in Jean le Rond d’Alembert’s “Dimensions”, and one realization of a four-dimensional object is a tesseract, shown in Figure 2.↩︎\nPRIM-9 is an acronym for Picturing, Rotation, Isolation and Masking in up to 9 dimensions. These operations are fundamental to interactive and dynamic data visualization.↩︎",
+    "crumbs": [
+      "Preface"
+    ]
+  },
+  {
+    "objectID": "01-intro.html",
+    "href": "01-intro.html",
+    "title": "1  Introduction",
+    "section": "",
+    "text": "1.1 Multivariate vs. multivariable methods\nIn this era of multivitamins, multitools, multifactor authentication and even the multiverse, it is well to understand the distinction between multivariate and multivariable methods as these terms are generally used and as I use them here in relation to statistical methods and data visualization. The distinction is simple:",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction</span>"
+    ]
+  },
+  {
+    "objectID": "01-intro.html#multivariate-vs.-multivariable-methods",
+    "href": "01-intro.html#multivariate-vs.-multivariable-methods",
+    "title": "1  Introduction",
+    "section": "",
+    "text": "multivariate \\(\\ne\\) multivariable\n\n\n\nMultivariate methods for linear models such as multivariate regression have more than one dependent, response or outcome variable. Other multivariate methods such as principal components analysis or factor analysis treat all variables on an equal footing.\nMultivariable methods have a single dependent variable and more than one independent variables or covariates.",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction</span>"
+    ]
+  },
+  {
+    "objectID": "01-intro.html#why-use-a-multivariate-design",
+    "href": "01-intro.html#why-use-a-multivariate-design",
+    "title": "1  Introduction",
+    "section": "\n1.2 Why use a multivariate design",
+    "text": "1.2 Why use a multivariate design\nA particular research outcome (e.g., depression, neuro-cognitive functioning, academic achievement, self-concept, attention deficit hyperactivity disorders) might take on a multivariate form if it has several observed measurement scales or related aspects by which it is quantified, or if there are multiple theoretically distinct outcomes that should be assessed in conjunction with each other (e.g., using depression, generalized anxiety, and stress inventories to model overall happiness). In this situation, the primary concern of the researcher is to ascertain the impact of potential predictors on two or more response variables simultaneously.\nFor example, if academic achievement is measured for adolescents by their reading, mathematics, science, and history scores, the following questions are of interest:\n\nDo predictors such as parent encouragement, socioeconomic status and school environmental variables affect all of these outcomes?\nDo they affect them in the same or different ways?\nHow many different aspects of academic achievement can be distinguished in the predictors? Equivalently, is academic achievement unidimensional or multidimensional in relation to the predictors?\n\nSimilarly, if psychiatric patients in various diagnostic categories are measured on a battery of tests related to social skills and cognitive functioning, we might want to know:\n\nWhich measures best discriminate among the diagnostic groups?\nWhich measures are most predictive of positive outcomes?\nFurther, how are the relationships between the outcomes affected by the predictors?\n\nSuch questions obviously concern more than just the separate univariate relations of each response to the predictors. Equally, or perhaps more importantly, are questions of how the response variables are predicted jointly.\n\n\n\n\n\n\nSEM\n\n\n\nStructural equation modeling (SEM) offers another route to explore and analyze the relationships among multiple predictors and multiple responses. They have the advantage of being able to test potentially complex systems of linear equations in very flexible ways; however, these methods are often far removed from data analysis per se and except for path diagrams offer little in the way of visualization methods to aid in understanding and communicating the results. The graphical methods we describe here can also be useful in a SEM context.",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction</span>"
+    ]
+  },
+  {
+    "objectID": "01-intro.html#linear-models-univariate-to-multivariate",
+    "href": "01-intro.html#linear-models-univariate-to-multivariate",
+    "title": "1  Introduction",
+    "section": "\n1.3 Linear models: Univariate to multivariate",
+    "text": "1.3 Linear models: Univariate to multivariate\nFor classical linear models for ANOVA and regression, the step from a univariate model for a single response, \\(y\\), to a multivariate one for a collection of \\(p\\) responses, \\(\\mathbf{y}\\) is conceptually very easy. That’s because the univariate model,\n\\[y_i = \\beta_0 + \\beta_1 x_1 + \\beta_2 x_2 + \\dots + \\beta_q x_q + \\epsilon_i , \\]\nor, in matrix terms,\n\\[\\mathbf{y} = \\mathbf{X} \\; \\mathbf{\\beta} + \\mathbf{\\epsilon}, \\quad\\mbox{   with   }\\quad \\mathbf{u} \\sim \\mathcal{N} (0, \\sigma^2 \\mathbf{I}) ,\\]\ngeneralizes directly to an analogous multivariate linear model (MLM),\n\\[\\mathbf{Y} = [\\mathbf{y_1}, \\mathbf{y_2}, \\dots, \\mathbf{y_p}] = \\mathbf{X} \\; \\mathbf{B} + \\Epsilon \\quad\\mbox{   with   }\\quad \\Epsilon \\sim \\mathcal{N} (\\mathbf{0}, \\mathbf{\\Sigma})\\]\nfor multiple responses (as will be discussed in detail). The design matrix, \\(\\mathbf{X}\\) remains the same, and the vector \\(\\beta\\) of coefficients becomes a matrix \\(\\mathbf{B}\\), with one column for each of the \\(p\\) outcome variables.\nHappily as well, hypothesis tests for the MLM are also straight-forward generalizations of the familiar \\(F\\) and \\(t\\)-tests for univariate response models. Moreover, there is a rich geometry underlying these generalizations  which we can exploit for understanding and visualization.",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction</span>"
+    ]
+  },
+  {
+    "objectID": "01-intro.html#visualization-is-harder",
+    "href": "01-intro.html#visualization-is-harder",
+    "title": "1  Introduction",
+    "section": "\n1.4 Visualization is harder",
+    "text": "1.4 Visualization is harder\nHowever, with two or more response variables, visualizations for multivariate models are not as simple as they are for their univariate counterparts for understanding the effects of predictors, model parameters, or model diagnostics. Consequently, the results of such studies are often explored and discussed solely in terms of coefficients and significance, and visualizations of the relationships are only provided for one response variable at a time, if at all. This tradition can mask important nuances, and lead researchers to draw erroneous conclusions.\nThe aim of this book is to describe and illustrate some central methods that we have developed over the last ten years that aid in the understanding and communication of the results of multivariate linear models (Friendly, 2007; Friendly & Meyer, 2016). These methods rely on data ellipsoids as simple, minimally sufficient visualizations of variance that can be shown in 2D and 3D plots. As will be demonstrated, the Hypothesis-Error (HE) plot framework applies this idea to the results of multivariate tests of linear hypotheses. \nFurther, in the case where there are more than just a few outcome variables, the important nectar of their relationships to predictors can often be distilled in a multivariate juicer— a projection of the multivariate relationships to the predictors in the low-D space that captures most of the flavor. This idea can be applied using canonical correlation plots and with canonical discriminant HE plots. \n\n\nProjection: The cover image from Hofstadter’s Gödel, Bach and Escher illustrates projection of 3D solids onto each 2D plane.",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction</span>"
+    ]
+  },
+  {
+    "objectID": "01-intro.html#sec-problems",
+    "href": "01-intro.html#sec-problems",
+    "title": "1  Introduction",
+    "section": "\n1.5 Problems in understanding and communicating MLM results",
+    "text": "1.5 Problems in understanding and communicating MLM results\nIn my consulting practice within the Statistical Consulting Service at York University, I see hundreds of clients each year ranging from advanced undergraduate thesis students, to graduate students and faculty from a variety of fields. Over the last two decades, and across each of these groups, I have noticed an increasing desire to utilize multivariate methods. As researchers are exposed to the utility and power of multivariate tests, they see them as an appealing alternative to running many univariate ANOVAs or multiple regressions for each response variable separately.\nHowever, multivariate analyses are more complicated than such approaches, especially when it comes to understanding and communicating results. Output is typically voluminous, and researchers will often get lost in the numbers. While software (SPSS, SAS and R) make tabular summary displays easy, these often obscure the findings that researchers are most interested in. The most common analytic oversights that we have observed are:\n\nAtomistic data screening: Researchers have mostly learned the assumptions (the Holy Trinity of normality, constant variance and independence) of univariate linear models, but then apply univariate tests (e.g., Shapiro-Wilk) and diagnostic plots (normal QQ plots) to every predictor and every response.\nBonferroni everywhere: Faced with the task of reporting the results for multiple response measures and a collection of predictors for each, a common tendency is to run (and sometimes report) each of the separate univariate response models and then apply a correction for multiple testing. Not only is this confusing and awkward to report, but it is largely unnecessary because the multivariate tests provide protection for multiple testing.\nReverting to univariate visualizations: To display results, SPSS and SAS make some visualization methods available through menu choices or syntax, but usually these are the wrong (or at least unhelpful) choices, in that they generate separate univariate graphs for the individual responses.\n\nThis book to discusses a few essential procedures for multivariate linear models, how their interpretation can be aided through the use of well-crafted (though novel) visualizations, and provides replicable sample code in R to showcase their use in applied behaviorial research. A later section [ref?] provides some practical guidelines for analyzing, visualizing and reporting such models to help avoid these and other problems.\nPackage summary:\n\n1 packages used here: knitr\n\n\n\n\n\n\nFriendly, M. (2007). HE plots for multivariate general linear models. Journal of Computational and Graphical Statistics, 16(2), 421–444. https://doi.org/10.1198/106186007X208407\n\n\nFriendly, M., & Meyer, D. (2016). Discrete data analysis with R: Visualization and modeling techniques for categorical and count data. Chapman & Hall/CRC.",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction</span>"
+    ]
+  },
+  {
+    "objectID": "02-getting_started.html",
+    "href": "02-getting_started.html",
+    "title": "2  Getting Started",
+    "section": "",
+    "text": "2.1 Why plot your data?\nAt the time the Farhquhar brothers wrote this pithy aphorism, graphical methods for understanding data had advanced considerably, but were not universally practiced, prompting their complaint.\nThe main graphic forms we use today—the pie chart, line graphs and bar—were invented by William Playfair around 1800 (Playfair, 1786, 1801). The scatterplot arrived shortly after (Herschel, 1833) and thematic maps showing the spatial distributions of social variables (crime, suicides, literacy) were used for the first time to reason about important societal questions (Guerry, 1833) such as “is increased education associated with lower rates of crime?”\nIn the last half of the 18th Century, the idea of correlation was developed (Galton, 1886; Pearson, 1896) and the period, roughly 1860–1890, dubbed the “Golden Age of Graphics (Funkhouser, 1937) became the richest period of innovation and beauty in the entire history of data visualization. During this time there was an incredible development of visual thinking, represented by the work of Charles Joseph Minard, advances in the role of visualization within scientific discovery, as illustrated through Francis Galton, and graphical excellence, embodied in state statistical atlases produced in France and elsewhere. See Friendly (2008); Friendly & Wainer (2021) for this history.\nThis chapter introduces the importance of graphing data through three nearly classic stories with the following themes:",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Getting Started</span>"
+    ]
+  },
+  {
+    "objectID": "02-getting_started.html#sec-why_plot",
+    "href": "02-getting_started.html#sec-why_plot",
+    "title": "2  Getting Started",
+    "section": "",
+    "text": "Getting information from a table is like extracting sunlight from a cucumber. Farquhar & Farquhar (1891)\n\n\n\n\n\n\nsummary statistics are not enough: Anscombe’s Quartet demonstrates datasets that are indistinguishable by numerical summary statistics (mean, standard deviation, correlation), but whose relationships are vastly different.\none lousy point can ruin your day: A researcher is mystified by a difference between a correlation for men and women until she plots the data.\nfinding the signal in noise: The story of the US 1970 Draft Lottery shows how a weak, but reliable signal, reflecting bias in a process can be revealed by graphical enhancement and summarization.\n\n\n2.1.1 Anscombe’s Quartet\nIn 1973, Francis Anscombe (Anscombe, 1973) famously constructed a set of four datasets illustrate the importance of plotting the graphs before analyzing and model building, and the effect of unusual observations on fitted models. Now known as Anscombe’s Quartet, these datasets had identical statistical properties: the same means, standard deviations, correlations and regression lines.\nHis purpose was to debunk three notions that had been prevalent at the time:\n\nNumerical calculations are exact, but graphs are coarse and limited by perception and resolution;\nFor any particular kind of statistical data there is just one set of calculations constituting a correct statistical analysis;\nPerforming intricate calculations is virtuous, whereas actually looking at the data is cheating.\n\nThe dataset datasets::anscombe has 11 observations, recorded in wide format, with variables x1:x4 and y1:y4.\n\ndata(anscombe) \nhead(anscombe)\n#&gt;   x1 x2 x3 x4   y1   y2    y3   y4\n#&gt; 1 10 10 10  8 8.04 9.14  7.46 6.58\n#&gt; 2  8  8  8  8 6.95 8.14  6.77 5.76\n#&gt; 3 13 13 13  8 7.58 8.74 12.74 7.71\n#&gt; 4  9  9  9  8 8.81 8.77  7.11 8.84\n#&gt; 5 11 11 11  8 8.33 9.26  7.81 8.47\n#&gt; 6 14 14 14  8 9.96 8.10  8.84 7.04\n\nThe following code transforms this data to long format and calculates some summary statistics for each dataset.\n\nanscombe_long &lt;- anscombe |&gt; \n  pivot_longer(everything(), \n               names_to = c(\".value\", \"dataset\"), \n               names_pattern = \"(.)(.)\"\n  ) |&gt;\n  arrange(dataset)\n\nanscombe_long |&gt;\n  group_by(dataset) |&gt;\n  summarise(xbar      = mean(x),\n            ybar      = mean(y),\n            r         = cor(x, y),\n            intercept = coef(lm(y ~ x))[1],\n            slope     = coef(lm(y ~ x))[2]\n         )\n#&gt; # A tibble: 4 × 6\n#&gt;   dataset  xbar  ybar     r intercept slope\n#&gt;   &lt;chr&gt;   &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;     &lt;dbl&gt; &lt;dbl&gt;\n#&gt; 1 1           9  7.50 0.816      3.00 0.500\n#&gt; 2 2           9  7.50 0.816      3.00 0.5  \n#&gt; 3 3           9  7.5  0.816      3.00 0.500\n#&gt; 4 4           9  7.50 0.817      3.00 0.500\n\nAs we can see, all four datasets have nearly identical univariate and bivariate statistical measures. You can only see how they differ in graphs, which show their true natures to be vastly different.\nFigure 2.1 is an enhanced version of Anscombe’s plot of these data, adding helpful annotations to show visually the underlying statistical summaries.\n\n\n\n\n\n\n\nFigure 2.1: Scatterplots of Anscombe’s Quartet. Each plot shows the fitted regression line and a 68% data ellipse representing the correlation between \\(x\\) and \\(y\\).\n\n\n\n\nThis figure is produced as follows, using a single call to ggplot(), faceted by dataset. As we will see later (Section 3.2), the data ellipse (produced by stat_ellipse()) reflects the correlation between the variables.\n\ndesc &lt;- tibble(\n  dataset = 1:4,\n  label = c(\"Pure error\", \"Lack of fit\", \"Outlier\", \"Influence\")\n)\n\nggplot(anscombe_long, aes(x = x, y = y)) +\n  geom_point(color = \"blue\", size = 4) +\n  geom_smooth(method = \"lm\", formula = y ~ x, se = FALSE,\n              color = \"red\", linewidth = 1.5) +\n  scale_x_continuous(breaks = seq(0,20,2)) +\n  scale_y_continuous(breaks = seq(0,12,2)) +\n  stat_ellipse(level = 0.5, color=col, type=\"norm\") +\n  geom_label(data=desc, aes(label = label), x=6, y=12) +\n  facet_wrap(~dataset, labeller = label_both) \n\nThe subplots are labeled with the statistical idea they reflect:\n\ndataset 1: Pure error. This is the typical case with well-behaved data. Variation of the points around the line reflect only measurement error or unreliability in the response, \\(y\\).\ndataset 2: Lack of fit. The data is clearly curvilinear, and would be very well described by a quadratic, y ~ poly(x, 2). This violates the assumption of linear regression that the fitted model has the correct form.\ndataset 3: Outlier. One point, second from the right, has a very large residual. Because this point is near the extreme of \\(x\\), it pulls the regression line towards it, as you can see by imagining a line through the remaining points.\ndataset 4: Influence. All but one of the points have the same \\(x\\) value. The one unusual point has sufficient influence to force the regression line to fit it exactly.\n\nOne moral from this example:\n\nLinear regression only “sees” a line. It does its’ best when the data are really linear. Because the line is fit by least squares, it pulls the line toward discrepant points to minimize the sum of squared residuals.\n\n\n\n\n\n\n\nDatasaurus Dozen\n\n\n\nThe method Anscombe used to compose his quartet is unknown, but it turns out that that there is a method to construct a wider collection of datasets with identical statistical properties. After all, in a bivariate dataset with \\(n\\) observations, the correlation has \\((n-2)\\) degrees of freedom, so it is possible to choose \\(n-2\\) of the \\((x, y)\\) pairs to yield any given value. As it happens, it is also possible to create any number of datasets with the same means, standard deviations and correlations with nearly any shape you like — even a dinosaur!\nThe Datasaurus Dozen was first publicized by Alberto Cairo in a blog post and are available in the datasauRus package (Davies et al., 2022). As shown in Figure 2.2, the sets include a star, cross, circle, bullseye, horizontal and vertical lines, and, of course the “dino”. The method (Matejka & Fitzmaurice, 2017) uses simulated annealing, an iterative process that perturbs the points in a scatterplot, moving them towards a given shape while keeping the statistical summaries close to the fixed target value.\nThe datasauRus package just contains the datasets, but a general method, called statistical metamers, for producing such datasets has been described by Elio Campitelli and implemented in the metamer package.\n\n\n\n\n\n\n\n\n\nFigure 2.2: Animation of the Dinosaur Dozen datasets. Source: https://youtu.be/It4UA75z_KQ\n\n\n\n\n\n\n\n\n\n\nQuartets\n\n\n\nThe essential idea of a statistical “quartet” is to illustrate four quite different datasets or circumstances that seem superficially the same, but yet are paradoxically very different when you look behind the scenes. For example, in the context of causal analysis Gelman et al. (2023), illustrated sets of four graphs, within each of which all four represent the same average (latent) causal effect but with much different patterns of individual effects; McGowan et al. (2023) provide another illustration with four seemingly identical data sets each generated by a different causal mechanism. As an example of machine learning models, Biecek et al. (2023), introduced the “Rashamon Quartet”, a synthetic dataset for which four models from different classes (linear model, regression tree, random forest, neural network) have practically identical predictive performance. In all cases, the paradox is solved when their visualization reveals the distinct ways of understanding structure in the data. The quartets package contains these and other variations on this theme.\n\n\n\n\n2.1.2 One lousy point can ruin your day\nIn the mid 1980s, a consulting client had a strange problem.1 She was conducting a study of the relation between body image and weight preoccupation in exercising and non-exercising people (Davis, 1990). As part of the design, the researcher wanted to know if self-reported weight could be taken as a reliable indicator of true weight measured on a scale. It was expected that the correlations between reported and measured weight should be close to 1.0, and the slope of the regression lines for men and women should also be close to 1.0. The dataset is car::Davis.\nShe was therefore very surprise to see the following numerical results: For men, the correlation was nearly perfect, but not so for women. \n\ndata(Davis, package=\"carData\")\nDavis &lt;- Davis |&gt;\n  drop_na()          # drop missing cases\nDavis |&gt;\n  group_by(sex) |&gt;\n  select(sex, weight, repwt) |&gt;\n  summarise(r = cor(weight, repwt))\n#&gt; # A tibble: 2 × 2\n#&gt;   sex       r\n#&gt;   &lt;fct&gt; &lt;dbl&gt;\n#&gt; 1 F     0.501\n#&gt; 2 M     0.979\n\nSimilarly, the regression lines showed the expected slope for men, but that for women was only 0.26.\n\nDavis |&gt;\n  nest(data = -sex) |&gt;\n  mutate(model = map(data, ~ lm(repwt ~ weight, data = .)),\n         tidied = map(model, tidy)) |&gt;\n  unnest(tidied) |&gt;\n  filter(term == \"weight\") |&gt;\n  select(sex, term, estimate, std.error)\n#&gt; # A tibble: 2 × 4\n#&gt;   sex   term   estimate std.error\n#&gt;   &lt;fct&gt; &lt;chr&gt;     &lt;dbl&gt;     &lt;dbl&gt;\n#&gt; 1 M     weight    0.990    0.0229\n#&gt; 2 F     weight    0.262    0.0459\n\n“What could be wrong here?”, the client asked. The consultant replied with the obvious question:\n\nDid you plot your data?\n\nThe answer turned out to be one discrepant point, a female, whose measured weight was 166 kg (366 lbs!). This single point exerted so much influence that it pulled the fitted regression line down to a slope of only 0.26.\n\n# shorthand to position legend inside the figure\nlegend_inside &lt;- function(position) {\n  theme(legend.position = \"inside\",\n        legend.position.inside = position)\n}\n\nDavis |&gt;\n  ggplot(aes(x = weight, y = repwt, \n             color = sex, shape = sex, linetype = sex)) +\n  geom_point(size = ifelse(Davis$weight==166, 6, 2)) +\n  geom_smooth(method = \"lm\", formula = y~x, se = FALSE) +\n  labs(x = \"Measured weight (kg)\", y = \"Reported weight (kg)\") +\n  scale_linetype_manual(values = c(F = \"longdash\", M = \"solid\")) +\n  legend_inside(c(.8, .8))\n\n\n\n\n\n\nFigure 2.3: Regression for Davis’ data on reported weight and measures weight for men and women. Separate regression lines, predicting reported weight from measured weight are shown for males and females. One highly unusual point is highlighted.\n\n\n\n\nIn this example, it was arguable that \\(x\\) and \\(y\\) axes should be reversed, to determine how well measured weight can be predicted from reported weight. In ggplot this can easily be done by reversing the x and y aesthetics.\n\nDavis |&gt;\n  ggplot(aes(y = weight, x = repwt, color = sex, shape=sex)) +\n  geom_point(size = ifelse(Davis$weight==166, 6, 2)) +\n  labs(y = \"Measured weight (kg)\", x = \"Reported weight (kg)\") +\n    geom_smooth(method = \"lm\", formula = y~x, se = FALSE) +\n  legend_inside(c(.8, .8))\n\n\n\n\n\n\nFigure 2.4: Regression for Davis’ data on reported weight and measures weight for men and women. Separate regression lines, predicting measured weight from reported weight are shown for males and females. The highly unusual point no longer has an effect on the fitted lines.\n\n\n\n\nIn Figure 2.4, this discrepant observation again stands out like a sore thumb, but it makes very little difference in the fitted line for females. The reason is that this point is well within the range of the \\(x\\) variable (repwt). To impact the slope of the regression line, an observation must be unusual in_both_ \\(x\\) and \\(y\\). We take up the topic of how to detect influential observations and what to do about them in Chapter 6.\nThe value of such plots is not only that they can reveal possible problems with an analysis, but also help identify their reasons and suggest corrective action. What went wrong here? Examination of the original data showed that this person switched the values, recording her reported weight in the box for measured weight and vice versa.\n\n2.1.3 Shaken, not stirred: The 1970 Draft Lottery\n\nAlthough we often hear that data speak for themselves, their voices can be soft and sly.—Frederick Mosteller (1983), Beginning Statistics with Data Analysis, p. 234.\n\nThe power of graphics is particularly evident when data contains a weak signal embedded in a field of noise. To the casual glance, there may seem to be nothing going on, but the signal can be made apparent in an incisive graph.\nA dramatic example of this occurred in 1969 when the U.S. military conducted a lottery, the first since World War II, to determine which young men would be called up to serve in the Vietnam War for 1970. The U.S. Selective Service had devised a system to rank eligible men according to a random drawing of their birthdays. There were 366 blue plastic capsules containing birth dates placed in a transparent glass container and drawn by hand to assign ranked order-of-call numbers to all men within the 18-26 age range.\n\n\n\n\nCongressman Alexander Pirnie (R-NY) drawing the first capsule for the Selective Service draft, Dec 1, 1969. Source: https://en.wikipedia.org/wiki/Draft_lottery_(1969)#/media/File:1969_draft_lottery_photo.jpg\n\n\n\nIn an attempt to make the selection process also transparent, the proceeding was covered on radio, TV and film and the dates posted in order on a large display board. The first capsule—drawn by Congressman Alexander Pirnie (R-NY) of the House Armed Services Committee—contained the date September 14, so all men born on September 14 in any year between 1944 and 1950 were assigned lottery number 1, and would be drafted first. April 24 was drawn next, then December 30, February 14, and so on until June 8, selected last. At the time of the drawing, US officials stated that those with birthdays drawn in the first third would almost certainly be drafted, while those in the last third would probably avoid the draft (Fienberg, 1971).\nI watched this unfold with considerable interest because I was eligible for the Draft that year. I was dismayed when my birthday, May 7, came up ranked 35. Ugh!\nThe data, from the official Selective Service listing are contained in the dataset vcdExtra::Draft1970, ordered by Month and birthdate (Day), with Rank as the order in which the birthdates were drawn.\n\n\nlibrary(ggplot2)\nlibrary(dplyr)\ndata(Draft1970, package = \"vcdExtra\")\ndplyr::glimpse(Draft1970)\n#&gt; Rows: 366\n#&gt; Columns: 3\n#&gt; $ Day   &lt;int&gt; 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…\n#&gt; $ Rank  &lt;int&gt; 305, 159, 251, 215, 101, 224, 306, 199, 194, 325, 32…\n#&gt; $ Month &lt;ord&gt; Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Ja…\n\nA basic scatterplot, slightly prettified, is shown in Figure 2.5. The points are colored by month, and month labels are shown at the bottom.\n\nShow the code# make markers for months at their mid points\nmonths &lt;- data.frame(\n  month =unique(Draft1970$Month),\n  mid = seq(15, 365-15, by = 30))\n\nggplot2:: theme_set(theme_bw(base_size = 16))\ngg &lt;- ggplot(Draft1970, aes(x = Day, y = Rank)) +\n  geom_point(size = 2.5, shape = 21, \n             alpha = 0.3, \n             color = \"black\", \n             aes(fill=Month)\n  ) +\n  scale_fill_manual(values = rainbow(12)) +\n  geom_text(data=months, aes(x=mid, y=0, label=month), nudge_x = 5) +\n  geom_smooth(method = \"lm\", formula = y ~ 1,\n              col = \"black\", fill=\"grey\", linetype = \"dashed\", alpha=0.6) +\n  labs(x = \"Day of the year\",\n       y = \"Lottery rank\") +\n  theme(legend.position = \"none\") \ngg\n\n\n\n\n\n\nFigure 2.5: Basic scatterplot of 1970 Draft Lottery data plotting rank order of selection against birthdates in the year. Points are colored by month. The horizontal line is at the average rank.\n\n\n\n\nThe ranks do seem to be essentially random. Is there any reason to suspect a flaw in the selection process, as I firmly hoped at the time?\nIf you stare at the graph in @fig-draft-gg1 long enough, you can make out a sparsity of points in the\nupper right corner and also in the lower left corner compared to the opposite corners.\nVisual smoothers\nFitting a linear regression line or a smoothed (loess) curve can bring out the signal lurking in the background of a field of nearly random points. Figure 2.6 shows a definite trend to lower ranks for birthdays toward the end of the year. Those born earlier in the year were more likely to be given lower ranks, calling them up sooner for the draft.\n\nShow the codeggplot(Draft1970, aes(x = Day, y = Rank)) +\n  geom_point(size = 2.5, shape = 21, \n             alpha = 0.3, \n             color = \"black\", \n             aes(fill=Month)) +\n  scale_fill_manual(values = rainbow(12)) +\n  geom_smooth(method = \"lm\", formula = y~1,\n              se = FALSE,\n              col = \"black\", fill=\"grey\", linetype = \"dashed\", alpha=0.6) +\n  geom_smooth(method = \"loess\", formula = y~x,\n              color = \"blue\", se = FALSE,\n              alpha=0.25) +\n  geom_smooth(method = \"lm\", formula = y~x,\n              color = \"darkgreen\",\n              fill = \"darkgreen\", \n              alpha=0.25) +\n  geom_text(data=months, aes(x=mid, y=0, label=month), nudge_x = 5) +\n  labs(x = \"Day of the year\",\n       y = \"Lottery rank\") +\n  theme(legend.position = \"none\") \n\n\n\n\n\n\nFigure 2.6: Enhanced scatterplot of 1970 Draft Lottery data adding a linear regression line and loess smooth.\n\n\n\n\nIs this a real effect? Even though the points seem to be random over the year, linear regression of Rank on Day shows a highly significant negative effect even though the correlation2 is small (\\(r = -0.226\\)). The slope, -0.226, means that for each additional day in the year the lottery rank decreases about 1/4 toward the front of the draft line; that’s nearly 7 ranks per month.\n\ndraft.mod &lt;- lm(Rank ~ Day, data=Draft1970)\nwith(Draft1970, cor(Day, Rank))\n#&gt; [1] -0.226\ncoef(draft.mod)\n#&gt; (Intercept)         Day \n#&gt;     224.913      -0.226\n\nSo, smoothing the data, using either the linear regression line or a nonparametric smoother is one important technique for seeing a weak signal in a noisy background.\nStatistical summaries\nAnother way to enhance the signal-to-noise ratio of a graph is to plot summaries of the messy data points. For example, you might make boxplots of the ranks by month, or calculate and plot the mean or median rank by month and plot those together with some indication of variability within month.\nFigure 2.7 plots the average Rank for each month with error bars showing the mean \\(\\pm 1\\) standard errors against the average Day. The message of rank decreasing nearly linearly with month is now more dramatic. The correlation between the means is \\(r = -0.867\\).\n\nCodemeans &lt;- Draft1970 |&gt;\n  group_by(Month) |&gt;\n  summarize(Day = mean(Day),\n            se = sd(Rank/ sqrt(n())),\n            Rank = mean(Rank)) \n\nggplot(aes(x = Day, y = Rank), data=means) +\n  geom_point(size = 4) +\n  geom_smooth(method = \"lm\", formula = y~x,\n              color = \"blue\", fill = \"blue\", alpha = 0.1) +\n  geom_errorbar(aes(ymin = Rank-se, ymax = Rank+se), width = 8) +\n  geom_text(data=months, aes(x=mid, y=0, label=month), nudge_x = 5) +\n  labs(x = \"Average day of the year\",\n       y = \"Average lottery rank\")\n\n\n\n\n\n\nFigure 2.7: Plot of the average rank per month with \\(\\pm 1\\) standard error bars. The line shows the least squares regression line, treating months as equally spaced.\n\n\n\n\nThe visual impression of a linearly decreasing trend in lottery rank is much stronger in Figure 2.7 than in Figure 2.6 for two reasons:\n\nReplacing the data points with their means strengthens the signal in relation to noise.\nThe narrower vertical range (100–250) in the plot of means makes the slope of the line appear steeper. (However, the correlation of the means, \\(r = -0.231\\) is nearly the same as the correlation of the data points.)\nWhat happened here?\nPrevious lotteries carried out by drawing capsules from a container had occasionally suffered the embarrassment that an empty capsule was selected because of vigorous mixing (Fienberg, 1971). So for the 1970 lottery, the birthdate capsules were put in cardboard boxes, one for each month and these were carefully emptied into the glass container in order of month: Jan., Feb., … Dec., gently shaken in atop the pile already there. All might have been well had the persons drawing the capsules put their hand in truly randomly, but generally they picked from toward the top of the container. Consequently, those born later in the year had a greater chance of being picked earlier.\nThere was considerable criticism of this procedure once the flaw had been revealed by analyses such as described here. In the following year, the Selective Service called upon the National Bureau of Standards to devise a better procedure. In 1971 they used two drums, one with the dates of the year and another with the rank numbers 1-366. As a date capsule was drawn randomly from the first drum, another from the numbers drum was picked simultaneously, giving a doubly-randomized sequence.\nOf course, if they had R, the entire process could have been done using sample():\n\nset.seed(42)\ndate = seq(as.Date(\"1971-01-01\"), as.Date(\"1971-12-31\"), by=\"+1 day\")\nrank = sample(seq_along(date))\ndraft1971 &lt;- data.frame(date, rank)\n\nhead(draft1971, 4)\n#&gt;         date rank\n#&gt; 1 1971-01-01   49\n#&gt; 2 1971-01-02  321\n#&gt; 3 1971-01-03  153\n#&gt; 4 1971-01-04   74\ntail(draft1971, 4)\n#&gt;           date rank\n#&gt; 362 1971-12-28  247\n#&gt; 363 1971-12-29    8\n#&gt; 364 1971-12-30  333\n#&gt; 365 1971-12-31  132\n\nAnd, what would have happened to me and all others born on a May 7th, if they did it this way? My lottery rank would have 274!\n\nme &lt;- as.Date(\"1971-05-07\")\ndraft1971[draft1971$date == me,]\n#&gt;           date rank\n#&gt; 127 1971-05-07  274",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Getting Started</span>"
+    ]
+  },
+  {
+    "objectID": "02-getting_started.html#plots-for-data-analysis",
+    "href": "02-getting_started.html#plots-for-data-analysis",
+    "title": "2  Getting Started",
+    "section": "\n2.2 Plots for data analysis",
+    "text": "2.2 Plots for data analysis\nVisualization methods take an enormous variety of forms, but it is useful to distinguish several broad categories according to their use in data analysis:\n\ndata plots : primarily plot the raw data, often with annotations to aid interpretation (regression lines and smooths, data ellipses, marginal distributions)\nreconnaissance plots : with more than a few variables, reconnaissance plots provide a high-level, bird’s-eye overview of the data, allowing you to see patterns that might not be visible in a set of separate plots. Some examples are scatterplot matrices, showing all bivariate plots of variables in a dataset; correlation diagrams, using visual glyphs to represent the correlations between all pairs of variables and “trellis” or faceted plots that show how a focal relation of one or more variables differs across values of other variables.\nmodel plots : plot the results of a fitted model, such as a regression line or curve to show uncertainty, or a regression surface in 3D, or a plot of coefficients in model together with confidence intervals. Other model plots try to take into account that a fitted model may involve more variables than can be shown in a static 2D plot. Some examples of these are added variable plots, and marginal effect plots, both of which attempt to show the net relation of two focal variables, controlling or adjusting for other variables in a model.\ndiagnostic plots : indicating potential problems with the fitted model. These include residual plots, influence plots, plots for testing homogeneity of variance and so forth.\ndimension reduction plots : plot representations of the data into a space of fewer dimensions than the number of variables in the dataset. Simple examples include principal components analysis (PCA) and the related biplots, and multidimensional scaling (MDS) methods.\n\nWe give some more details and a few examples in the sections that follow.",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Getting Started</span>"
+    ]
+  },
+  {
+    "objectID": "02-getting_started.html#data-plots",
+    "href": "02-getting_started.html#data-plots",
+    "title": "2  Getting Started",
+    "section": "\n2.3 Data plots",
+    "text": "2.3 Data plots\nData plots portray the data in a space where the coordinate axes are the observed variables.\n\n1D plots include line plots, histograms and density estimates.\n2D plots are most often scatterplots, but contour plots or hex-binned plots are also useful when the sample size is large.",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Getting Started</span>"
+    ]
+  },
+  {
+    "objectID": "02-getting_started.html#model-plots",
+    "href": "02-getting_started.html#model-plots",
+    "title": "2  Getting Started",
+    "section": "\n2.4 Model plots",
+    "text": "2.4 Model plots\nModel plots show the fitted or predicted values from a statistical model and provide visual summaries…",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Getting Started</span>"
+    ]
+  },
+  {
+    "objectID": "02-getting_started.html#diagnostic-plots",
+    "href": "02-getting_started.html#diagnostic-plots",
+    "title": "2  Getting Started",
+    "section": "\n2.5 Diagnostic plots",
+    "text": "2.5 Diagnostic plots",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Getting Started</span>"
+    ]
+  },
+  {
+    "objectID": "02-getting_started.html#principles-of-graphical-display",
+    "href": "02-getting_started.html#principles-of-graphical-display",
+    "title": "2  Getting Started",
+    "section": "\n2.6 Principles of graphical display",
+    "text": "2.6 Principles of graphical display\n[This could be a separate chapter]\n\nCriteria for assessing graphs: communication goals\nEffective data display:\n\nMake the data stand out\nMake graphical comparison easy\nEffect ordering: For variables and unordered factors, arrange them according to the effects to be seen\n\n\nVisual thinning: As the data becomes more complex, focus more on impactful summaries\n\nPackage summary\n\n12 packages used here: broom, dplyr, forcats, ggplot2, knitr, lubridate, purrr, readr, stringr, tibble, tidyr, tidyverse\n\n\n\n\n\n\nAnscombe, F. J. (1973). Graphs in statistical analysis. The American Statistician, 27, 17–21.\n\n\nBiecek, P., Baniecki, H., Krzyzinski, M., & Cook, D. (2023). Performance is not enough: A story of the rashomon’s quartet. https://arxiv.org/abs/2302.13356\n\n\nDavies, R., Locke, S., & D’Agostino McGowan, L. (2022). datasauRus: Datasets from the datasaurus dozen. https://CRAN.R-project.org/package=datasauRus\n\n\nDavis, C. (1990). Body image and weight preoccupation: A comparison between exercising and non-exercising women. Appetite, 16(1), 84. https://doi.org/10.1016/0195-6663(91)90115-9\n\n\nFarquhar, A. B., & Farquhar, H. (1891). Economic and industrial delusions: A discourse of the case for protection. Putnam.\n\n\nFienberg, S. E. (1971). Randomization and social affairs: The 1970 draft lottery. Science, 171, 255–261.\n\n\nFriendly, M. (2008). The Golden Age of statistical graphics. Statistical Science, 23(4), 502–535. https://doi.org/10.1214/08-STS268\n\n\nFriendly, M., & Wainer, H. (2021). A history of data visualization and graphic communication. Harvard University Press. https://doi.org/10.4159/9780674259034\n\n\nFunkhouser, H. G. (1937). Historical development of the graphical representation of statistical data. Osiris, 3(1), 269–405. http://tinyurl.com/32ema9\n\n\nGalton, F. (1886). Regression towards mediocrity in hereditary stature. Journal of the Anthropological Institute, 15, 246–263. http://www.jstor.org/cgi-bin/jstor/viewitem/09595295/dm995266/99p0374f/0\n\n\nGelman, A., Hullman, J., & Kennedy, L. (2023). Causal quartets: Different ways to attain the same average treatment effect. http://www.stat.columbia.edu/~gelman/research/unpublished/causal_quartets.pdf\n\n\nGuerry, A.-M. (1833). Essai sur la statistique morale de la France. Crochard.\n\n\nHerschel, J. F. W. (1833). On the investigation of the orbits of revolving double stars: Being a supplement to a paper entitled \"micrometrical measures of 364 double stars\". Memoirs of the Royal Astronomical Society, 5, 171–222.\n\n\nMatejka, J., & Fitzmaurice, G. (2017, May). Same stats, different graphs. Proceedings of the 2017 CHI Conference on Human Factors in Computing Systems. https://doi.org/10.1145/3025453.3025912\n\n\nMcGowan, L. D., Gerke, T., & Barrett, M. (2023). Causal inference is not just a statistics problem. Journal of Statistics and Data Science Education, 1–9. https://doi.org/10.1080/26939169.2023.2276446\n\n\nPearson, K. (1896). Contributions to the mathematical theory of evolution—III, regression, heredity and panmixia. Philosophical Transactions of the Royal Society of London, 187, 253–318.\n\n\nPlayfair, W. (1786). Commercial and political atlas: Representing, by copper-plate charts, the progress of the commerce, revenues, expenditure, and debts of england, during the whole of the eighteenth century. Debrett; Robinson;; Sewell. http://ucpj.uchicago.edu/Isis/journal/demo/v000n000/000000/000000.fg4.html\n\n\nPlayfair, W. (1801). Statistical breviary; shewing, on a principle entirely new, the resources of every state and kingdom in Europe. Wallis.",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Getting Started</span>"
+    ]
+  },
+  {
+    "objectID": "02-getting_started.html#footnotes",
+    "href": "02-getting_started.html#footnotes",
+    "title": "2  Getting Started",
+    "section": "",
+    "text": "This story is told apocryphally. The consulting client actually did plot the data, but needed help making better graphs.↩︎\nBecause both days of the year and rank in the lottery are the integers, 1 to 366, the Pearson correlation and Spearman rank order correlation are identical.↩︎",
+    "crumbs": [
+      "Orienting Ideas",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Getting Started</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html",
+    "href": "03-multivariate_plots.html",
+    "title": "3  Plots of Multivariate Data",
+    "section": "",
+    "text": "3.1 Bivariate summaries\nThe basic scatterplot is the workhorse of multivariate data visualization, showing how one variable, \\(y\\), often an outcome to be explained by or varies with another, \\(x\\). It is a building block for many useful techniques, so it is helpful to understand how it can be used as a tool for thinking in a wider, multivariate context.\nThe essential idea is that we can start with a simple version of the scatterplot and add annotations to show interesting features more clearly. We consider the following here:\nExample: Academic salaries\nLet’s start with data on the academic salaries of faculty members collected at a U.S. college for the purpose of assessing salary differences between male and female faculty members, and perhaps address anomalies in compensation. The dataset carData::Salaries gives data on nine-month salaries and other variables for 397 faculty members in the 2008-2009 academic year.\ndata(Salaries, package = \"carData\")\nstr(Salaries)\n#&gt; 'data.frame':  397 obs. of  6 variables:\n#&gt;  $ rank         : Factor w/ 3 levels \"AsstProf\",\"AssocProf\",..: 3 3 1 3 3 2 3 3 3 3 ...\n#&gt;  $ discipline   : Factor w/ 2 levels \"A\",\"B\": 2 2 2 2 2 2 2 2 2 2 ...\n#&gt;  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...\n#&gt;  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...\n#&gt;  $ sex          : Factor w/ 2 levels \"Female\",\"Male\": 2 2 2 2 2 2 2 2 2 1 ...\n#&gt;  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...\nThe most obvious, but perhaps naive, predictor of salary is years.since.phd. For simplicity, I’ll refer to this as years of “experience.” Before looking at differences between males and females, we would want consider faculty rank (related also to yrs.service) and discipline, recorded here as \"A\" (“theoretical” departments) or \"B\" (“applied” departments). But, for a basic plot, we will ignore these for now to focus on what can be learned from plot annotations.\nlibrary(ggplot2)\ngg1 &lt;- ggplot(Salaries, \n       aes(x = yrs.since.phd, y = salary)) +\n  geom_jitter(size = 2) +\n  scale_y_continuous(labels = scales::dollar_format(\n    prefix=\"$\", scale = 0.001, suffix = \"K\")) +\n  labs(x = \"Years since PhD\",\n       y = \"Salary\") \n\ngg1 + geom_rug(position = \"jitter\", alpha = 1/4)\n\n\n\n\n\n\nFigure 3.1: Naive scatterplot of Salary vs. years since PhD, ignoring other variables, and without graphical annotations.\nThere is quite a lot we can see “just by looking” at this simple plot, but the main things are:",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#sec-bivariate_summaries",
+    "href": "03-multivariate_plots.html#sec-bivariate_summaries",
+    "title": "3  Plots of Multivariate Data",
+    "section": "",
+    "text": "Smoothers: Showing overall trends, perhaps in several forms, as visual summaries such as fitted regression lines or curves and nonparametric smoothers.\n\nStratifiers: Using color, shape or other features to identify subgroups; more generally, conditioning on other variables in multi-panel displays;\n\nData ellipses: A compact 2D visual summary of bivariate linear relations and uncertainty assuming normality; more generally, contour plots of bivariate density.\n\n\n\n\n\n\n\n\n\nSalary increases generally from 0 - 40 years since the PhD, but then maybe begins to drop off (partial retirement?);\nVariability in salary increases among those with the same experience, a “fan-shaped” pattern that signals a violation of homogeneity of variance in simple regression;\nData beyond 50 years is thin, but there are some quite low salaries there. Adding rug plots to the X and Y axes is a simple but effective way to show the marginal distributions of the observations. Jitter and transparency helps to avoid overplotting due to discrete values.\n\n\n3.1.1 Smoothers\nSmoothers are among the most useful graphical annotations you can add to such plots, giving a visual summary of how \\(y\\) changes with \\(x\\). The most common smoother is a line showing the linear regression for \\(y\\) given \\(x\\), expressed in math notation as \\(\\mathbb{E} (y | x) = b_0 + b_1 x\\). If there is doubt that a linear relation is an adequate summary, you can try a quadratic or other polynomial smoothers.\nIn ggplot2, these are easily added to a plot using geom_smooth() with method = \"lm\", and a model formula, which (by default) is y ~ x for a linear relation or y ~ poly(x, k) for a polynomial of degree \\(k\\).\n\nCodegg1 + \n  geom_smooth(method = \"lm\", formula = \"y ~ x\", \n              color = \"red\", fill= \"pink\",\n              linewidth = 2) +\n  geom_smooth(method = \"lm\", formula = \"y ~ poly(x,2)\", \n              color = \"darkgreen\", fill = \"lightgreen\",\n              linewidth = 2) \n\n\n\n\n\n\nFigure 3.2: Scatterplot of Salary vs. years since PhD, showing linear and quadratic smooths with 95% confidence bands.\n\n\n\n\n\nThis serves to highlight some of our impressions from the basic scatterplot shown in Figure 3.1, making them more apparent. And that’s precisely the point: the regression smoother draws attention to a possible pattern that we can consider as a visual summary of the data. You can think of this as showing what a linear (or quadratic) regression “sees” in the data. Statistical tests  can help you decide if there is more evidence for a quadratic fit compared to the simpler linear relation. \nIt is useful to also show some indication of uncertainty (or inversely, precision) associated with the predicted values. Both the linear and quadratic trends are shown in Figure 3.2 with 95% pointwise confidence bands.1 These are necessarily narrower in the center of the range of \\(x\\) where there is typically more data; they get wider toward the highest values of experience where the data are thinner.\nNon-parametric smoothers\nThe most generally useful idea is a smoother that tracks an average value, \\(\\mathbb{E} (y | x)\\), of \\(y\\) as \\(x\\) varies across its’ range without assuming any particular functional form, and so avoiding the necessity to choose among y ~ poly(x, 1), or y ~ poly(x, 2), or y ~ poly(x, 3), etc.\nNon-parametric smoothers attempt to estimate \\(\\mathbb{E} (y | x) = f(x)\\) where \\(f(x)\\) is some smooth function. These typically use a collection of weighted local regressions for each \\(x_i\\) within a window centered at that value. In the method called lowess or loess (Cleveland, 1979; Cleveland & Devlin, 1988), a weight function is applied, giving greatest weight to \\(x_i\\) and a weight of 0 outside a window containing a certain fraction, \\(s\\), called span, of the nearest neighbors of \\(x_i\\). The fraction, \\(s\\), is usually within the range \\(1/3 \\le s \\le 2/3\\), and it determines the smoothness of the resulting curve; smaller values produce a wigglier curve and larger values giving a smoother fit (an optimal span can be determined by \\(k\\)-fold cross-validation to minimize a measure of overall error of approximation).\nNon-parametric regression is a broad topic; see Fox (2016), Ch. 18 for a more general treatment including smoothing splines, and Wood (2006) for generalized additive models, fit using method = \"gam\" in ggplot2, which is the default when the largest group has more than 1,000 observations.\nFigure 3.3 shows the addition of a loess smooth to the plot in Figure 3.2, suppressing the confidence band for the linear regression. The loess fit is nearly coincident with the quadratic fit, but has a slightly wider confidence band.\n\nCodegg1 + \n  geom_smooth(method = \"loess\", formula = \"y ~ x\", \n              color = \"blue\", fill = scales::muted(\"blue\"),\n              linewidth = 2) +\n  geom_smooth(method = \"lm\", formula = \"y ~ x\", se = FALSE,\n              color = \"red\",\n              linewidth = 2) +\n  geom_smooth(method = \"lm\", formula = \"y ~ poly(x,2)\", \n              color = \"darkgreen\", fill = \"lightgreen\",\n              linewidth = 2) \n\n\n\n\n\n\nFigure 3.3: Scatterplot of Salary vs. years since PhD, adding the loess smooth. The loess smooth curve and confidence band in green is nearly indistinguishable from a quadratic fit in blue.\n\n\n\n\nBut now comes an important question: is it reasonable that academic salary should increase up to about 40 years since the PhD degree and then decline? The predicted salary for someone still working 50 years after earning their degree is about the same as a person at 15 years. What else is going on here?\n\n3.1.2 Stratifiers\nVery often, we have a main relationship of interest, but various groups in the data are identified by discrete factors (like faculty rank and sex, their type of discipline here), or there are quantitative predictors for which the main relation might vary. In the language of statistical models such effects are interaction terms, as in y ~ group + x + group:x, where the term group:x fits a different slope for each group and the grouping variable is often called a moderator variable. Common moderator variables are ethnicity, health status, social class and level of education. Moderators can also be continuous variables as in y ~ x1 + x2 + x1:x2.\nI call these stratifiers, recognizing that we should consider breaking down the overall relation to see whether and how it changes over such “other” variables. Such variables are most often factors, but we can cut a continuous variable into ranges (shingles) and do the same graphically. There are two general stratifying graphical techniques:\n\nGrouping: Identify subgroups in the data by assigning different visual attributes, such as color, shape, line style, etc. within a single plot. This is quite natural for factors; quantitative predictors can be accommodated by cutting their range into ordered intervals. Grouping has the advantage that the levels of a grouping variable can be shown within the same plot, facilitating direct comparison.\nConditioning: Showing subgroups in different plot panels. This has the advantages that relations for the individual groups more easily discerned and one can easily stratify by two (or more) other variables jointly, but visual comparison is more difficult because the eye must scan from one panel to another.\n\n\n\n\n\n\n\nHistory Corner\n\n\n\nRecognition of the roles of visual grouping by factors within a panel and conditioning in multi-panel displays was an important advance in the development of modern statistical graphics. It began at A.T.&T. Bell Labs in Murray Hill, NJ in conjunction with the S language, the mother of R.\nConditioning displays (originally called coplots (Chambers & Hastie, 1991)) are simply a collection of 1D, 2D or 3D plots separate panels for subsets of the data broken down by one or more factors, or, for quantitative variables, subdivided into a factor with several overlapping intervals (shingles). The first implementation was in Trellis plots (Becker et al., 1996; Cleveland, 1985).\nTrellis displays were extended in the lattice package (Sarkar, 2024), which offered:\n\nA graphing syntax similar to that used in statistical model formulas: y ~ x | g conditions the data by the levels of g, with | read as “given”; two or more conditioning are specified as y ~ x | g1 + g2 + ..., with + read as “and”.\n\nPanel functions define what is plotted in a given panel. panel.xyplot() is the default for scatterplots, plotting points, but you can add panel.lmline() for regression lines, latticeExtra::panel.smoother() for loess smooths and a wide variety of others.\n\nThe car package (Fox et al., 2023) supports this graphing syntax in many of its functions. ggplot2 does not; it uses aesthetics (aes()), which map variables in the data to visual characteristics in displays.\n\n\nThe most obvious variable that affects academic salary is rank, because faculty typically get an increase in salary with a promotion that carries through in their future salary. What can we see if we group by rank and fit a separate smoothed curve for each?\nIn ggplot2 thinking, grouping is accomplished simply by adding an aesthetic, such as color = rank. What happens then is that points, lines, smooths and other geom_*() inherit the feature that they are differentiated by color. In the case of geom_smooth(), we get a separate fit for each subset of the data, according to rank.\n\nCode# make some re-useable pieces to avoid repetitions\nscale_salary &lt;-   scale_y_continuous(\n  labels = scales::dollar_format(prefix=\"$\", \n                                 scale = 0.001, \n                                 suffix = \"K\")) \n# position the legend inside the plot\nlegend_pos &lt;- theme(legend.position = \"inside\",\n                    legend.position.inside = c(.1, 0.95), \n                    legend.justification = c(0, 1))\n\nggplot(Salaries, \n       aes(x = yrs.since.phd, y = salary, \n           color = rank, shape = rank)) +\n  geom_point() +\n  scale_salary +\n  labs(x = \"Years since PhD\",\n       y = \"Salary\") +\n  geom_smooth(aes(fill = rank),\n                  method = \"loess\", formula = \"y ~ x\", \n                  linewidth = 2)  +\n  legend_pos\n\n\n\n\n\n\nFigure 3.4: Scatterplot of Salary vs. years since PhD, grouped by rank.\n\n\n\n\nWell, there is a different story here. Salaries generally occupy separate vertical levels, increasing with academic rank. The horizontal extents of the smoothed curves show their ranges. Within each rank there is some initial increase after promotion, and then some tendency to decline with increasing years. But, by and large, years since the PhD doesn’t make as much difference once we’ve taken academic rank into account.\nWhat about the discipline which is classified, perhaps peculiarly, as “theoretical” vs. “applied”? The values are just \"A\" and \"B\", so I map these to more meaningful labels before making the plot.\n\nCodeSalaries &lt;- Salaries |&gt;\n  mutate(discipline = \n           factor(discipline, \n                  labels = c(\"A: Theoretical\", \"B: Applied\")))\n\nSalaries |&gt;\n  ggplot(aes(x = yrs.since.phd, y = salary, color = discipline)) +\n    geom_point() +\n  scale_salary +\n  geom_smooth(aes(fill = discipline ),\n                method = \"loess\", formula = \"y ~ x\", \n                linewidth = 2) + \n  labs(x = \"Years since PhD\",\n       y = \"Salary\") +\n  legend_pos \n\n\n\n\n\n\nFigure 3.5: Scatterplot of Salary vs. years since PhD, grouped by discipline.\n\n\n\n\nThe story in Figure 3.5 is again different. Faculty in applied disciplines on average earn about 10,000$ more per year on average than their theoretical colleagues.\n\nSalaries |&gt;\n  group_by(discipline) |&gt;\n  summarize(mean = mean(salary)) \n#&gt; # A tibble: 2 × 2\n#&gt;   discipline        mean\n#&gt;   &lt;fct&gt;            &lt;dbl&gt;\n#&gt; 1 A: Theoretical 108548.\n#&gt; 2 B: Applied     118029.\n\nFor both groups, there is an approximately linear relation up to about 30–40 years, but the smoothed curves then diverge into the region where the data is thinner.\nThis result is more surprising than differences among faculty ranks. The effect of annotation with smoothed curves as visual summaries is apparent, and provides a stimulus to think about why these differences (if they are real) exist between theoretical and applied professors, and maybe should theoreticians be paid more!\n\n3.1.3 Conditioning\nThe previous plots use grouping by color to plot the data for different subsets inside the same plot window, making comparison among groups easier, because they can be directly compared along a common vertical scale 2. This gets messy, however, when there are more than just a few levels, or worse—when there are two (or more) variables for which we want to show separate effects. In such cases, we can plot separate panels using the ggplot2 concept of faceting. There are two options: facet_wrap() takes one or more conditioning variables and produces a ribbon of plots for each combination of levels; facet_grid(row ~ col) takes two or more conditioning variables and arranges the plots in a 2D array identified by the row and col variables.\nLet’s look at salary broken down by the combinations of discipline and rank. Here, I chose to stratify using color by rank within each of panels faceting by discipline. Because there is more going on in this plot, a linear smooth is used to represent the trend.\n\nCodeSalaries |&gt;\n  ggplot(aes(x = yrs.since.phd, y = salary, \n             color = rank, shape = rank)) +\n  geom_point() +\n  scale_salary +\n  labs(x = \"Years since PhD\",\n       y = \"Salary\") +\n  geom_smooth(aes(fill = rank),\n              method = \"lm\", formula = \"y ~ x\", \n              linewidth = 2, alpha = 1/4) +\n  facet_wrap(~ discipline) +\n  legend_pos\n\n\n\n\n\n\nFigure 3.6: Scatterplot of Salary vs. years since PhD, grouped by rank, with separate panels for discipline.\n\n\n\n\nOnce both of these factors are taken into account, there does not seem to be much impact of years of service. Salaries in theoretical disciplines are noticeably greater than those in applied disciplines at all ranks, and there are even greater differences among ranks.\nFinally, to shed light on the question that motivated this example— are there anomalous differences in salary for men and women— we can look at differences in salary according to sex, when discipline and rank are taken into account. To do this graphically, condition by both variables, but use facet_grid(discipline ~ rank) to arrange their combinations in a grid whose rows are the levels of discipline and columns are those of rank. I want to make the comparison of males and females most direct, so I use color = sex to stratify the panels. The smoothed regression lines and error bands are calculated separately for each combination of discipline, rank and sex.\n\nCodeSalaries |&gt;\n  ggplot(aes(x = yrs.since.phd, y = salary, color = sex)) +\n  geom_point() +\n  scale_salary +\n  labs(x = \"Years since PhD\",\n       y = \"Salary\") +\n  geom_smooth(aes(fill = sex),\n              method = \"lm\", formula = \"y ~ x\",\n              linewidth = 2, alpha = 1/4) +\n  facet_grid(discipline ~ rank) +\n  theme_bw(base_size = 14) + \n  legend_pos\n\n\n\n\n\n\nFigure 3.7: Scatterplot of Salary vs. years since PhD, grouped by sex, faceted by discipline and rank.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#sec-data-ellipse",
+    "href": "03-multivariate_plots.html#sec-data-ellipse",
+    "title": "3  Plots of Multivariate Data",
+    "section": "\n3.2 Data Ellipses",
+    "text": "3.2 Data Ellipses\nThe data ellipse (Monette, 1990), or concentration ellipse (Dempster, 1969) is a remarkably simple and effective display for viewing and understanding bivariate relationships in multivariate data. The data ellipse is typically used to add a visual summary to a scatterplot, that shows all together the means, standard deviations, correlation, and slope of the regression line for two variables, perhaps stratified by another variable. Under the classical assumption that the data are bivariate normally distributed, the data ellipse is also a sufficient visual summary, in the sense that it captures all relevant features of the data. See Friendly et al. (2013) for a complete discussion of the role of ellipsoids in statistical data visualization.\nIt is based on the idea that in a bivariate normal distribution, the contours of equal probability form a series of concentric ellipses. If the variables were uncorrelated and had the same variances, these would be circles, and Euclidean distance would measure the distance of each observation from the mean. When the variables are correlated, a different measure, Mahalanobis distance is the proper measure of how far a point is from the mean, taking the correlation into account.\n\n\n\n\n\n\n\nFigure 3.8: 2D data with curves of constant distance from the centroid. The blue solid ellipse shows a contour of constant Mahalanobis distance, taking the correlation into account; the dashed red circle is a contour of equal Euclidean distance. Given the data points, Which of the points A and B is further from the mean (X)? Source: Re-drawn from Ou Zhang\n\n\n\n\n\nTo illustrate, Figure 3.8 shows a scatterplot with labels for two points, “A” and “B”. Which is further from the mean, “X”? A contour of constant Euclidean distance, shown by the red dashed circle, ignores the apparent negative correlation, so point “A” is further. The blue ellipse for Mahalanobis distance takes the correlation into account, so point “B” has a greater distance from the mean.\nMathematically, Euclidean (squared) distance for \\(p\\) variables, \\(j = 1, 2, \\dots , p\\), is just a generalization of the square of a univariate standardized (\\(z\\)) score, \\(z^2 = [(y - \\bar{y}) / s]^2\\),\n\\[\nD_E^2 (\\mathbf{y}) = \\sum_j^p z_j^2 = \\mathbf{z}^\\textsf{T}  \\mathbf{z} = (\\mathbf{y} - \\bar{\\mathbf{y}})^\\textsf{T} \\operatorname{diag}(\\mathbf{S})^{-1} (\\mathbf{y} - \\bar{\\mathbf{y}}) \\; ,\n\\] where \\(\\mathbf{S}\\) is the sample variance-covariance matrix, \\(\\mathbf{S} = ({n-1})^{-1} \\sum_{i=1}^n (\\mathbf{y}_i - \\bar{\\mathbf{y}})^\\textsf{T} (\\mathbf{y}_i - \\bar{\\mathbf{y}})\\).\nMahalanobis’ distance takes the correlations into account simply by using the covariances as well as the variances, \\[\nD_M^2 (\\mathbf{y}) = (\\mathbf{y} - \\bar{\\mathbf{y}})^\\mathsf{T} S^{-1} (\\mathbf{y} - \\bar{\\mathbf{y}}) \\; .\n\\tag{3.1}\\]\nIn Equation 3.1, the inverse \\(S^{-1}\\) serves to “divide” the matrix \\((\\mathbf{y} - \\bar{\\mathbf{y}})^\\mathsf{T} (\\mathbf{y} - \\bar{\\mathbf{y}})\\) of squared distances by the variances (and covariances) of the variables, as in the univariate case.\nFor \\(p\\) variables, the data ellipsoid \\(\\mathcal{E}_c\\) of size \\(c\\) is a \\(p\\)-dimensional ellipse, defined as the set of points \\(\\mathbf{y} = (y_1, y_2, \\dots y_p)\\) whose squared Mahalanobis distance, \\(D_M^2 ( \\mathbf{y} )\\) is less than or equal to \\(c^2\\), \\[\n\\mathcal{E}_c (\\bar{\\mathbf{y}}, \\mathbf{S}) := \\{ D_M^2 (\\mathbf{y}) \\le c^2 \\} \\; .\n\\] A computational definition recognizes that the boundary of the ellipsoid can be found by transforming a unit sphere \\(\\mathcal{P}\\) centered at the origin, \\(\\mathcal{P} : \\{ \\mathbf{x}^\\textsf{T} \\mathbf{x}= 1\\}\\), by \\(\\mathbf{S}^{1/2}\\) and then shifting that to centroid of the data,\n\\[\n\\mathcal{E}_c (\\bar{\\mathbf{y}}, \\mathbf{S}) = \\bar{\\mathbf{y}} \\; \\oplus \\; \\mathbf{S}^{1/2} \\, \\mathcal{P} \\:\\: ,\n\\] where \\(\\mathbf{S}^{1/2}\\) represents a rotation and scaling and the notation \\(\\oplus\\) represents translation to a new centroid, \\(\\bar{\\mathbf{y}}\\) here. The matrix \\(\\mathbf{S}^{1/2}\\) is commonly computed as the Choleski factor of \\(\\mathbf{S}\\). Slightly abusing notation and taking the unit sphere as given (like an identity matrix \\(\\mathbf{I}\\)), we can write the data ellipsoid as simply:\n\\[\n\\mathcal{E}_c (\\bar{\\mathbf{y}}, \\mathbf{S}) = \\bar{\\mathbf{y}} \\; \\oplus \\; c\\, \\sqrt{\\mathbf{S}} \\:\\: .\n\\tag{3.2}\\]\nWhen \\(\\mathbf{y}\\) is (at least approximately) bivariate normal, \\(D_M^2(\\mathbf{y})\\) has a large-sample \\(\\chi^2_2\\) distribution (\\(\\chi^2\\) with 2 df), so\n\n\n\\(c^2 = \\chi^2_2 (0.68) = 2.28\\) gives a “1 standard deviation bivariate ellipse,” an analog of the standard interval \\(\\bar{y} \\pm 1 s\\), while\n\n\\(c^2 = \\chi^2_2 (0.95) = 5.99 \\approx 6\\) gives a data ellipse of 95% coverage.\n\nIn not-large samples, the radius \\(c\\) of the ellipsoid is better approximated by a multiple of a \\(F_{p, n-p}\\) distribution, becoming \\(c =\\sqrt{ 2 F_{2, n-2}^{1-\\alpha} }\\) in the bivariate case (\\(p=2\\)) for coverage \\(1-\\alpha\\).\n\n3.2.1 Ellipse properties\nThe essential ideas of correlation and regression and their relation to ellipses go back to Galton (1886). Galton’s goal was to predict (or explain) how a heritable trait, \\(Y\\), (e.g., height) of children was related to that of their parents, \\(X\\). He made a semi-graphic table of the frequencies of 928 observations of the average height of father and mother versus the height of their child, shown in Figure 3.9. He then drew smoothed contour lines of equal frequencies and had the wonderful visual insight that these formed concentric shapes that were tolerably close to ellipses.\nHe then calculated summaries, \\(\\text{Ave}(Y | X)\\), and, for symmetry, \\(\\text{Ave}(X | Y)\\), and plotted these as lines of means on his diagram. Lo and behold, he had a second visual insight: the lines of means of (\\(Y | X\\)) and (\\(X | Y\\)) corresponded approximately to the loci of horizontal and vertical tangents to the concentric ellipses. To complete the picture, he added lines showing the major and minor axes of the family of ellipses (which turned out to be the principal components) with the result shown in Figure 3.9.\n\n\n\n\n\n\n\nFigure 3.9: Galton’s 1886 diagram, showing the relationship of height of children to the average of their parents’ height. The diagram is essentially an overlay of a geometrical interpretation on a bivariate grouped frequency distribution, shown as numbers.\n\n\n\n\nFor two variables, \\(x\\) and \\(y\\), the remarkable properties of the data ellipse are illustrated in Figure 3.10, a modern reconstruction of Galton’s diagram.\n\n\n\n\n\n\n\nFigure 3.10: Sunflower plot of Galton’s data on heights of parents and their children (in.), with 40%, 68% and 95% data ellipses and the regression lines of \\(y\\) on \\(x\\) (black) and \\(x\\) on \\(y\\) (grey).\n\n\n\n\n\nThe ellipses have the mean vector \\((\\bar{x}, \\bar{y})\\) as their center.\nThe lengths of arms of the blue dashed central cross show the standard deviations of the variables, which correspond to the shadows of the ellipse covering 40% of the data. These are the bivariate analogs of the standard intervals \\(\\bar{x} \\pm 1 s_x\\) and \\(\\bar{y} \\pm 1 s_y\\).\n\nMore generally, shadows (projections) on the coordinate axes, or any linear combination of them, give any standard interval, \\(\\bar{x} \\pm k s_x\\) and \\(\\bar{y} \\pm k s_y\\). Those with \\(k=1, 1.5, 2.45\\), have bivariate coverage 40%, 68% and 95% respectively, corresponding to these quantiles of the \\(\\chi^2\\) distribution with 2 degrees of freedom, i.e., \\(\\chi^2_2 (.40) \\approx 1^2\\), \\(\\chi^2_2 (.68) \\approx 1.5^2\\), and \\(\\chi^2_2 (.95) \\approx 2.45\\). The shadows of the 68% ellipse are the bivariate analog of a univariate \\(\\bar{x} \\pm 1 s_x\\) interval.\n\n\nThe regression line predicting \\(y\\) from \\(x\\) goes through the points where the ellipses have vertical tangents. The other regression line, predicting \\(x\\) from \\(y\\) goes through the points of horizontal tangency.\nThe correlation \\(r(x, y)\\) is the ratio of the vertical segment from the mean of \\(y\\) to the regression line to the vertical segment going to the top of the ellipse as shown at the right of the figure. It is \\(r = 0.46\\) in this example.\nThe residual standard deviation, \\(s_e = \\sqrt{MSE} = \\sqrt{\\Sigma (y - \\bar{y})^2 / n-2}\\), is the half-length of the ellipse at the mean \\(\\bar{x}\\).\n\nBecause Galton’s values of parent and child height were recorded in class intervals of 1 in., they are shown as sunflower symbols in Figure 3.10, with multiple ‘petals’ reflecting the number of observations at each location. This plot (except for annotations) is constructed using sunflowerplot() and car::dataEllipse() for the ellipses.\n\ndata(Galton, package = \"HistData\")\n\nsunflowerplot(parent ~ child, data=Galton, \n      xlim=c(61,75), \n      ylim=c(61,75), \n      seg.col=\"black\", \n      xlab=\"Child height\", \n      ylab=\"Mid Parent height\")\n\ny.x &lt;- lm(parent ~ child, data=Galton)     # regression of y on x\nabline(y.x, lwd=2)\nx.y &lt;- lm(child ~ parent, data=Galton)     # regression of x on y\ncc &lt;- coef(x.y)\nabline(-cc[1]/cc[2], 1/cc[2], lwd=2, col=\"gray\")\n\nwith(Galton, \n     car::dataEllipse(child, parent, \n         plot.points=FALSE, \n         levels=c(0.40, 0.68, 0.95), \n         lty=1:3)\n    )\n\nFinally, as Galton noted in his diagram, the principal major and minor axes of the ellipse have important statistical properties. Pearson (1901) would later show that their directions are determined by the eigenvectors \\(\\mathbf{v}_1, \\mathbf{v}_2, \\dots\\) of the covariance matrix \\(\\mathbf{S}\\) and their radii by the square roots, \\(\\sqrt{\\mathbf{v}_1}, \\sqrt{\\mathbf{v}_1}, \\dots\\) of the corresponding eigenvalues.\n\n3.2.2 R functions for data ellipses\nA number of packages provide functions for drawing data ellipses in a scatterplot, with various features.\n\n\ncar::scatterplot(): uses base R graphics to draw 2D scatterplots, with a wide variety of plot enhancements including linear and non-parametric smoothers (loess, gam), a formula method, e.g., y ~ x | group, and marking points and lines using symbol shape, color, etc. Importantly, the car package generally allows automatic identification of “noteworthy” points by their labels in the plot using a variety of methods. For example, method = \"mahal\" labels cases with the most extreme Mahalanobis distances; method = \"r\" selects points according to their value of abs(y), which is appropriate in residual plots.\n\ncar::dataEllipse(): plots classical or robust data (using MASS::cov/trob()) ellipses for one or more groups, with the same facilities for point identification.\n\nheplots::covEllipses(): draws classical or robust data ellipses for one or more groups in a one-way design and optionally for the pooled total sample, where the focus is on homogeneity of within-group covariance matrices.\n\nggplot2::stat_ellipse(): uses the calculation methods of car::dataEllipse() to add unfilled (geom = \"path\") or filled (geom = polygon\") data ellipses in a ggplot scatterplot, using inherited aesthetics.\n\n3.2.3 Example: Canadian occupational prestige\nThese examples use the data on the prestige of 102 occupational categories and other measures from the 1971 Canadian Census, recorded in carData::Prestige.3 Our interest is in understanding how prestige (the Pineo & Porter (2008) prestige score for an occupational category, derived from a social survey) is related to census measures of the average education, income, percent women of incumbents in those occupations. Occupation type is a factor with levels \"bc\" (blue collar), \"wc\" (white collar) and \"prof\" (professional).\n\n\ndata(Prestige, package=\"carData\")\n# `type` is really an ordered factor. Make it so.\nPrestige$type &lt;- ordered(Prestige$type,\n                         levels=c(\"bc\", \"wc\", \"prof\"))\nstr(Prestige)\n#&gt; 'data.frame':  102 obs. of  6 variables:\n#&gt;  $ education: num  13.1 12.3 12.8 11.4 14.6 ...\n#&gt;  $ income   : int  12351 25879 9271 8865 8403 11030 8258 14163 11377 11023 ...\n#&gt;  $ women    : num  11.16 4.02 15.7 9.11 11.68 ...\n#&gt;  $ prestige : num  68.8 69.1 63.4 56.8 73.5 77.6 72.6 78.1 73.1 68.8 ...\n#&gt;  $ census   : int  1113 1130 1171 1175 2111 2113 2133 2141 2143 2153 ...\n#&gt;  $ type     : Ord.factor w/ 3 levels \"bc\"&lt;\"wc\"&lt;\"prof\": 3 3 3 3 3 3 3 3 3 3 ...\n\nI first illustrate the relation between income and prestige in Figure 3.11 using car::scatterplot() with many of its bells and whistles, including marginal boxplots for the variables, the linear regression line, loess smooth and the 68% data ellipse.\n\nscatterplot(prestige ~ income, data=Prestige,\n  pch = 16, cex.lab = 1.25,\n  regLine = list(col = \"red\", lwd=3),\n  smooth = list(smoother=loessLine, \n                lty.smooth = 1, lwd.smooth=3,\n                col.smooth = \"darkgreen\", \n                col.var = \"darkgreen\"),\n  ellipse = list(levels = 0.68),\n  id = list(n=4, method = \"mahal\", col=\"black\", cex=1.2))\n#&gt; general.managers          lawyers        ministers       physicians \n#&gt;                2               17               20               24\n\n\n\n\n\n\nFigure 3.11: Scatterplot of prestige vs. income, showing the linear regression line (red), the loess smooth with a confidence envelope (darkgreen) and a 68% data ellipse. Points with the 4 largest \\(D^2\\) values are labeled.\n\n\n\n\nThere is a lot that can be seen here:\n\n\nincome is positively skewed, as is often the case.\nThe loess smooth, on the scale of income, shows prestige increasing up to $15,000 (these are 1971 incomes), and then leveling off.\nThe data ellipse, centered at the means encloses approximately 68% of the data points. It adds visual information about the correlation and precision of the linear regression; but here, the non-linear trend for higher incomes strongly suggests a different approach.\nThe four points identified by their labels are those with the largest Mahalanobis distances. scatterplot() prints their labels to the console.\n\nFigure 3.12 shows a similar plot for education, which from the boxplot appears to be reasonably symmetric. The smoothed curve is quite close to the linear regression, according to which prestige increases on average coef(lm(prestige ~ education, data=Prestige))[\"education\"] = 5.361 with each year of education.\n\nscatterplot(prestige ~ education, data=Prestige,\n  pch = 16, cex.lab = 1.25,\n  regLine = list(col = \"red\", lwd=3),\n  smooth = list(smoother=loessLine, \n                lty.smooth = 1, lwd.smooth=3,\n                col.smooth = \"darkgreen\", \n                col.var = \"darkgreen\"),\n  ellipse = list(levels = 0.68),\n  id = list(n=4, method = \"mahal\", col=\"black\", cex=1.2))\n#&gt;  physicians file.clerks    newsboys     farmers \n#&gt;          24          41          53          67\n\n\n\n\n\n\nFigure 3.12: Scatterplot of prestige vs. education, showing the linear regression line (red), the loess smooth with a confidence envelope (darkgreen) and a 68% data ellipse.\n\n\n\n\nIn this plot, farmers, newsboys, file.clerks and physicians are identified as noteworthy, for being furthest from the mean by Mahalanobis distance. In relation to their typical level of education, these are mostly understandable, but it is nice that farmers are rated of higher prestige than their level of education would predict.\nNote that the method argument for point identification can take a vector of case numbers indicating the points to be labeled. So, to label the observations with large absolute standardized residuals in the linear model m, you can use method = which(abs(rstandard(m)) &gt; 2).\n\nm &lt;- lm(prestige ~ education, data=Prestige)\nscatterplot(prestige ~ education, data=Prestige,\n            pch = 16, cex.lab = 1.25,\n            boxplots = FALSE,\n            regLine = list(col = \"red\", lwd=3),\n            smooth = list(smoother=loessLine,\n                          lty.smooth = 1, lwd.smooth=3,\n                          col.smooth = \"black\", \n                          col.var = \"darkgreen\"),\n            ellipse = list(levels = 0.68),\n            id = list(n=4, method = which(abs(rstandard(m))&gt;2), \n                      col=\"black\", cex=1.2)) |&gt; invisible()\n\n\n\n\n\n\nFigure 3.13: Scatterplot of prestige vs. education, labeling points whose absolute standardized residual is &gt; 2.\n\n\n\n\n\n3.2.3.1 Plotting on a log scale\nA typical remedy for the non-linear relationship of income to prestige is to plot income on a log scale. This usually makes sense, and expresses a belief that a multiple of or percentage increase in income has a constant impact on prestige, as opposed to the additive interpretation for income itself.\nFor example, the slope of the linear regression line in Figure 3.11 is given by coef(lm(prestige ~ income, data=Prestige))[\"income\"] = 0.003. Multiplying this by 1000 says that a $1000 increase in income is associated with with an average increase of prestige of 2.9.\nIn the plot below, scatterplot(..., log = \"x\") re-scales the x-axis to the \\(\\log_e()\\) scale. The slope, coef(lm(prestige ~ log(income), data=Prestige))[\"log(income)\"] = 21.556 says that a 1% increase in salary is associated with an average change of 21.55 / 100 in prestige.\n\n\nscatterplot(prestige ~ income, data=Prestige,\n  log = \"x\",\n  pch = 16, cex.lab = 1.25,\n  regLine = list(col = \"red\", lwd=3),\n  smooth = list(smoother=loessLine,\n                lty.smooth = 1, lwd.smooth=3,\n                col.smooth = \"darkgreen\", col.var = \"darkgreen\"),\n  ellipse = list(levels = 0.68),\n  id = list(n=4, method = \"mahal\", col=\"black\", cex=1.2))\n#&gt; general.managers        ministers         newsboys      babysitters \n#&gt;                2               20               53               63\n\n\n\n\n\n\nFigure 3.14: Scatterplot of prestige vs. log(income).\n\n\n\n\nThe smoothed curve in Figure 3.14 exhibits a slight tendency to bend upwards, but a linear relation is a reasonable approximation.\n\n3.2.3.2 Stratifying\nBefore going further, it is instructive to ask what we could see in the relationship between income and prestige if we stratified by type of occupation, fitting separate regressions and smooths for blue collar, white collar and professional incumbents in these occupations.\nThe formula prestige ~ income | type (read: income given type) is a natural way to specify grouping by type; separate linear regressions and smooths are calculated for each group, applying the color and point shapes specified by the col and pch arguments.\n\nscatterplot(prestige ~ income | type, data=Prestige,\n  col = c(\"blue\", \"red\", \"darkgreen\"),\n  pch = 15:17,\n  grid = FALSE,\n  legend = list(coords=\"bottomright\"),\n  regLine = list(lwd=3),\n  smooth=list(smoother=loessLine, \n              var=FALSE, lwd.smooth=2, lty.smooth=1))\n\n\n\n\n\n\nFigure 3.15: Scatterplot of prestige vs. income, stratified by occupational type. This implies a different interpretation, where occupation type is a moderator variable.\n\n\n\n\nThis visual analysis offers a different interpretation of the dependence of prestige on income, which appeared to be non-linear when occupation type was ignored. Instead, Figure 3.15 suggests an interaction of income by type. In a model formula this would be expressed as one of:\nlm(prestige ~ income + type + income:type, data = Prestige)\nlm(prestige ~ income * type, data = Prestige)\nThese models signify that there are different slopes (and intercepts) for the three occupational types. In this interpretation, type is a moderator variable, with a different story. The slopes of the fitted lines suggest that among blue collar workers, prestige increases sharply with their income. For white collar and professional workers, there is still an increasing relation of prestige with income, but the effect of income (slope) diminishes with higher occupational category. A different plot entails a different story.\n\n3.2.4 Example: Penguins data\n\n\n\n\n\n\n\nFigure 3.16: Penguin species observed in the Palmer Archipelago. This is a cartoon, but it illustrates some features of penguin body size measurements, and the colors typically used for species. Image: Allison Horst\n\n\n\n\nThe penguins dataset from the palmerpenguins package (Horst et al., 2022) provides further instructive examples of plots and analyses of multivariate data. The data consists of measurements of body size (flipper length, body mass, bill length and depth) of 344 penguins collected at the Palmer Research Station in Antarctica.\nThere were three different species of penguins (Adélie, Chinstrap & Gentoo) collected from 3 islands in the Palmer Archipelago between 2007–2009 (Gorman et al., 2014). The purpose was to examine differences in size or appearance of these species, particularly differences among the sexes (sexual dimorphism) in relation to foraging and habitat.\nHere, I use a slightly altered version of the dataset, peng, renaming variables to remove the units, making factors of character variables and deleting a few cases with missing data.\n\ndata(penguins, package = \"palmerpenguins\")\npeng &lt;- penguins |&gt;\n  rename(\n    bill_length = bill_length_mm, \n    bill_depth = bill_depth_mm, \n    flipper_length = flipper_length_mm, \n    body_mass = body_mass_g\n  ) |&gt;\n  mutate(species = as.factor(species),\n         island = as.factor(island),\n         sex = as.factor(substr(sex,1,1))) |&gt;\n  tidyr::drop_na()\n\nstr(peng)\n#&gt; tibble [333 × 8] (S3: tbl_df/tbl/data.frame)\n#&gt;  $ species       : Factor w/ 3 levels \"Adelie\",\"Chinstrap\",..: 1 1 1 1 1 1 1 1 1 1 ...\n#&gt;  $ island        : Factor w/ 3 levels \"Biscoe\",\"Dream\",..: 3 3 3 3 3 3 3 3 3 3 ...\n#&gt;  $ bill_length   : num [1:333] 39.1 39.5 40.3 36.7 39.3 38.9 39.2 41.1 38.6 34.6 ...\n#&gt;  $ bill_depth    : num [1:333] 18.7 17.4 18 19.3 20.6 17.8 19.6 17.6 21.2 21.1 ...\n#&gt;  $ flipper_length: int [1:333] 181 186 195 193 190 181 195 182 191 198 ...\n#&gt;  $ body_mass     : int [1:333] 3750 3800 3250 3450 3650 3625 4675 3200 3800 4400 ...\n#&gt;  $ sex           : Factor w/ 2 levels \"f\",\"m\": 2 1 1 1 2 1 2 1 2 2 ...\n#&gt;  $ year          : int [1:333] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...\n\nThere are quite a few variables to choose for illustrating data ellipses in scatterplots. Here I focus on the measures of their bills, bill_length and bill_depth (indicating curvature) and show how to use ggplot2 for these plots.\nI’ll be using the penguins data quite a lot, so it is useful to set up custom colors like those used in Figure 3.16, and shown in Figure 3.17 with their color codes. These are shades of:\n\n\nAdelie: orange,\n\nChinstrap: purple, and\n\nGentoo: green.\n\n\n\n\n\n\n\n\nFigure 3.17: Color palettes used for penguin species.\n\n\n\n\nTo use these in ggplot2 I define a function peng.colors() that allows shades of light, medium and dark and then functions scale_*_penguins() for color and fill.\n\nCodepeng.colors &lt;- function(shade=c(\"medium\", \"light\", \"dark\")) {\n  shade = match.arg(shade)\n  #             light      medium     dark\n  oranges &lt;- c(\"#FDBF6F\", \"#F89D38\", \"#F37A00\")  # Adelie\n  purples &lt;- c(\"#CAB2D6\", \"#9A78B8\", \"#6A3D9A\")  # Chinstrap\n  greens &lt;-  c(\"#B2DF8A\", \"#73C05B\", \"#33a02c\")  # Gentoo\n  \n  cols.vec &lt;- c(oranges, purples, greens)\n  cols.mat &lt;- \n    matrix(cols.vec, 3, 3, \n           byrow = TRUE,\n           dimnames = list(species = c(\"Adelie\", \"Chinstrap\", \"Gentoo\"),\n                           shade = c(\"light\", \"medium\", \"dark\")))\n  # get shaded colors\n  cols.mat[, shade ]\n}\n\n# define color and fill scales\nscale_fill_penguins &lt;- function(shade=c(\"medium\", \"light\", \"dark\"), ...){\n  shade = match.arg(shade)\n  ggplot2::discrete_scale(\n    \"fill\",\"penguins\",\n     scales:::manual_pal(values = peng.colors(shade)), ...)\n}\n\nscale_colour_penguins &lt;- function(shade=c(\"medium\", \"light\", \"dark\"), ...){\n  shade = match.arg(shade)\n  ggplot2::discrete_scale(\n    \"colour\",\"penguins\",\n    scales:::manual_pal(values = peng.colors(shade)), ...)\n}\nscale_color_penguins &lt;- scale_colour_penguins\n\n\nThis is used to define a theme_penguins() function that I use to simply change the color and fill scales for plots below.\n\ntheme_penguins &lt;- function(shade=c(\"medium\", \"light\", \"dark\"), ...) {\n  shade = match.arg(shade)\n  list(scale_color_penguins(shade=shade),\n       scale_fill_penguins(shade=shade)\n      )\n}\n\nAn initial plot using ggplot2 shown in Figure 3.18 uses color and point shape to distinguish the three penguin species. I annotate the plot of points using the linear regression lines, loess smooths to check for non-linearity and 95% data ellipses to show precision of the linear relation.\n\nCodeggplot(peng, \n       aes(x = bill_length, y = bill_depth,\n           color = species, shape = species, fill=species)) +\n  geom_point(size=2) +\n  geom_smooth(method = \"lm\", formula = y ~ x,\n              se=FALSE, linewidth=2) +\n  geom_smooth(method = \"loess\",  formula = y ~ x,\n              linewidth = 1.5, se = FALSE, alpha=0.1) +\n  stat_ellipse(geom = \"polygon\", level = 0.95, alpha = 0.2) +\n  theme_penguins(\"dark\") +\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(0.85, 0.15))\n\n\n\n\n\n\nFigure 3.18: Penguin bill length and bill depth according to species.\n\n\n\n\nOverall, the three species occupy different regions of this 2D space and for each species the relation between bill length and depth appears reasonably linear. Given this, we can suppress plotting the data points to get a visual summary of the data using the fitted regression lines and data ellipses, as shown in Figure 3.19.\nThis idea, of visual thinning a graph to focus on what should be seen, becomes increasingly useful as the data becomes more complex. The ggplot2 framework encourages this, because we can think of various components as layers, to be included or not. Here I chose to include only the regression line and add data ellipses of 40%, 68% and 95% coverage to highlight the increasing bivariate density around the group means.\n\nCodeggplot(peng, \n       aes(x = bill_length, y = bill_depth,\n           color = species, shape = species, fill=species)) +\n  geom_smooth(method = \"lm\",  se=FALSE, linewidth=2) +\n  stat_ellipse(geom = \"polygon\", level = 0.95, alpha = 0.2) +\n  stat_ellipse(geom = \"polygon\", level = 0.68, alpha = 0.2) +\n  stat_ellipse(geom = \"polygon\", level = 0.40, alpha = 0.2) +\n  theme_penguins(\"dark\") +\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(0.85, 0.15))\n\n\n\n\n\n\nFigure 3.19: Visual thinning: Suppressing the data points gives a visual summary of the relation between bill length and bill depth using the regression line and data ellipses.\n\n\n\n\n\n3.2.4.1 Nonparamtric bivariate density plots\nWhile I emphasize data ellipses (because I like their beautiful geometry), other visual summaries of the bivariate density are possible and often useful.\nFor a single variable, stats::density() and ggplot2::geom_density() calculate a smoothed estimate of the density using nonparametric kernel methods (Silverman, 1986) whose smoothness is controlled by a bandwidth parameter, analogous to the span in a loess smoother. This idea extends to two (and more) variables (Scott, 1992). For bivariate data, MASS::kde2d() estimates the density on a square \\(n \\times n\\) grid over the ranges of the variables.\nggplot2 provides geom_density_2d() which uses MASS::kde2d() and displays these as contours— horizontal slices of the 3D surface at equally-spaced heights and projects these onto the 2D plane. The ggdensity package (Otto & Kahle, 2023) extends this with geom_hdr(), computing the high density regions that bound given levels of probability and maps these to the alpha transparency aesthetic. A method argument allows you to specify various nonparametric (method =\"kde\" is the default) and parametric (method =\"mvnorm\" gives normal data ellipses) ways to estimate the underlying bivariate distribution.\nFigure 3.20 shows these side-by-side for comparison. With geom_density_2d() you can specify either the number of contour bins or the width of these bins (binwidth). For geom_hdr(), the probs argument gives a result that is easier to understand.\n\nCodelibrary(ggdensity)\nlibrary(patchwork)\np1 &lt;- ggplot(peng, \n       aes(x = bill_length, y = bill_depth,\n           color = species)) +\n  geom_smooth(method = \"lm\",  se=FALSE, linewidth=2) +\n  geom_density_2d(linewidth = 1.1, bins = 8) +\n  ggtitle(\"geom_density_2d\") +\n  theme_bw(base_size = 14) + \n  theme_penguins() +\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(0.85, 0.15))\n\np2 &lt;- ggplot(peng, \n       aes(x = bill_length, y = bill_depth,\n           color = species, fill = species)) +\n  geom_smooth(method = \"lm\",  se=FALSE, linewidth=2) +\n  geom_hdr(probs = c(0.95, 0.68, 0.4), show.legend = FALSE) +\n  ggtitle(\"ggdensity::geom_hdr\") +\n  theme_bw(base_size = 14) +\n  theme_penguins() +\n  theme(legend.position = \"none\")\n\np1 + p2\n\n\n\n\n\n\nFigure 3.20: Bivariate densities show the contours of the 3D surface representing the frequency in the joint distribution of bill length and bill depth.\n\n\n\n\n\n3.2.5 Simpson’s paradox: marginal and conditional relationships\nBecause it provides a visual representation of means, variances, and correlations, the data ellipse is ideally suited as a tool for illustrating and explicating various phenomena that occur in the analysis of linear models. One class of simple, but important, examples concerns the difference between the marginal relationship between variables, ignoring some important factor or covariate, and the conditional relationship, adjusting (controlling) for that variable.\nSimpson’s (1951) paradox occurs when the marginal and conditional relationships differ in direction. That is, the overall correlation in a model y ~ x might be negative, while the within-group correlations in separate models for each group y[g] ~ x[g] might be positive, or vice versa.\nThis may be seen in the plots of bill length against bill depth for the penguin data shown in Figure 3.21. Ignoring penguin species, the marginal, total-sample correlation is slightly negative as seen in panel (a). The individual-sample ellipses in panel (b) show that the conditional, within-species correlations are all positive, with approximately equal regression slopes. However the group means have a negative relationship, accounting for the negative marginal correlation when species is ignored.\n\n\n\n\n\n\n\nFigure 3.21: Marginal (a), conditional (b), and pooled within-sample (c) relationships of bill length and depth in the Penguins data. Each plot shows the 68% data ellipse and regression line(s) with 95% confidence bands.\n\n\n\n\nThe regression line in panel (a) is that for the linear model lm(bill_depth ~ bill_length), while the separate lines in panel (b) are those for the model lm(bill_depth ~ bill_length * species) which allows a different slope and intercept for each species.\nA correct analysis of the (conditional) relationship between these variables, controlling or adjusting for mean differences among species, is based on the pooled within-sample covariance matrix, a weighted average of the individual within-group \\(\\mathbf{S}_i\\), \\[\n\\mathbf{S}_{\\textrm{within}}  =\n\\sum_{i=1}^g\n(n_i - 1) \\mathbf{S}_i \\, / \\, (N - g)\n\\:\\: ,\n\\] where \\(N = \\sum n_i\\). The result is shown in panel (c) of Figure 3.21.\nIn this graph, the data for each species were first transformed to deviations from the species means on both variables and then translated back to the grand means. You can also see here that the shapes and sizes of the individual data ellipses are roughly comparable, but perhaps not identical. This visual idea of centering groups to a common mean will become important in Chapter 12 when we want to test the assumption of equality of error covariances in multivariate models.\nThe ggplot2 code for the panels in this figure are shown below. Note that for components that will be the same across panels, you can define elements (e.g., labels, theme_penguins(), legend_position) once, and then re-use these across several graphs.\n\n\n(a) Ignoring species\n(b) By species\n(c) Within species\n\n\n\n\nlabels &lt;- labs(\n  x = \"Bill length (mm)\",\n  y = \"Bill depth (mm)\",\n  color = \"Species\",\n  shape = \"Species\",\n  fill = \"Species\") \n\nplt1 &lt;- ggplot(data = peng,\n               aes(x = bill_length,\n                   y = bill_depth)) +\n  geom_point(size = 1.5) +\n  geom_smooth(method = \"lm\", formula = y ~ x, \n              se = TRUE, color = \"gray50\") +\n  stat_ellipse(level = 0.68, linewidth = 1.1) +\n  ggtitle(\"Ignoring species\") +\n  labels\n\nplt1\n\n\n\n\nlegend_position &lt;-\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(0.83, 0.16))\n\nplt2 &lt;- ggplot(data = peng,\n               aes(x = bill_length,\n                   y = bill_depth,\n                   color = species,\n                   shape = species,\n                   fill = species)) +\n  geom_point(size = 1.5,\n             alpha = 0.8) +\n  geom_smooth(method = \"lm\", formula = y ~ x, \n              se = TRUE, alpha = 0.3) +\n  stat_ellipse(level = 0.68, linewidth = 1.1) +\n  ggtitle(\"By species\") +\n  labels +\n  theme_penguins(\"dark\") +\n  legend_position \n\nplt2\n\n\n\n\n# center within groups, translate to grand means\nmeans &lt;- colMeans(peng[, 3:4])\npeng.centered &lt;- peng |&gt;\n  group_by(species) |&gt;\n  mutate(bill_length = means[1] + scale(bill_length, scale = FALSE),\n         bill_depth  = means[2] + scale(bill_depth, scale = FALSE))\n\nplt3 &lt;- ggplot(data = peng.centered,\n               aes(x = bill_length,\n                   y = bill_depth,\n                   color = species,\n                   shape = species,\n                   fill = species)) +\n  geom_point(size = 1.5,\n             alpha = 0.8) +\n  geom_smooth(method = \"lm\", formula = y ~ x, \n              se = TRUE, alpha = 0.3) +\n  stat_ellipse(level = 0.68, linewidth = 1.1) +\n  labels +\n  ggtitle(\"Within species\") +\n  theme_penguins(\"dark\") +\n  legend_position \n\nplt3",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#sec-scatmat",
+    "href": "03-multivariate_plots.html#sec-scatmat",
+    "title": "3  Plots of Multivariate Data",
+    "section": "\n3.3 Scatterplot matrices",
+    "text": "3.3 Scatterplot matrices\nGoing beyond bivariate scatterplots, a pairs plot (or scatterplot matrix) displays all possible \\(p \\times p\\) pairs of \\(p\\) variables in a matrix-like display where variables \\((x_i, x_j)\\) are shown in a plot for row \\(i\\), column \\(j\\). This idea, due to Hartigan (1975b), uses small multiple plots, so that the eye can easily scan across a row or down a column to see how a given variable is related to all the others.\nThe most basic version is provided by pairs() in base R. When one variable is considered as an outcome or response, it is usually helpful to put this in the first row and column. For the Prestige data, in addition to income and education, we also have a measure of % women in each occupational category.\nPlotting these together gives Figure 3.22. In such plots, the diagonal cells give labels for the variables, but they are also a guide to interpreting what is shown. In each row, say row 2 for income, income is the vertical \\(y\\) variable in plots against other variables. In each column, say column 3 for education, education is the horizontal \\(x\\) variable.\n\npairs(~ prestige + income + education + women,\n      data=Prestige)\n\n\n\n\n\n\nFigure 3.22: Scatterplot matrix of the variables in the Prestige dataset produced by pairs()\n\n\n\n\nThe plots in the first row show what we have seen before for the relations between prestige and income and education, adding to those the plot of prestige vs. % women. Plots in the first column show the same data, but with \\(x\\) and \\(y\\) interchanged.\nBut this basic pairs() plot is very limited. A more feature-rich version is provided by car::scatterplotMatrix() which can add the regression lines, loess smooths and data ellipses for each pair, as shown in Figure 3.23.\nThe diagonal panels show density curves for the distribution of each variable; for example, the distribution of education appears to be multi-modal and that of women shows that most of the occupations have a low percentage of women.\nThe combination of the regression line with the loess smoothed curve, but without their confidence envelopes, provides about the right amount of detail to take in at a glance where the relations are non-linear. We’ve already seen (Figure 3.11) the non-linear relation between prestige and income (row 1, column 2) when occupational type is ignored. But all relations with income in column 2 are non-linear, reinforcing our idea (Section 3.2.3.1) that effects of income should be assessed on a log scale.\n\nscatterplotMatrix(~ prestige + income + education + women,\n  data=Prestige,\n  regLine = list(method=lm, lty=1, lwd=2, col=\"black\"),\n  smooth=list(smoother=loessLine, spread=FALSE,\n              lty.smooth=1, lwd.smooth=3, col.smooth=\"red\"),\n  ellipse=list(levels=0.68, fill.alpha=0.1))\n\n\n\n\n\n\nFigure 3.23: Scatterplot matrix of the variables in the Prestige dataset from car::scatterplotMatrix().\n\n\n\n\nscatterplotMatrix() can also label points using the id = argument (though this can get messy) and can stratify the observations by a grouping variable with different symbols and colors. For example, Figure 3.24 uses the syntax ~ prestige + education + income + women | type to provide separate regression lines, smoothed curves and data ellipses for the three types of occupations. (The default colors are somewhat garish, so I use scales::hue_pal() to mimic the discrete color scale used in ggplot2).\n\nscatterplotMatrix(~ prestige + income + education + women | type,\n  data = Prestige,\n  col = scales::hue_pal()(3),\n  pch = 15:17,\n  smooth=list(smoother=loessLine, spread=FALSE,\n              lty.smooth=1, lwd.smooth=3, col.smooth=\"black\"),\n  ellipse=list(levels=0.68, fill.alpha=0.1))\n\n\n\n\n\n\nFigure 3.24: Scatterplot matrix of the variables in the Prestige dataset from car::scatterplotMatrix(), stratified by type of occupation.\n\n\n\n\nIt is now easy to see why education is multi-modal: blue collar, white collar and professional occupations have largely non-overlapping years of education. As well, the distribution of % women is much higher in the white collar category.\nFor the penguins data, given what we’ve seen before in Figure 3.18 and Figure 3.19, we may wish to suppress details of the points (plot.points = FALSE) and loess smooths (smooth = FALSE) to focus attention on the similarity of regression lines and data ellipses for the three penguin species. In Figure 3.25, I’ve chosen to show boxplots rather than density curves in the diagonal panels in order to highlight differences in the means and interquartile ranges of the species, and to show 68% and 95% data ellipses in the off-diagonal panels.\n\nscatterplotMatrix(~ bill_length + bill_depth + flipper_length + body_mass | species,\n  data = peng, \n  col = peng.colors(\"medium\"), \n  legend=FALSE,\n  ellipse = list(levels = c(0.68, 0.95), \n                 fill.alpha = 0.1),\n  regLine = list(lwd=3),\n  diagonal = list(method = \"boxplot\"),\n  smooth = FALSE,\n  plot.points = FALSE,\n  cex.labels=1) \n\n\n\n\n\n\nFigure 3.25: Scatterplot matrix of the quantitative variables in the penguins dataset, stratified by species.\n\n\n\n\n\nIt can be seen that the species are widely separated in most of the bivariate plots. As well, the regression lines for species have similar slopes and the data ellipses have similar size and shape in most of the plots. From the boxplots, we can also see that Adelie penguins have shorter bill lengths than the others, while Gentoo penguins have smaller bill depth, but longer flippers and are heavier than Chinstrap and Adelie penguins.\n\n\n\n\n\n\nLooking ahead\n\n\n\nFigure 3.25 provides a reasonably complete visual summary of the data in relation to multivariate models that ask “do the species differ in their means on these body size measures?” This corresponds to the MANOVA model,\n\npeng.mod &lt;- lm(cbind(bill_length, bill_depth, flipper_length, body_mass) ~ species, \n               data=peng)\n\nHypothesis-error (HE) plots, described in Chapter 11 provide a better summary of the evidence for the MANOVA test of differences among means on all variables together. These give an \\(\\mathbf{H}\\) ellipse reflecting the differences among means, to be compared with an \\(\\mathbf{E}\\) ellipse reflecting within-group variation and a visual test of significance.\nA related question is “how well are the penguin species distinguished by these body size measures?” Here, the relevant model is linear discriminant analysis (LDA), where species plays the role of the response in the model,\n\npeng.lda &lt;- MASS:lda( species ~ cbind(bill_length, bill_depth, flipper_length, body_mass), \n               data=peng)\n\nBoth MANOVA and LDA depend on the assumption that the variances and correlations between the variables are the same for all groups. This assumption can be tested and visualized using the methods in Chapter 12.\n\n\n\n3.3.1 Visual thinning\nWhat can you do if there are even more variables than in these examples? If what you want is a high-level, zoomed-out display summarizing the pairwise relations more strongly, you can apply the idea of visual thinning to show only the most important features.\nThis example uses data on the rate of various crimes in the 50 U.S. states from the United States Statistical Abstracts, 1970, used by Hartigan (1975a) and Friendly (1991). These are ordered in the dataset roughly by seriousness of crime or from crimes of violence to property crimes.\n\ndata(crime, package = \"ggbiplot\")\nstr(crime)\n#&gt; 'data.frame':  50 obs. of  10 variables:\n#&gt;  $ state   : chr  \"Alabama\" \"Alaska\" \"Arizona\" \"Arkansas\" ...\n#&gt;  $ murder  : num  14.2 10.8 9.5 8.8 11.5 6.3 4.2 6 10.2 11.7 ...\n#&gt;  $ rape    : num  25.2 51.6 34.2 27.6 49.4 42 16.8 24.9 39.6 31.1 ...\n#&gt;  $ robbery : num  96.8 96.8 138.2 83.2 287 ...\n#&gt;  $ assault : num  278 284 312 203 358 ...\n#&gt;  $ burglary: num  1136 1332 2346 973 2139 ...\n#&gt;  $ larceny : num  1882 3370 4467 1862 3500 ...\n#&gt;  $ auto    : num  281 753 440 183 664 ...\n#&gt;  $ st      : chr  \"AL\" \"AK\" \"AZ\" \"AR\" ...\n#&gt;  $ region  : Factor w/ 4 levels \"Northeast\",\"South\",..: 2 4 4 2 4 4 1 2 2 2 ...\n\n\nFigure 3.26 displays the scatterplot matrix for these seven variables, using only the regression line and data ellipse to show the linear relation and the loess smooth to show potential non-linearity. To make this even more schematic, the axis tick marks and labels are also removed using the par() settings xaxt = \"n\", yaxt = \"n\".\n\ncrime |&gt;\n  select(where(is.numeric)) |&gt;\n  scatterplotMatrix(\n    plot.points = FALSE,\n    ellipse = list(levels = 0.68, fill=FALSE),\n    smooth = list(spread = FALSE, \n                  lwd.smooth=2, lty.smooth = 1, col.smooth = \"red\"),\n    cex.labels = 2,\n    xaxt = \"n\", yaxt = \"n\")\n\n\n\n\n\n\nFigure 3.26: Visual thinning: Scatterplot matrix of the crime data, showing only high-level summaries of the linear and nonlinear relations betgween each pair of variables.\n\n\n\n\nWe can see that all pairwise correlations are positive, pairs closer to the main diagonal tend to be more highly correlated and in most cases the nonparametric smooth doesn’t differ much from the linear regression line. Exceptions to this appear mainly in the columns for robbery and auto (auto theft).",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#sec-corrgram",
+    "href": "03-multivariate_plots.html#sec-corrgram",
+    "title": "3  Plots of Multivariate Data",
+    "section": "\n3.4 Corrgrams",
+    "text": "3.4 Corrgrams\nWhat if you want to summarize the data even further simple visual thinning. For example with many variables you might want to show only the value of the correlation for each pair of variables, but do so in a way to help see patterns in the correlations that would be invisible in just a table.\nA corrgram (Friendly, 2002) is a visual display of a correlation matrix, where the correlation can be rendered in a variety of ways to show the direction and magnitude: circular “pac-man” (or pie) symbols, ellipses, colored vars or shaded rectangles, as shown in Figure 3.27.\nAnother aspect is that of effect ordering (Friendly & Kwan, 2003), ordering the levels of factors and variables in graphic displays to make important features most apparent. For variables, this means that we can arrange the variables in a matrix-like display in such a way as to make the pattern of relationships easiest to see. Methods to achieve this include using principal components and cluster analysis to put the most related variables together as described in Chapter 4.\n\n\n\n\n\n\n\nFigure 3.27: Corrgrams: Some renderings for the value of a correlation in a corrgram display, conveying sign and magnitude in different ways.\n\n\n\n\nIn R, these diagrams can be created using the corrgram (Wright, 2021) and corrplot (Wei & Simko, 2024) packages, with different features. corrgram::corrgram() is closest to Friendly (2002), in that it allows different rendering functions for the lower, upper and diagonal panels as illustrated in Figure 3.27. For example, a corrgram similar to Figure 3.26 can be produced as follows (not shown here):\n\ncrime |&gt;\n  select(where(is.numeric)) |&gt;\n  corrgram(lower.panel = panel.ellipse,\n           upper.panel = panel.ellipse,\n           diag.panel = panel.density)\n\nWith the corrplot package, corrplot() provides the rendering methods c(\"circle\", \"square\", \"ellipse\", \"number\", \"shade\", \"color\", \"pie\"), but only one can be used at a time. The function corrplot.mixed() allows different options to be selected for the lower and upper triangles. The iconic rendering shape is colored with a gradient in relation to the correlation value. For comparison, Figure 3.28 uses ellipses below the diagonal and filled pie charts below the diagonal using a gradient of the fill color in both cases.\n\ncrime.cor &lt;- crime |&gt;\n  dplyr::select(where(is.numeric)) |&gt; \n  cor()\n\ncorrplot.mixed(crime.cor,\n   lower = \"ellipse\",\n   upper = \"pie\",\n   tl.col = \"black\",\n   tl.srt = 0,\n   tl.cex = 1.25,\n   addCoef.col = \"black\",\n   addCoefasPercent = TRUE)\n\n\n\n\n\n\nFigure 3.28: Mixed corrplot of the crime data, showing the correlation between each pair of variables with an ellipse (lower) and a pie chart symbol (upper), all shaded in proportion to the correlation value, also shown numerically.\n\n\n\n\nThe combination of renderings shown in Figure 3.28 is instructive. Small differences among correlation values are easier to see with the pie symbols than with the ellipses; for example, compare the values for murder with larceny and auto theft in row 1, columns 6-7 with those in column 1, rows 6-7, where the former are easier to distinguish. The shading color adds another visual cue.\nThe variables in Figure 3.26 and Figure 3.28 are arranged by their order in the dataset, which is not often the most useful. A better idea is to arrange the variables so that the most highly correlated variables are adjacent.\nA general method described in Section 4.5 orders the variables according to the angles of the first two eigenvectors from a principal components analysis (PCA) around a unit circle. The function corrMatOrder() provides several methods (order = c(\"AOE\", \"FPC\", \"hclust\", \"alphabet\")) for doing this, and PCA ordering is order = \"AOE\". Murder and auto theft are still first and last, but some of the intermediate crimes have been rearranged.\n\nord &lt;- corrMatOrder(crime.cor, order = \"AOE\")\nrownames(crime.cor)[ord]\n#&gt; [1] \"murder\"   \"assault\"  \"rape\"     \"robbery\"  \"burglary\"\n#&gt; [6] \"larceny\"  \"auto\"\n\n\n\nUsing this ordering in corrplot() produces Figure 3.29.\n\ncorrplot.mixed(crime.cor,\n  order = \"AOE\", \n  lower = \"ellipse\",\n  upper = \"ellipse\",\n  tl.col = \"black\",\n  tl.srt = 0,\n  tl.cex = 1.25,\n  addCoef.col = \"black\",\n  addCoefasPercent = TRUE)\n\n\n\n\n\n\nFigure 3.29: Corrplot of the crime data with the variables reordered according to the angles of variable eigenvectors. Correlations are rendered with ellipses shaded in proportion to their magnitude.\n\n\n\n\nIn this case, where the correlations among the crime variables are all positive, the effect of variable re-ordering is subtle, but note that there is now a slightly pronounced pattern of highest correlations near the diagonal, and decreasing away from the diagonal. Figure 4.27 and Figure 4.29 in Section 4.5 provide a more dramatic example of variable ordering using this method.\nVariations of corrgrams are worthy replacements for a numeric table of correlations, which are often presented in publications only for archival value. Including the numeric value (rounded here, for presentation purposes), makes this an attractive alternative to boring tables of correlations.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#sec-ggpairs",
+    "href": "03-multivariate_plots.html#sec-ggpairs",
+    "title": "3  Plots of Multivariate Data",
+    "section": "\n3.5 Generalized pairs plots",
+    "text": "3.5 Generalized pairs plots\nWhen a dataset contains one or more discrete variables, the traditional pairs plot cannot cope, because the discrete categories would plot as many overlaid points. This cannot be represented using only color and/or point symbols in a meaningful scatterplot.\nBut the associations between categorical variables in a frequency table can be shown in mosaic displays (Friendly, 1994), using an array of tiles whose areas are depict the cell frequencies. For an \\(n\\)-way frequency, an analog of the scatterplot matrix uses mosaic plots for each pair of variables. The vcd package (Meyer et al., 2024) implements very general pairs() methods for \"table\" objects and vcdExtra (Friendly, 2023) extends this to wide classes of loglinear models (Friendly, 1999) See Friendly (1999) and my book Discrete Data Analysis with R (Friendly & Meyer, 2016) for mosaic plots and mosaic matrices.\nFor example, we can tabulate the distributions of penguin species by sex and the island where they were observed using xtabs(). ftable() prints this three-way table more compactly. (In this example, and what follows in the chapter, I’ve changed the labels for sex from (“f”, “m”) to (“Female”, “Male”)).\n\n# use better labels for sex\npeng &lt;- peng |&gt;\n  mutate(sex = factor(sex, labels = c(\"Female\", \"Male\")))\npeng.table &lt;- xtabs(~ species + sex + island, data = peng)\n\nftable(peng.table)\n#&gt;                  island Biscoe Dream Torgersen\n#&gt; species   sex                                 \n#&gt; Adelie    Female            22    27        24\n#&gt;           Male              22    28        23\n#&gt; Chinstrap Female             0    34         0\n#&gt;           Male               0    34         0\n#&gt; Gentoo    Female            58     0         0\n#&gt;           Male              61     0         0\n\nWe can see immediately that the penguin species differ by island: only Adelie were observed on all three islands; Biscoe Island had no Chinstraps and Dream Island had no Gentoos.\nvcd::pairs() produces all pairwise mosaic plots, as shown in Figure 3.30. The diagonal panels show the one-way frequencies by width of the divided bars. Each off-diagonal panel shows the bivariate counts, breaking down each column variable by splitting the bars in proportion to a second variable. Consequently, the frequency of each cell is represented by its’ area. The purpose is to show the pattern of association between each pair, and so, the tiles in the mosaic are shaded according to the signed standardized residual, \\(d_{ij} = (n_{ij} - \\hat{n}_{ij}) / \\sqrt{\\hat{n}_{ij}}\\) in a simple \\(\\chi^2 = \\Sigma_{ij} \\; d_{ij}^2\\) test for association— blue where the observed frequency \\(n_{ij}\\) is significantly greater than expected \\(\\hat{n}_{ij}\\) under independence, and red where it is less than expected. The tiles are unshaded when \\(| d_{ij} | &lt; 2\\).\n\nlibrary(vcd)\npairs(peng.table, shade = TRUE,\n      lower_panel_args = list(labeling = labeling_values()),\n      upper_panel_args = list(labeling = labeling_values()))\n\n\n\n\n\n\nFigure 3.30: Mosaic pairs plot for the combinations of species, sex and island. Diagnonal plots show the marginal frequency of each variable by the width of each rectangle. Off-diagonal mosaic plots subdivide by the conditional frequency of the second variable, shown numerically in the tiles.\n\n\n\n\nThe shading patterns in cells (1,3) and (3,1) of Figure 3.30 show what we’ve seen before in the table of frequencies: The distribution of species varies across island because on each island one or more species did not occur. Row 2 and column 2 show that sex is nearly exactly proportional among species and islands, indicating independence, \\(\\text{sex} \\perp \\{\\text{species}, \\text{island}\\}\\). More importantly, mosaic pairs plots can show, at a glance, all (bivariate) associations among multivariate categorical variables.\nThe next step, by John Emerson and others (Emerson et al., 2013) was to recognize that combinations of continuous and discrete, categorical variables could be plotted in different ways.\n\nTwo continuous variables can be shown as a standard scatterplot of points and/or bivariate density contours, or simply by numeric summaries such as a correlation value;\nA pair of one continuous and one categorical variable can be shown as side-by-side boxplots or violin plots, histograms or density plots;\nTwo categorical variables could be shown in a mosaic plot or by grouped bar plots.\n\nIn the ggplot2 framework, these displays are implemented using the ggpairs() function from the GGally package (Schloerke et al., 2024). This allows different plot types to be shown in the lower and upper triangles and in the diagonal cells of the plot matrix. As well, aesthetics such as color and shape can be used within the plots to distinguish groups directly. As illustrated below, you can define custom functions to control exactly what is plotted in any panel.\nThe basic, default plot shows scatterplots for pairs of continuous variables in the lower triangle and the values of correlations in the upper triangle. A combination of a discrete and continuous variables is plotted as histograms in the lower triangle and boxplots in the upper triangle. Figure 3.31 includes sex to illustrate the combinations.\n\n\nCodeggpairs(peng, columns=c(3:6, 7),\n        aes(color=species, alpha=0.5),\n        progress = FALSE) +\n  theme_penguins() +\n  theme(axis.text.x = element_text(angle = -45))\n\n\n\n\n\n\n\n\n\n\nFigure 3.31: Basic ggpairs() plot of penguin size variables and sex, stratified by species.\n\n\n\n\nTo my eye, printing the values of correlations in the upper triangle is often a waste of graphic space. But in this example the correlations show something peculiar and interesting if you look closely: In all pairs among the penguin size measurements, there are positive correlations within each species, as we can see in Figure 3.25. Yet, in three of these panels, the overall correlation ignoring species is negative. For example, the overall correlation between bill depth and flipper length is \\(r = -0.579\\) in row 2, column 3; the scatterplot in the diagonally opposite cell, row 3, column 2 shows the data. These cases, of differing signs for an overall correlation, ignoring a group variable and the within group correlations are examples of Simpson’s Paradox, explored later in Chapter XX. \nThe last row and column, for sex in Figure 3.31, provides an initial glance at the issue of sex differences among penguin species that motivated the collection of these data. We can go further by also examining differences among species and island, but first we need to understand how to display exactly what we want for each pairwise plot.\nggpairs() is extremely general in that for each of the lower, upper and diag sections you can assign any of a large number of built-in functions (of the form ggally_NAME), or your own custom function for what is plotted, depending on the types of variables in each plot.\n\n\ncontinuous: both X and Y are continuous variables, supply this as the NAME part of a ggally_NAME() function or the name of a custom function;\n\ncombo: one X of and Y variable is discrete while the other is continuous, using the same convention;\n\ndiscrete: both X and Y are discrete variables.\n\nThe defaults, which were used in Figure 3.31, are:\n\nupper = list(continuous = \"cor\",          # correlation values\n             combo = \"box_no_facet\",      # boxplots \n             discrete = \"count\")          # rectangles ~ count\nlower = list(continuous = \"points\",       # just data points\n             combo = \"facethist\",         # faceted histograms\n             discrete = \"facetbar\")       # faceted bar plots\ndiag  = list(continuous = \"densityDiag\",  # density plots\n             discrete = \"barDiag\")        # bar plots\n\nThus, ggpairs() uses ggally_cor() to print the correlation values for pairs of continuous variables in the upper triangle, and uses ggally_points() to plot scatterplots of points in the lower portion. The diagonal panels as shown as density plots (ggally_densityDiag()) for continuous variables but as bar plots (ggally_barDiag()) for discrete factors.\nSee the vignette, ggally_plots for an illustrated list of available high-level plots. For our purpose here, which is to illustrate enhanced displays, note that for scatterplots of continuous variables, there are two functions which plot the points and also add a smoother, _lm or _loess.\n\nls(getNamespace(\"GGally\")) |&gt; \n  stringr::str_subset(\"^ggally_smooth_\")\n#&gt; [1] \"ggally_smooth_lm\"    \"ggally_smooth_loess\"\n\nA customized display for scatterplots of continuous variables can be any function that takes data and mapping arguments and returns a \"ggplot\" object. The mapping argument supplies the aesthetics, e.g., aes(color=species, alpha=0.5), but only if you wish to override what is supplied in the ggpairs() call.\nHere is a function, my_panel() that plots the data points, regression line and loess smooth:\n\nmy_panel &lt;- function(data, mapping, ...){\n  p &lt;- ggplot(data = data, mapping = mapping) + \n    geom_point() + \n    geom_smooth(method=lm, formula = y ~ x, se = FALSE, ...) +\n    geom_smooth(method=loess, formula = y ~ x, se = FALSE, ...)\n  p\n}\n\nFor this example, I want only simple summaries of for the scatterplots, so I don’t want to plot the data points, but do want to add the regression line and a data ellipse.\n\nmy_panel1 &lt;- function(data, mapping, ...){\n  p &lt;- ggplot(data = data, mapping = mapping) + \n     geom_smooth(method=lm, formula = y ~ x, se = FALSE, ...) +\n     stat_ellipse(geom = \"polygon\", level = 0.68, ...)\n  p\n}\n\nThen, to show what can be done, Figure 3.32 uses my_panel1() for the scatterplots in the 4 x 4 block of plots in the upper left. The combination of the continuous body size measures and the discrete factors species, island and sex are shown in upper triangle by boxplots but by faceted histograms in the lower portion. The factors are shown as rectangles with area proportional to count (poor-man’s mosaic plots) above the diagonal and as faceted bar plots below.\n\n\nCodeggpairs(peng, columns=c(3:6, 1, 2, 7),\n        mapping = aes(color=species, fill = species, alpha=0.2),\n        lower = list(continuous = my_panel1),\n        upper = list(continuous = my_panel1),\n        progress = FALSE) +\n  theme_penguins() +\n  theme(panel.grid.major = element_blank(), \n        panel.grid.minor = element_blank()) + \n  theme(axis.text.x = element_text(angle = -45))\n\n\n\n\n\n\n\n\n\n\nFigure 3.32: Customized ggpairs() plot of penguin size variables, together with species, island and sex.\n\n\n\n\nThere is certainly a lot going on in Figure 3.32, but it does show a high-level overview of all the variables (except year) in the penguins dataset. It is probably easiest to “read” this figure by focusing on the four blocks for the combinations of 4 continuous and 3 categorical measures. In the upper left block, visual thinning of the scatterplots, showing only the data ellipses and regression lines gives a simple view as it did in Figure 3.25.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#sec-parcoord",
+    "href": "03-multivariate_plots.html#sec-parcoord",
+    "title": "3  Plots of Multivariate Data",
+    "section": "\n3.6 Parallel coordinate plots",
+    "text": "3.6 Parallel coordinate plots\nAs we have seen above, scatterplot matrices and generalized pairs plots extend data visualization to multivariate data, but these variables share one 2D space, so resolution decreases as the number of variable increase. You need a very large screen or sheet of paper to see more than, say 5-6 variables with any clarity.\nParallel coordinate plots are an attractive alternative, with which we can visualize an arbitrary number of variables to get a visual summary of a potentially high-dimensional dataset, and perhaps recognize outliers and clusters in the data in a different way. In these plots, each variable is shown on a separate, parallel axis. A multivariate observation is then plotted by connecting their respective values on each axis with lines across all the axes.\nThe geometry of parallel coordinates is interesting, because what is a point in \\(n\\)-dimensional (Euclidean) data space becomes a line in the projective parallel coordinate space with \\(n\\) axes, and vice-versa: lines in parallel coordinate space correspond to points in data space. Thus, a collection of points in data space map to lines that intersect in a point in projective space. What this does is to map \\(n\\)-dimensional relations into 2D patterns we can see in a parallel coordinates plot.\n\n\n\n\n\n\nHistory Corner\n\n\n\n\nThose who don’t know history are doomed to plagarize it —The author\n\nThe theory of projective geometry originated with the French mathematician Maurice d’Ocagne (1885) who sought a way to provide graphic calculation of mathematical functions with alignment diagrams or nomograms using parallel axes with different scales. A three-variable equation, for example, could be solved using three parallel axes, where known values could be marked on their scales, a line drawn between them, and an unknown read on its scale at the point where the line intersects that scale.\nHenry Gannet (1880), in work preceding the Statistical Atlas of the United States for the 1890 Census (Gannett, 1898), is widely credited with being the first to use parallel coordinates plots to show data, in his case, to show the rank ordering of US states by 10 measures including population, occupations, wealth, manufacturing, agriculture and so on.\nHowever, both d’Ocagne and Gannet were far preceded in this by Andre-Michel Guerry (1833) who used this method to show how the rank order of various crimes changed with age of the accused. See Friendly (2022), Figure 7 for his version and for an appreciation of the remarkable contributions of this amateur statistician to the history of data visualization.\n\nThe use of parallel coordinates for display of multidimensional data was rediscovered by Alfred Inselberg (1985) and extended by Edward Wegman (1990), neither of whom recognized the earlier history. Somewhat earlier, David Andrews (1972) proposed mapping multivariate observations to smooth Fourrier functions composed of alternating \\(\\sin()\\) and \\(\\cos()\\) terms. And in my book, SAS System for Statistical Graphics (Friendly, 1991), I implemented what I called profile plots without knowing their earlier history as parallel coordinate plots.\n\n\nParallel coordinate plots present a challenge for graphic developers, in that they require a different way to think about plot construction for multiple variables, which can be quantitative, as in the original idea, or categorical factors, all to be shown along parallel axes.\nHere, I use the ggpcp package (Hofmann et al., 2022), best described in VanderPlas et al. (2023), who also review the modern history.4 This takes some getting used to, because they develop pcp_*() extensions of the ggplot2 grammar of graphics framework to allow:\n\n\npcp_select(): selections of the variables to be plotted and their horizontal order on parallel axes,\n\npcp_scale(): methods for scaling of the variables to each axis,\n\npcp_arrange(): methods for breaking ties in factor variables to space them out.\n\nThen, it provides geom_pcp_*() functions to control the display of axes with appropriate aesthetics, labels for categorical factors and so forth. Figure 3.33 illustrates this type of display, using sex and species in addition to the quantitative variables for the penguin data.\n\n\nCodepeng |&gt;\n  pcp_select(bill_length:body_mass, sex, species) |&gt;\n  pcp_scale(method = \"uniminmax\") |&gt;\n  pcp_arrange() |&gt;\n  ggplot(aes_pcp()) +\n  geom_pcp_axes() +\n  geom_pcp(aes(colour = species), alpha = 0.8, overplot = \"none\") +\n  geom_pcp_labels() +\n  scale_colour_manual(values = peng.colors()) +\n  labs(x = \"\", y = \"\") +\n  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), \n        axis.ticks.y = element_blank(), legend.position = \"none\")\n\n\n\n\n\n\n\n\n\n\nFigure 3.33: Parallel coordinates plot of penguin size variables, together with sex and species.\n\n\n\n\nRearranging the order of variables and the ordering of factor levels can make a difference in what we can see in such plots. For a simple example (following VanderPlas et al. (2023)), we reorder the levels of species and islands to make it clearer which species occur on each island.\n\nCodepeng1 &lt;- peng |&gt;\n  mutate(species = factor(species, levels = c(\"Chinstrap\", \"Adelie\", \"Gentoo\"))) |&gt;\n  mutate(island = factor(island, levels = c(\"Dream\", \"Torgersen\", \"Biscoe\")))\n\npeng1 |&gt;\n  pcp_select(species, island, bill_length:body_mass) |&gt;\n  pcp_scale() |&gt;\n  pcp_arrange(method = \"from-left\") |&gt;\n  ggplot(aes_pcp()) +\n  geom_pcp_axes() +\n  geom_pcp(aes(colour = species), alpha = 0.6, overplot = \"none\") +\n  geom_pcp_boxes(fill = \"white\", alpha = 0.5) +\n  geom_pcp_labels() +\n  scale_colour_manual(values = peng.colors()[c(2,1,3)]) +\n  theme_bw() +\n  labs(x = \"\", y = \"\") +\n  theme(axis.text.y = element_blank(), \n        axis.ticks.y = element_blank(),\n        legend.position = \"none\") \n\n\n\n\n\n\n\n\n\n\n\nThe order of variables in this plot emphasizes the relation between penguin species and the island where they were observed and then shows the values of the quantitative body size measurements. More generally, quantitative variables can, and probably should, be ordered to place the most highly correlated variables adjacently to minimize the degree of crossing lines from one variable to the next (Martí & Laguna, 2003). When variables are highly negatively correlated (such as bill_depth and flipper_length here), crossings can be reduced simply by reversing the scale of one of the variables, e.g., by plotting -bill_depth.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#animated-tours",
+    "href": "03-multivariate_plots.html#animated-tours",
+    "title": "3  Plots of Multivariate Data",
+    "section": "\n3.7 Animated tours",
+    "text": "3.7 Animated tours\nIn the mid 17\\(^{th}\\) to early 19\\(^{th}\\)-century the Grand Tour became a coming-of-age custom for young Europeans (mainly British nobility and landed gentry) of sufficient rank and means to undertake a journey to the principal sites of Europe (Paris, Geneva, Rome, Athens, …) to complete their education by learning something of the cultural legacies in history, art, and music from antiquity to the Renaissance. Thereby, they could gain a wider appreciation of history and be prepared to play a role in polite society or in their chosen endeavors.\nTravels in high-dimensional data space might be less thrilling than a journey from London through Paris and Millan to Rome. Yet, in both cases it is useful to think of the path taken, and what might be seen along the way. But there are different kinds of tours. We might simply take a meandering tour, exploring all the way, or want to plan a tour to see the most interesting sites in travel or have a tour guided by an expert. Similarly in data space, we might travel randomly to see what we can find or be guided to find interesting features such as clusters, outliers or non-linear relations in data.\nFollowing the demonstration in PRIM-9 (Section 3.1) of exploring multidimensional data space by rotation Asimov (1985) developed the idea of the grand tour, a computer method for viewing multivariate statistical data via orthogonal projections onto an animated sequence of low-dimensional subspaces, like a movie. In contrast to a scatterplot matrix which shows a static view of a data cloud projected onto all pairwise variable axes, a statistical tour is like the view of an eye moving smoothly in high-dimensional space, capturing what it sees from a given location onto the 2-d plane of the computer screen.\nMore generally, statistical tours are a type of dynamic projections onto orthogonal axes (called a basis) that embed data in a \\(p\\)−dimensional space into a \\(d\\)−dimensional viewing subspace. Typically, \\(d=2\\), and the result is displayed as scatterplots, together with vectors representing the projections of the data variables in this space. But the projected data can be rendered in 1-d as densities or histograms, or in other number of dimensions as glyphs, or even as parallel coordinate plots. The essential idea is that we can define, and animate, a tour path as a smooth sequence of such projections over small changes to the projection basis, which gives the orientation of the data in the viewing space.\n\n3.7.1 Projections\nThe idea of a projection is fundamental to touring methods and other visualizations of high-D data, so it is useful to understand what a projection is. Quite simply, you can think of a projection as the shadow of an object or cloud of points. This is nicely illustrated by the cover image (Figure 3.34) used for Douglas Hofstadter’s (1979) Gödel, Bach and Escher which shows 3D solid shapes illuminated by light sources so their shadows form the letters G, B and E projected onto the planes formed by pairs of the three coordinate axes. The set of three 2D views is essentially the same that we see in a scatterplot matrix, where a 3D dataset is portrayed by the set of shadows of the points on planes formed by pairs of coordinate axes.\n\n\n\n\n\n\n\nFigure 3.34: The cover image from Hofstadter (1979) illustrates how projections are shadows of an object cast by a light from a given direction.\n\n\n\n\nIn the simplest case, a data point \\(\\mathbf{x} = (x_1, x_2)\\) in two dimensions can be represented geometrically as a vector from the origin as shown in Figure 3.35. This point can be projected on any one-dimensional axis \\(\\mathbf{p}\\) by dropping a line perpendicular to \\(\\mathbf{p}\\), which is the idea of a shadow. Mathematically, this is calculated as the product \\(\\mathbf{x}^\\mathsf{T} \\mathbf{p} = x_1 p_1 + x_2 p_2\\) and suitably normalized to give the correct length. …\n\n\n\n\n\n\n\nFigure 3.35: Projection of a point x onto a direction or axis p.\n\n\n\n\nMore generally, a projection of an \\((n \\times p)\\) data matrix \\(\\mathbf{X}\\) representing \\(n\\) observations in \\(p\\) dimensions onto a \\(d\\)-dimensional viewing space \\(\\mathbf{Y}_{n \\times d}\\) is represented by a \\(p \\times d\\) projection matrix \\(\\mathbf{P}\\) as \\(\\mathbf{Y} = \\mathbf{X} \\mathbf{P}\\), where the columns of \\(\\mathbf{P}\\) are orthogonal and of unit length,i.e., \\(\\mathbf{P}^\\mathsf{T} \\mathbf{P} = \\mathbf{I}_{(d \\times d)}\\).\nFor example, to project a data matrix \\(\\mathbf{X}\\) in three dimensions onto a 2D plane, we would multiply it by a \\((3 \\times 2)\\) orthonormal matrix \\(\\mathbf{P}\\). The matrix \\(\\mathbf{P}_1\\) below simply selects the first two columns of \\(\\mathbf{X}\\).5\n\\[\n\\mathbf{X} =\n\\begin{bmatrix}\n    0 & 0 & 0 \\\\\n    0 & 0 & 10 \\\\\n    0 & 10 & 0 \\\\\n    0 & 10 & 10 \\\\\n    10 & 0 & 0 \\\\\n    10 & 0 & 10 \\\\\n    10 & 10 & 0 \\\\\n    10 & 10 & 10 \\\\\n\\end{bmatrix}_{8 \\times 3}\n;\\;\n\\mathbf{P_1} =\n\\begin{bmatrix}\n    1 & 0 \\\\\n    0 & 1 \\\\\n    0 & 0 \\\\\n\\end{bmatrix}_{3 \\times 2}\n\\;\\Rightarrow\\quad\n\\mathbf{Y} = \\mathbf{X} \\; \\mathbf{P_1} =\n\\begin{bmatrix}\n    0 & 0 \\\\\n    0 & 0 \\\\\n    0 & 10 \\\\\n    0 & 10 \\\\\n    10 & 0 \\\\\n    10 & 0 \\\\\n    10 & 10 \\\\\n    10 & 10 \\\\\n\\end{bmatrix}_{8 \\times 2}\n\\] An oblique projection using all three dimensions is given by \\(\\mathbf{P_2}\\) below. This produces a new 2D view in \\(\\mathbf{Y}\\): \\[\n\\mathbf{P_2} =\n\\begin{bmatrix}\n    0.71 & -0.42 \\\\\n    0.71 & 0.42 \\\\\n    0 & 0.84 \\\\\n\\end{bmatrix}_{3 \\times 2}\n\\quad\\Rightarrow\\quad\n\\mathbf{Y} = \\mathbf{X} \\; \\mathbf{P_2} =\n\\begin{bmatrix}\n    0 & 0 \\\\\n    0 & 8.4 \\\\\n    7.1 & 4.2 \\\\\n    7.1 & 12.6 \\\\\n    7.1 & -4.2 \\\\\n    7.1 & 4.2 \\\\\n    14.2 & 0 \\\\\n    14.2 & 8.4 \\\\\n\\end{bmatrix}\n\\]\nThe columns in \\(\\mathbf{Y}\\) are simply the linear combinations of those of \\(\\mathbf{X}\\) using the weights in each column of \\(\\mathbf{P_2}\\)\n\\[\\begin{aligned}\n\\mathbf{y}_1 & = & 0.71 \\mathbf{x}_1 + 0.71 \\mathbf{x}_2 + 0 \\mathbf{x}_3\\\\\n\\mathbf{y}_2 & = & -0.42 \\mathbf{x}_1 + 0.42 \\mathbf{x}_2 + 0.84 \\mathbf{x}_3 \\\\\n\\end{aligned}\\]\n\nCodevals &lt;- c(0, 10)\nX &lt;- expand.grid(x1 = vals, x2=vals, x3=vals) |&gt; as.matrix()\n\n# project on just x1, x2 plane\nP1 &lt;- rbind(diag(2), c(0,0))\nY1 &lt;- X %*% P1\n\n# oblique projection\nP2 &lt;- matrix(c(0.71, 0.71, 0, -0.42, .42, 0.84), ncol=2)\nY2 &lt;- X %*% P2\n\n\nIn this example, the matrix \\(\\mathbf{X}\\) consists of 8 points at the vertices of a cube of size 10, as shown in Figure 3.36 (a). The projections \\(\\mathbf{Y}_1 = \\mathbf{P}_1 \\mathbf{X}\\) and \\(\\mathbf{Y}_2 = \\mathbf{P}_2 \\mathbf{X}\\) are shown in panels (b) and (c). To make it easier to relate the points in different views, shapes and colors are assigned so that each point has a unique combination of these attributes.6\n\npch &lt;- rep(15:18, times = 2)\ncolors &lt;- c(\"red\", \"blue\", \"darkgreen\", \"brown\")\ncol &lt;- rep(colors, each = 2)\ndata.frame(X, pch, col)\n#&gt;   x1 x2 x3 pch       col\n#&gt; 1  0  0  0  15       red\n#&gt; 2 10  0  0  16       red\n#&gt; 3  0 10  0  17      blue\n#&gt; 4 10 10  0  18      blue\n#&gt; 5  0  0 10  15 darkgreen\n#&gt; 6 10  0 10  16 darkgreen\n#&gt; 7  0 10 10  17     brown\n#&gt; 8 10 10 10  18     brown\n\n\n\n\n\n\n\n\nFigure 3.36: Projection example: (a) The 8 points in X form a cube of size 10; (b) the projection by P1 is the view ignoring x3 (two points coincide at each vertex); (c) the projection by P2 is an oblique view.\n\n\n\n\nBut, if we are traveling in the projection space of \\(\\mathbf{Y}\\), we need some signposts to tell us how the new dimensions relate to those of \\(\\mathbf{X}\\). The answer is provided simply by plotting the rows of \\(\\mathbf{P}\\) as vectors, as shown in Figure 3.37. In these plots, each row of \\(\\mathbf{P}_1\\) and \\(\\mathbf{P}_2\\) appears as a vector from the origin. It’s direction shows the contribution each of \\(\\mathbf{x}_1, \\mathbf{x}_2, \\mathbf{x}_3\\) make to the new coordinates \\(\\mathbf{y}_1\\) and \\(\\mathbf{y}_2\\).\nIn \\(\\mathbf{P}_1\\), the projected variable \\(\\mathbf{y}_1\\) is related only to \\(\\mathbf{x}_1\\), while \\(\\mathbf{y}_2\\) is related only to \\(\\mathbf{x}_2\\) \\(\\mathbf{x}_3\\) makes no contribution, and appears at the origin. However in the projection given by \\(\\mathbf{P}_2\\), \\(\\mathbf{x}_1\\) and \\(\\mathbf{x}_2\\) make the same contribution to \\(\\mathbf{y}_1\\), while \\(\\mathbf{x}_3\\) has no contribution to that horizontal axis. The vertical axis, \\(\\mathbf{y}_2\\) here is completely aligned with \\(\\mathbf{x}_3\\); \\(\\mathbf{x}_1\\) and \\(\\mathbf{x}_2\\) have vertical components that are half of that for \\(\\mathbf{x}_3\\) in absolute value.\nCodelibrary(matlib)\nop &lt;- par(mar=c(4, 5, 1, 1)+.1)\nxlim &lt;- ylim &lt;- c(-1.1, 1.1)\naxes.x &lt;- c(-1, 1, NA, 0, 0)\naxes.y &lt;- c(0, 0, NA, -1, 1)\nlabs &lt;- c(expression(x[1]), expression(x[2]), expression(x[3]))\nplot(xlim, ylim, type = \"n\", asp=1,\n     xlab = expression(y[1]), ylab = expression(y[2]),\n     cex.lab = 1.8)\ncircle(0, 0, 1, col = adjustcolor(\"skyblue\", alpha = 0.2))\nlines(axes.x, axes.y, col = \"grey\")\nvectors(P1, labels = labs, cex.lab = 1.8, lwd = 3, pos.lab = c(4, 2, 1))\n\nplot(xlim, ylim, type = \"n\", asp=1,\n     xlab = expression(y[1]), ylab = expression(y[2]),\n     cex.lab = 1.8)\ncircle(0, 0, 1, col = adjustcolor(\"skyblue\", alpha = 0.2))\nlines(axes.x, axes.y, col = \"grey\")\nvectors(P2, labels = labs, cex.lab = 1.8, lwd = 3)\npar(op)\n\n\n\n\n\n\n\n\n\n\nFigure 3.37: Variable vectors: Data variables viewed as vectors in the space of their projections. The angles of the x vectors with respect to the y coordinate axes show their relative contributions to each. The lengths of the x vectors show the relative degree to which they are represented in the space of ys. Left: the P1 projection; right: the P2 projection.\n\n\n\n\n\n3.7.1.1 Vector lengths\nIn Figure 3.37, the lengths of the \\(\\mathbf{x}\\) vectors reflect the relative degree to which each variable is represented in the space of the projection, and this is important for interpretation. For the \\(\\mathbf{P}_1\\) projection, \\(\\mathbf{x}_3\\) is of length 0, while \\(\\mathbf{x}_1\\) and \\(\\mathbf{x}_2\\) fill the unit circle. In the projection given by \\(\\mathbf{P}_2\\), all three \\(\\mathbf{x}\\) are approximately the same length.\nIn algebra, the length of a vector \\(\\mathbf{x}\\) is \\(||\\mathbf{x}|| = (\\mathbf{x}^\\mathsf{T} \\mathbf{x})^{1/2} = \\sqrt{\\Sigma x_i^2}\\), the Euclidean distance of the tip of the vector from the origin. In R, we calculate the lengths of row vectors in a projection matrix by transposing and using matlib::len().\n\nP1 |&gt; t() |&gt; matlib::len()\n#&gt; [1] 1 1 0\nP2 |&gt; t() |&gt; matlib::len()\n#&gt; [1] 0.825 0.825 0.840\n\n\n3.7.1.2 Joint-views\nTo interpret such projections, we want to see both the projected data and the signposts that tell us where we are in relation to the original variables. To do this, we can overlay the variable vectors represented by the rows of the projection matrix \\(\\mathbf{P}\\) onto plots like Figure 3.36 (b) and Figure 3.36 (c) to see how the axes in a projection relate to those in the data. To place these together on the same plot, we can either center the columns of \\(\\mathbf{Y}\\) at their means or shift the the columns of \\(\\mathbf{P}\\) to colMeans(Y). It is only the directions of the vectors that matters, so we are free to scale their lengths by any convenient factor.\n\nCodeY2s &lt;- scale(Y2, scale=FALSE)       # center Y2\nplot(Y2s, cex = 3, \n     asp = 1,\n     pch = pch, col = col,\n     xlab = expression(y[1]), ylab = expression(y[2]),\n     xlim = c(-10, 10), ylim = c(-10, 10), cex.lab = 1.8)\nr &lt;- 7\nvecs &lt;- (r*diag(3) %*% P2)\nvectors(vecs, labels = labs, cex.lab = 1.8, lwd = 2)\nvectors(-vecs, labels = NULL, lty = 1, angle = 1, col = \"gray\")\n\n\nThe plot in Figure 3.38 illustrates this, centering \\(\\mathbf{Y}\\), and multiplying the vectors in \\(\\mathbf{P}\\) by 7. To check your understanding, try to see if you can relate what is shown in this plot to the 3D plot in Figure 3.36 (a).\n\n\n\n\n\n\n\nFigure 3.38: The P2 projection of the data showing vectors for the original variables in the space of Y.\n\n\n\n\nThe idea of viewing low-dimensional projections of data together with vectors representing the contributions of the original variables to the dimensions shown in a display is also the basis of biplot techniques (Section 4.3) we will use in relation to principal components analysis.\n\n3.7.2 Touring methods\nThe trick of statistical touring methods is to generate a smooth sequence of interpolated projections \\(\\mathbf{P}_{(t)}\\) indexed by time \\(t\\), \\(\\mathbf{P}_{(1)}, \\mathbf{P}_{(2)}, \\mathbf{P}_{(3)}, \\dots, \\mathbf{P}_{(T)}\\). This gives a path of views \\(\\mathbf{Y}_{(t)} = \\mathbf{X} \\mathbf{P}_{(t)}\\), that can be animated in successive frames, as shown schematically in Figure 3.39.\n\n\n\n\n\n\n\nFigure 3.39: Interpolations: Illustration of a grand tour of interpolations of projection planes showing 2D scatterplots of the Penguin dataset. The seqeunce of views moves smoothly from an initial frame P(1) to a final frame P(T) where the penguin species are widely separated.\n\n\n\n\nAsimov’s (1985) original idea of the grand tour was that of a random path, picking orthogonal projections \\(\\mathbf{P}_{(i)}\\) at random. Given enough time, the grand tour gives a space-filling path and would eventually show every possible projection of the data. But it does so smoothly, by interpolating from one projection to the next. In the travel analogy, the path by road from London to Paris might go smoothly through Kent to Dover, thence via Amiens and Beauvais before reaching Paris. By air, the tour would follow a smoother geodesic path, and this is what the grand tour does. The sense in watching an animation of a statistical grand tour is that of continuous motion. The grand tour algorithm is described in detail by Buja et al. (2005) and Cook et al. (2008).\n\n\n3.7.2.1 Guided tours\nThe next big idea was that rather than traveling randomly in projection space one could take a guided tour, following a path that leads to “interesting projections”, such as those that reveal clusters, gaps in data space or outliers. This idea, called projection pursuit (Cook et al., 1995), works by defining a measure of interestingness of a data projection. In a guided tour, the next projection is chosen to increase that index, so over time the projection moves toward one that is maximizes that index.\nIn the time since Asimov (1985), there have been many implementations of touring visualization methods. XGobi (Swayne et al., 1998) for X-Windows displays on Linux systems provided a test-bed for dynamic, interactive graphic methods; it’s successor, GGobi (Cook & Swayne, 2007; Swayne et al., 2003) extended the range of touring methods to include a wider variety of projection pursuit indices.\n\n3.7.2.2 tourr package\nThe current state of art is best captured in the tourr package for R (Wickham et al., 2011; Wickham & Cook, 2024). It defines a tour to consist of three components:\n\n\ndata: An \\((n \\times p)\\) numerical data matrix to be viewed.\n\npath: A tour path function that produces a smoothed sequence of projection matrices \\(\\mathbf{P}_{(p \\times d)}\\) in \\(d\\). dimensions, for example grand_tour(d = 2) or guided_tour(index = holes).\n\ndisplay: A function that renders the projected data, for example display_xy() for a scatterplot, display_depth() for a 3D plot with simulated depth, or display_pcp() for a parallel coordinates plots\n\nThis very nicely separates the aspects of a tour, and allows one to think of and define new tour path methods and display methods. The package defines two general tour functions: animate() produces a real-time animation on a display device and render() saves image frames to disk, such as a .gif file.\n\nanimate(data, tour_path, display_method)\nrender(data, tour_path, display_method)\n\nThe tourr package provides a wide range of tour path methods and display methods:\n\n# tour path methods\ngrep(\"_tour$\", lsf.str(\"package:tourr\"), value = TRUE)\n#&gt;  [1] \"dependence_tour\"     \"frozen_guided_tour\" \n#&gt;  [3] \"frozen_tour\"         \"grand_tour\"         \n#&gt;  [5] \"guided_anomaly_tour\" \"guided_section_tour\"\n#&gt;  [7] \"guided_tour\"         \"little_tour\"        \n#&gt;  [9] \"local_tour\"          \"new_tour\"           \n#&gt; [11] \"planned_tour\"        \"planned2_tour\"      \n#&gt; [13] \"radial_tour\"\n\n# display methods\ngrep(\"display_\", lsf.str(\"package:tourr\"), value = TRUE)\n#&gt;  [1] \"display_andrews\"   \"display_density2d\" \"display_depth\"    \n#&gt;  [4] \"display_dist\"      \"display_faces\"     \"display_groupxy\"  \n#&gt;  [7] \"display_idx\"       \"display_image\"     \"display_pca\"      \n#&gt; [10] \"display_pcp\"       \"display_sage\"      \"display_scatmat\"  \n#&gt; [13] \"display_slice\"     \"display_stars\"     \"display_stereo\"   \n#&gt; [16] \"display_trails\"    \"display_xy\"\n\nTour path methods take a variety of optional arguments to specify the detailed behavior of the method. For example, most allow you to specify the number of dimension (d =) of the projections. The guided_tour() is of particular interest here.\n\nargs(guided_tour)\n#&gt; function (index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = 25, \n#&gt;     max.i = Inf, search_f = search_geodesic, n_sample = 100, \n#&gt;     ...) \n#&gt; NULL\n\nIn this, index_f specifies a function that the method tries to optimize on its path and package defines four indices:\n\nHoles (holes()): This is sensitive to projections with separated clusters of points, with few points near the origin\nCentral mass (cmass()): Sensitive to projections with lots of points in the center, but perhaps with some outliers\nLinear discriminant analysis (lda_pp()): For data with a grouping factor, optimizes a measure of separation of the group means as in MANOVA or linear discriminant analysis.\nPDA analysis (pda_pp()): A penalized version of lda_pp() for cases of large \\(p\\) relative to sample size \\(n\\) (E.-K. Lee & Cook, 2009).\n\nIn addition, there is now a guided_anomaly_tour() that looks for the best projection of observations that are outside the data ellipsoid, finding a view showing observations with large Mahalanobis distances from the centroid.\n\n3.7.2.3 Penguin tours\nPenguins are a traveling species. They make yearly travels inland to breeding sites in early spring, repeating the patterns of their ancestors. Near the beginning of summer, adult penguins and their chicks return to the sea and spend the rest of the summer feeding there (Black et al., 2018). If they were also data scientists, they might wonder about the relations among among their cousins of different species and take a tour of their measurements…\n\nFor example, using the Penguins dataset, the following calls produce grand tours in 2, 3, and 4 dimensions. The 2D tour is displayed as a scatterplot, the 3D tour using simulated depth as shown by variation in point size and transparency, and the 4D tour is shown using a parallel coordinate plot.\n\ndata(peng, package = \"heplots\")\npeng_scaled &lt;- scale(peng[,3:6])\ncolnames(peng_scaled) &lt;- c(\"BL\", \"BD\", \"FL\", \"BM\")\n\nanimate(peng_scaled, grand_tour(d = 2), display_xy())\nanimate(peng_scaled, grand_tour(d = 3), display_depth())\nanimate(peng_scaled, grand_tour(d = 4), display_pcp())\n\n\n\n\n\n\n\n\n\n\n(a) 2D, scatterplot\n\n\n\n\n\n\n\n\n\n(b) 3D, simulated depth\n\n\n\n\n\n\n\n\n\n(c) 4D, parallel coordinates plot\n\n\n\n\n\n\nFigure 3.40: Grand tours of the penguin dataset in 2, 3, and 4 dimensions using different display_*() methods.\n\n\nTo illustrate, I’ll start with a grand tour designed to explore this 4D space of penguins. I’ll abbreviate the variables to two characters, “BL” = bill_length, “BD” = bill_depth, “FL” = flipper_length, and “BM” = body_mass and identify the penguin species using point shape (pch) and color (col).\nAs you watch this pay attention to the separation of the species and any other interesting features. What do you see?\n\ndata(peng, package = \"heplots\")\npeng_scaled &lt;- scale(peng[,3:6])\ncolnames(peng_scaled) &lt;- c(\"BL\", \"BD\", \"FL\", \"BM\")\n\npch &lt;- c(15, 16, 17)[peng$species] \ncex = 1.2\n\nset.seed(1234)\nanimate(peng_scaled,\n        tour_path = grand_tour(d=2),\n        display_xy(col = peng$species,\n                   palette = peng.colors(\"dark\"),\n                   pch = pch, cex = cex,\n                   axis.col = \"black\", \n                   axis.text.col = \"black\", \n                   axis.lwd = 1.5))\n\n\n\n\n\n\n\n\nFigure 3.41: Animation of a grand tour of the Penguin data.\n\n\n\n\nFigure 3.42 shows three frames from this movie. The first (a) is the initial frame that shows the projection in the plane of bill depth and bill length. The variable vectors indicate that bill length differentiates Adelie penguins from the others. In frame (b), the three species are widely separated, with bill depth distinguishing Gentoo from the others. In frame (c) the three species are largely mixed, but two points stand out as outliers, with exceptionally long bills compared to the rest.\n\n\n\n\n\n\n\n\n\n(a) Initial frame\n\n\n\n\n\n\n\n\n\n(b) Clusters\n\n\n\n\n\n\n\n\n\n(c) Outliers\n\n\n\n\n\n\nFigure 3.42: Three frames from the grand tour of the Penguin data. (a) The initial frame is the projection showing only BD and BL, where bill length conveniently separates Adelie from the other two species. (b) A frame that shows the three species more widely separated. (c) A frame that shows two outliers with very large bills.\n\n\n\n\n\n\n\n\n\n\nLet’s take the penguins on a guided tour, trying to find views that show the greatest separations among the penguin species; that is, a guided tour, optimizing the lda_pp() index.\n\nset.seed(1234)\nanimate(peng_scaled, \n        guided_tour(lda_pp(peng$species)),\n        display_xy(col = peng$species,\n                   palette = peng.colors(\"dark\"),\n                   pch = pch,\n                   cex = cex)\n)\n\n\n\n\n\n\n\n\nFigure 3.43: Animation of a guided tour of the Penguin data, using a tour criterion designed to find an optimal separation among the penguin species. The animation shows three loops of the sequence of projections and stops when the LDA criterion cannot be improved.\n\n\n\n\nTODO: I’m trying to balance what will/can be shown in the HTML version vs. the printed PDF. Needs text here specifically for the PDF version.\n\n\n\n\n\n\n\n\n\n(a) Optimizing lda_pp()\n\n\n\n\n\n\n\n\n\n(b) Optimizing anomaly_index()\n\n\n\n\n\n\nFigure 3.44: Guided tours: These figures show the final frame in the animations of guided tours designed to find the projection that optimize an index. (a) The lda_pp() criterion optimizes the separation of the means for species relative to within-group variation. (b) The anomalies_index() optimizes the average Mahalanobis distance of points from the centroid\n\n\nThese examples are intended to highlight what is possible with dynamic graphics for exploring high-dimensional data visually. Cook & Laa (2024) extend the discussion of these methods from Cook & Swayne (2007) (which used Ggobi) to the tourr package. They illustrate dimension reduction, various cluster analysis methods, trees and random forests and some machine-learning techniques.\n\nIdeally, we should be able interact with a tour,\n\npausing when we see something interesting and saving the view for later analysis;\nselecting or highlighting unusual points,\nchanging tour methods or variables displayed on the fly, and so forth.\n\nSome packages that provide these capabilities are: detourr (Hart & Wang, 2022) liminal (S. Lee, 2021) and langevitour (Harrison, 2023, 2024) The loon package (Waddell & Oldford, 2023) is a general toolkit that enables highly interactive data visualization. It provides a loon.tour package (Xu & Oldford, 2021) for using touring methods within the loon environment.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#sec-network",
+    "href": "03-multivariate_plots.html#sec-network",
+    "title": "3  Plots of Multivariate Data",
+    "section": "\n3.8 Network diagrams",
+    "text": "3.8 Network diagrams\nA major theme throughout this chapter has been to understand how to extend data visualization from simple bivariate scatterplots to increasingly more complex situations with larger datasets. With a moderate number of variables, techniques such as smoothing, summarizing with data ellipses and fitted curves, and visual thinning can be used to tame “big \\(N\\)” datasets with thousands of cases.\nHowever “big \\(p\\)” datasets, with more than a moderate number (\\(p\\)) of variables still remain a challenge. It is hard to see how the more advanced methods (corrgrams, parallel coordinate) described earlier could cope with \\(p = 20, 50, 100, 500, \\dots\\) variables. At some point, each of these begins to break down for the purpose of visualizing associations among many variables. We are forced to thin the information presented in graphs more and more as the number of variables increases.\nIt turns out that there is a way to increase the number of variables displayed dramatically, if we are mainly interested in the pairwise correlations for reasonably normally distributed data. A graphical network diagram portrays variables by nodes (vertices), connected by (weighted) edges whose properties reflect the strength of connections between pairs, such as a correlation. Such diagrams can reveal properties not readily seen by other means.\nAs an example consider Figure 3.45, which portrays the correlations among 25 self-report items reflecting 5 factors (the “Big Five”) considered in personality psychology to represent the dominant aspects of all of personality. These factors are easily remembered by the acronum OCEAN: Openness, Conscientiousness, Extraversion, Agreeableness and Neuroticism. The dataset, psych::bfi, contains data from an online sample of \\(n=2800\\) with 5 items for each scale.\nIn this figure (taken from Rodrigues (2021)), the item nodes are labeled according to the OCEAN factor they are assumed to measure. For 25 items, there are \\(25 \\times 24 / 2 = 300\\) correlations, way too much to see. A clearer picture arises when we reduce the number of edges shown according to some criterion. Here, edges are drawn only between nodes where the correlation is considered important by a method (“glasso” = graphical LASSO) designed to make the graph optimally sparse.\n\n\n\n\n\n\n\n\nFigure 3.45: Network diagram of the correlations among 25 items from a Big-Five personality scale, 5 items for each scale. The magnitude of a correlation is shown by the thickness and transparency of the edge between two item nodes. The sign of a correlation is shown by edge color and style: solid blue for positive and dashed red for negative. Source: Rodrigues (2021)\n\n\n\n\nThe edges shown in Figure 3.45 reflect the Pearson correlation between a given pair of items by the visual attributes of color and line style: magnitude is shown by both the thickness and transparency of the edge; the sign of the correlation is shown by color and line type: solid blue for positive correlations and dashed red for negative ones.\nAccording to some theories, the five personality factors should be largely non-overlapping, so there should not be many edges connecting items of one factor with those of another. Yet, there are quite a few cross-factor connections in Figure 3.45, so perhaps the theory is wrong, or, more likely, the 25 items are not good representatives of these underlying dimensions. The network diagram shown here is a visual tool for thought and refinement. See Costantini et al. (2015) for a tutorial on network analysis of personality data in R.\nNetwork diagrams stem from mathematical graph theory (Bondy & Murty, 2008; West, 2001) of the abstract properties of nodes and edges used to represent pairwise relationships. These can be used to model many types of relations and processes in physical, biological, social and other sciences, where such properties as connectedness, centrality, cliques of connected nodes and so forth provide a vocabulary used to understand and explain complex systems.\nFor one example, Grandjean (2016) used network analysis to study the connections among 2500 Twitter users (nodes) who identified as belonging to a “digital humanities” community from the relations (edges) of who follows whom. Grandjean also used these methods to study the relationships among characters in Shakespeare’s tragedies in terms of the characters (nodes) and edges representing how often they appeared in the same scene.\nThe wide applicability of these ideas has led to what is now called network science (Barab’asi, 2016) encompassing computer networks, biological networks, cognitive and semantic networks, and social networks. Recent developments in psychology led to a framework of network psychometrics (Isvoranu et al., 2022), where, for example, symptoms of psychopathology (phobias, anxiety, substance abuse) can be conceptualized as an interconnected network of clusters and studied for possible causal relations (Robinaugh et al., 2019).\nBecause a network diagram can potentially reflect hundreds of variables, various graph layout algorithms have been developed to automatically position the nodes so as to generate aesthetically pleasing network visualizations that emphasize important structural properties, like clusters and central nodes, while minimizing visual clutter (many crossing lines) to promote understandability and usability.\nThere are quite a few R packages for constructing network diagrams, both static and dynamic / interactive, and these differ considerably in how the information required for a graph is structured as R objects, and the flexibility to produce attractive graphs. Among these, igraph (Csárdi et al., 2024) structures the data as a dataset of vertices and edges with properties\n-&gt; packages: qgraph, …\n\n3.8.1 Crime data\nFor the present purposes, let’s see what network diagrams can tell us about the crime data analyzed earlier. Here, I first reorder the variables as in Figure 3.29. In the call to qgraph(), the argument minimum = \"sig\" says to show only the edges for significant correlations (at \\(\\alpha = 0.01\\) here). In Figure 3.46, the variable nodes are positioned around a circle (layout = \"circle\"), which is the default.\n\n\nlibrary(qgraph)\nord &lt;- corrMatOrder(crime.cor, order = \"AOE\")\nrownames(crime.cor)[ord]\n#&gt; [1] \"murder\"   \"assault\"  \"rape\"     \"robbery\"  \"burglary\"\n#&gt; [6] \"larceny\"  \"auto\"\ncrime.cor &lt;- crime.cor[ord, ord]\n\n# \"association graph\": network of correlations\nqgraph(crime.cor, \n  title = \"Crime data:\\ncorrelations\", title.cex = 1.5,\n  graph = \"cor\",\n  layout = \"circle\",\n  minimum = \"sig\", sampleSize = nrow(crime), alpha = 0.01,\n  color = grey(.9), vsize = 12,\n  labels = rownames(crime.cor),\n  posCol = \"blue\")\n\n\n\n\n\n\nFigure 3.46: Network diagram depicting the correlations among the crime variables. Only edges for correlations that are significant at the \\(\\alpha = 0.01\\) level are displayed.\n\n\n\n\n\nIn this figure, you can see the group of property crimes (auto theft, larceny, burglary) at the left separated from the violent crimes against persons at the right.\n\n3.8.2 Partial correlations\nAmong the more important statistical applications of network graph theory is the idea that you can also use them to study the the partial (conditional) associations among variables with the contributions of all other variables removed in what are called Graphical Gaussian Models (GGMs) (Højsgaard et al., 2012; Lauritzen, 1996). In a network diagram of these partial associations,\n\nThe edges between nodes represent the partial correlations between those variables.\nThe absence of an edge between two nodes indicates their variables are conditionally independent, given the other variables.\n\nSo, whereas a network diagram of correlations shows marginal associations ignoring other variables, one of partial correlations allows you to visualize the direct relationship between each pair of variables, removing the indirect effects that might be mediated through all other variables.\nFor a set of variables \\(X = \\{x_1, x_2, \\dots, x_p \\}\\), the partial correlation between \\(x_i\\) and \\(x_i\\), controlling for all other variables \\(Z = X \\setminus \\{x_i, x_j\\} = x_\\text{others}\\) is equivalent to the correlation between the residuals of the linear regressions of \\(x_i\\) on all other \\(\\mathbf{Z}\\) and \\(x_j\\) on \\(\\mathbf{Z}\\). (The notation \\(X \\setminus \\{x_i, x_j\\}\\) is read as “\\(X\\) without the set \\(\\{x_i, x_j\\}\\)”).\nMathematically, let \\(\\hat{x}_i\\) and \\(\\hat{x}_j\\) be the predicted values from the linear regressions of \\(x_i\\) on \\(\\mathbf{Z}\\) and of \\(x_j\\) on \\(\\mathbf{Z}\\), respectively. The partial correlation \\(p_{ij}\\) between \\(x_i\\) and \\(x_j\\) controlling for \\(\\mathbf{Z}\\) is given by: \\[\np_{x_i,x_j|\\mathbf{Z}} = r( x_i, x_j \\mid \\text{others}) = \\text{cor}[ (x_i - \\hat{x}_i),\\; (x_j - \\hat{x}_j)]\n\\tag{3.3}\\]\nBut, rather than running all these linear regressions, they can all be computed from the inverse of the correlation matrix (Whittaker, 1990, Ch. 5), a relation first noted by Dempster (1972). Let \\(\\mathbf{R}\\) be the correlation matrix of the variables. Then, the matrix \\(\\mathbf{P}\\) of partial correlations can be obtained from the negative inverse, \\(-\\mathbf{R}^{-1}\\), standardized to a correlation matrix by dividing by the square root of product of its diagonal elements, \\[\nP_{ij} = - \\frac{R^{-1}_{ij}}{\\sqrt{(R^{-1}_{ii} \\cdot R^{-1}_{jj})}} \\:\\: .\n\\]\n\nThe practical implications of this are:\n\nIf a partial correlation is close to zero, it suggests the relationship between two variables is primarily mediated through other variables.\nNon-zero partial correlations indicate a direct relationship that persists after controlling for other variables.\n\nFigure 3.47 shows the partial correlation network for the crime data, using the qgraph() argument graph = \"pcor\" To provide a more interpretable result, the argument layout = \"spring\" positions the nodes using a force-embedded algorithm where edges act like springs, pulling connected nodes together and unconnected nodes repel each other, pushing them apart.\n\nqgraph(crime.cor, \n       title = \"Crime data:\\npartial correlations\", title.cex = 1.5,\n       graph = \"pcor\",\n       layout = \"spring\", repulsion = 1.2,\n       minimum = \"sig\", sampleSize = nrow(crime), alpha = 0.05,\n       color = grey(.9), vsize = 14,\n       labels = rownames(crime.cor),\n       edge.labels = TRUE, edge.label.cex = 1.7,\n       posCol = \"blue\")\n\n\n\n\n\n\nFigure 3.47: Network diagram of partial correlations among the crime variables, controlling for all others. Variable nodes have been positioned by a “spring” layout method …\n\n\n\n\nFigure 3.47 shows that, once all other crime variables are controlled for each pair, there remain only a few partial correlations at the \\(\\alpha = 0.05\\) level. Of these, only the largest three in absolute value are significant at \\(\\alpha = 0.01\\).\nThus, once all other variables are taken into account, what remains is mainly a strong positive association between burglary and larceny and a moderate one between auto theft and robbery. There also remains a moderate negative correlation between murder and larceny. The spring layout makes it clear that, with suppression of weak edges, auto theft and robbery form a cluster separated from the other variables.\n\n3.8.3 Visualizing partial correlations\nJust as you can visualize marginal association between variables in a scatterplot, you can also visualize conditional association. A partial variables plot is simply a scatterplot of the partial residuals \\(e_i = (x_i - \\hat{x}_i)\\) from a regression of \\(x_i\\) on the other variables \\(Z\\) against those \\(e_j = (x_j - \\hat{x}_j)\\) for another variable \\(x_j\\).\nIn this, you can use all the bells and whistles of standard scatterplots (regression lines, smooths, data ellipses, …) to listen more attentively to the story partial association has to tell. The function pvPlot() calculates the partial residuals and then calls car::dataEllipse() for display. The five most “unusual” observations by Mahalanobis \\(D^2\\) are identified with their abbreviated state labels. Figure 3.48 shows these plots for the variable pairs with the two largest partial correlations.\n\n\nsource(\"R/pvPlot.R\")\n# select numeric, make `st` into rownames\ncrime.num &lt;- crime |&gt;\n  tibble::column_to_rownames(\"st\") |&gt;\n  dplyr::select(where(is.numeric))\n\npvPlot(crime.num, vars = c(\"burglary\", \"larceny\"), \n       id = list(n=5),\n       cex.lab = 1.5)\npvPlot(crime.num, vars = c(\"robbery\", \"auto\"),\n       id = list(n=5),\n       cex.lab = 1.5)\n\n\n\n\n\n\n\n\nFigure 3.48: Partial variables plots for burglary and larceny (left) and for robbery and auto theft (right) in the network diagram for partial correlations of the crime variables.\n\n\n\n\nIn the pvPlot for burglary and larceny, you can see that the high partial correlation is largely driven by the extreme points at the left and and right sides. Once all other variables are taken into account, Arizona (AZ) and Hawaii (HI) have larger incidence of both crimes, while Arkansas (AK) are smaller on both.\nIn the pvPlot for robbery and auto theft, New York stands out as an influential, high-leverage point (see Section 6.6); Massachusetts (MA) is noteworthy because auto theft in that state is considerably higher than what would be predicted from all other variables.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#multivariate-thinking-and-visualization",
+    "href": "03-multivariate_plots.html#multivariate-thinking-and-visualization",
+    "title": "3  Plots of Multivariate Data",
+    "section": "\n3.9 Multivariate thinking and visualization",
+    "text": "3.9 Multivariate thinking and visualization\nTODO: These are just initial notes on a chapter summary, and pointing the way to dimension reduction methods in Chapter 4.\nThis chapter has covered a lot of ground. We started with simple scatterplots and how to enhance them with graphical summaries and annotations …\nThe two curses\nMultivariate data is often said to suffer from the curse of dimensionality (ref: Bellman1957), meaning that that as the dimensionality of data increases, the volume of the space increases so fast that the available data become sparse, so that the amount of data needed often grows exponentially with the dimensionality.\nBut, there is another curse here, the curse of two-dimensionality, meaning that as the dimensionality of data increases, what we can display and understand from a 2D image decreases rapidly with the number of dimensions of data. …\nPackage summary\nFor development, keep track of the packages used in each chapter.\n\n16 packages used here: car, carData, corrgram, corrplot, dplyr, GGally, ggdensity, ggpcp, ggplot2, grid, knitr, patchwork, qgraph, tidyr, tourr, vcd\n\n\n\n\n\n\nAndrews, D. F. (1972). Plots of high dimensional data. Biometrics, 28, 123–136.\n\n\nAsimov, D. (1985). Grand tour. SIAM Journal of Scientific and Statistical Computing, 6(1), 128–143.\n\n\nBarab’asi, A.-L. (2016). Network science. Cambridge University Press.\n\n\nBecker, R. A., Cleveland, W. S., & Shyu, M.-J. (1996). The visual design and control of trellis display. Journal of Computational and Graphical Statistics, 5(2), 123–155.\n\n\nBlack, C., Southwell, C., Emmerson, L., Lunn, D., & Hart, T. (2018). Time-lapse imagery of adélie penguins reveals differential winter strategies and breeding site occupation. PLOS ONE, 13(3), e0193532. https://doi.org/10.1371/journal.pone.0193532\n\n\nBlishen, B., Carroll, W., & Moore, C. (1987). The 1981 socioeconomic index for occupations in canada. Canadian Review of Sociology/Revue Canadienne de Sociologie, 24(4), 465–488. https://doi.org/10.1111/j.1755-618x.1987.tb00639.x\n\n\nBondy, J. A., & Murty, U. S. R. (2008). Graph theory. Springer.\n\n\nBuja, A., Cook, D., Asimov, D., & Hurley, C. (2005). Computational methods for high-dimensional rotations in data visualization. In J. S. CR Rao EJ Wegman (Ed.), Handbook of statistics (pp. 391–413). Elsevier. https://doi.org/10.1016/s0169-7161(04)24014-7\n\n\ncagne, M. (1885). Coordonnées parallèles et axiales: Méthode de transformation géométrique et procédé nouveau de calcul graphique déduits de la considération des coordonnées parallèlles. Gauthier-Villars. http://historical.library.cornell.edu/cgi-bin/cul.math/docviewer?did=00620001&seq=3\n\n\nChambers, J. M., & Hastie, T. J. (1991). Statistical models in s (p. 624). Chapman & Hall/CRC.\n\n\nCleveland, W. S. (1979). Robust locally weighted regression and smoothing scatterplots. Journal of the American Statistical Association, 74, 829–836.\n\n\nCleveland, W. S. (1985). The elements of graphing data. Wadsworth Advanced Books.\n\n\nCleveland, W. S., & Devlin, S. J. (1988). Locally weighted regression: An approach to regression analysis by local fitting. Journal of the American Statistical Association, 83, 596–610.\n\n\nCleveland, W. S., & McGill, R. (1984). Graphical perception: Theory, experimentation and application to the development of graphical methods. Journal of the American Statistical Association, 79, 531–554.\n\n\nCleveland, W. S., & McGill, R. (1985). Graphical perception and graphical methods for analyzing scientific data. Science, 229, 828–833.\n\n\nCook, D., Buja, A., Cabrera, J., & Hurley, C. (1995). Grand tour and projection pursuit. Journal of Computational and Graphical Statistics, 4(3), 155. https://doi.org/10.2307/1390844\n\n\nCook, D., Buja, A., Lee, E.-K., & Wickham, H. (2008). Grand tours, projection pursuit guided tours, and manual controls. In Handbook of data visualization (pp. 295–314). Springer Berlin Heidelberg. https://doi.org/10.1007/978-3-540-33037-0_13\n\n\nCook, D., & Laa, U. (2024). Interactively exploring high-dimensional data and models in R. Online. https://dicook.github.io/mulgar_book/\n\n\nCook, D., & Swayne, D. F. (2007). Interactive and dynamic graphics for data analysis : With R and GGobi. Springer. http://www.ggobi.org/book/\n\n\nCostantini, G., Epskamp, S., Borsboom, D., Perugini, M., Mõttus, R., Waldorp, L. J., & Cramer, A. O. J. (2015). State of the aRt personality research: A tutorial on network analysis of personality data in R. Journal of Research in Personality, 54, 13–29. https://doi.org/10.1016/j.jrp.2014.07.003\n\n\nCsárdi, G., Nepusz, T., Traag, V., Horvát, S., Zanini, F., Noom, D., & Müller, K. (2024). igraph: Network analysis and visualization in r. https://doi.org/10.5281/zenodo.7682609\n\n\nDempster, A. P. (1969). Elements of continuous multivariate analysis. Addison-Wesley.\n\n\nDempster, A. P. (1972). Covariance selection. Biometrics, 28(1), 157–175.\n\n\nEmerson, J. W., Green, W. A., Schloerke, B., Crowley, J., Cook, D., Hofmann, H., & Wickham, H. (2013). The generalized pairs plot. Journal of Computational and Graphical Statistics, 22(1), 79–91. http://www.tandfonline.com/doi/ref/10.1080/10618600.2012.694762\n\n\nFox, J. (2016). Applied regression analysis and generalized linear models (Third edition.). SAGE.\n\n\nFox, J., Weisberg, S., & Price, B. (2023). Car: Companion to applied regression. https://CRAN.R-project.org/package=car\n\n\nFriendly, M. (1991). SAS System for statistical graphics (1st ed.). SAS Institute. http://www.sas. com/service/doc/pubcat/uspubcat/ind_files/56143.html\n\n\nFriendly, M. (1994). Mosaic displays for multi-way contingency tables. Journal of the American Statistical Association, 89, 190–200. http://www.jstor.org/stable/2291215\n\n\nFriendly, M. (1999). Extending mosaic displays: Marginal, conditional, and partial views of categorical data. Journal of Computational and Graphical Statistics, 8(3), 373–395. http://datavis.ca/papers/drew/drew.pdf\n\n\nFriendly, M. (2002). Corrgrams: Exploratory displays for correlation matrices. The American Statistician, 56(4), 316–324. https://doi.org/10.1198/000313002533\n\n\nFriendly, M. (2022). The life and works of andré-michel guerry, revisited. Sociological Spectrum, 42(4-6), 233–259. https://doi.org/10.1080/02732173.2022.2078450\n\n\nFriendly, M. (2023). vcdExtra: Vcd extensions and additions. https://friendly.github.io/vcdExtra/\n\n\nFriendly, M., & Kwan, E. (2003). Effect ordering for data displays. Computational Statistics and Data Analysis, 43(4), 509–539. https://doi.org/10.1016/S0167-9473(02)00290-6\n\n\nFriendly, M., & Meyer, D. (2016). Discrete data analysis with R: Visualization and modeling techniques for categorical and count data. Chapman & Hall/CRC.\n\n\nFriendly, M., Monette, G., & Fox, J. (2013). Elliptical insights: Understanding statistical methods through elliptical geometry. Statistical Science, 28(1), 1–39. https://doi.org/10.1214/12-STS402\n\n\nGalton, F. (1886). Regression towards mediocrity in hereditary stature. Journal of the Anthropological Institute, 15, 246–263. http://www.jstor.org/cgi-bin/jstor/viewitem/09595295/dm995266/99p0374f/0\n\n\nGannett, H. (1898). Statistical atlas of the united states, eleventh (1890) census. U.S. Government Printing Office.\n\n\nGorman, K. B., Williams, T. D., & Fraser, W. R. (2014). Ecological sexual dimorphism and environmental variability within a community of antarctic penguins (genus pygoscelis). PLoS ONE, 9(3), e90081. https://doi.org/10.1371/journal.pone.0090081\n\n\nGrandjean, M. (2016). A social network analysis of Twitter: Mapping the digital humanities community. Cogent Arts &Amp; Humanities, 3(1), 1171458. https://doi.org/10.1080/23311983.2016.1171458\n\n\nGuerry, A.-M. (1833). Essai sur la statistique morale de la France. Crochard.\n\n\nHarrison, P. (2023). Langevitour: Smooth interactive touring of high dimensions, demonstrated with scRNA-seq data. The R Journal, 15(2), 206–219. https://doi.org/10.32614/RJ-2023-046\n\n\nHarrison, P. (2024). Langevitour: Langevin tour. https://logarithmic.net/langevitour/\n\n\nHart, C., & Wang, E. (2022). Detourr: Portable and performant tour animations. https://CRAN.R-project.org/package=detourr\n\n\nHartigan, J. A. (1975a). Clustering algorithms. John Wiley; Sons.\n\n\nHartigan, J. A. (1975b). Printer graphics for clustering. Journal of Statistical Computing and Simulation, 4, 187–213.\n\n\nHofmann, H., VanderPlas, S., & Ge, Y. (2022). Ggpcp: Parallel coordinate plots in the ggplot2 framework. https://github.com/heike/ggpcp\n\n\nHofstadter, D. R. (1979). Gödel, escher, bach: An eternal golden braid. Basic Books.\n\n\nHøjsgaard, S., Edwards, D., & Lauritzen, S. (2012). Graphical models with R. Springer Science & Business Media.\n\n\nHorst, A., Hill, A., & Gorman, K. (2022). Palmerpenguins: Palmer archipelago (antarctica) penguin data. https://allisonhorst.github.io/palmerpenguins/\n\n\nInselberg, A. (1985). The plane with parallel coordinates. The Visual Computer, 1, 69–91.\n\n\nIsvoranu, A.-M., Epskamp, S., Waldorp, L. J., & Borsboom, D. (2022). Network psychometrics with r: A guide for behavioral and social scientists. Routledge. https://doi.org/10.4324/9781003111238\n\n\nLauritzen, S. L. (1996). Graphical models. Oxford University Press.\n\n\nLee, E.-K., & Cook, D. (2009). A projection pursuit index for large p small n data. Statistics and Computing, 20(3), 381–392. https://doi.org/10.1007/s11222-009-9131-1\n\n\nLee, S. (2021). Liminal: Multivariate data visualization with tours and embeddings. https://CRAN.R-project.org/package=liminal\n\n\nMartí, R., & Laguna, M. (2003). Heuristics and meta-heuristics for 2-layer straight line crossing minimization. Discrete Applied Mathematics, 127(3), 665–678.\n\n\nMeyer, D., Zeileis, A., Hornik, K., & Friendly, M. (2024). Vcd: Visualizing categorical data. https://CRAN.R-project.org/package=vcd\n\n\nMonette, G. (1990). Geometry of multiple regression and interactive 3-D graphics. In J. Fox & S. Long (Eds.), Modern methods of data analysis (pp. 209–256). SAGE Publications.\n\n\nOtto, J., & Kahle, D. (2023). Ggdensity: Interpretable bivariate density visualization with ggplot2. https://jamesotto852.github.io/ggdensity/\n\n\nPearson, K. (1901). On lines and planes of closest fit to systems of points in space. Philosophical Magazine, 6(2), 559–572.\n\n\nPineo, P. O., & Porter, J. (2008). Occupational prestige in canada. Canadian Review of Sociology, 4(1), 24–40. https://doi.org/10.1111/j.1755-618x.1967.tb00472.x\n\n\nRobinaugh, D. J., Hoekstra, R. H. A., Toner, E. R., & Borsboom, D. (2019). The network approach to psychopathology: A review of the literature 2008–2018 and an agenda for future research. Psychological Medicine, 50(3), 353–366. https://doi.org/10.1017/s0033291719003404\n\n\nSarkar, D. (2024). Lattice: Trellis graphics for r. https://lattice.r-forge.r-project.org/\n\n\nSchloerke, B., Cook, D., Larmarange, J., Briatte, F., Marbach, M., Thoen, E., Elberg, A., & Crowley, J. (2024). GGally: Extension to ggplot2. https://ggobi.github.io/ggally/\n\n\nScott, D. W. (1992). Multivariate density estimation: Theory, practice, and visualization. Wiley.\n\n\nSilverman, B. W. (1986). Density estimation for statistics and data analysis. Chapman & Hall.\n\n\nSimpson, E. H. (1951). The interpretation of interaction in contingency tables. Journal of the Royal Statistical Society, Series B, 30, 238–241.\n\n\nSwayne, D. F., Cook, D., & Buja, A. (1998). XGobi: Interactive dynamic data visualization in the x window system. Journal of Computational and Graphical Statistics, 7(1), 113–130. https://doi.org/10.1080/10618600.1998.10474764\n\n\nSwayne, D. F., Lang, D. T., Buja, A., & Cook, D. (2003). GGobi: Evolving from XGobi into an extensible framework for interactive data visualization. Computational Statistics &Amp; Data Analysis, 43(4), 423–444. https://doi.org/10.1016/s0167-9473(02)00286-4\n\n\nVanderPlas, S., Ge, Y., Unwin, A., & Hofmann, H. (2023). Penguins go parallel: A grammar of graphics framework for generalized parallel coordinate plots. Journal of Computational and Graphical Statistics, 1–16. https://doi.org/10.1080/10618600.2023.2195462\n\n\nWaddell, A., & Oldford, R. W. (2023). Loon: Interactive statistical data visualization. https://CRAN.R-project.org/package=loon\n\n\nWegman, E. J. (1990). Hyperdimensional data analysis using parallel coordinates. Journal of the American Statistical Association, 85(411), 664–675.\n\n\nWei, T., & Simko, V. (2024). Corrplot: Visualization of a correlation matrix. https://github.com/taiyun/corrplot\n\n\nWest, D. B. (2001). Introduction to graph theory. Prentice hall.\n\n\nWhittaker, J. (1990). Graphical models in applied multivariate statistics. John Wiley; Sons.\n\n\nWickham, H., & Cook, D. (2024). Tourr: Tour methods for multivariate data visualisation. https://github.com/ggobi/tourr\n\n\nWickham, H., Cook, D., Hofmann, H., & Buja, A. (2011). Tourr: An R package for exploring multivariate data with projections. Journal of Statistical Software, 40(2). https://doi.org/10.18637/jss.v040.i02\n\n\nWood, S. N. (2006). Generalized additive models: An introduction with r. Chapman; Hall/CRC Press.\n\n\nWright, K. (2021). Corrgram: Plot a correlogram. https://kwstat.github.io/corrgram/\n\n\nXu, Z., & Oldford, R. W. (2021). Loon.tour: Tour in ’loon’. https://cran.r-project.org/package=loon.tourr",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "03-multivariate_plots.html#footnotes",
+    "href": "03-multivariate_plots.html#footnotes",
+    "title": "3  Plots of Multivariate Data",
+    "section": "",
+    "text": "Confidence bands allow us to visualize the uncertainty around a fitted regression curve, which can be of two types: pointwise intervals or simultaneous intervals. The default setting in `ggplot2::geom_smooth() calculates pointwise intervals (using stats::predict.lm(..., interval=\"confidence\") at a confidence level \\(1-\\alpha\\) for the predicted response at each value \\(x_i\\) of a predictor, and have the frequentist interpretation that over repeated sampling only \\(100\\;\\alpha\\) of the predictions at \\(x_i\\) will be outside that interval. In contrast, simultaneous intervals are calculated so that \\(1 - \\alpha\\) is the probability that all of them cover their corresponding true values simultaneously. These are necessarily wider than pointwise intervals. Commonly used methods for constructing simultaneous confidence bands in regression are the Bonferroni and Scheffé methods, which control the family-wise error rate over all values of \\(x_i\\). See  for precise definitions of these terms. These are different from a prediction band, which is used to represent the uncertainty about the value of a new data-point on the curve, but subject to the additional variance reflected in one observation.↩︎\nThe classic study by Cleveland & McGill (1984);Cleveland & McGill (1985) shows that judgements of magnitude along a common scale are more accurate than those along separate, aligned scales.↩︎\nThe dataset was collected by Bernard Blishen, William Carroll and Catherine Moore, but apparently unpublished. A version updated to the 1981 census is described in Blishen et al. (1987).↩︎\nOther implementations of parallel coordinate plots in R include: MASS::parcoord(), GGally::ggparcoord() andPairViz::pcp()`. The ggpcp version used here is the most general.↩︎\nThis example was modified from one used by Cook et al. (2008).↩︎\nPlot shapes given by pch = 15:18 correspond to: filled square (15), filled circle (16), filled triangle point-up (17), filled diamond (18).↩︎",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Plots of Multivariate Data</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html",
+    "href": "04-pca-biplot.html",
+    "title": "4  Dimension Reduction",
+    "section": "",
+    "text": "4.1 Flatland and Spaceland\nThere was a cloud in the sky above Flatland one day. But it was a huge, multidimensional cloud of sparkly points that might contain some important message, perhaps like the hidden EUREKA (Figure 5), or perhaps forecasting the upcoming harvest, if only Flatlanders could appreciate it.\nA leading citizen, A SQUARE, who had traveled once to Spaceland and therefore had an inkling of its majesty beyond the simple world of his life in the plane looked at that cloud and had a brilliant thought, an OMG moment:\nAs it happened, our Square friend, although he could never really see in three dimensions, he could now at least think of a world described by height as well as breadth and width, and think of the shadow cast by a cloud as something mutable, changing size and shape depending on its’ orientation over Flatland.\nAnd what a world it was, inhabited by Pyramids, Cubes and wondrous creatures called Polyhedrons with many \\(C\\)orners, \\(F\\)aces and \\(E\\)dges. Not only that, but all those Polyhedra were forced in Spaceland to obey a magic formula: \\(C + F - E = 2\\).1 How cool was that!\nIndeed, there were even exalted Spheres, having so many faces that its surface became as smooth as a baby’s bottom with no need for pointed corners or edges, just as Circles were the smoothest occupants of his world with far too many sides to count. It was his dream of a Sphere passing through Flatland (Figure 1) that first awakened him to a third dimension.\nHe also marveled at Ellipsoids, as smooth as Spheres, but in Spaceland having three natural axes of different extent and capable of being appearing fatter or slimmer when rotated from different views. An Ellipsoid had magical properties: it could appear as so thin in one or more dimensions that it became a simple 2D ellipse, or a 1D line, or even a 0D point (Friendly et al., 2013).\nAll of these now arose in Square’s richer 3D imagination. And, all of this came from just one more dimension than his life in Flatland.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html#sec-spaceland",
+    "href": "04-pca-biplot.html#sec-spaceland",
+    "title": "4  Dimension Reduction",
+    "section": "",
+    "text": "It is high time that I should pass from these brief and discursive notes about Flatland to the central event of this book, my initiation into the mysteries of Space. THAT is my subject; all that has gone before is merely preface — Edwin Abbott, Flatland, p. 57.\n\n\n\n\n“Oh, can I, in my imagination, rotate that cloud and squeeze its juice so that it rains down on Flatland with greatest joy?”\n\n\n\n\n\n\n\n4.1.1 Multivariate juicers\nUp to now, we have also been living in Flatland. We have been trying to understand data in data space of possibly many dimensions, but confined to the 2D plane of a graph window. Scatterplot matrices and parallel coordinate plots provided some relief. The former did so by projecting the data into sets of 2D views in the coordinates of data space; the latter did so by providing multiple axes in a 2D space along which we could trace the paths of individual observations.\nThis chapter is about seeing data in a different projection, a low-dimensional (usually 2D) space that squeezes out the most juice from multidimensional data for a particular purpose (Figure 4.1), where what we want to understand can be more easily seen.\n\n\n\n\n\n\n\nFigure 4.1: A multivariate juicer takes data from possibly high-dimensional data space and transforms it to a lower-dimenional space in which important effects can be more easily seen.\n\n\n\n\nHere, I concentrate on principal components analysis (PCA), whose goal reflects A Square’s desire to see that sparkly cloud of points in \\(nD\\) space in the plane showing the greatest variation (squeezing the most juice) among all other possible views. This appealed to his sense of geometry, but left him wondering how the variables in that high-D cloud were related to the dimensions he could see in a best-fitting plane.\nThe idea of a biplot, showing the data points in the plane, together with thick pointed arrows—variable vectors— in one view is the other topic explained in this chapter (Section 4.3). The biplot is the simplest example of a multivariate juicer. The essential idea is to project the cloud of data points in \\(n\\) dimensions into the 2D space of principal components and simultaneously show how the original variables relate to this space. For exploratory analysis to get an initial, incisive view of a multivariate dataset, a biplot is often my first choice.\n\n\n\n\n\n\nLooking ahead\n\n\n\nI’m using the term multivariate juicer here to refer the wider class of dimension reduction techniques, used for various purposes in data analysis and visualization. PCA is the simplest example and illustrates the general ideas.\nThe key point is that these methods are designed to transform the data into a low-dimensional space for a particular goal or purpose. In PCA, the goal is to extract the greatest amount of total variability in the data. In the context of univariate multiple regression, the goal is often to reduce the number of predictors necessary to account for an outcome variable, called feature extraction in the machine learning literature.\nWhen the goal is to best distinguish among groups discriminant analysis finds uncorrelated weighted sums of predictors on which the means of groups are most widely separated in a reduced space of hopefully fewer dimensions.\nThe methods I cover in this book are all linear methods, but there is also a wide variety of non-linear dimension reduction techniques.\n\n\nPackages\nIn this chapter I use the following packages. Load them now:\n\nlibrary(ggplot2)\nlibrary(dplyr)\nlibrary(tidyr)\nlibrary(patchwork)\nlibrary(ggbiplot)\nlibrary(FactoMineR)\nlibrary(factoextra)\nlibrary(car)\nlibrary(ggpubr)\nlibrary(matlib)",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html#sec-pca",
+    "href": "04-pca-biplot.html#sec-pca",
+    "title": "4  Dimension Reduction",
+    "section": "\n4.2 Principal components analysis (PCA)",
+    "text": "4.2 Principal components analysis (PCA)\nWhen Francis Galton (1886) first discovered the idea of regression toward the mean and presented his famous diagram (Figure 3.9), he had little thought that he had provided a window to a higher-dimensional world, beyond what even A Square could imagine. His friend, Karl Pearson (1896) took that idea and developed it into a theory of regression and a measure of correlation that would bear his name, Pearson’s \\(r\\).\nBut then Pearson (1901) had a further inspiration, akin to that of A Square. If he also had a cloud of sparkly points in \\(2, 3, 4, ..., p\\) dimensions, could he find a point (\\(0D\\)), or line (\\(1D\\)), or plane (\\(2D\\)), or even a hyperplane (\\(nD\\)) that best summarized — squeezed out the most juice—from multivariate data? This was the first truly multivariate problem in the history of statistics (Friendly & Wainer, 2021, p. 186).\nThe best \\(0D\\) point was easy— it was simply the centroid, the means of each of the variables in the data, \\((\\bar{x}_1, \\bar{x}_2, ..., \\bar{x}_p)\\), because that was “closest” to the data in the sense of minimizing the sum of squared differences, \\(\\Sigma_i\\Sigma_j (x_{ij} - \\bar{x}_j)^2\\). In higher dimensions, his solution was also an application of the method of least squares, but he argued it geometrically and visually as shown in Figure 4.2.\n\n\n\n\n\n\n\nFigure 4.2: Karl Pearson’s (1901) geometric, visual argument for finding the line or plane of closest fit to a collection of points, P1, P2, P3, …\n\n\n\n\nFor a \\(1D\\) summary, the line of best fit to the points \\(P_1, P_2, \\dots P_n\\) is the line that goes through the centroid and made the average squared length of the perpendicular segments from those points to a line as small as possible. This was different from the case in linear regression, for fitting \\(y\\) from \\(x\\), where the average squared length of the vertical segments, \\(\\Sigma_i (y_i - \\hat{y}_i)^2\\) was minimized by least squares.\nHe went on to prove the visual insights from simple smoothing of Galton (1886) (shown in Figure 3.9) regarding the regression lines of y ~ x and x ~ y. More importantly, he proved that the cloud of points is captured, for the purpose of finding a best line, plane or hyperplane, by the ellipsoid that encloses it, as seen in his diagram, Figure 4.3. The major axis of the 2D ellipse is the line of best fit, along which the data points have the smallest average squared distance from the line. The axis at right angles to that—the minor axis— is labeled “line of worst fit” with the largest average squared distance.\n\n\n\n\n\n\n\nFigure 4.3: Karl Pearson’s diagram showing the elliptical geometry of regression and principal components analysis … Source: Pearson (1901), p. 566.\n\n\n\n\nEven more importantly— and this is the basis for PCA — he recognized that the two orthogonal axes of the ellipse gave new coordinates for the data which were uncorrelated, whatever the correlation of \\(x\\) and \\(y\\).\n\nPhysically, the axes of the correlation type-ellipse are the directions of independent and uncorrelated variation. — Pearson (1901), p. 566.\n\nIt was but a small step to recognize that for two variables, \\(x\\) and \\(y\\):\n\nThe line of best fit, the major axis (PC1) had the greatest variance of points projected onto it.\nThe line of worst fit, the minor axis (PC2), had the least variance.\nThese could be seen as a rotation of the data space of \\((x, y)\\) to a new space (PC1, PC2) with uncorrelated variables.\nThe total variation of the points in data space, \\(\\text{Var}(x) + \\text{Var}(y)\\), being unchanged by rotation, was equally well expressed as the total variation \\(\\text{Var}(PC1) + \\text{Var}(PC2)\\) of the scores on what are now called the principal component axes.\n\nIt would have appealed to Pearson (and also to A Square) to see these observations demonstrated in a 3D video. Figure 4.4 shows a 3D plot of the variables Sepal.Length, Sepal.Width and Petal.Length in Edgar Anderson’s iris data, with points colored by species and the 95% data ellipsoid. This is rotated smoothly by interpolation until the first two principal axes, PC1 and PC2 are aligned with the horizontal and vertical dimensions. Because this is a rigid rotation of the cloud of points, the total variability is obviously unchanged.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nFigure 4.4: Animation of PCA as a rotation in 3D space. The plot shows three variables for the iris data, initially in data space and its’ data ellipsoid, with points colored according to species of the iris flowers. This is rotated smoothly until the first two principal axes are aligned with the horizontal and vertical directions in the final frame.\n\n\n\n\n\n4.2.1 PCA by springs\nBefore delving into the mathematics of PCA, it is useful to see how Pearson’s problem, and fitting by least squares generally, could be solved in a physical realization.\nFrom elementary statistics, you may be familiar with a physical demonstration that the mean, \\(\\bar{x}\\), of a sample is the value for which the sum of deviations, \\(\\Sigma_i (x_i - \\bar{x})\\) is zero, so the mean can be visualized as the point of balance on a line where those differences \\((x_i - \\bar{x})\\) are placed. Equally well, there is a physical realization of the mean as the point along an axis where weights connected by springs will minimize the sum of squared differences, because springs with a constant stiffness, \\(k\\), exert forces proportional to \\(k (x_i - \\bar{x}) ^2\\). That’s the reason it is useful as a measure of central tendency: it minimizes the average squared error.\nIn two dimensions, imagine that we have points, \\((x_i, y_i)\\) and these are attached by springs of equal stiffness \\(k\\), to a line anchored at the centroid, \\((\\bar{x}, \\bar{y})\\) as shown in Figure 4.5. If we rotate the line to some initial position and release it, the springs will pull the line clockwise or counterclockwise and the line will bounce around until the forces, proportional to the squares of the lengths of the springs, will eventually balance out at the position (shown by the red fixed line segments at the ends). This is the position that minimizes the the sum of squared lengths of the connecting springs, and also minimizes the kinetic energy in the system.\nIf you look closely at Figure 4.5 you will see something else: When the line is at its final position of minimum squared length and energy, the positions of the red points on this line are spread out furthest, i.e., have maximum variance. Conversely, when the line is at right angles to its final position (shown by the black line at 90\\(^o\\)) the projected points have the smallest possible variance.\n\n\n\n\n\n\n\n\nFigure 4.5: Animation of PCA fitted by springs. The blue data points are connected to their projections on the red line by springs perpendicular to that line. From an initial position, the springs pull that line in proportion to their squared distances, until the line finally settles down to the position where the forces are balanced and the minimum is achieved. Source: Amoeba, https://bit.ly/46tAicu.\n\n\n\n\n4.2.2 Mathematics and geometry of PCA\nAs the ideas of principal components developed, there was a happy marriage of Galton’s geometrical intuition and Pearson’s mathematical analysis. The best men at the wedding were ellipses and higher-dimensional ellipsoids. The bridesmaids were eigenvectors, pointing in as many different directions as space would allow, each sized according to their associated eigenvalues. Attending the wedding were the ghosts of uncles, Leonhard Euler, Jean-Louis Lagrange, Augustin-Louis Cauchy and others who had earlier discovered the mathematical properties of ellipses and quadratic forms in relation to problems in physics.\nThe key idea in the statistical application was that, for a set of variables \\(\\mathbf{x}_1, \\mathbf{x}_2, \\dots, \\mathbf{x}_p\\), the \\(p \\times p\\) covariance matrix \\(\\mathbf{S}\\) could be expressed exactly as a matrix product involving a matrix \\(\\mathbf{V}\\), whose columns are eigenvectors (\\(\\mathbf{v}_i\\)) and a diagonal matrix \\(\\mathbf{\\Lambda}\\), whose diagonal elements (\\(\\lambda_i\\)) are the corresponding eigenvalues.\nTo explain this, it is helpful to use a bit of matrix math:\n\\[\\begin{aligned}\n\\mathbf{S}_{p \\times p} & = \\mathbf{V}_{p \\times p} \\phantom{0000000000}\n                            \\mathbf{\\Lambda}_{p \\times p} \\phantom{00000000000000}\n                            \\mathbf{V}_{p \\times p}^\\mathsf{T} \\\\\n           & = \\left( \\mathbf{v}_1, \\, \\mathbf{v}_2, \\,\\dots, \\, \\mathbf{v}_p \\right)\n           \\begin{pmatrix}\n             \\lambda_1 &  &  & \\\\\n             & \\lambda_2  &   & \\\\\n             &  & \\ddots & \\\\\n             &  &  & \\lambda_p\n            \\end{pmatrix}\n            \\;\n            \\begin{pmatrix}\n            \\mathbf{v}_1^\\mathsf{T}\\\\\n            \\mathbf{v}_2^\\mathsf{T}\\\\\n            \\vdots\\\\\n            \\mathbf{v}_p^\\mathsf{T}\\\\\n            \\end{pmatrix}\n           \\\\\n           & = \\lambda_1 \\mathbf{v}_1 \\mathbf{v}_1^\\mathsf{T} + \\lambda_2 \\mathbf{v}_2 \\mathbf{v}_2^\\mathsf{T} + \\cdots + \\lambda_p \\mathbf{v}_p \\mathbf{v}_p^\\mathsf{T}\n\\end{aligned} \\tag{4.1}\\]\nIn this equation,\n\nThe last line follows because \\(\\mathbf{\\Lambda}\\) is a diagonal matrix, so \\(\\mathbf{S}\\) is expressed as a sum of outer products of each \\(\\mathbf{v}_i\\) with itself, times the eigenvalue \\(\\lambda_i\\).\nThe columns of \\(\\mathbf{V}\\) are the eigenvectors of \\(\\mathbf{S}\\). They are orthogonal and of unit length, so \\(\\mathbf{V}^\\mathsf{T} \\mathbf{V} = \\mathbf{I}\\) and thus they represent orthogonal (uncorrelated) directions in data space.\nThe columns \\(\\mathbf{v}_i\\) are the weights applied to the variables to produce the scores on the principal components. For example, the first principal component is the weighted sum:\n\n\\[\n\\text{PC}_1 = v_{11} \\mathbf{x}_1 + v_{12} \\mathbf{x}_2 + \\cdots + v_{1p} \\mathbf{x}_p \\:\\: .\n\\]\n\nThe matrix of all scores on the principal components can be calculated by multiplying the data matrix \\(\\mathbf{X}\\) by the eigenvectors, \\(\\mathbf{PC} = \\mathbf{X} \\mathbf{V}\\).\nThe eigenvalues, \\(\\lambda_1, \\lambda_2, \\dots, \\lambda_p\\) are the variances of the the components, because \\(\\mathbf{v}_i^\\mathsf{T} \\;\\mathbf{S} \\; \\mathbf{v}_i = \\lambda_i\\).\nIt is usually the case that the variables \\(\\mathbf{x}_1, \\mathbf{x}_2, \\dots, \\mathbf{x}_p\\) are linearly independent, which means that none of these is an exact linear combination of the others. In this case, all eigenvalues \\(\\lambda_i\\) are positive and the covariance matrix \\(\\mathbf{S}\\) is said to have rank \\(p\\). (Rank is the number of non-zero eigenvalues.)\nHere is a key fact: If, as usual, the eigenvalues are arranged in order, so that \\(\\lambda_1 &gt; \\lambda_2 &gt; \\dots &gt; \\lambda_p\\), then the first \\(d\\) components give a \\(d\\)-dimensional approximation to \\(\\mathbf{S}\\), which accounts for \\(\\Sigma_i^d \\lambda_i\\) of the \\(\\Sigma_i^p \\lambda_i\\) total variance, usually interpreted as the proportion, \\((\\Sigma_i^d \\lambda_i) / (\\Sigma_i^p \\lambda_i)\\).\n\nFor the case of two variables, \\(\\mathbf{x}_1\\) and \\(\\mathbf{x}_2\\) Figure 4.6 shows the transformation from data space to component space. The eigenvectors, \\(\\mathbf{v}_1, \\mathbf{v}_2\\) are the major and minor axes of the data ellipse, whose lengths are the square roots \\(\\sqrt{\\lambda_1}, \\sqrt{\\lambda_2}\\) of the eigenvalues.\n\n\n\n\n\n\n\nFigure 4.6: Geometry of PCA as a rotation from data space to principal component space, defined by the eigenvectors v1 and v2 of a covariance matrix\n\n\n\n\n\nExample: Workers’ experience and income\nFor a small example, consider the relation between years of experience and income in a small (contrived) sample (\\(n = 10\\)) of workers in a factory. The dataset matlib::workers contains these and other variables. In a wider context, we might want to fit a regression model to predict Income, but here we focus on a PCA of just these two variables.\n\ndata(workers, package = \"matlib\") \nhead(workers)\n#&gt;         Income Experience Skill Gender\n#&gt; Abby        20          0     2 Female\n#&gt; Betty       35          5     5 Female\n#&gt; Charles     40          5     8   Male\n#&gt; Doreen      30         10     6 Female\n#&gt; Ethan       50         10    10   Male\n#&gt; Francie     50         15     7 Female\n\n\nLet’s start with a simple scatterplot of Income vs. Experience, with points labeled by Name (and colored by Gender). There’s a fairly strong correlation (\\(r\\) = 0.853). How does a PCA capture this?\n\n\nvars &lt;- c(\"Experience\", \"Income\")\nplot(workers[, vars],\n     pch = 16, cex = 1.5,\n     cex.lab = 1.5)\ntext(workers[, vars], \n     labels = rownames(workers),\n     col = ifelse(workers$Gender == \"Female\", \"red\", \"blue\"),\n     pos = 3, xpd = TRUE)\n\n\n\n\n\n\nFigure 4.7: Scatterplot of Income vs. Experience for the workers data.\n\n\n\n\nTo carry out a PCA of these variables, first calculate the vector of means (\\(\\bar{\\mathbf{x}}\\)) and covariance matrix \\(\\mathbf{S}\\).\n\nmu &lt;- colMeans(workers[, vars]) |&gt; print()\n#&gt; Experience     Income \n#&gt;       15.5       46.5\nS &lt;- cov(workers[, vars]) |&gt; print()\n#&gt;            Experience Income\n#&gt; Experience        136    152\n#&gt; Income            152    234\n\nThe eigenvalues and eigenvectors of S are calculated by eigen(). This returns a list with components values for the \\(\\lambda_i\\) and vectors for \\(\\mathbf{V}\\).\n\nS.eig &lt;- eigen(S)\nLambda &lt;- S.eig$values |&gt; print()\n#&gt; [1] 344.3  25.1\nV &lt;- S.eig$vectors |&gt; print()\n#&gt;       [,1]   [,2]\n#&gt; [1,] 0.589 -0.808\n#&gt; [2,] 0.808  0.589\n\nFrom this, you can verify the points above regarding the relations between variances of the variables and the eigenvalues:\n\n#total variances of the variables = sum of eigenvalues\nsum(diag(S))\n#&gt; [1] 369\nsum(Lambda)\n#&gt; [1] 369\n\n# percent of variance of each PC\n100 * Lambda / sum(Lambda)\n#&gt; [1] 93.2  6.8\n\nUsing these, you can express the eigenvalue decomposition of \\(\\mathbf{S}\\) in Equation 4.1 with latexMatrix() and Eqn from the matlib package (Friendly et al., 2024) as:\n\n\noptions(digits = 4)\nrownames(S) &lt;- colnames(S) &lt;- c(\"\\\\small \\\\text{Exp}\", \n                                \"\\\\small \\\\text{Inc}\")\nspacer &lt;- \"\\\\phantom{00000000000000}\"\nEqn(\"\\\\mathbf{S} & = \\\\mathbf{V}\", spacer,\n    \"\\\\mathbf{\\\\Lambda}\", spacer,  \n    \"\\\\mathbf{V}^\\\\top\", Eqn_newline(),\n    latexMatrix(S), \"& =\", \n    latexMatrix(V), \"  \", diag(Lambda), \"  \", latexMatrix(V, transpose=TRUE),\n    align = TRUE)\n\n\n\\[\\begin{aligned}\n\\mathbf{S} & = \\mathbf{V} \\phantom{00000000000000}\n     \\mathbf{\\Lambda} \\phantom{00000000000000}  \n     \\mathbf{V}^\\top \\\\\n\\begin{matrix}\n  &  \\begin{matrix} \\phantom{i} Exp & Inc\n  \\end{matrix} \\\\\n\\begin{matrix}  \n   Exp\\\\\n   Inc\\\\\n\\end{matrix}  &\n\\begin{pmatrix}  \n136 & 152 \\\\\n152 & 234 \\\\\n\\end{pmatrix}\n\\\\\n\\end{matrix}\n& =\\begin{pmatrix}\n0.589 & -0.808 \\\\\n0.808 &  0.589 \\\\\n\\end{pmatrix}\n  \\begin{pmatrix}\n344.3 &   0.0 \\\\\n  0.0 &  25.1 \\\\\n\\end{pmatrix}\n  \\begin{pmatrix}\n0.589 & -0.808 \\\\\n0.808 &  0.589 \\\\\n\\end{pmatrix}^\\top\n\\end{aligned}\\]\nThe “scores” on the principal components can be calculated (point (5) above) as \\(\\mathbf{PC} = \\mathbf{X} \\mathbf{V}\\):\n\nPC &lt;- as.matrix(workers[, vars]) %*% V\ncolnames(PC) &lt;- paste0(\"PC\", 1:2)\nhead(PC)\n#&gt;          PC1   PC2\n#&gt; Abby    16.2 11.78\n#&gt; Betty   31.2 16.57\n#&gt; Charles 35.3 19.52\n#&gt; Doreen  30.1  9.59\n#&gt; Ethan   46.3 21.37\n#&gt; Francie 49.2 17.32\n\nThen, you can visualize the geometry of PCA as in Figure 4.6 (left) by plotting the data ellipse for the points, along with the PCA axes (heplots::ellipse.axes()). Figure 4.8 also shows the bounding box of the data ellipse, which are parallel to the PC axes and scaled to have the same “radius” as the data ellipse.\n\n\n# calculate conjugate axes for PCA factorization\npca.fac &lt;- function(x) {\n  xx &lt;- svd(x)\n  ret &lt;- t(xx$v) * sqrt(pmax( xx$d,0))\n  ret\n}\n\ndataEllipse(Income ~ Experience, data=workers,\n    pch = 16, cex = 1.5, \n    center.pch = \"+\", center.cex = 2,\n    cex.lab = 1.5,\n    levels = 0.68,\n    grid = FALSE,\n    xlim = c(-10, 40),\n    ylim = c(10, 80),\n    asp = 1)\nabline(h = mu[2], v = mu[1], \n       lty = 2, col = \"grey\")\n\n# axes of the ellipse = PC1, PC2\nradius &lt;- sqrt(2 * qf(0.68, 2, nrow(workers)-1 ))\nheplots::ellipse.axes(S, mu, \n     radius = radius,\n     labels = TRUE,\n     col = \"red\", lwd = 2,\n     cex = 1.8)\n\n# bounding box of the ellipse\nlines(spida2::ellplus(mu, S, radius = radius,\n              box = TRUE, fac = pca.fac),\n      col = \"darkgreen\",\n      lwd = 2, lty=\"longdash\")\n\n\n\n\n\n\nFigure 4.8: Geometry of the PCA for the workers data, showing the data ellipse, the eigenvectors of \\(\\mathbf{S}\\), whose half-lengths are the square roots \\(\\sqrt{\\lambda_i}\\) of the eigenvalues, and the bounding box of the ellipse.\n\n\n\n\nFinally, to preview the methods of the next section, the results calculated “by hand” above can be obtained using prcomp(). The values labeled \"Standard deviations\" are the square roots \\(\\sqrt{\\lambda}_i\\) of the two eigenvalues. The eigenvectors are labeled \"Rotation\" because \\(\\mathbf{V}\\) is the matrix that rotates the data matrix to produce the component scores.\n\nworkers.pca &lt;- prcomp(workers[, vars]) |&gt; print()\n#&gt; Standard deviations (1, .., p=2):\n#&gt; [1] 18.56  5.01\n#&gt; \n#&gt; Rotation (n x k) = (2 x 2):\n#&gt;              PC1    PC2\n#&gt; Experience 0.589  0.808\n#&gt; Income     0.808 -0.589\n\n\n4.2.3 Finding principal components\nIn R, PCA is most easily carried out using stats::prcomp() or stats::princomp() or similar functions in other packages such as FactomineR::PCA(). The FactoMineR package (Husson et al., 2017, 2024) has extensive capabilities for exploratory analysis of multivariate data (PCA, correspondence analysis, cluster analysis).\nA particular strength of FactoMineR for PCA is that it allows the inclusion of supplementary variables (which can be categorical or quantitative) and supplementary points for individuals. These are not used in the analysis, but are projected into the plots to facilitate interpretation. For example, in the analysis of the crime data described below, it would be useful to have measures of other characteristics of the U.S. states, such as poverty and average level of education (Section 4.3.5).\nUnfortunately, although all of these functions perform similar calculations, the options for analysis and the details of the result they return differ.\nThe important options for analysis include:\n\nwhether or not the data variables are centered, to a mean of \\(\\bar{x}_j =0\\)\n\nwhether or not the data variables are scaled, to a variance of \\(\\text{Var}(x_j) =1\\).\n\nIt nearly always makes sense to center the variables. The choice of scaling determines whether the correlation matrix is analyzed, so that each variable contributes equally to the total variance that is to be accounted for versus analysis of the covariance matrix, where each variable contributes its own variance to the total. Analysis of the covariance matrix makes little sense when the variables are measured on different scales2, unless you want to interpret total variance on the scales of the different variables.\nYou don’t need to scale your data in advance, but be aware of the options: prcomp() has default options center = TRUE, scale. = FALSE3 so in most cases you should specify scale. = TRUE. I mostly use this. The older princomp() has only the option cor = FALSE which centers the data and uses the covariance matrix, so in most cases the default is OK.\nTo illustrate, the analysis of the workers data presented above used scale. = FALSE by default, so the eigenvalues reflected the variances of Experience and Income. The analogous result, using standardized variables (\\(z\\)-scores) can be computed in any of the forms shown below, using either scale. = FALSE or standardizing first using scale():\n\nprcomp(workers[, vars], scale. = TRUE)\n#&gt; Standard deviations (1, .., p=2):\n#&gt; [1] 1.361 0.383\n#&gt; \n#&gt; Rotation (n x k) = (2 x 2):\n#&gt;              PC1    PC2\n#&gt; Experience 0.707  0.707\n#&gt; Income     0.707 -0.707\n\n# same as (output suppressed):\nworkers[, vars] |&gt; prcomp(scale. = TRUE) |&gt; invisible()\nworkers[, vars] |&gt; scale() |&gt; prcomp() |&gt; invisible()\n\nIn this form, each of Experience and Income have variance = 1, and the \"Standard deviations\" reported are the square roots (\\(\\sqrt{\\lambda}_i\\)) of the eigenvalues \\(\\lambda_i\\) of the correlation matrix \\(\\mathbf{R}\\). The eigenvalues of a correlation matrix always sum to \\(p\\), the number of variables. This fact prompted the rough rule of thumb to extract principal componends whose eigenvalues exceed 1.0, which is their average value, \\(\\bar{\\lambda} = (\\Sigma^p \\lambda_i) / p = p / p\\).\n\nprcomp(workers[, vars], scale. = TRUE)$sdev\n#&gt; [1] 1.361 0.383\n\n# eiven values of correlation matrix\nR &lt;- cor(workers[, vars])\nR.eig &lt;- eigen(R)\nLambda &lt;- R.eig$values |&gt; print()\n#&gt; [1] 1.853 0.147\nsum(Lambda)\n#&gt; [1] 2\n\nExample: Crime data\nThe dataset crime, analysed in Section 3.4, showed all positive correlations among the rates of various crimes in the corrgram, Figure 3.28. What can we see from a PCA? Is it possible that a few dimensions can account for most of the juice in this data?\nIn this example, you can easily find the PCA solution using prcomp() in a single line in base-R. You need to specify the numeric variables to analyze by their columns in the data frame. The most important option here is scale. = TRUE.\n\ndata(crime, package = \"ggbiplot\")\ncrime.pca &lt;- prcomp(crime[, 2:8], scale. = TRUE)\n\nThe tidy equivalent is more verbose, but also more expressive about what is being done. It selects the variables to analyze by a function, is.numeric() applied to each of the columns and feeds the result to prcomp().\n\ncrime.pca &lt;- \n  crime |&gt; \n  dplyr::select(where(is.numeric)) |&gt;\n  prcomp(scale. = TRUE)\n\nAs is typical with models in R, the result, crime.pca of prcomp() is an object of class \"prcomp\", a list of components, and there are a variety of methods for \"prcomp\" objects. Among the simplest is summary(), which gives the contributions of each component to the total variance in the dataset.\n\nsummary(crime.pca) |&gt; print(digits=2)\n#&gt; Importance of components:\n#&gt;                         PC1  PC2  PC3   PC4   PC5   PC6   PC7\n#&gt; Standard deviation     2.03 1.11 0.85 0.563 0.508 0.471 0.352\n#&gt; Proportion of Variance 0.59 0.18 0.10 0.045 0.037 0.032 0.018\n#&gt; Cumulative Proportion  0.59 0.76 0.87 0.914 0.951 0.982 1.000\n\nThe object, crime.pca returned by prcomp() is a list of the following the following elements:\n\nnames(crime.pca)\n#&gt; [1] \"sdev\"     \"rotation\" \"center\"   \"scale\"    \"x\"\n\nOf these, for \\(n\\) observations and \\(p\\) variables,\n\n\nsdev is the length \\(p\\) vector of the standard deviations of the principal components (i.e., the square roots \\(\\sqrt{\\lambda_i}\\) of the eigenvalues of the covariance/correlation matrix). When the variables are standardized, the sum of squares of the eigenvalues is equal to \\(p\\).\n\nrotation is the \\(p \\times p\\) matrix of weights or loadings of the variables on the components; the columns are the eigenvectors of the covariance or correlation matrix of the data;\n\nx is the \\(n \\times p\\) matrix of scores for the observations on the components, the result of multiplying (rotating) the data matrix by the loadings. These are uncorrelated, so cov(x) is a \\(p \\times p\\) diagonal matrix whose diagonal elements are the eigenvalues \\(\\lambda_i\\) = sdev^2.\n\ncenter gives the means of the variables when the option center. = TRUE (the default)\n\n4.2.4 Visualizing variance proportions: screeplots\nFor a high-D dataset, such as the crime data in seven dimensions, a natural question is how much of the variation in the data can be captured in 1D, 2D, 3D, … summaries and views. This is answered by considering the proportions of variance accounted by each of the dimensions, or their cumulative values. The components returned by various PCA methods have (confusingly) different names, so broom::tidy() provides methods to unify extraction of these values.\n\n(crime.eig &lt;- crime.pca |&gt; \n  broom::tidy(matrix = \"eigenvalues\"))\n#&gt; # A tibble: 7 × 4\n#&gt;      PC std.dev percent cumulative\n#&gt;   &lt;dbl&gt;   &lt;dbl&gt;   &lt;dbl&gt;      &lt;dbl&gt;\n#&gt; 1     1   2.03   0.588       0.588\n#&gt; 2     2   1.11   0.177       0.765\n#&gt; 3     3   0.852  0.104       0.868\n#&gt; 4     4   0.563  0.0452      0.914\n#&gt; 5     5   0.508  0.0368      0.951\n#&gt; 6     6   0.471  0.0317      0.982\n#&gt; 7     7   0.352  0.0177      1\n\nThen, a simple visualization is a plot of the proportion of variance for each component (or cumulative proportion) against the component number, usually called a screeplot. The idea, introduced by Cattell (1966), is that after the largest, dominant components, the remainder should resemble the rubble, or scree formed by rocks falling from a cliff.\nFrom this plot, imagine drawing a straight line through the plotted eigenvalues, starting with the largest one. The typical rough guidance is that the last point to fall on this line represents the last component to extract, the idea being that beyond this, the amount of additional variance explained is non-meaningful. Another rule of thumb is to choose the number of components to extract a desired proportion of total variance, usually in the range of 80 - 90%.\nstats::plot(crime.pca) would give a bar plot of the variances of the components, however ggbiplot::ggscreeplot() gives nicer and more flexible displays as shown in Figure 4.9.\n\np1 &lt;- ggscreeplot(crime.pca) +\n  stat_smooth(data = crime.eig |&gt; filter(PC&gt;=4), \n              aes(x=PC, y=percent), method = \"lm\", \n              se = FALSE,\n              fullrange = TRUE) +\n  theme_bw(base_size = 14)\n\np2 &lt;- ggscreeplot(crime.pca, type = \"cev\") +\n  geom_hline(yintercept = c(0.8, 0.9), color = \"blue\") +\n  theme_bw(base_size = 14)\n\np1 + p2\n\n\n\n\n\n\nFigure 4.9: Screeplots for the PCA of the crime data. The left panel shows the traditional version, plotting variance proportions against component number, with linear guideline for the scree rule of thumb. The right panel plots cumulative proportions, showing cutoffs of 80%, 90%.\n\n\n\n\nFrom this we might conclude that four components are necessary to satisfy the scree criterion or to account for 90% of the total variation in these crime statistics. However two components, giving 76.5%, might be enough juice to tell a reasonable story.\n\n4.2.5 Visualizing PCA scores and variable vectors\nTo see and attempt to understand PCA results, it is useful to plot both the scores for the observations on a few of the largest components and also the loadings or variable vectors that give the weights for the variables in determining the principal components.\nIn Section 4.3 I discuss the biplot technique that plots both in a single display. However, I do this directly here, using tidy processing to explain what is going on in PCA and in these graphical displays.\nScores\nThe (uncorrelated) principal component scores can be extracted as crime.pca$x or using purrr::pluck(\"x\"). As noted above, these are uncorrelated and have variances equal to the eigenvalues of the correlation matrix.\n\nscores &lt;- crime.pca |&gt; purrr::pluck(\"x\") \ncov(scores) |&gt; zapsmall()\n#&gt;      PC1  PC2  PC3  PC4  PC5  PC6  PC7\n#&gt; PC1 4.11 0.00 0.00 0.00 0.00 0.00 0.00\n#&gt; PC2 0.00 1.24 0.00 0.00 0.00 0.00 0.00\n#&gt; PC3 0.00 0.00 0.73 0.00 0.00 0.00 0.00\n#&gt; PC4 0.00 0.00 0.00 0.32 0.00 0.00 0.00\n#&gt; PC5 0.00 0.00 0.00 0.00 0.26 0.00 0.00\n#&gt; PC6 0.00 0.00 0.00 0.00 0.00 0.22 0.00\n#&gt; PC7 0.00 0.00 0.00 0.00 0.00 0.00 0.12\n\nFor plotting, it is more convenient to use broom::augment() which extracts the scores (named .fittedPC*) and appends these to the variables in the dataset.\n\ncrime.pca |&gt;\n  broom::augment(crime) |&gt; head()\n#&gt; # A tibble: 6 × 18\n#&gt;   .rownames state      murder  rape robbery assault burglary larceny\n#&gt;   &lt;chr&gt;     &lt;chr&gt;       &lt;dbl&gt; &lt;dbl&gt;   &lt;dbl&gt;   &lt;dbl&gt;    &lt;dbl&gt;   &lt;dbl&gt;\n#&gt; 1 1         Alabama      14.2  25.2    96.8    278.    1136.   1882.\n#&gt; 2 2         Alaska       10.8  51.6    96.8    284     1332.   3370.\n#&gt; 3 3         Arizona       9.5  34.2   138.     312.    2346.   4467.\n#&gt; 4 4         Arkansas      8.8  27.6    83.2    203.     973.   1862.\n#&gt; 5 5         California   11.5  49.4   287      358     2139.   3500.\n#&gt; 6 6         Colorado      6.3  42     171.     293.    1935.   3903.\n#&gt; # ℹ 10 more variables: auto &lt;dbl&gt;, st &lt;chr&gt;, region &lt;fct&gt;,\n#&gt; #   .fittedPC1 &lt;dbl&gt;, .fittedPC2 &lt;dbl&gt;, .fittedPC3 &lt;dbl&gt;,\n#&gt; #   .fittedPC4 &lt;dbl&gt;, .fittedPC5 &lt;dbl&gt;, .fittedPC6 &lt;dbl&gt;,\n#&gt; #   .fittedPC7 &lt;dbl&gt;\n\nThen, we can use ggplot() to plot any pair of components. To aid interpretation, I label the points by their state abbreviation and color them by region of the U.S.. A geometric interpretation of the plot requires an aspect ratio of 1.0 (via coord_fixed()) so that a unit distance on the horizontal axis is the same length as a unit distance on the vertical. To demonstrate that the components are uncorrelated, I also added their data ellipse.\n\ncrime.pca |&gt;\n  broom::augment(crime) |&gt; # add original dataset back in\n  ggplot(aes(.fittedPC1, .fittedPC2, color = region)) + \n  geom_hline(yintercept = 0) +\n  geom_vline(xintercept = 0) +\n  geom_point(size = 1.5) +\n  geom_text(aes(label = st), nudge_x = 0.2) +\n  stat_ellipse(color = \"grey\") +\n  coord_fixed() +\n  labs(x = \"PC Dimension 1\", y = \"PC Dimnension 2\") +\n  theme_minimal(base_size = 14) +\n  theme(legend.position = \"top\") \n\n\n\n\n\n\nFigure 4.10: Plot of component scores on the first two principal components for the crime data. States are colored by region.\n\n\n\n\nTo interpret such plots, it is useful consider the observations that are a high and low on each of the axes as well as other information, such as region here, and ask how these differ on the crime statistics. The first component, PC1, contrasts Nevada and California with North Dakota, South Dakota and West Virginia. The second component has most of the southern states on the low end and Massachusetts, Rhode Island and Hawaii on the high end. However, interpretation is easier when we also consider how the various crimes contribute to these dimensions.\nWhen, as here, there are more than two components that seem important in the scree plot, we could obviously go further and plot other pairs.\nVariable vectors\nYou can extract the variable loadings using either crime.pca$rotation or purrr::pluck(\"rotation\"), similar to what I did with the scores.\n\ncrime.pca |&gt; purrr::pluck(\"rotation\")\n#&gt;             PC1     PC2     PC3     PC4     PC5     PC6     PC7\n#&gt; murder   -0.300 -0.6292 -0.1782  0.2321  0.5381  0.2591  0.2676\n#&gt; rape     -0.432 -0.1694  0.2442 -0.0622  0.1885 -0.7733 -0.2965\n#&gt; robbery  -0.397  0.0422 -0.4959  0.5580 -0.5200 -0.1144 -0.0039\n#&gt; assault  -0.397 -0.3435  0.0695 -0.6298 -0.5067  0.1724  0.1917\n#&gt; burglary -0.440  0.2033  0.2099  0.0576  0.1010  0.5360 -0.6481\n#&gt; larceny  -0.357  0.4023  0.5392  0.2349  0.0301  0.0394  0.6017\n#&gt; auto     -0.295  0.5024 -0.5684 -0.4192  0.3698 -0.0573  0.1470\n\nBut note something important in this output: All of the weights for the first component are negative. In PCA, the directions of the eigenvectors are completely arbitrary, in the sense that the vector \\(-\\mathbf{v}_i\\) gives the same linear combination as \\(\\mathbf{v}_i\\), but with its’ sign reversed. For interpretation, it is useful (and usually recommended) to reflect the loadings to a positive orientation by multiplying them by -1. In general, you are free to reflect any of the components for ease of interpretation, and not necessarily if all the signs are negative.\nTo reflect the PCA loadings (multiplying PC1 and PC2 by -1) and get them into a convenient format for plotting with ggplot(), it is necessary to do a bit of processing, including making the row.names() into an explicit variable for the purpose of labeling.\n\n\n\n\n\n\n\nrownames in R\n\n\n\nR software evolved over many years, particularly in conventions for labeling cases in printed output and graphics. In base-R, the convention was that the row.names() of a matrix or data.frame served as observation labels in all printed output and plots, with a default to use numbers 1:n if there were no rownames. In ggplot2 and the tidyverse framework, the decision was made that observation labels had to be an explicit variable in a “tidy” dataset, so it could be used as a variable in constructs like geom_text(aes(label = label)) as in this example. This change often requires extra steps in software that uses the rownames convention.\n\n\n\nvectors &lt;- crime.pca |&gt; \n  purrr::pluck(\"rotation\") |&gt;\n  as.data.frame() |&gt;\n  mutate(PC1 = -1 * PC1, PC2 = -1 * PC2) |&gt;      # reflect axes\n  tibble::rownames_to_column(var = \"label\") \n\nvectors[, 1:3]\n#&gt;      label   PC1     PC2\n#&gt; 1   murder 0.300  0.6292\n#&gt; 2     rape 0.432  0.1694\n#&gt; 3  robbery 0.397 -0.0422\n#&gt; 4  assault 0.397  0.3435\n#&gt; 5 burglary 0.440 -0.2033\n#&gt; 6  larceny 0.357 -0.4023\n#&gt; 7     auto 0.295 -0.5024\n\nThen, I plot these using geom_segment(), taking some care to use arrows from the origin with a nice shape and add geom_text() labels for the variables positioned slightly to the right. Again, coord_fixed() ensures equal scales for the axes, which is important because we want to interpret the angles between the variable vectors and the PCA coordinate axes.\n\narrow_style &lt;- arrow(\n  angle = 20, ends = \"first\", type = \"closed\", \n  length = grid::unit(8, \"pt\")\n)\n\nvectors |&gt;\n  ggplot(aes(PC1, PC2)) +\n  geom_hline(yintercept = 0) +\n  geom_vline(xintercept = 0) +\n  geom_segment(xend = 0, yend = 0, \n               linewidth = 1, \n               arrow = arrow_style,\n               color = \"brown\") +\n  geom_text(aes(label = label), \n            size = 5,\n            hjust = \"outward\",\n            nudge_x = 0.05, \n            color = \"brown\") +\n  ggforce::geom_circle(aes(x0 = 0, y0 = 0, r = 0.5),  color = gray(.50)) +\n  xlim(-0.5, 0.9) + \n  ylim(-0.8, 0.8) +\n  coord_fixed() +         # fix aspect ratio to 1:1\n  theme_minimal(base_size = 14)\n\n\n\n\n\n\nFigure 4.11: Plot of component loadings the first two principal components for the crime data. These are interpreted as the contributions of the variables to the components.\n\n\n\n\nThe variable vectors (arrows) shown in Figure 4.11 have the following interpretations:\n\nThe lengths of the variable vectors, \\(\\lVert\\mathbf{v}_i\\rVert = \\sqrt{\\Sigma_{j} \\; v_{ij}^2}\\) give the relative proportion of variance of each variable accounted for in a two-dimensional display.\nEach vector points in the direction in component space with which that variable is most highly correlated: the value, \\(v_{ij}\\), of the vector for variable \\(\\mathbf{x}_i\\) on component \\(j\\) reflects the correlation of that variable with the \\(j\\)th principal component. Thus,\n\n\nA Variable that is perfectly correlated with a component is parallel to it.\nA variable this is uncorrelated with an component is perpendicular to it.\n\n\nThe angle between vectors shows the strength and direction of the correlation between those variables: the cosine of the angle \\(\\theta\\) between two variable vectors, \\(\\mathbf{v}_i\\) and \\(\\mathbf{v}_j\\), which is \\(\\cos(\\theta) = \\mathbf{v}_i^\\prime \\; \\mathbf{v}_j \\;/ \\; \\| \\mathbf{v}_i \\| \\cdot \\| \\mathbf{v}_j \\|\\) gives the approximation of the correlation \\(r_{ij}\\) between \\(\\mathbf{x}_i\\) and \\(\\mathbf{x}_j\\) that is shown in this space. This means that: * two variable vectors that point in the same direction are highly correlated; \\(r = 1\\) if they are completely aligned. * Variable vectors at right angles are approximately uncorrelated, while those pointing in opposite directions are negatively correlated; \\(r = -1\\) if they are at 180\\(^o\\).\n\nTo illustrate point (1), the following indicates that almost 70% of the variance of murder is represented in the the 2D plot shown in Figure 4.10, but only 40% of the variance of robbery is captured. For point (2), the correlation of murder with the dimensions is 0.3 for PC1 and 0.63 for PC2. For point (3), the angle between murder and burglary looks to be about 90\\(^o\\), but the actual correlation is 0.39.\n\n\n\n\n\n\n\n\n\nvectors |&gt; select(label, PC1, PC2) |&gt; \n  mutate(length = sqrt(PC1^2 + PC2^2))\n#&gt;      label   PC1     PC2 length\n#&gt; 1   murder 0.300  0.6292  0.697\n#&gt; 2     rape 0.432  0.1694  0.464\n#&gt; 3  robbery 0.397 -0.0422  0.399\n#&gt; 4  assault 0.397  0.3435  0.525\n#&gt; 5 burglary 0.440 -0.2033  0.485\n#&gt; 6  larceny 0.357 -0.4023  0.538\n#&gt; 7     auto 0.295 -0.5024  0.583",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html#sec-biplot",
+    "href": "04-pca-biplot.html#sec-biplot",
+    "title": "4  Dimension Reduction",
+    "section": "\n4.3 Biplots",
+    "text": "4.3 Biplots\nThe biplot is a visual multivariate juicer. It is the simple and powerful idea that came from the recognition that you can overlay a plot of observation scores in a principal components analysis with the information of the variable loadings (weights) to give a simultaneous display that is easy to interpret. In this sense, a biplot is generalization of a scatterplot, projecting from data space to PCA space, where the observations are shown by points, as in the plots of component scores in Figure 4.10, but with the variables also shown by vectors (or scaled linear axes aligned with those vectors).\nThe idea of the biplot was introduced by Ruben Gabriel (1971, 1981) and later expanded in scope by Gower & Hand (1996). The book by Greenacre (2010) gives a practical overview of the many variety of biplots. Gower et al. (2011) Understanding biplots provides a full treatment of many topics, including how to calibrate biplot axes, 3D plots, and so forth.\nBiplot methodolgy is far more general than I cover here. Categorical variables can be incorporated in PCA using points that represent the levels of discrete categories. Two-way frequency tables of categorical variables can be analysed using correspondence analysis, which is similar to PCA, but designed to account for the maximum amount of the \\(\\chi^2\\) statistic for association; multiple correspondence analysis extends this to method to multi-way tables (Friendly & Meyer, 2016; Greenacre, 1984).\n\n4.3.1 Constructing a biplot\nThe biplot is constructed by using the singular value decomposition (SVD) to obtain a low-rank approximation to the data matrix \\(\\mathbf{X}_{n \\times p}\\) (centered, and optionally scaled to unit variances) whose \\(n\\) rows are the observations and whose \\(p\\) columns are the variables.\n\n\n\n\n\n\n\nFigure 4.12: The singular value decomposition expresses a data matrix X as the product of a matrix U of observation scores, a diagonal matrix \\(\\Lambda\\) of singular values and a matrix V of variable weights.\n\n\n\n\nUsing the SVD, the matrix \\(\\mathbf{X}\\), of rank \\(r \\le p\\) can be expressed exactly as: \\[\n\\mathbf{X} = \\mathbf{U} \\mathbf{\\Lambda} \\mathbf{V}'\n                 = \\sum_i^r \\lambda_i \\mathbf{u}_i \\mathbf{v}_i' \\; ,\n\\tag{4.2}\\]\nwhere\n\n\n\\(\\mathbf{U}\\) is an \\(n \\times r\\) orthonormal matrix of uncorrelated observation scores; these are also the eigenvectors of \\(\\mathbf{X} \\mathbf{X}'\\),\n\n\\(\\mathbf{\\Lambda}\\) is an \\(r \\times r\\) diagonal matrix of singular values, \\(\\lambda_1 \\ge \\lambda_2 \\ge \\cdots \\lambda_r\\), which are also the square roots of the eigenvalues of \\(\\mathbf{X} \\mathbf{X}'\\).\n\n\\(\\mathbf{V}\\) is an \\(r \\times p\\) orthonormal matrix of variable weights and also the eigenvectors of \\(\\mathbf{X}' \\mathbf{X}\\).\n\nThen, a rank 2 (or 3) PCA approximation \\(\\widehat{\\mathbf{X}}\\) to the data matrix used in the biplot can be obtained from the first 2 (or 3) singular values \\(\\lambda_i\\) and the corresponding \\(\\mathbf{u}_i, \\mathbf{v}_i\\) as:\n\\[\n\\mathbf{X} \\approx \\widehat{\\mathbf{X}} = \\lambda_1 \\mathbf{u}_1 \\mathbf{v}_1' + \\lambda_2 \\mathbf{u}_2 \\mathbf{v}_2' \\; .\n\\]\nThe variance of \\(\\mathbf{X}\\) accounted for by each term is \\(\\lambda_i^2\\).\nA biplot is then obtained by overlaying two scatterplots that share a common set of axes and have a between-set scalar product interpretation. Typically, the observations (rows of \\(\\mathbf{X}\\)) are represented as points and the variables (columns of \\(\\mathbf{X}\\)) are represented as vectors from the origin.\nThe scale factor, \\(\\alpha\\) allows the variances of the components to be apportioned between the row points and column vectors, with different interpretations, by representing the approximation \\(\\widehat{\\mathbf{X}}\\) as the product of two matrices,\n\\[\n\\widehat{\\mathbf{X}} = (\\mathbf{U} \\mathbf{\\Lambda}^\\alpha) (\\mathbf{\\Lambda}^{1-\\alpha} \\mathbf{V}') = \\mathbf{A} \\mathbf{B}'\n\\] This notation uses a little math trick involving a power, \\(0 \\le \\alpha \\le 1\\): When \\(\\alpha = 1\\), \\(\\mathbf{\\Lambda}^\\alpha = \\mathbf{\\Lambda}^1  =\\mathbf{\\Lambda}\\), and \\(\\mathbf{\\Lambda}^{1-\\alpha} = \\mathbf{\\Lambda}^0  =\\mathbf{I}\\). \\(\\alpha = 1/2\\) gives the diagonal matrix \\(\\mathbf{\\Lambda}^{1/2}\\) whose elements are the square roots of the singular values.\nThe choice \\(\\alpha = 1\\) assigns the singular values totally to the left factor; then, the angle between two variable vectors, reflecting the inner product \\(\\mathbf{x}_j^\\mathsf{T}, \\mathbf{x}_{j'}\\) approximates their correlation or covariance, and the distance between the points approximates their Mahalanobis distances. \\(\\alpha = 0\\) gives a distance interpretation to the column display. \\(\\alpha = 1/2\\) gives a symmetrically scaled biplot. *TODO**: Explain this better.\nWhen the singular values are assigned totally to the left or to the right factor, the resultant coordinates are called principal coordinates and the sum of squared coordinates on each dimension equal the corresponding singular value. The other matrix, to which no part of the singular values is assigned, contains the so-called standard coordinates and have sum of squared values equal to 1.0.\n\n4.3.2 Biplots in R\nThere are a large number of R packages providing biplots. The most basic, stats::biplot(), provides methods for \"prcomp\" and \"princomp\" objects. Among other packages, factoextra (Kassambara & Mundt, 2020), an extension of FactoMineR (Husson et al., 2024), is perhaps the most comprehensive and provides ggplot2 graphics. In addition to biplot methods for quantitative data using PCA (fviz_pca()), it offers biplots for categorical data using correspondence analysis (fviz_ca()) and multiple correspondence analysis (fviz_mca()); factor analysis with mixed quantitative and categorical variables (fviz_famd()) and cluster analysis (fviz_cluster()). The adegraphics package (Dray et al., 2023) produces lovely biplots using lattice graphics, but with its own analytic framework.\nHere, I use the ggbiplot [R-ggbiplot] package, which aims to provide a simple interface to biplots within the ggplot2 framework. I also use some convenient utility functions from factoextra.\n\n4.3.3 Example: Crime data\nA basic biplot of the crime data, using standardized principal components and labeling the observation by their state abbreviation is shown in Figure 4.13. The correlation circle reflects the data ellipse of the standardized components. This reminds us that these components are uncorrelated and have equal variance in the display.\n\n\ncrime.pca &lt;- reflect(crime.pca) # reflect the axes\n\nggbiplot(crime.pca,\n   obs.scale = 1, var.scale = 1,\n   labels = crime$st ,\n   circle = TRUE,\n   varname.size = 4,\n   varname.color = \"brown\") +\n  theme_minimal(base_size = 14) \n\n\n\n\n\n\nFigure 4.13: Basic biplot of the crime data. State abbreviations are shown at their standardized scores on the first two dimensions. The variable vectors reflect the correlations of the variables with the biplot dimensions.\n\n\n\n\nIn this dataset the states are grouped by region and we saw some differences among regions in the plot (Figure 4.10) of component scores. ggbiplot() provides options to include a groups = variable, used to color the observation points and also to draw their data ellipses, facilitating interpretation.\n\nggbiplot(crime.pca,\n   obs.scale = 1, var.scale = 1,\n   groups = crime$region,\n   labels = crime$st,\n   labels.size = 4,\n   var.factor = 1.4,\n   ellipse = TRUE, \n   ellipse.prob = 0.5, ellipse.alpha = 0.1,\n   circle = TRUE,\n   varname.size = 4,\n   varname.color = \"black\",\n   clip = \"off\") +\n  labs(fill = \"Region\", color = \"Region\") +\n  theme_minimal(base_size = 14) +\n  theme(legend.direction = 'horizontal', legend.position = 'top')\n\n\n\n\n\n\nFigure 4.14: Enhanced biplot of the crime data, grouping the states by region and adding data ellipses.\n\n\n\n\nThis plot provides what is necessary to interpret the nature of the components and also the variation of the states in relation to these. In this, the data ellipses for the regions provide a visual summary that aids interpretation.\n\nFrom the variable vectors, it seems that PC1, having all positive and nearly equal loadings, reflects a total or overall index of crimes. Nevada, California, New York and Florida are highest on this, while North Dakota, South Dakota and West Virginia are lowest.\nThe second component, PC2, shows a contrast between crimes against persons (murder, assault, rape) at the top and property crimes (auto theft, larceny) at the bottom. Nearly all the Southern states are high on personal crimes; states in the North East are generally higher on property crimes.\nWestern states tend to be somewhat higher on overall crime rate, while North Central are lower on average. In these states there is not much variation in the relative proportions of personal vs. property crimes.\n\nMoreover, in this biplot you can interpret the the value for a particular state on a given crime by considering its projection on the variable vector, where the origin corresponds to the mean, positions along the vector have greater than average values on that crime, and the opposite direction have lower than average values. For example, Massachusetts has the highest value on auto theft, but a value less than the mean. Louisiana and South Carolina on the other hand are highest in the rate of murder and slightly less than average on auto theft.\nThese 2D plots account for only 76.5% of the total variance of crimes, so it is useful to also examine the third principal component, which accounts for an additional 10.4%. The choices = option controls which dimensions are plotted.\n\nggbiplot(crime.pca,\n         choices = c(1,3),\n         obs.scale = 1, var.scale = 1,\n         groups = crime$region,\n         labels = crime$st,\n         labels.size = 4,\n         var.factor = 2,\n         ellipse = TRUE, \n         ellipse.prob = 0.5, ellipse.alpha = 0.1,\n         circle = TRUE,\n         varname.size = 4,\n         varname.color = \"black\",\n         clip = \"off\") +\n  labs(fill = \"Region\", color = \"Region\") +\n  theme_minimal(base_size = 14) +\n  theme(legend.direction = 'horizontal', legend.position = 'top')\n\n\n\n\n\n\nFigure 4.15: Biplot of dimensions 1 & 3 of the crime data, with data ellipses for the regions.\n\n\n\n\nDimension 3 in Figure 4.15 is more subtle. One interpretation is a contrast between larceny, which is a larceny (simple theft) and robbery, which involves stealing something from a person and is considered a more serious crime with an element of possible violence. In this plot, murder has a relatively short variable vector, so does not contribute very much to differences among the states.\n\n4.3.4 Biplot contributions and quality\nTo better understand how much each variable contributes to the biplot dimensions, it is helpful to see information about the variance of variables along each dimension. Graphically, this is nothing more than a measure of the lengths of projections of the variables on each of the dimensions. factoextra::get_pca_var() calculates a number of tables from a \"prcomp\" or similar object.\n\nvar_info &lt;- factoextra::get_pca_var(crime.pca)\nnames(var_info)\n#&gt; [1] \"coord\"   \"cor\"     \"cos2\"    \"contrib\"\n\nThe component cor gives correlations of the variables with the dimensions and contrib gives their variance contributions as percents, where each row and column sums to 100.\n\ncontrib &lt;- var_info$contrib\ncbind(contrib, Total = rowSums(contrib)) |&gt;\n  rbind(Total = c(colSums(contrib), NA)) |&gt; \n  round(digits=2)\n#&gt;           Dim.1  Dim.2  Dim.3  Dim.4  Dim.5  Dim.6  Dim.7 Total\n#&gt; murder     9.02  39.59   3.18   5.39  28.96   6.71   7.16   100\n#&gt; rape      18.64   2.87   5.96   0.39   3.55  59.79   8.79   100\n#&gt; robbery   15.75   0.18  24.59  31.14  27.04   1.31   0.00   100\n#&gt; assault   15.73  11.80   0.48  39.67  25.67   2.97   3.68   100\n#&gt; burglary  19.37   4.13   4.41   0.33   1.02  28.73  42.01   100\n#&gt; larceny   12.77  16.19  29.08   5.52   0.09   0.16  36.20   100\n#&gt; auto       8.71  25.24  32.31  17.58  13.67   0.33   2.16   100\n#&gt; Total    100.00 100.00 100.00 100.00 100.00 100.00 100.00    NA\n\nThese contributions can be visualized as sorted barcharts for a given axis using factoextra::fviz_contrib(). The dashed horizontal lines are at the average value for each dimension.\n\np1 &lt;- fviz_contrib(crime.pca, choice = \"var\", axes = 1,\n                   fill = \"lightgreen\", color = \"black\")\np2 &lt;- fviz_contrib(crime.pca, choice = \"var\", axes = 2,\n                   fill = \"lightgreen\", color = \"black\")\np1 + p2\n\n\n\n\n\n\nFigure 4.16: Contributions of the crime variables to dimensions 1 (left) & 2 (right) of the PCA solution\n\n\n\n\nA simple rubric for interpreting the dimensions in terms of the variable contributions is to mention those that are largest or above average on each dimension. So, burglary and rape contribute most to the first dimension, while murder and auto theft contribute most to the second.\nAnother useful measure is called cos2, the quality of representation, meaning how much of a variable is represented in a given component. The columns sum to the eigenvalue for each dimension. The rows each sum to 1.0, meaning each variable is completely represented on all components, but we can find the quality of a \\(k\\)-D solution by summing the values in the first \\(k\\) columns. These can be plotted in a style similar to Figure 4.16 using factoextra::fviz_cos2().\n\nquality &lt;- var_info$cos2\nrowSums(quality)\n#&gt;   murder     rape  robbery  assault burglary  larceny     auto \n#&gt;        1        1        1        1        1        1        1\n\ncolSums(quality)\n#&gt; Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7 \n#&gt; 4.115 1.239 0.726 0.316 0.258 0.222 0.124\n\ncbind(quality[, 1:2], \n      Total = rowSums(quality[, 1:2])) |&gt;\n  round(digits = 2)\n#&gt;          Dim.1 Dim.2 Total\n#&gt; murder    0.37  0.49  0.86\n#&gt; rape      0.77  0.04  0.80\n#&gt; robbery   0.65  0.00  0.65\n#&gt; assault   0.65  0.15  0.79\n#&gt; burglary  0.80  0.05  0.85\n#&gt; larceny   0.53  0.20  0.73\n#&gt; auto      0.36  0.31  0.67\n\nIn two dimensions, murder and burglary are best represented; robbery and larceny are the worst, but as we saw above (Figure 4.15), these crimes are implicated in the third dimension.\n\n4.3.5 Supplementary variables\nAn important feature of biplot methodology is that once you have a reduced-rank display of the relations among a set of variables, you can use other available data to help interpret what what is shown in the biplot. In a sense, this is what I did above in Figure 4.14 and Figure 4.15 using region as a grouping variable and summarizing the variability in the scores for states with their data ellipses by region.\nWhen we have other quantitative variables on the same observations, these can be represented as supplementary variables in the same space. Geometrically, this amounts to projecting the new variables on the space of the principal components. It is carried out by regressions of these supplementary variables on the scores for the principal component dimensions.\nFor example, the left panel of Figure 4.17 depicts the vector geometry of a regression of a variable \\(\\mathbf{y}\\) on two predictors, \\(\\mathbf{x}_1\\) and \\(\\mathbf{x}_2\\). The fitted vector, \\(\\widehat{\\mathbf{y}}\\), is the perpendicular projection of \\(\\mathbf{y}\\) onto the plane of \\(\\mathbf{x}_1\\) and \\(\\mathbf{x}_2\\). In the same way, in the right panel, a supplementary variable is projected into the plane of two principal component axes shown as an ellipse. The black fitted vector shows how that additional variable relates to the biplot dimensions.\n\n\n\n\n\n\n\nFigure 4.17: Fitting supplementary variables in a biplot is analogous (right) to regression on the principal component dimensions (left). Source: Aluja et al. (2018), Figure 2.11\n\n\n\n\nFor this example, it happens that some suitable supplementary variables to aid interpretation of crime rates are available in the dataset datsets::state.x77, which was obtained from the U.S. Bureau of the Census Statistical Abstract of the United States for 1977. I select a few of these below and make the state name a column variable so it can be merged with the crime data.\n\nsupp_data &lt;- state.x77 |&gt;\n  as.data.frame() |&gt;\n  tibble::rownames_to_column(var = \"state\") |&gt;\n  rename(Life_Exp = `Life Exp`,\n         HS_Grad = `HS Grad`) |&gt;\n  select(state, Income:Life_Exp, HS_Grad) \n\nhead(supp_data)\n#&gt;        state Income Illiteracy Life_Exp HS_Grad\n#&gt; 1    Alabama   3624        2.1     69.0    41.3\n#&gt; 2     Alaska   6315        1.5     69.3    66.7\n#&gt; 3    Arizona   4530        1.8     70.5    58.1\n#&gt; 4   Arkansas   3378        1.9     70.7    39.9\n#&gt; 5 California   5114        1.1     71.7    62.6\n#&gt; 6   Colorado   4884        0.7     72.1    63.9\n\nThen, we can merge the crime data with the supp_data dataset to produce something suitable for analysis using factoMineR::PCA().\n\ncrime_joined &lt;-\n  dplyr::left_join(crime[, 1:8], supp_data, by = \"state\")\nnames(crime_joined)\n#&gt;  [1] \"state\"      \"murder\"     \"rape\"       \"robbery\"   \n#&gt;  [5] \"assault\"    \"burglary\"   \"larceny\"    \"auto\"      \n#&gt;  [9] \"Income\"     \"Illiteracy\" \"Life_Exp\"   \"HS_Grad\"\n\nPCA() can only get the labels for the observations from the row.names() of the dataset, so I assign them explicitly. The supplementary variables are specified by the argument quanti.sup as the indices of the columns in what is passed as the data argument.\n\nrow.names(crime_joined) &lt;- crime$st\ncrime.PCA_sup &lt;- PCA(crime_joined[,c(2:8, 9:12)], \n                     quanti.sup = 8:11,\n                     scale.unit=TRUE, \n                     ncp=3, \n                     graph = FALSE)\n\nThe essential difference between the result of prcomp() used earlier to get the crime.pca object and the result of PCA() with supplementary variables is that the crime.PCA_sup object now contains a quanti.sup component containing the coordinates for the supplementary variables in PCA space.\nThese can be calculated directly as a the coefficients of a multivariate regression of the standardized supplementary variables on the PCA scores for the dimensions, with no intercept—which forces the fitted vectors to go through the origin. For example, in the plot below (Figure 4.18), the vector for Income has coordinates (0.192, -0.530) on the first two PCA dimensions.\n\nreg.data &lt;- cbind(scale(supp_data[, -1]), \n                  crime.PCA_sup$ind$coord) |&gt;\n  as.data.frame()\n\nsup.mod &lt;- lm(cbind(Income, Illiteracy, Life_Exp, HS_Grad) ~ \n                    0 + Dim.1 + Dim.2 + Dim.3, \n              data = reg.data )\n\n(coefs &lt;- t(coef(sup.mod)))\n#&gt;             Dim.1  Dim.2   Dim.3\n#&gt; Income      0.192  0.530  0.0482\n#&gt; Illiteracy  0.112 -0.536  0.1689\n#&gt; Life_Exp   -0.131  0.649 -0.2158\n#&gt; HS_Grad     0.103  0.610 -0.4095\n\nNote that, because the supplementary variables are standardized, these coefficients are the same as the correlations between the supplementary variables and the scores on the principal components, up to a scaling factor for each dimension. This provides a general way to relate dimensions found in other methods to the original data variables using vectors as in biplot techniques.\n\ncor(reg.data[, 1:4], reg.data[, 5:7]) |&gt;\n  print() -&gt; R\n#&gt;             Dim.1  Dim.2   Dim.3\n#&gt; Income      0.393  0.596  0.0415\n#&gt; Illiteracy  0.230 -0.602  0.1453\n#&gt; Life_Exp   -0.268  0.730 -0.1857\n#&gt; HS_Grad     0.211  0.686 -0.3524\n\nR / coefs\n#&gt;            Dim.1 Dim.2 Dim.3\n#&gt; Income      2.05  1.12 0.861\n#&gt; Illiteracy  2.05  1.12 0.861\n#&gt; Life_Exp    2.05  1.12 0.861\n#&gt; HS_Grad     2.05  1.12 0.861\n\nThe PCA() result can then be plotted using FactoMiner::plot() or various factoextra functions like fviz_pca_var() for a plot of the variable vectors or fviz_pca_biplot() for a biplot. When a quanti.sup component is present, supplementary variables are also shown in the displays.\nFor simplicity I use FactoMiner::plot() here and only show the variable vectors. For consistency with earlier plots, I first reflect the orientation of the 2nd PCA dimension so that crimes of personal violence are at the top, as in Figure 4.11.\n\n# reverse coordinates of Dim 2\ncrime.PCA_sup &lt;- ggbiplot::reflect(crime.PCA_sup, columns = 2)\n# also reverse the orientation of coordinates for supplementary vars on Dim 2\n# crime.PCA_sup$quanti.sup$coord[, 2] &lt;- -crime.PCA_sup$quanti.sup$coord[, 2]\nplot(crime.PCA_sup, choix = \"var\")\n\n\n\n\n\n\nFigure 4.18: PCA plot of variables for the crime data, with vectors for the supplementary variables showing their association with the principal component dimensions.\n\n\n\n\nRecall that from earlier analyses, I interpreted the the dominant PC1 dimension as reflecting overall rate of crime. The contributions to this dimension, which are the projections of the variable vectors on the horizontal axis in Figure 4.11 and Figure 4.14 were shown graphically by barcharts in the left panel of Figure 4.16.\nBut now in Figure 4.18, with the addition of variable vectors for the supplementary variables, you can see how income, rate of illiteracy, life expectancy and proportion of high school graduates are related to the variation in rates of crimes for the U.S. states.\nOn dimension 1, what stands out is that life expectancy is associated with lower overall crime, while other supplementary variable have positive associations. On dimension 2, crimes against persons (murder, assault, rape) are associated with greater rates of illiteracy among the states, which as we earlier saw (Figure 4.14) were more often Southern states. Crimes against property (auto theft, larceny) at the bottom of this dimension are associated with higher levels of income and high school graduates\n\n4.3.6 Example: Diabetes data\nAs another example, consider the data from Reaven & Miller (1979) on measures of insulin and glucose shown in Figure 6 and that led to the discovery of two distinct types of development of Type 2 diabetes (Section 3.1). This dataset is available as heplots::Diabetes. The three groups are Normal, Chemical_Diabetic and Overt_Diabetic, and the (numerical) diagnostic variables are:\n\n\nrelwt: relative weight, the ratio of actual to expected weight, given the person’s height,\n\nglufast: fasting blood plasma glucose level\n\nglutest: test blood plasma glucose level, a measure of glucose intolerance\n\ninstest: plasma insulin during test, a measure of insulin response to oral glucose\n\nsspg: steady state plasma glucose, a measure of insulin resistance\n\nTODO: Should introduce 3D plots earlier, in Ch3 before Section 3.3.\nFirst, let’s try to create a 3D plot, analogous to the artist’s drawing from PRIM-9 shown in Figure 7. For this, I use car::scatter3d() which can show data ellipsoids summarizing each group. The formula notation, z ~ x + y assigns the z variable to the vertical direction in the plot, and the x and y variable form a base plane.\n\ncols &lt;- c(\"darkgreen\", \"blue\", \"red\")\nscatter3d(sspg ~ instest + glutest, data=Diabetes,\n          groups = Diabetes$group,\n          ellipsoid = TRUE,\n          surface = FALSE,\n          col = cols,\n          surface.col = cols)\n\ncar::scatter3d() uses the rgl package (Adler & Murdoch, 2023) to render 3D graphics on a display device, which means that it has facilities for perspective, lighting and other visual properties. You can interactively zoom in or out or rotate the display in any of the three dimensions and use rgl::spin3d() to animate rotations around any axes and record this a a movie3d(). Figure 4.19 shows two views of this plot, one from the front and one from the back. The data ellipsoids are not as evocative as the artist’s rendering, but they give a sense of the relative sizes and shapes of the clouds of points for the three diagnostic groups.\n\n\n\n\n\n\n\nFigure 4.19: Two views of a 3D scatterplot of three main diagnostic variables in the Diabetes dataset. The left panel shows an orientation similar to that of Figure 7; the right panel shows a view from the back.\n\n\n\n\nThe normal group is concentrated near the origin, with relatively low values on all three diagnostic measures. The chemical diabetic group forms a wing with higher values on insulin response to oral glucose (instest), while the overt diabetics form the other wing, with higher values on glucose intolerance (glutest). The relative sizes and orientations of the data ellipsoids are also informative.\nGiven this, what can we see in a biplot view based on PCA? The PCA of these data shows that 83% of the variance is captured in two dimensions and 96% in three. The result for 3D is interesting, in that the view from PRIM-9 shown in Figure 7 and Figure 4.19 nearly captured all available information.\n\ndata(Diabetes, package=\"heplots\")\n\ndiab.pca &lt;- \n  Diabetes |&gt; \n  dplyr::select(where(is.numeric)) |&gt;\n  prcomp(scale. = TRUE)\nsummary(diab.pca)\n#&gt; Importance of components:\n#&gt;                          PC1   PC2   PC3    PC4     PC5\n#&gt; Standard deviation     1.662 1.177 0.818 0.3934 0.17589\n#&gt; Proportion of Variance 0.552 0.277 0.134 0.0309 0.00619\n#&gt; Cumulative Proportion  0.552 0.829 0.963 0.9938 1.00000\n\nA 2D biplot, with data ellipses for the groups, can be produced as before, but I also want to illustrate labeling the groups directly, rather than in a legend.\n\nplt &lt;- ggbiplot(diab.pca,\n     obs.scale = 1, var.scale = 1,\n     groups = Diabetes$group,\n     var.factor = 1.4,\n     ellipse = TRUE, \n     ellipse.prob = 0.5, ellipse.alpha = 0.1,\n     circle = TRUE,\n     point.size = 2,\n     varname.size = 4) +\n  labs(fill = \"Group\", color = \"Group\") +\n  theme_minimal(base_size = 14) +\n  theme(legend.position = \"none\")\n\nThen, find the centroids of the component scores and use geom_label() to plot the group labels.\n\nscores &lt;- data.frame(diab.pca$x[, 1:2], group = Diabetes$group)\ncentroids &lt;- scores |&gt;\n  group_by(group) |&gt;\n  summarize(PC1 = mean(PC1),\n            PC2 = mean(PC2))\n\nplt + geom_label(data = centroids, \n                 aes(x = PC1, y = PC2, \n                     label=group, color = group),\n                 nudge_y = 0.2)\n\n\n\n\n\n\nFigure 4.20: 2D biplot of the Diabetes data\n\n\n\n\nWhat can we see here, and how does it relate to the artist’s depiction in Figure 7? The variables instest, sspg and glutest correspond approximately to the coordinate axes in the artist’s drawing. glutest and glufast primarily separate the overt diabetics from the others. The chemical diabetics are distinguished by having larger values of insulin response (instest) and are also higher in relative weight (relwt).",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html#sec-nonlinear",
+    "href": "04-pca-biplot.html#sec-nonlinear",
+    "title": "4  Dimension Reduction",
+    "section": "\n4.4 Nonlinear dimension reduction",
+    "text": "4.4 Nonlinear dimension reduction\nThe world of dimension reduction methods reflected by PCA is a simple and attractive one in which relationships among variable are at least approximately linear, and can be made visible in a lower-dimensional view by linear transformations and projections. PCA does an optimal job of capturing global linear relationships in the data. But many phenomena defy linear description or involve local nonlinear relationships and clusters within the data. Our understanding of high-D data can sometimes be improved by nonlinear dimension reduction techniques.\nTo see why, consider the data shown in the left panel of Figure 4.21 and suppose we want to be able to separate the two classes by a line. The groups are readily seen in this simple 2D example, but there is no linear combination or projection that shows them as distinct categories. The right panel shows the same data after a nonlinear transformation to polar coordinates, where the two groups are readily distinguished by radius. Such problems arise in higher dimensions where direct visualization is far more difficult and nonlinear methods become attractive.\n\n\n\n\n\n\n\nFigure 4.21: *Nonlinear patterns**: Two representations of the same data are shown. In the plot at the left, the clusters are clear to the eye, but there is no linear relation that separates them. Transforming the data nonlinearly, to polar coordinates in the plot at the right, makes the two groups distinct.\n\n\n\n\n\n4.4.1 Multidimensional scaling\nOne way to break out of the “linear-combination, maximize-variance PCA” mold is to consider a more intrinsic property of points in Spaceland: similarity or distance. The earliest expression of this idea was in multidimensional scaling (MDS) by Torgerson (1952), which involved trying to determine a metric low-D representation of objects from their interpoint distances via an application of the SVD.\nThe break-through for nonlinear methods came from Roger Shepard and William Kruskal (Kruskal, 1964; Shepard, 1962a, 1962b) who recognized that a more general, nonmetric version (nMDS) could be achieved using only the rank order of input distances \\(d_{ij}\\) among objects. nMDS maps these into a low-D spatial representation of points, \\(\\mathbf{x}_i, \\mathbf{x}_j\\) whose fitted distances, \\(\\hat{d}_{ij} = \\lVert\\mathbf{x}_i - \\mathbf{x}_j\\rVert\\) matches the order of the \\(d_{ij}\\) as closely as possible. That is, rather than assume that the observed distances are linearly related to the fitted \\(\\hat{d}_{ij}\\), nMDS assumes only that their order is the same. Borg & Groenen (2005) and Borg et al. (2018) give a comprehensive overview of modern developments in MDS.\nThe impetus for MDS stemmed largely from psychology and the behavioral sciences, where simple experimental measures of similarity or dissimilarity of psychological objects (color names, facial expressions, words, Morse code symbols) could be obtained by direct ratings, confusions, or other tasks (Shepard et al., 1972b, 1972a). MDS was revolutionary in that it provided a coherent method to study the dimensions of perceptual and cognitive space in applications where the explanation of a cognitive process was derived directly from an MDS solution (Shoben, 1983).\nTo perform nMDS, you need to calculate the matrix of distances between all pairs of observations (dist()). The basic function is MASS::isoMDS().4 In the call, you can specify the number of dimensions (k) desired, with k=2 as default. It returns the coordinates in a dataset called points.\n\ndiab.dist &lt;- dist(Diabetes[, 1:5])\nmds &lt;- diab.dist |&gt;\n  MASS::isoMDS(k = 2, trace = FALSE) |&gt;\n  purrr::pluck(\"points\") \n\ncolnames(mds) &lt;- c(\"Dim1\", \"Dim2\")\nmds &lt;- bind_cols(mds, group = Diabetes$group)\nmds |&gt; sample_n(6)\n#&gt; # A tibble: 6 × 3\n#&gt;     Dim1   Dim2 group            \n#&gt;    &lt;dbl&gt;  &lt;dbl&gt; &lt;fct&gt;            \n#&gt; 1 -213.  -42.1  Normal           \n#&gt; 2  191.   47.3  Overt_Diabetic   \n#&gt; 3   12.0 -63.2  Overt_Diabetic   \n#&gt; 4   25.0 -38.1  Chemical_Diabetic\n#&gt; 5  774.    9.44 Overt_Diabetic   \n#&gt; 6   79.0 136.   Overt_Diabetic\n\nThe method works by trying to minimize a measure, “Stress”, of the average difference between the fitted distances \\(\\hat{d}_{ij}\\) and an optimal monotonic (order-preserving) transformation, \\(f_{\\text{mon}}(d_{ij})\\), of the distances in the data. Values of Stress around 5-8% and smaller are generally considered adequate.\nUnlike PCA, where you can fit all possible dimensions once and choose the number of components to retain by examining the eigenvalues or variance proportions, in MDS it is necessary to fit the data for several values of k and consider the trade-off between goodness of fit and complexity.\n\nstress &lt;- vector(length = 5)\nfor(k in 1:5){\n  res &lt;- MASS::isoMDS(diab.dist, k=k, trace = FALSE)\n  stress[k] &lt;- res$stress\n}\nround(stress, 3)\n#&gt; [1] 17.755  3.525  0.256  0.000  0.000\n\nPlotting these shows that a 3D solution is nearly perfect, while a 2D solution is certainly adequate. This plot is the MDS analog of a screeplot for PCA.\n\nplot(stress, type = \"b\", pch = 16, cex = 2,\n     xlab = \"Number of dimensions\",\n     ylab = \"Stress (%)\")\n\n\n\n\n\n\nFigure 4.22: Badness of fit (Stress) of the MDS solution in relation to number of dimensions.\n\n\n\n\nTo plot the 2D solution, I’ll use ggpubr::ggscatter() here because it handles grouping, provides concentration ellipses and other graphical features.\n\nlibrary(ggpubr)\ncols &lt;- scales::hue_pal()(3) |&gt; rev()\nmplot &lt;-\nggscatter(mds, x = \"Dim1\", y = \"Dim2\", \n          color = \"group\",\n          shape = \"group\",\n          palette = cols,\n          size = 2,\n          ellipse = TRUE,\n          ellipse.level = 0.5,\n          ellipse.type = \"t\") +\n  geom_hline(yintercept = 0, color = \"gray\") +\n  geom_vline(xintercept = 0, color = \"gray\") \n\nFor this and other examples using MDS, it would be nice to also show how the dimensions of this space relate to the original variables, as in a biplot. Using the idea of correlations between variables and dimensions from Section 4.3.5, I do this as shown below. Only the relative directions and lengths of the variable vectors matter, so you can choose any convenient scale factor to make the vectors fill the plot region.\n\nvectors &lt;- cor(Diabetes[, 1:5], mds[, 1:2])\nscale_fac &lt;- 500\nmplot + \n  coord_fixed() +\n  geom_segment(data=vectors,\n               aes(x=0, xend=scale_fac*Dim1, y=0, yend=scale_fac*Dim2),\n               arrow = arrow(length = unit(0.2, \"cm\"), type = \"closed\"),\n               linewidth = 1.1) +\n  geom_text(data = vectors,\n            aes(x = 1.15*scale_fac*Dim1, y = 1.07*scale_fac*Dim2, \n                label=row.names(vectors)),\n            nudge_x = 4,\n            size = 4) +\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(.8, .8))\n\n\n\n\n\n\nFigure 4.23: Nonmetric MDS representation of the Diabetes data. The vectors reflect the correlations of the variables with the MDS dimensions.\n\n\n\n\nThe configuration of the groups in Figure 4.23 is similar to that of the biplot in Figure 4.20, but the groups are more widely separated along the first MDS dimension. The variable vectors are also similar, except that relwt is not well-represented in the MDS solution.\n\n4.4.2 t-SNE\nWith the rise of “machine learning” methods for “feature extraction” in “supervised” vs. “unsupervised” settings, a variety of new algorithms have been proposed for the task of finding low-D representations of high-D data. Among these, t-distributed Stochastic Neighbor Embedding (t-SNE) developed by Maaten & Hinton (2008) is touted as method for revealing local structure and clustering better in possibly complex high-D data and at different scales.\nt-SNE differs from MDS in what it tries to preserve in the mapping to low-D space: Multidimensional scaling aims to preserve the distances between pairs of data points, focusing on pairs of distant points in the original space. t-SNE, on the other hand focuses on preserving neighboring data points. Data points that are close in the original data space will be tight in the t-SNE embeddings.\n\n“The t-SNE algorithm models the probability distribution of neighbors around each point. Here, the term neighbors refers to the set of points which are closest to each point. In the original, high-dimensional space, this is modeled as a Gaussian distribution. In the 2-dimensional output space this is modeled as a \\(t\\)-distribution. The goal of the procedure is to find a mapping onto the 2-dimensional space that minimizes the differences between these two distributions over all points. The fatter tails of a \\(t\\)-distribution compared to a Gaussian help to spread the points more evenly in the 2-dimensional space.” (Jake Hoare, How t-SNE works and Dimensionality Reduction).\nt-SNE also uses the idea of mapping distance measures into a low-D space, but converts Euclidean distances into conditional probabilities. Stochastic neighbor embedding means that t-SNE constructs a probability distribution over pairs of high-dimensional objects in such a way that similar objects are assigned a higher probability while dissimilar points are assigned a lower probability.\nAs van der Maaten and Hinton explained: “The similarity of datapoint \\(\\mathbf{x}_{j}\\) to datapoint \\(\\mathbf{x}_{i}\\) is the conditional probability, \\(p_{j|i}\\), that \\(\\mathbf{x}_{i}\\) would pick \\(\\mathbf{x}_{j}\\) as its neighbor if neighbors were picked in proportion to their probability density under a Gaussian distribution centered at \\(\\mathbf{x}_{i}\\).” For \\(i \\ne j\\), they define:\n\\[\np_{j\\mid i} = \\frac{\\exp(-\\lVert\\mathbf{x}_i - \\mathbf{x}_j\\rVert^2 / 2\\sigma_i^2)}{\\sum_{k \\neq i} \\exp(-\\lVert\\mathbf{x}_i - \\mathbf{x}_k\\rVert^2 / 2\\sigma_i^2)} \\;.\n\\] and set \\(p_{i\\mid i} = 0\\). \\(\\sigma^2_i\\) is the variance of the normal distribution that centered on datapoint \\(\\mathbf{x}_{i}\\) and serves as a tuning bandwidth so smaller values of \\(\\sigma _{i}\\) are used in denser parts of the data space. These conditional probabilities are made symmetric via averaging, giving \\(p_{ij} = \\frac{p_{j\\mid i} + p_{i\\mid j}}{2n}\\).\nt-SNE defines a similar probability distribution \\(q_{ij}\\) over the points \\(\\mathbf{y}_i\\) in the low-dimensional map, and it minimizes the Kullback–Leibler divergence (KL divergence) between the two distributions with respect to the locations of the points in the map,\n\\[\nD_\\mathrm{KL}\\left(P \\parallel Q\\right) = \\sum_{i \\neq j} p_{ij} \\log \\frac{p_{ij}}{q_{ij}} \\; ,\n\\] a measure of how different the distribution of \\(P\\) in the data is from that of \\(Q\\) in the low-D representation. The t in t-SNE comes from the fact that the probability distribution of the points \\(\\mathbf{y}_i\\) in the embedding space is taken to be a heavy-tailed \\(t_{(1)}\\) distribution with one degree of freedom to spread the points more evenly in the 2-dimensional space, rather than the Gaussian distribution for the points in the high-D data space.\nt-SNE is implemented in the Rtsne package (Krijthe, 2023) which is capable of handling thousands of points in very high dimensions. It uses a tuning parameter, “perplexity” to choose the bandwidth \\(\\sigma^2_i\\) for each point. This value effectively controls how many nearest neighbors are taken into account when constructing the embedding in the low-dimensional space. It can be thought of as the means to balance between preserving the global and the local structure of the data.5\nRtsne::Rtsne() finds the locations of the points in the low-D space, of dimension k=2 by default. It returns the coordinates in a component named Y. The package has no print(), summary() or plot methods, so you’re on your own.\n\nlibrary(Rtsne)\nset.seed(123) \ndiab.tsne &lt;- Rtsne(Diabetes[, 1:5], scale = TRUE)\ndf2 &lt;- data.frame(diab.tsne$Y, group = Diabetes$group) \ncolnames(df2) &lt;- c(\"Dim1\", \"Dim2\", \"group\")\n\nYou can plot this as shown below:\n\n\np2 &lt;- ggplot(df2, aes(x=Dim1, y=Dim2, color = group, shape=group)) + \n  geom_point(size = 3) + \n  stat_ellipse(level = 0.68, linewidth=1.1) +\n  geom_hline(yintercept = 0) +\n  geom_vline(xintercept = 0) +\n  scale_color_manual(values = cols) +\n  labs(x = \"Dimension 1\",\n       y = \"Dimension 2\") + \n  ggtitle(\"tSNE\") +\n  theme_bw(base_size = 16) +\n  theme(legend.position = \"bottom\") \np2\n\n\n\n\n\n\nFigure 4.24: t-SNE representation of the Diabetes data.\n\n\n\n\n\n4.4.2.1 Comparing solutions\nFor the Diabetes data, I’ve shown the results of three different dimension reduction techniques, PCA (Figure 4.20), MDS (Figure 4.23), and t-SNE (Figure 4.24). How are these similar, and how do they differ?\nOne way is to view them side by side as shown in Figure 4.25. To an initial glance, the t-SNE solution looks like a rotated version of the PCA solution, but there are differences in the shapes of the clusters as well.\n\n\n\n\n\n\n\n\nFigure 4.25: Comparison of the PCA and t-SNE 2D representations of the Diabetes data.\n\n\n\n\nAnother way to compare these two views is to animate the transition from the PCA to the t-SNE representation by a series of smooth interpolated views. This is a more generally useful visualization technique, so it is useful to spell out the details.\nThe essential idea is calculate interpolated views as a weighted average of the two endpoints using a weight \\(\\gamma\\) that is varied from 0 to 1.\n\\[\n\\mathbf{X}_{\\text{View}} = \\gamma \\;\\mathbf{X}_{\\text{PCA}} + (1-\\gamma) \\;\\mathbf{X}_{\\text{t-SNE}}\n\\] The same idea can be applied to other graphical features: lines, paths (ellipses), and so forth. These methods are implemented in the gganimate package (Pedersen & Robinson, 2024).\nIn this case, to create an animation you can extract the coordinates for the PCA, \\(\\mathbf{X}_{\\text{PCA}}\\), as a data.frame df1, and those for the t-SNE, \\(\\mathbf{X}_{\\text{t-SNE}}\\) as df2, each with a constant method variable. These two are then stacked (using rbind()) to give a combined df3. The animation can then interpolate over method going from pure PCA to pure t-SNE.\n\ndiab.pca &lt;- prcomp(Diabetes[, 1:5], scale = TRUE, rank.=2) \ndf1 &lt;- data.frame(diab.pca$x, group = Diabetes$group) \ncolnames(df1) &lt;- c(\"Dim1\", \"Dim2\", \"group\")\ndf1 &lt;- cbind(df1, method=\"PCA\")\n\nset.seed(123) \ndiab.tsne &lt;- Rtsne(Diabetes[, 1:5], scale = TRUE)\ndf2 &lt;- data.frame(diab.tsne$Y, group = Diabetes$group) \ncolnames(df2) &lt;- c(\"Dim1\", \"Dim2\", \"group\")\ndf2 &lt;- cbind(df2, method=\"tSNE\")\n\n# stack the PCA and t-SNE solutions\ndf3 &lt;- rbind(df1, df2) \n\nThen, plot the configuration of the points and add data ellipses as before. The key thing for animating the difference between the solutions is to add transition_states(method, ...), tweening from PCA to t-SNE. The state_length argument transition_states() controls the relative length of the pause between states.\nThis animated graphic is shown only in the online version of the book.\n\nlibrary(gganimate)\nanimated_plot &lt;- \n  ggplot(df3, aes(x=Dim1, y=Dim2, color=group, shape=group)) + \n  geom_point(size = 3) + \n  stat_ellipse(level = 0.68, linewidth=1.1) +\n  geom_hline(yintercept = 0) +\n  geom_vline(xintercept = 0) +\n  scale_color_manual(values = cols) +\n  labs(title = \"PCA vs. tSNE Dimension Reduction: {closest_state}\",\n       subtitle = \"Frame {frame} of {nframes}\",\n       x = \"Dimension 1\",\n       y = \"Dimension 2\") + \n  transition_states( method, transition_length = 3, state_length = 2 ) + \n  view_follow() + \n  theme_bw(base_size = 16) +\n  theme(legend.position = \"bottom\") \n\nanimated_plot\n\n\n\n\n\n\n\n\nFigure 4.26: Animation of the relationship of PCA to the t-SNE embedding for the Diabetes data. The method name in the title reflects the closest state\n\n\n\n\nYou can see that the PCA configuration is morphed into the that for t-SNE largely by rotation 90\\(^o\\) clockwise, so that dimension 1 in PCA becomes dimension 2 in t-SNE. This is not unexpected, because PCA finds the dimensions in to order of maximum variance, whereas t-SNE is only trying to match the distances in the data to those in the solution. To interpret the result from t-SNE you are free to interchange the axes, or indeed to rotate the solution arbitrarily.\nIt is more interesting that the sizes and shapes of the group clusters change from one solution to the other. The normal group is most compact in the PCA solution, but becomes the least compact in t-SNE.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html#sec-var-order",
+    "href": "04-pca-biplot.html#sec-var-order",
+    "title": "4  Dimension Reduction",
+    "section": "\n4.5 Application: Variable ordering for data displays",
+    "text": "4.5 Application: Variable ordering for data displays\nIn many multivariate data displays, such as scatterplot matrices, parallel coordinate plots and others reviewed in Chapter 3, the order of different variables might seem arbitrary. They might appear in alphabetic order, or more often in the order they appear in your dataset, for example when you use pairs(mydata). Yet, the principle of effect ordering (Friendly & Kwan (2003)) for variables says you should try to arrange the variables so that adjacent ones are as similar as possible.6\nFor example, the mtcars dataset contains data on 32 automobiles from the 1974 U.S. magazine Motor Trend and consists of fuel comsumption (mpg) and 10 aspects of automobile design (cyl: number of cyliners; hp: horsepower, wt: weight) and performance (qsec: time to drive a quarter-mile). What can we see from a simple corrplot() of their correlations? No coherent pattern stands out in Figure 4.27.\n\ndata(mtcars)\nlibrary(corrplot)\nR &lt;- cor(mtcars)\ncorrplot(R, \n         method = 'ellipse',\n         title = \"Dataset variable order\",\n         tl.srt = 0, tl.col = \"black\", tl.pos = 'd',\n         mar = c(0,0,1,0))\n\n\n\n\n\n\nFigure 4.27: Corrplot of mtcars data, with the variables arranged in the order they appear in the dataset.\n\n\n\n\nIn this display you can scan the rows and columns to “look up” the sign and approximate magnitude of a given correlation; for example, the correlation between mpg and cyl appears to be about -0.9, while that between mpg and gear is about 0.5. Of course, you could print the correlation matrix to find the actual values (-0.86 and 0.48 respectively):\n\nprint(floor(100*R))\n#&gt;      mpg cyl disp  hp drat  wt qsec  vs  am gear carb\n#&gt; mpg  100 -86  -85 -78   68 -87   41  66  59   48  -56\n#&gt; cyl  -86 100   90  83  -70  78  -60 -82 -53  -50   52\n#&gt; disp -85  90  100  79  -72  88  -44 -72 -60  -56   39\n#&gt; hp   -78  83   79 100  -45  65  -71 -73 -25  -13   74\n#&gt; drat  68 -70  -72 -45  100 -72    9  44  71   69  -10\n#&gt; wt   -87  78   88  65  -72 100  -18 -56 -70  -59   42\n#&gt; qsec  41 -60  -44 -71    9 -18  100  74 -23  -22  -66\n#&gt; vs    66 -82  -72 -73   44 -56   74 100  16   20  -57\n#&gt; am    59 -53  -60 -25   71 -70  -23  16 100   79    5\n#&gt; gear  48 -50  -56 -13   69 -59  -22  20  79  100   27\n#&gt; carb -56  52   39  74  -10  42  -66 -57   5   27  100\n\nBecause the angles between variable vectors in the biplot reflect their correlations, Friendly & Kwan (2003) defined principal component variable ordering as the order of angles, \\(a_i\\) of the first two eigenvectors, \\(\\mathbf{v}_1, \\mathbf{v}_2\\) around the unit circle. These values are calculated going counter-clockwise from the 12:00 position as:\n\\[\na_i =\n  \\begin{cases}\n    \\tan^{-1} (v_{i2}/v_{i1}), & \\text{if $v_{i1}&gt;0$;}\n     \\\\\n    \\tan^{-1} (v_{i2}/v_{i1}) + \\pi, & \\text{otherwise.}\n  \\end{cases}     \n\\tag{4.3}\\]\nIn Equation 4.3 \\(\\tan^{-1}(x)\\) is read as “the angle whose tangent is \\(x\\)”, and so the angles are determined by the tangent ratios “opposite” / “adjacent” = \\(v_{i2} / v_{i1}\\) in the right triangle defined by the vector and the horizontal axis.\n\nFor the mtcars data the biplot in Figure 4.28 accounts for 84% of the total variance so a 2D representation is fairly good. The plot shows the variables as widely dispersed. There is a collection at the left of positively correlated variables and another positively correlated set at the right.\n\nmtcars.pca &lt;- prcomp(mtcars, scale. = TRUE)\nggbiplot(mtcars.pca,\n         circle = TRUE,\n         point.size = 2.5,\n         varname.size = 6,\n         varname.color = \"brown\") +\n  theme_minimal(base_size = 14) \n\n\n\n\n\n\nFigure 4.28: Biplot of the mtcars data. The order of the variables around the circle, starting from “gear” (say) arranges them so that the most similar variables are adjacent in graphical displays.\n\n\n\n\nIn corrplot() principal component variable ordering is implemented using the order = \"AOE\" option. There are a variety of other methods based on hierarchical cluster analysis described in the package vignette.\nFigure 4.29 shows the result of ordering the variables by this method. A nice feature of corrplot() is the ability to manually highlight blocks of variables that have a similar pattern of signs by outlining them with rectangles. From the biplot, the two main clusters of positively correlated variables seemed clear, and are outlined in the plot using corrplot::corrRect(). What became clear in the corrplot is that qsec, the time to drive a quarter-mile from a dead start didn’t quite fit this pattern, so I highlighted it separately.\n\ncorrplot(R, \n         method = 'ellipse', \n         order = \"AOE\",\n         title = \"PCA variable order\",\n         tl.srt = 0, tl.col = \"black\", tl.pos = 'd',\n         mar = c(0,0,1,0)) |&gt;\n  corrRect(c(1, 6, 7, 11))\n\n\n\n\n\n\nFigure 4.29: Corrplot of mtcars data, with the variables ordered according to the variable vectors in the biplot.\n\n\n\n\nBut wait, there is something else to be seen in Figure 4.29. Can you see one cell that doesn’t fit with the rest?\nYes, the correlation of number of forward gears (gear) and number of carburators (carb) in the upper left and lower right corners stands out as moderately positive (0.27) while all the others in their off-diagonal blocks are negative. This is another benefit of effect ordering: when you arrange the variables so that the most highly related variable are together, features that deviate from dominant pattern become visible.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html#application-eigenfaces",
+    "href": "04-pca-biplot.html#application-eigenfaces",
+    "title": "4  Dimension Reduction",
+    "section": "\n4.6 Application: Eigenfaces",
+    "text": "4.6 Application: Eigenfaces\nThere are many applications of principal components analysis beyond the use for visualization for multivariate data covered here, that rely on its’ ability as a dimension reduction technique, that is, to find a low-dimensional approximation to a high-dimensional dataset.\n\n\n\n\n\n\nMachine learning uses\n\n\n\nIn machine learning, for example, PCA is a method used to reduce model complexity and avoid overfitting by feature extraction, which amounts to fitting a response variable in a low-D space of the predictors. This is just another name for principal components regression, where, instead of regressing the dependent variable on all the explanatory variables directly, a smaller number principal components of the explanatory variables is used as predictors. This has the added benefit that it avoids problems of collinearity (section-ref) due to high correlations of the predictors, because the principal component scores are necessarily uncorrelated. When the goal is model explanation rather than pure prediction, it has the disadvantage that the components may be hard to interpret.\n\n\nAn interesting class of problems have to do with image processing, where an image of size width \\(\\times\\) height in pixels can be represented by a \\(w \\times h\\) array of greyscale values \\(x_{ij}\\) in the range of [0, 1] or \\(h \\times w \\times 3\\) array \\(x_{ijk}\\) of (red, green, blue) color values. For example a single \\(640 \\times 640\\) photo is comprised of about 400K pixels in B/W and 1200K pixels in color.\nThe uses here include\n\n\nImage compression: a process applied to a graphics file to minimize its size in bytes for storage or transmission, without degrading image quality below an acceptable threshold\n\nimage enhancement: improving the quality of an image, with applications in Computer Vision tasks, remote sensing, and satellite imagery.\n\nfacial recognition: classifying or matching a facial image against a large corpus of stored images.\n\nWhen PCA is used on facial images, you can think of the process as generating eigenfaces, a representation of the pixels in the image in terms of an eigenvalue decomposition. Dimension reduction means that a facial image can be considerably compressed by removing the components associated with small dimensions.\nAs an example, consider the black and white version of the Mona Lisa shown in Figure 4.30. The idea and code for this example is adapted from this blog post by Kieran Healy.7\nTODO: Web links like this should be footnotes for PDF\n\n\n\n\n\n\n\nFigure 4.30: 640 x 954 black and white image of the Mona Lisa. Source: Wikipedia\n\n\n\n\nIt would take too long to explain the entire method, so I’ll just sketch the essential parts here. The complete script for this example is contained in PCA-MonaLisa.R. …\nTODO: Show the necessary parts, including the screeplot.\nAn image can be imported using imager::load.image() which creates a \"cimg\" object, a 4-dimensional array with dimensions named x,y,z,c. x and y are the usual spatial dimensions, z is a depth dimension (which would correspond to time in a movie), and c is a color dimension containing R, G, B values.\n\nlibrary(imager)\nimg &lt;- imager::load.image(here::here(\"images\", \"MonaLisa-BW.jpg\"))\ndim(img)\n#&gt; [1] 640 954   1   1\n\n\nAn as.data.frame() method converts this to a data frame with x and y coordinates. Each x-y pair is a location in the 640 by 954 pixel grid, and the value is a grayscale value ranging from zero to one.\n\nimg_df_long &lt;- as.data.frame(img)\nhead(img_df_long)\n#&gt;   x y value\n#&gt; 1 1 1 0.431\n#&gt; 2 2 1 0.337\n#&gt; 3 3 1 0.467\n#&gt; 4 4 1 0.337\n#&gt; 5 5 1 0.376\n#&gt; 6 6 1 0.361\n\nHowever, to do a PCA we will need a matrix of data in wide format containing the grayscale pixel values. We can do this using tidyr::pivot_wider(), giving a result with 640 rows and 954 columns.\n\nimg_df &lt;- pivot_wider(img_df_long, \n                     names_from = y, \n                     values_from = value) |&gt;\n  select(-x)\ndim(img_df)\n#&gt; [1] 640 954\n\nMona’s PCA is produced from this img_df with prcomp():\n\nimg_pca &lt;- img_df |&gt;\n  prcomp(scale = TRUE, center = TRUE)\n\nWith 955 columns, the PCA comprises 955 eigenvalue/eigenvector pairs. However, the rank of a matrix is the smaller of the number of rows and columns, so only 640 eigenvalues can be non-zero. Printing the first 10 shows that the first three dimensions account for 46% of the variance and we only get to 63% with 10 components.\n\nimg_pca |&gt;\n  broom::tidy(matrix = \"eigenvalues\") |&gt; head(10)\n#&gt; # A tibble: 10 × 4\n#&gt;       PC std.dev percent cumulative\n#&gt;    &lt;dbl&gt;   &lt;dbl&gt;   &lt;dbl&gt;      &lt;dbl&gt;\n#&gt;  1     1   14.1  0.209        0.209\n#&gt;  2     2   11.6  0.141        0.350\n#&gt;  3     3   10.1  0.107        0.457\n#&gt;  4     4    7.83 0.0643       0.522\n#&gt;  5     5    6.11 0.0392       0.561\n#&gt;  6     6    4.75 0.0237       0.585\n#&gt;  7     7    3.70 0.0143       0.599\n#&gt;  8     8    3.52 0.0130       0.612\n#&gt;  9     9    3.12 0.0102       0.622\n#&gt; 10    10    2.86 0.00855      0.631\n\nFigure 4.31 shows a screeplot of proportions of variance. Because there are so many components and most of the information is concentrated in the largest dimensions, I’ve used a \\(\\log_{10}()\\) scale on the horizontal axis. Beyond 10 or so dimensions, the variance of additional components looks quite tiny.\n\nggscreeplot(img_pca) +\n  scale_x_log10()\n\n\n\n\n\n\nFigure 4.31: Screeplot of the variance proportions in the Mona Lisa PCA.\n\n\n\n\nThen, if \\(\\mathbf{M}\\) is the \\(640 \\times 955\\) matrix of pixel values, a best approximation \\(\\widehat{\\mathbf{M}}_k\\) using \\(k\\) dimensions can be obtained as \\(\\widehat{\\mathbf{M}}_k = \\mathbf{X}_k\\;\\mathbf{V}_k^\\mathsf{T}\\) where \\(\\mathbf{X}_k\\) are the principal component scores and \\(\\mathbf{V}_k\\) are the eigenvectors corresponding to the \\(k\\) largest eigenvalues. The function approx_pca() does this, and also undoes the scaling and centering carried out in PCA.\nTODO: Also, separate approximation from the pivot_longer code…\n\nCodeapprox_pca &lt;- function(n_comp = 20, pca_object = img_pca){\n  ## Multiply the matrix of rotated data (component scores) by the transpose of \n  ## the matrix of eigenvectors (i.e. the component loadings) to get back to a \n  ## matrix of original data values\n\n  recon &lt;- pca_object$x[, 1:n_comp] %*% t(pca_object$rotation[, 1:n_comp])\n  \n  ## Reverse any scaling and centering that was done by prcomp()\n  if(all(pca_object$scale != FALSE)){\n    ## Rescale by the reciprocal of the scaling factor, i.e. back to\n    ## original range.\n    recon &lt;- scale(recon, center = FALSE, scale = 1/pca_object$scale)\n  }\n  if(all(pca_object$center != FALSE)){\n    ## Remove any mean centering by adding the subtracted mean back in\n    recon &lt;- scale(recon, scale = FALSE, center = -1 * pca_object$center)\n  }\n  \n  ## Make it a data frame that we can easily pivot to long format\n  ## for drawing with ggplot\n  recon_df &lt;- data.frame(cbind(1:nrow(recon), recon))\n  colnames(recon_df) &lt;- c(\"x\", 1:(ncol(recon_df)-1))\n\n  ## Return the data to long form \n  recon_df_long &lt;- recon_df |&gt;\n    tidyr::pivot_longer(cols = -x, \n                        names_to = \"y\", \n                        values_to = \"value\") |&gt;\n    mutate(y = as.numeric(y)) |&gt;\n    arrange(y) |&gt;\n    as.data.frame()\n  \n  recon_df_long\n}\n\n\nFinally, the recovered images, using 2, 3 , 4, 5, 10, 15, 20, 50, and 100 principal components can be plotted using ggplot. In the code below, the approx_pca() function is run for each of the 9 values specified by n_pcs giving a data frame recovered_imgs containing all reconstructed images, with variables x, y and value (the greyscale pixel value).\n\nn_pcs &lt;- c(2:5, 10, 15, 20, 50, 100)\nnames(n_pcs) &lt;- paste(\"First\", n_pcs, \"Components\", sep = \"_\")\n\nrecovered_imgs &lt;- map_dfr(n_pcs, \n                          approx_pca, \n                          .id = \"pcs\") |&gt;\n  mutate(pcs = stringr::str_replace_all(pcs, \"_\", \" \"), \n         pcs = factor(pcs, levels = unique(pcs), ordered = TRUE))\n\nIn ggplot(), each is plotted using geom_raster(), using value to as the fill color. A quirk of images imported to R is that origin is taken as the upper left corner, so the Y axis scale needs to be reversed. The 9 images are then plotted together using facet_wrap().\n\np &lt;- ggplot(data = recovered_imgs, \n            mapping = aes(x = x, y = y, fill = value))\np_out &lt;- p + geom_raster() + \n  scale_y_reverse() + \n  scale_fill_gradient(low = \"black\", high = \"white\") +\n  facet_wrap(~ pcs, ncol = 3) + \n  guides(fill = \"none\") + \n  labs(title = \"Recovering Mona Lisa from PCA of her pixels\") + \n  theme(strip.text = element_text(face = \"bold\", size = rel(1.2)),\n        plot.title = element_text(size = rel(1.5)))\n\np_out\n\nThe result, in Figure 4.32 is instructive about how much visual information is contained in lower-dimensional reconstructions, or conversely, how much the image can be compressed by omitting the many small dimensions.\n\n\n\n\n\n\n\nFigure 4.32: Re-construction of the Mona Lisa using 2, 3 , 4, 5, 10, 15, 20, 50, and 100 principal components.\n\n\n\n\nIn this figure, with 4-5 components most people would recognize this as a blury image of the world’s most famous portrait. It is certainly clear that this is the Mona Lisa with 10–15 components. Details of the portrait and backgound features become recognizable with 20–50 components, and with 100 components it compares favorably with the original in Figure 4.30. In numbers, the original \\(640 \\times 955\\)) image is of size 600 Kb. The 100 component version is only 93 Kb, 15.6% of this.",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html#elliptical-insights-outlier-detection",
+    "href": "04-pca-biplot.html#elliptical-insights-outlier-detection",
+    "title": "4  Dimension Reduction",
+    "section": "\n4.7 Elliptical insights: Outlier detection",
+    "text": "4.7 Elliptical insights: Outlier detection\nThe data ellipse (Section 3.2), or ellipsoid in more than 2D is fundamental in regression. But also, as Pearson showed, it is key to understanding principal components analysis, where the principal component directions are simply the axes of the ellipsoid of the data. As such, observations that are unusual in data space may not stand out in univariate views of the variables, but will stand out in principal component space, usually on the smallest dimension.\nAs an illustration, I created a dataset of \\(n = 100\\) observations with a linear relation, \\(y = x + \\mathcal{N}(0, 1)\\) and then added two discrepant points at (1.5, -1.5), (-1.5, 1.5).\n\nset.seed(123345)\nx &lt;- c(rnorm(100),             1.5, -1.5)\ny &lt;- c(x[1:100] + rnorm(100), -1.5, 1.5)\n\nWhen these are plotted with a data ellipse in Figure 4.33 (left), you can see the discrepant points labeled 101 and 102, but they do not stand out as unusual on either \\(x\\) or \\(y\\). The transformation to from data space to principal components space, shown in Figure 4.33 (right), is simply a rotation of \\((x, y)\\) to a space whose coordinate axes are the major and minor axes of the data ellipse, \\((PC_1, PC_2)\\). In this view, the additional points appear a univariate outliers on the smallest dimension, \\(PC_2\\).\n\n\n\n\n\n\n\nFigure 4.33: Outlier demonstration: The left panel shows the original data and highlights the two discrepant points, which do not appear to be unusual on either x or y. The right panel shows the data rotated to principal components, where the labeled points stand out on the smallest PCA dimension.\n\n\n\n\nTo see this more clearly, Figure 4.34 shows an animation of the rotation from data space to PCA space. This uses heplots::interpPlot() …\n\n\n\n&lt;iframe width=\"480\" height=\"480\" src=\"images/outlier-demo.gif\"&gt;&lt;/iframe&gt;\n\n\n\nFigure 4.34: Animation of rotation from data space to PCA space.\n\n\nPackage summary\n\n16 packages used here: car, carData, corrplot, dplyr, factoextra, FactoMineR, ggbiplot, ggplot2, ggpubr, imager, knitr, magrittr, matlib, patchwork, Rtsne, tidyr\n\n\n\n\n\nAdler, D., & Murdoch, D. (2023). Rgl: 3D visualization using OpenGL. https://CRAN.R-project.org/package=rgl\n\n\nAluja, T., Morineau, A., & Sanchez, G. (2018). Principal component analysis for data science. https://pca4ds.github.io/\n\n\nBorg, I., & Groenen, P. J. F. (2005). Modern Multidimensional Scaling: Theory and Applications. Springer.\n\n\nBorg, I., Groenen, P. J. F., & Mair, P. (2018). Applied multidimensional scaling and unfolding. In SpringerBriefs in Statistics. Springer International Publishing. https://doi.org/10.1007/978-3-319-73471-2\n\n\nCattell, R. B. (1966). The scree test for the number of factors. Multivariate Behavioral Research, 1(2), 245–276. https://doi.org/10.1207/s15327906mbr0102_10\n\n\nDray, S., Siberchicot, A., & Jean Thioulouse. Based on earlier work by Alice Julien-Laferrière., with contributions from. (2023). Adegraphics: An S4 lattice-based package for the representation of multivariate data. http://pbil.univ-lyon1.fr/ADE-4/\n\n\nEuler, L. (1758). Elementa doctrinae solidorum. Novi Commentarii Academiae Scientiarum Petropolitanae, 4, 109–140. https://scholarlycommons.pacific.edu/euler-works/230/\n\n\nFriendly, M., Fox, J., & Chalmers, P. (2024). Matlib: Matrix functions for teaching and learning linear algebra and multivariate statistics. https://github.com/friendly/matlib\n\n\nFriendly, M., & Kwan, E. (2003). Effect ordering for data displays. Computational Statistics and Data Analysis, 43(4), 509–539. https://doi.org/10.1016/S0167-9473(02)00290-6\n\n\nFriendly, M., & Meyer, D. (2016). Discrete data analysis with R: Visualization and modeling techniques for categorical and count data. Chapman & Hall/CRC.\n\n\nFriendly, M., Monette, G., & Fox, J. (2013). Elliptical insights: Understanding statistical methods through elliptical geometry. Statistical Science, 28(1), 1–39. https://doi.org/10.1214/12-STS402\n\n\nFriendly, M., & Wainer, H. (2021). A history of data visualization and graphic communication. Harvard University Press. https://doi.org/10.4159/9780674259034\n\n\nGabriel, K. R. (1971). The biplot graphic display of matrices with application to principal components analysis. Biometrics, 58(3), 453–467. https://doi.org/10.2307/2334381\n\n\nGabriel, K. R. (1981). Biplot display of multivariate matrices for inspection of data and diagnosis. In V. Barnett (Ed.), Interpreting multivariate data (pp. 147–173). John Wiley; Sons.\n\n\nGalton, F. (1886). Regression towards mediocrity in hereditary stature. Journal of the Anthropological Institute, 15, 246–263. http://www.jstor.org/cgi-bin/jstor/viewitem/09595295/dm995266/99p0374f/0\n\n\nGower, J. C., & Hand, D. J. (1996). Biplots. Chapman & Hall.\n\n\nGower, J. C., Lubbe, S. G., & Roux, N. J. L. (2011). Understanding biplots. Wiley. http://books.google.ca/books?id=66gQCi5JOKYC\n\n\nGreenacre, M. (1984). Theory and applications of correspondence analysis. Academic Press.\n\n\nGreenacre, M. (2010). Biplots in practice. Fundación BBVA. https://books.google.ca/books?id=dv4LrFP7U\\_EC\n\n\nHahsler, M., Buchta, C., & Hornik, K. (2024). Seriation: Infrastructure for ordering objects using seriation. https://github.com/mhahsler/seriation\n\n\nHusson, F., Josse, J., Le, S., & Mazet, J. (2024). FactoMineR: Multivariate exploratory data analysis and data mining. http://factominer.free.fr\n\n\nHusson, F., Le, S., & Pagès, J. (2017). Exploratory multivariate analysis by example using r. Chapman & Hall. https://doi.org/10.1201/b21874\n\n\nKassambara, A., & Mundt, F. (2020). Factoextra: Extract and visualize the results of multivariate data analyses. http://www.sthda.com/english/rpkgs/factoextra\n\n\nKrijthe, J. (2023). Rtsne: T-distributed stochastic neighbor embedding using a barnes-hut implementation. https://github.com/jkrijthe/Rtsne\n\n\nKruskal, J. B. (1964). Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis. Psychometrika, 29(1), 1–27. https://doi.org/10.1007/bf02289565\n\n\nMaaten, L. van der, & Hinton, G. (2008). Visualizing data using t-SNE. Journal of Machine Learning Research, 9, 2579–2605. http://www.jmlr.org/papers/v9/vandermaaten08a.html\n\n\nOksanen, J., Simpson, G. L., Blanchet, F. G., Kindt, R., Legendre, P., Minchin, P. R., O’Hara, R. B., Solymos, P., Stevens, M. H. H., Szoecs, E., Wagner, H., Barbour, M., Bedward, M., Bolker, B., Borcard, D., Carvalho, G., Chirico, M., De Caceres, M., Durand, S., … Weedon, J. (2024). Vegan: Community ecology package. https://github.com/vegandevs/vegan\n\n\nPearson, K. (1896). Contributions to the mathematical theory of evolution—III, regression, heredity and panmixia. Philosophical Transactions of the Royal Society of London, 187, 253–318.\n\n\nPearson, K. (1901). On lines and planes of closest fit to systems of points in space. Philosophical Magazine, 6(2), 559–572.\n\n\nPedersen, T. L., & Robinson, D. (2024). Gganimate: A grammar of animated graphics. https://gganimate.com\n\n\nReaven, G. M., & Miller, R. G. (1979). An attempt to define the nature of chemical diabetes using a multidimensional analysis. Diabetologia, 16, 17–24.\n\n\nShepard, R. N. (1962a). The analysis of proximities: Multidimensional scaling with an unknown distance function. i. Psychometrika, 27(2), 125–140. https://doi.org/10.1007/bf02289630\n\n\nShepard, R. N. (1962b). The analysis of proximities: Multidimensional scaling with an unknown distance function. II. Psychometrika, 27(3), 219–246. https://doi.org/10.1007/bf02289621\n\n\nShepard, R. N., Romney, A. K., Nerlove, S. B., & Board, M. S. S. (1972a). Multidimensional scaling; theory and applications in the behavioral sciences: Vols. II. Applications. Seminar Press. https://books.google.ca/books?id=PpFAAQAAIAAJ\n\n\nShepard, R. N., Romney, A. K., Nerlove, S. B., & Board, M. S. S. (1972b). Multidimensional scaling: Theory and applications in the behavioral sciences: Vols. I. Theory. Seminar Press. https://books.google.ca/books?id=pJRAAQAAIAAJ\n\n\nShoben, E. J. (1983). Applications of multidimensional scaling in cognitive psychology. Applied Psychological Measurement, 7(4), 473–490. https://doi.org/10.1177/014662168300700406\n\n\nTorgerson, W. S. (1952). Multidimensional scaling: I. Theory and method. Psychometrika, 17(4), 401–419. https://doi.org/10.1007/bf02288916",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "04-pca-biplot.html#footnotes",
+    "href": "04-pca-biplot.html#footnotes",
+    "title": "4  Dimension Reduction",
+    "section": "",
+    "text": "This is Euler’s (1758) formula, which states that any convex polyhedron must obey the formula \\(V + F - E = 2\\) where \\(V\\) is the number of vertexes (corners), \\(F\\) is the number of faces and \\(E\\) is the number of edges. For example, a tetrahedron or pyramid has \\((V, F, E) = (4, 4, 6)\\) and a cube has \\((V, F, E) = (8, 6, 12)\\). Stated in words, for all solid bodies confined by planes, the sum of the number of vertexes and the number of faces is two less than the number of edges.↩︎\nFor example, if two variables in the analysis are height and weight, changing the unit of height from inches to centimeters would multiply its variance by \\(2.54^2\\); changing weight from pounds to kilograms would divide its variance by \\(2.2^2\\).↩︎\nThe unfortunate default scale. = FALSE was for consistency with S, the precursor to R but in general scaling is usually advisable.↩︎\nThe vegan package (Oksanen et al., 2024) provides vegan::metaMDS() which allows a wide range of distance measures …↩︎\nThe usual default, perplexity = 30 focuses on preserving the distances to the 30 nearest neighbors and puts virtually no weight on preserving distances to the remaining points. For data sets with a small number of points e.g. \\(n=100\\), this will uncover the global structure quite well since each point will preserve distances to a third of the data set. For larger problems, e.g., \\(n = 10,000\\) points, using a higher perplexity value e.g. 500, will do a much better job for of uncovering the global structure. (This description comes from https://opentsne.readthedocs.io/en/latest/parameters.html)↩︎\nThe general topic of arranging items (variables, factor values) in an orderly sequence is called seriation, and stems from methods of dating in archaeology, used to arrange stone tools, pottery fragments, and other artifacts in time order. In R, the seriation package (Hahsler et al., 2024) provides a wide range of techniques. …↩︎\nhttps://kieranhealy.org/blog/archives/2019/10/27/reconstructing-images-using-pca/↩︎",
+    "crumbs": [
+      "Exploratory Methods",
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Dimension Reduction</span>"
+    ]
+  },
+  {
+    "objectID": "05-linear_models.html",
+    "href": "05-linear_models.html",
+    "title": "5  Overview of Linear models",
+    "section": "",
+    "text": "5.1 Linear combinations\nAll methods of multivariate statistics involve a simple idea: Finding weighted sums—linear combinations— of observed variables to optimize some criterion—maximizing a measure of goodness-of-fit, like \\(R^2\\) or minimizing a measure of badness-of-fit like sums of squares of residuals. Methods differ according to whether:",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Overview of Linear models</span>"
+    ]
+  },
+  {
+    "objectID": "05-linear_models.html#linear-combinations",
+    "href": "05-linear_models.html#linear-combinations",
+    "title": "5  Overview of Linear models",
+    "section": "",
+    "text": "All variables belong to one set (say, \\(\\mathbf{X}\\)), not distinguished as to whether they are responses or predictors, as in PCA and factor analysis, vs. two sets where one set is considered outcome, dependent variables, to be explained by predictors, independent variables (\\(\\mathbf{X}\\)), as in multiple regression, multivariate analysis of variance, discriminant analysis and canonical correlation analysis.\nThe variables in \\(\\mathbf{X}\\) and \\(\\mathbf{Y}\\) are discrete, categorical factors like sex and level of education or quantitative variables like salary and number of years of experience.\n\nPCA\nFor example, Figure 5.2 illustrates PCA (as we saw in Chapter 4) as finding weights to maximize the variance of linear combinations, \\(v_1, v_2, ...\\), \\[\\begin{aligned}\n\\mathbf{v}_1 & = & a_1 \\mathbf{x}_1 + a_2 \\mathbf{x}_2 + a_3 \\mathbf{x}_3 + a_4 \\mathbf{x}_4 \\\\\n\\mathbf{v}_2 & = & b_1 \\mathbf{x}_1 + b_2 \\mathbf{x}_2 + b_3 \\mathbf{x}_3 + b_4 \\mathbf{x}_4 \\\\\n\\vdots & = & \\vdots \\; , \\\\\n\\end{aligned}\\]\nsubject to all \\(\\mathbf{v}_i, \\mathbf{v}_j\\) being uncorrelated, \\(\\mathbf{v}_i \\;\\perp\\; \\mathbf{v}_j\\).\n\n\n\n\n\n\n\nFigure 5.2: Principal components analysis as linear combinations to maximize variance accounted for. Left: diagram of PCA showing two uncorrelated linear combinations, v1 and v2. Right: Geometry of PCA.\n\n\n\n\nMultiple regression\nAn analogous diagram for multiple regression is shown in Figure 5.3. Here, we find the weights \\(b_1, b_2, \\dots\\) to maximize the \\(R^2\\) of \\(\\mathbf{y}\\) with the predicted values \\(\\widehat{\\mathbf{y}}\\),\n\\[\n\\widehat{\\mathbf{y}} = b_1 \\mathbf{x}_1 + b_2 \\mathbf{x}_2 + b_3 \\mathbf{x}_3 \\:\\: .\n\\] In the vector diagram at the right, saying that the fitted vector \\(\\widehat{\\mathbf{y}}\\) is a linear combination of \\(\\mathbf{x}_1\\) and \\(\\mathbf{x}_2\\) means that it lies in the plane that they define. The fitted vector is the orthogonal projection of \\(\\mathbf{y}\\) on this plane, and the least squares weights \\(b_1\\) and \\(b_2\\) give the maximum possible correlation \\(r^2 (\\mathbf{y}, \\widehat{\\mathbf{y}})\\).\n\n\n\n\n\n\n\nFigure 5.3: Multiple regression as a linear combination to maximize the squared correlation with the predicted values \\(\\hat{\\mathbf{y}}\\). Right: vector geometry of multiple regression for two predictors.\n\n\n\n\nThe vector of residuals, \\(\\mathbf{e} = \\mathbf{y} -\\widehat{\\mathbf{y}}\\) is orthogonal to that plane (\\(\\mathbf{y}\\) and \\(\\mathbf{e}\\) are uncorrelated), and the least squares solution also minimizes length \\(\\parallel \\mathbf{e} \\parallel = \\sqrt(\\Sigma e_i^2)\\).\nMultivariate regression\nMultivariate multiple regression does the same thing for each response variable, \\(\\mathbf{y}_1\\) and \\(\\mathbf{y}_2\\), as shown in Figure 5.4. It finds the weights to maximize the correlation between each \\(\\mathbf{y}_j\\) and the corresponding predicted value \\(\\widehat{\\mathbf{y}}_j\\).\n\\[\\begin{aligned}\n\\widehat{\\mathbf{y}}_1 & = & a_1 \\mathbf{x}_1 + a_2 \\mathbf{x}_2 + a_3 \\mathbf{x}_3 \\\\\n\\widehat{\\mathbf{y}}_2 & = & b_1 \\mathbf{x}_1 + b_2 \\mathbf{x}_2 + b_3 \\mathbf{x}_3 \\\\\n\\end{aligned}\\]\n\n\n\n\n\n\n\nFigure 5.4: Multivariate multiple regression as linear combinations to maximize the R squared for each response variable separately.\n\n\n\n\nThe weights \\(a_1, a_2, a_3\\) and \\(b_1, b_2, b_3\\) are the same as would be found in separate multiple regressions for the response variables. However, the multivariate tests used here take the correlations among the \\(\\mathbf{y}\\)s, and can be more powerful than fitting separate univariate response models.\nCanonical correlation analysis\nFinally, canonical correlation analysis uses a different approach to fitting relations between a set of responses, \\(\\mathbf{y}_1, \\mathbf{y}_2, \\dots\\) and a set of predictors, \\(\\mathbf{x}_1, \\mathbf{x}_2, \\dots\\). …\n\n\n\n\n\n\n\nFigure 5.5: Canonical correlation analyis finds uncorrelated linear combinations of the responses which maximize the R squared with linear combinations of the predictors.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Overview of Linear models</span>"
+    ]
+  },
+  {
+    "objectID": "05-linear_models.html#sec-GLM",
+    "href": "05-linear_models.html#sec-GLM",
+    "title": "5  Overview of Linear models",
+    "section": "\n5.2 The General Linear Model",
+    "text": "5.2 The General Linear Model\nTo establish notation and terminology, it is worthwhile to state the the general linear model formally. For convenience, I use vector and matrix notation. This expresses a response variable, \\(\\mathbf{y} = (y_1, y_2, \\dots , y_n)^\\mathsf{T}\\) for \\(n\\) observations, as a sum of terms involving \\(p\\) regressors, \\(\\mathbf{x}_1, \\mathbf{x}_2, \\dots , \\mathbf{x}_p\\), each of length \\(n\\).\n\n\n\n\n\n\n\n\\[\\begin{aligned}\n\\mathbf{y} & = \\beta_0 + \\beta_1 \\mathbf{x}_1 + \\beta_2 \\mathbf{x}_2 + \\cdots + \\beta_p \\mathbf{x}_p + \\mathbf{\\epsilon} \\\\\n           & = \\left[ \\mathbf{1},\\; \\mathbf{x}_1,\\; \\mathbf{x}_2,\\; \\dots ,\\; \\mathbf{x}_p \\right] \\; \\boldsymbol{\\beta} + \\boldsymbol{\\epsilon} \\\\\n\\end{aligned} \\tag{5.1}\\]\n\nor, expressed in matrices,\n\\[\n\\Large{\\mathord{\\mathop{\\mathbf{y}}\\limits_{n \\times 1}} = \\mathord{\\mathop{\\mathbf{X}}\\limits_{n \\times (p+1)}}\\; \\mathord{\\mathop{\\mathbf{\\boldsymbol{\\beta}}}\\limits_{(p+1) \\times 1}} + \\boldsymbol{\\epsilon}}\n\\]\nThe matrix \\(\\mathbf{X}\\) is called the model matrix and contains the numerical representations of the predictor variables called regressors. The essential thing about a linear model is that it is linear in the parameters \\(\\beta_i\\). That is, the predicted value of \\(\\mathbf{y}\\) is a linear combination of some \\(\\mathbf{x}_i\\) with weights \\(\\beta_i\\). An example of a nonlinear model is the exponential growth model, \\(y = \\beta_0 + e^{\\beta_1 x}\\), where the parameter \\(\\beta_1\\) appears as an exponent.1\n\nThese can be quantitative variables like age, salary or years of education. But they can also be transformed versions, like sqrt(age) or log(salary).\nA quantitative variable can be represented by more than one model regressor, for example if it is expressed as a polynomial like poly(age, degree=2) or a natural spline like ns(salary, df=5). The model matrix portion for such terms contains one column for each degree of freedom (df) and there are df coefficients in the corresponding portion of \\(\\boldsymbol{\\beta}\\).\nA categorical or discrete predictor– a factor variable in R– with \\(d\\) levels is expressed as \\(d - 1\\) columns in \\(\\mathbf{X}\\). Typically these are contrasts or comparisons between a baseline or reference level and each of the remaining ones, but any set of \\(d - 1\\) linearly independent contrasts can be used by assigning to contrasts(factor). For example, contrasts(factor) &lt;- contr.treatment(4) for a 4-level factor assigns 3 contrasts representing comparisons with a baseline level, typically the first (in alphabetic order). For an ordered factor, such as one for political knowledge with levels “low”, “medium”, “high”, contrasts.poly() returns the coefficients of orthogonal polynomial contrasts representing linear and quadratic trends.\nInteractions between predictors are represented as the direct products of the corresponding columns of \\(\\mathbf{X}\\). This allows the effect of one predictor on the response to depend on values of other predictors. For example, the interaction of two quantitative variables, \\(\\mathbf{x}_1, \\mathbf{x}_2\\) is represented by the product \\(\\mathbf{x}_1 \\times \\mathbf{x}_2\\). More generally, for variables or factors \\(A\\) and \\(B\\) with degrees of freedom \\(\\text{df}_A\\) and \\(\\text{df}_B\\) the regressors in \\(\\mathbf{X}\\) are the \\(\\text{df}_A \\times \\text{df}_B\\) products of each column for \\(A\\) with each column for \\(B\\).\n\n\n5.2.1 Model formulas\nStatistical models in R, such as those fit by lm(), glm() and many other modelling function in R are expressed in a simple notation that was developed by Wilkinson & Rogers (1973) for the GENSTAT software system at the Rothamsted Research Station. It solves the problem of having a compact way to specify any model consisting of any combinations of quantitative and discrete factor variables, interactions of these and arbitrary transformations of these.\nIn this, a model formula take the forms\n\nresponse ~ terms\nresponse ~ term1 + term2 + ...\n\nwhere the left-hand side, response specifies the response variable in the model and the right-hand side specifies the terms in the model specifying the columns in the \\(\\mathbf{X}\\) matrix of Equation 5.1; the coefficients \\(\\beta\\) are implied and not represented explicitly in the formula.\nThe notation y ~ x is read as “y is modeled by x”. The left-hand side is usually a variable name (such as height), but it could be an expression that evaluates to the the response, such as log(salary) or weight/height^2 which represents the body mass index.\nOn the right-hand side (RHS), the usual arithmetic operator, +, -, *, /, ^ have special meanings as described below. The most fundamental is that y ~ a + b is interpreted as “y is modeled by a and b”; that is, the sum of linear terms for a and b.\nSome examples for regression-like models using only quantitative variables, x, x1, x2, x3, ... are shown below:\n\ny ~ x                      # simple linear regression\ny ~ x - 1                  # no intercept: regression through the origin \ny ~ x + I(x^2)             # quadratic model\ny ~ poly(x, 3)             # cubic model\ny ~ x1 * x2                # crossing: x1 + x2  +  x1 : x2\ny ~ x1 + x2 + x3           # multiple regression\ny ~ (x1 + x2 + x3)^2       # response surface: all quadratics & two-way interactions\nlog(y) ~ x1 + poly(x, 2)   # arbitrary transformation of response\ny1 + y2 ~ x1 + x2 + x3     # response is sum of y1 and y2\n\nThe intercept \\(\\beta_0\\) is automatically included in the model without need to specify it explicitly. The minus sign, - on the right-hand side removes terms from the model, so a model with no intercept \\(\\beta_0 = 0\\) can be specifies as y ~ X -1 (or perhaps more naturally, y ~ 0 + X).\nFunction calls on the RHS, such as poly(x, 3) are evaluated directly, but to use a special model operator, like ^ must be “protected” by wrapping the term in I(), meaning “identity” or “inhibit”. Thus, the model y ~ x + I(x^2) means the quadratic model \\(y = \\beta_0 + \\beta_1 x + \\beta_2 x^2\\). This differs from the model y ~ poly(x, 2) in that the former uses the raw x, x^2 values (which are necessarily positively correlated) while poly() converts these to orthogonal polynomial scores, which are uncorrelated (and therefore free from problems of collinearity).\nFactor variables are treated specially in linear models, but have simple notations in R formulas. The following examples use A, B, C to represent discrete factors with two or more levels.\n\ny ~ A                 # one-way ANOVA\ny ~ A + B             # two-way, main effects only\ny ~ A * B             # full two-way, with interaction\ny ~ A + B + A:B       # same, in long-hand\ny ~ x + A             # one-way ANCOVA\ny ~ (A + B + C)^2     # three-way ANOVA, incl. all two-way interactions\n\n\n5.2.1.1 Crossing\nThe * operator has special meaning used to specify the crossing of variables and factors and : specifies interactions (products of variables). So, the model y ~ x1 * x2 is expanded to give y ~ x1 + x2 + x1:x2 and the interaction term x1:x2 is calculated as \\(x_1 \\times x_2\\). In algebraic notation (omitting the error term) this works out to the model,\n\\[\\begin{aligned}\ny & = & \\beta_0 + \\beta_1 x_1 +  \\beta_2 x_2 +  \\beta_1 x_1 *  \\beta_2 x_2 \\\\\n  & = & \\beta_0 + (\\beta_1 + \\beta_2 x_2) x_1 + \\beta_2 x_2 \\:\\: ,\\\\\n\\end{aligned}\\]\nwhich means that the coefficient for \\(x_1\\) in the model is not constant for all values of \\(x_2\\), but rather changes with the value of \\(x_2\\). If \\(\\beta_2 &gt; 0\\), the slope for \\(x_1\\) increases with \\(x_2\\) and vice-versa.\ny ~ A * B for factors is similar, expanding to y ~ A + B + A:B, but the columns in the model matrix represent contrasts among the factor levels as describe above. The main effects, A and B come from contrasts among the means of their factor levels and the interaction term A:B reflects differences among means of A across the levels of B (and vice-versa).\nThe model formula y ~ x + A specifies an ANCOVA model with different intercepts for the levels of A, but with a common slope for x. Adding an interaction of x:A in the model y ~ x * A allow separate slopes and intercepts for the groups.\n\n5.2.1.2 Powers\nThe ^ exponent operator indicates powers of a term expression to a specified degree. Thus the term (A + B)^2 is identical to (A + B) * (A + B) which expands to the main effects of A, B and their interaction, also identical to A * B. In general, the product of parenthesized terms expands as in ordinary algebra,\n\ny ~ (A + B) * (C + D) -&gt; A + B + C + D + A:C + A:D + B:C + B:D\n\nPowers get more interesting with more terms, so (A + B + C)^2 is the same as (A + B + C) * (A + B + C), which includes main effects of A, B and C as well as all two-way interactions, A:B, A:C and B:C. The model formula (A + B + C)^3 expands to include all two-way interactions and the three-way interaction A:B:C.\n\n(A + B + C)^3 -&gt; A + B + C + A:B + A:C + B:C + A:B:C\n\nIn this context - can be use to remove terms, as shown in the following examples\n\n(A + B + C)^2 &lt;-&gt; (A + B + C)^3 - A:B:C\n(A + B + C)^3 - B:C - A:B:C  &lt;-&gt; A + B + C + A:B + A:C \n\nFinally, the symbol . on the right-hand side specifies all terms in the current dataset other than the response. Thus if you have a data.frame containing y, x1, x2, ..., x6, you can specify a model with all variables except x6 as predictors as\n\ny ~ . - x6\n\nTo test what we’ve covered above,\n\nWhat do you think the model formula y ~ .^2 means in a data set containing variables x1, x2, x3, and x4?\nWhat about the formula with y ~ .^2 - A:B:C:D with factors A, B, C, D?\n\nYou can work out questions like these or explore model formulae using terms() for a \"formula\" object. The labels of these terms can then be concatenated to a string and turned back into a formula using as.formula():\n\nf &lt;- formula(y ~ (x1 + x2 + x3 + x4)^2)\nterms = attr(terms(f), \"term.labels\")\n\nterms |&gt; paste(collapse = \" + \")\n#&gt; [1] \"x1 + x2 + x3 + x4 + x1:x2 + x1:x3 + x1:x4 + x2:x3 + x2:x4 + x3:x4\"\n# convert back to a formula\nas.formula(sprintf(\"y ~ %s\", paste(terms, collapse=\" + \"))) \n#&gt; y ~ x1 + x2 + x3 + x4 + x1:x2 + x1:x3 + x1:x4 + x2:x3 + x2:x4 + \n#&gt;     x3:x4\n\n\n5.2.2 Model matrices\nAs noted above, a model formula is used to generate the \\(n \\times (p+1)\\) model matrix, \\(\\mathbf{X}\\), typically containing the column of 1s for the intercept \\(\\beta_0\\) in the model, followed by \\(p\\) columns representing the regressors \\(\\mathbf{x}_1, \\mathbf{x}_2, \\dots , \\mathbf{x}_p\\). Internally, lm() uses stats::model.matrix() and you can use this to explore how factors, interactions and other model terms are represented in a model object.\nFor a small example, here are a few observations representing income (inc) and type of occupation. model.matrix() takes a one-sided formula with the terms on the right-hand side. The main effect model looks like this:\n\nset.seed(42)\ninc &lt;- round(runif(n=9, 20, 40))\ntype &lt;- rep(c(\"bc\", \"wc\", \"prof\"), each =3)\n\nmm &lt;- model.matrix(~ inc + type) \ndata.frame(type, mm)\n#&gt;   type X.Intercept. inc typeprof typewc\n#&gt; 1   bc            1  38        0      0\n#&gt; 2   bc            1  39        0      0\n#&gt; 3   bc            1  26        0      0\n#&gt; 4   wc            1  37        0      1\n#&gt; 5   wc            1  33        0      1\n#&gt; 6   wc            1  30        0      1\n#&gt; 7 prof            1  35        1      0\n#&gt; 8 prof            1  23        1      0\n#&gt; 9 prof            1  33        1      0\n\nAs you can see, type, with 2 degrees of freedom is represented by two dummy (0/1) variables, typeprof and typewc. Together, these represent treatment contrasts (comparisons) between the baseline group type==\"bc\", which is coded (0, 0) and each of the others: type==\"prof\", coded (1, 0) and type==\"wc\", codes (0, 1). By default, the baseline level is the first in alphabetic or numerical order, but you can change this using relevel().\nIn a model with the interaction inc * type, additional columns are constructed as the product of inc with each of the columns for type.\n\nmodel.matrix(~ inc * type)\n#&gt;   (Intercept) inc typeprof typewc inc:typeprof inc:typewc\n#&gt; 1           1  38        0      0            0          0\n#&gt; 2           1  39        0      0            0          0\n#&gt; 3           1  26        0      0            0          0\n#&gt; 4           1  37        0      1            0         37\n#&gt; 5           1  33        0      1            0         33\n#&gt; 6           1  30        0      1            0         30\n#&gt; 7           1  35        1      0           35          0\n#&gt; 8           1  23        1      0           23          0\n#&gt; 9           1  33        1      0           33          0\n#&gt; attr(,\"assign\")\n#&gt; [1] 0 1 2 2 3 3\n#&gt; attr(,\"contrasts\")\n#&gt; attr(,\"contrasts\")$type\n#&gt; [1] \"contr.treatment\"\n\n\n5.2.3 Contrasts",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Overview of Linear models</span>"
+    ]
+  },
+  {
+    "objectID": "05-linear_models.html#regression",
+    "href": "05-linear_models.html#regression",
+    "title": "5  Overview of Linear models",
+    "section": "\n5.3 Regression",
+    "text": "5.3 Regression",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Overview of Linear models</span>"
+    ]
+  },
+  {
+    "objectID": "05-linear_models.html#anova",
+    "href": "05-linear_models.html#anova",
+    "title": "5  Overview of Linear models",
+    "section": "\n5.4 ANOVA",
+    "text": "5.4 ANOVA",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Overview of Linear models</span>"
+    ]
+  },
+  {
+    "objectID": "05-linear_models.html#ancova",
+    "href": "05-linear_models.html#ancova",
+    "title": "5  Overview of Linear models",
+    "section": "\n5.5 ANCOVA",
+    "text": "5.5 ANCOVA",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Overview of Linear models</span>"
+    ]
+  },
+  {
+    "objectID": "05-linear_models.html#regression-trees",
+    "href": "05-linear_models.html#regression-trees",
+    "title": "5  Overview of Linear models",
+    "section": "\n5.6 Regression trees",
+    "text": "5.6 Regression trees\n\nPackage summary\n\nPackages used here:\n1 packages used here: knitr\n\n\n\n\n\nBock, R. D. (1963). Programming univariate and multivariate analysis of variance. Technometrics, 5(1), 95–117. https://doi.org/10.1080/00401706.1963.10490061\n\n\nBock, R. D. (1964). A computer program forunivariate and multivariate analysis of variance. Proceedings of Scientific Symposium on Statistics.\n\n\nClyde, D. J., Cramer, E. M., & Sherin, R. J. (1966). Multivariate statistical programs. Biometric Laboratory,University of Miami.\n\n\nDixon, W. J. (1965). BMD biomedical computer programs. Health Sciences Computing Facility, School of Medicine, University of California; Health Sciences Computing Faculty.\n\n\nFinn, J. D. (1967). MULTIVARIANCE: Fortran program for univariate and multivariate analysis of variance and covariance. School of Education, State University of New York at Buffalo.\n\n\nFisher, R. A. (1923). Studies in crop variation. II. The manurial response of different potato varieties. The Journal of Agricultural Science, 13(2), 311–320. https://hdl.handle.net/2440/15179\n\n\nFisher, R. A. (1925). Statistical methods for research workers. Oliver & Boyd.\n\n\nGalton, F. (1886). Regression towards mediocrity in hereditary stature. Journal of the Anthropological Institute, 15, 246–263. http://www.jstor.org/cgi-bin/jstor/viewitem/09595295/dm995266/99p0374f/0\n\n\nGalton, F. (1889). Natural inheritance. Macmillan. http://galton.org/books/natural-inheritance/pdf/galton-nat-inh-1up-clean.pdf\n\n\nGraybill, F. A. (1961). An introduction to linear statistical models. McGraw-Hill.\n\n\nIBM. (1965). Proceedings of the IBM scientific computing symposium on statistics: Oct 21-23, 1963 (L. Robinson, Ed.). IBM. https://www.amazon.com/Proceedings-Scientific-Computing-Symposium-Statistics/dp/B000GL5RLU\n\n\nPearson, K. (1896). Contributions to the mathematical theory of evolution—III, regression, heredity and panmixia. Philosophical Transactions of the Royal Society of London, 187, 253–318.\n\n\nScheffé, H. A. (1960). The analysis of variance. Wiley.\n\n\nWilkinson, G. N., & Rogers, C. E. (1973). Symbolic description of factorial models for analysis of variance. Applied Statistics, 22(3), 392. https://doi.org/10.2307/2346786\n\n\nWiner, B. J. (1962). Statistical principles in experimental design. McGraw-Hill.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Overview of Linear models</span>"
+    ]
+  },
+  {
+    "objectID": "05-linear_models.html#footnotes",
+    "href": "05-linear_models.html#footnotes",
+    "title": "5  Overview of Linear models",
+    "section": "",
+    "text": "Taking logarithms of both sides would yield the linear model, \\(log(y) = c + \\beta_1 x\\).↩︎",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Overview of Linear models</span>"
+    ]
+  },
+  {
+    "objectID": "06-linear_models-plots.html",
+    "href": "06-linear_models-plots.html",
+    "title": "6  Plots for univariate response models",
+    "section": "",
+    "text": "6.1 The “regression quartet”\nFor a fitted model, plotting the model object with plot(model) provides for any of six basic plots, of which four are produced by default, giving rise to the term regression quartet for this collection. These are:\nOne key feature of these plots is providing reference lines or smoothed curves for ease of judging the extent to which a plot conforms to the expected pattern; another is the labeling of observations which deviate from an assumption.\nThe base-R plot(model) plots are done much better in a variety of packages. I illustrate some versions from the car (Fox et al., 2023) and performance (Lüdecke et al., 2021) packages, part of the easystats (Lüdecke et al., 2022) suite of packages.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Plots for univariate response models</span>"
+    ]
+  },
+  {
+    "objectID": "06-linear_models-plots.html#the-regression-quartet",
+    "href": "06-linear_models-plots.html#the-regression-quartet",
+    "title": "6  Plots for univariate response models",
+    "section": "",
+    "text": "Residuals vs. Fitted: For well-behaved data, the points should hover around a horizontal line at residual = 0, with no obvious pattern or trend.\nNormal Q-Q plot: A plot of sorted standardized residuals \\(e_i\\) (obtained fromrstudent(model)) against the theoretical values those values would have in a standard normal \\(\\mathcal{N}(0, 1)\\) distribution.\nScale-Location: Plots the square-root of the absolute values of the standardized residuals \\(\\sqrt{| e_i |}\\) as a measure of “scale” against the fitted values \\(\\hat{y}_i\\) as a measure of “location”. This provides an assessment of homogeneity of variance. Violations appear as a tendency for scale (variability) to vary with location.\nResiduals vs. Leverage: Plots standardized residuals against leverage to help identify possibly influential observations. Leverage, or “hat” values (given by hat(model)) are proportional to the squared Mahalanobis distances of the predictor values \\(\\mathbf{x}_i\\) from the means, and measure the potential of an observation to change the fitted coefficients if that observation was deleted. Actual influence is measured by Cooks’s distance (cooks.distance(model)) and is proportional to the product of residual times leverage. Contours of constant Cook’s \\(D\\) are added to the plot.\n\n\n\n\n6.1.1 Example: Duncan’s occupational prestige\nIn a classic study in sociology, Duncan (1961) used data from the U.S. Census in 1950 to study how one could predict the prestige of occupational categories — which is hard to measure — from available information in the census for those occupations. His data is available in carData:Duncan, and contains\n\n\ntype: the category of occupation, one of prof (professional), wc (white collar) or bc (blue collar);\n\nincome: the percentage of occupational incumbents with a reported income &gt; 3500 (about 40,000 in current dollars);\n\neducation: the percentage of occupational incumbents who were high school graduates;\n\nprestige: the percentage of respondents in a social survey who rated the occupation as “good” or better in prestige.\n\nThese variables are a bit quirky in they are measured in percents, 0-100, rather dollars for income and years for education, but this common scale permitted Duncan to ask an interesting sociological question: Assuming that both income and education predict prestige, are they equally important, as might be assessed by testing the hypothesis \\(\\mathcal{H}_0: \\beta_{\\text{income}} = \\beta_{\\text{education}}\\). If so, this would provide a very simple theory for occupational prestige.\nA quick look at the data shows the variables and a selection of the occupational categories, which are the row.names() of the dataset.\n\ndata(Duncan, package = \"carData\")\nset.seed(42)\ncar::some(Duncan)\n#&gt;                  type income education prestige\n#&gt; accountant       prof     62        86       82\n#&gt; professor        prof     64        93       93\n#&gt; engineer         prof     72        86       88\n#&gt; factory.owner    prof     60        56       81\n#&gt; store.clerk        wc     29        50       16\n#&gt; carpenter          bc     21        23       33\n#&gt; machine.operator   bc     21        20       24\n#&gt; barber             bc     16        26       20\n#&gt; soda.clerk         bc     12        30        6\n#&gt; janitor            bc      7        20        8\n\nLet’s start by fitting a simple model using just income and education as predictors. The results look very good! Both income and education are highly significant and the \\(R^2 = 0.828\\) for the model indicates that prestige is very well predicted by just these variables.\n\nduncan.mod &lt;- lm(prestige ~ income + education, data=Duncan)\nsummary(duncan.mod)\n#&gt; \n#&gt; Call:\n#&gt; lm(formula = prestige ~ income + education, data = Duncan)\n#&gt; \n#&gt; Residuals:\n#&gt;    Min     1Q Median     3Q    Max \n#&gt; -29.54  -6.42   0.65   6.61  34.64 \n#&gt; \n#&gt; Coefficients:\n#&gt;             Estimate Std. Error t value Pr(&gt;|t|)    \n#&gt; (Intercept)  -6.0647     4.2719   -1.42     0.16    \n#&gt; income        0.5987     0.1197    5.00  1.1e-05 ***\n#&gt; education     0.5458     0.0983    5.56  1.7e-06 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#&gt; \n#&gt; Residual standard error: 13.4 on 42 degrees of freedom\n#&gt; Multiple R-squared:  0.828,  Adjusted R-squared:  0.82 \n#&gt; F-statistic:  101 on 2 and 42 DF,  p-value: &lt;2e-16\n\nBeyond this, Duncan was interested in the coefficients and whether income and education could be said to have equal impacts on predicting occupational prestige. A nice display of model coefficients with confidence intervals is provided by parameters::model_parameters().\n\nparameters::model_parameters(duncan.mod)\n#&gt; Parameter   | Coefficient |   SE |         95% CI | t(42) |      p\n#&gt; ------------------------------------------------------------------\n#&gt; (Intercept) |       -6.06 | 4.27 | [-14.69, 2.56] | -1.42 | 0.163 \n#&gt; income      |        0.60 | 0.12 | [  0.36, 0.84] |  5.00 | &lt; .001\n#&gt; education   |        0.55 | 0.10 | [  0.35, 0.74] |  5.56 | &lt; .001\n\nWe can also test Duncan’s hypothesis that income and education have equal effects on prestige with car::linearHypothesis(). This is constructed as a test of a restricted model in which the two coefficients are forced to be equal against the unrestricted model. Duncan was very happy with this result.\n\ncar::linearHypothesis(duncan.mod, \"income = education\")\n#&gt; \n#&gt; Linear hypothesis test:\n#&gt; income - education = 0\n#&gt; \n#&gt; Model 1: restricted model\n#&gt; Model 2: prestige ~ income + education\n#&gt; \n#&gt;   Res.Df  RSS Df Sum of Sq    F Pr(&gt;F)\n#&gt; 1     43 7519                         \n#&gt; 2     42 7507  1      12.2 0.07    0.8\n\nEquivalently, the linear hypothesis that \\(\\beta_{\\text{Inc}} = \\beta_{\\text{Educ}}\\) can be tested with a Wald test for difference between these coefficients, expressed as \\(\\mathcal{H}_0 : \\mathbf{C} \\mathbf{\\beta} = 0\\), using \\(\\mathbf{C} = (0, -1, 1)\\). The estimated value, -0.053 has a confidence interval [-0.462, 0.356], consistent with Duncan’s hypothesis.\n\nwtest &lt;- spida2::wald(duncan.mod, c(0, -1, 1))[[1]]\nwtest$estimate\n#&gt;       \n#&gt;        Estimate Std.Error DF t-value p-value Lower 0.95 Upper 0.95\n#&gt;   Larg  -0.0529     0.203 42  -0.261   0.795     -0.462      0.356\n\nWe can visualize this test and confidence intervals using a joint confidence ellipse for the coefficients for income and education in the model duncan.mod. In Figure 6.1 the size of the ellipse is set to \\(\\sqrt{F^{0.95}_{1,\\nu}} = t^{0.95}_{\\nu}\\), so that its shadows on the horizontal and vertical axes correspond to 1D 95% confidence intervals. In this plot, the line through the origin with slope \\(= 1\\) corresponds to equal coefficients for income and education and the line with slope \\(= -1\\) corresponds to their difference, \\(\\beta_{\\text{Educ}} - \\beta_{\\text{Inc}}\\). The orthogonal projection of the coefficient vector \\((\\widehat{\\beta}_{\\text{Inc}}, \\widehat{\\beta}_{\\text{Educ}})\\) (the center of the ellipse) is the point estimate of \\(\\widehat{\\beta}_{\\text{Educ}} - \\widehat{\\beta}_{\\text{Inc}}\\) and the shadow of the ellipse along this axis is the 95% confidence interval for the difference in slopes.\n\n\nSee the codeconfidenceEllipse(duncan.mod, col = \"blue\",\n  levels = 0.95, dfn = 1,\n  fill = TRUE, fill.alpha = 0.2,\n  xlim = c(-.4, 1),\n  ylim = c(-.4, 1), asp = 1,\n  cex.lab = 1.3,\n  grid = FALSE,\n  xlab = expression(paste(\"Income coefficient, \", beta[Inc])),\n  ylab = expression(paste(\"Education coefficient, \", beta[Educ])))\n\nabline(h=0, v=0, lwd = 2)\n\n# confidence intervals for each coefficient\nbeta &lt;- coef( duncan.mod )[-1]\nCI &lt;- confint(duncan.mod)       # confidence intervals\nlines( y = c(0,0), x = CI[\"income\",] , lwd = 5, col = 'blue')\nlines( x = c(0,0), y = CI[\"education\",] , lwd = 5, col = 'blue')\npoints(rbind(beta), col = 'black', pch = 16, cex=1.5)\npoints(diag(beta) , col = 'black', pch = 16, cex=1.4)\narrows(beta[1], beta[2], beta[1], 0, angle=8, len=0.2)\narrows(beta[1], beta[2], 0, beta[2], angle=8, len=0.2)\n\n# add line for equal slopes\nabline(a=0, b = 1, lwd = 2, col = \"darkgreen\")\ntext(0.8, 0.8, expression(beta[Educ] == beta[Inc]), \n     srt=45, pos=3, cex = 1.5, col = \"darkgreen\")\n\n# add line for difference in slopes\ncol &lt;- \"darkred\"\nx &lt;- c(-1.5, .5)\nlines(x=x, y=-x)\ntext(-.15, -.15, expression(~beta[\"Educ\"] - ~beta[\"Inc\"]), \n     col=col, cex=1.5, srt=-45)\n\n# confidence interval for b1 - b2\nwtest &lt;- spida2::wald(duncan.mod, c(0, -1, 1))[[1]]\nlower &lt;- wtest$estimate$Lower /2\nupper &lt;- wtest$estimate$Upper / 2\nlines(-c(lower, upper), c(lower,upper), lwd=6, col=col)\n\n# projection of (b1, b2) on b1-b2 axis\nbeta &lt;- coef( duncan.mod )[-1]\nbdiff &lt;- beta %*% c(1, -1)/2\npoints(bdiff, -bdiff, pch=16, cex=1.3)\narrows(beta[1], beta[2], bdiff, -bdiff, \n       angle=8, len=0.2, col=col, lwd = 2)\n\n# calibrate the diff axis\nticks &lt;- seq(-0.3, 0.3, by=0.2)\nticklen &lt;- 0.02\nsegments(ticks, -ticks, ticks-sqrt(2)*ticklen, -ticks-sqrt(2)*ticklen)\ntext(ticks-2.4*ticklen, -ticks-2.4*ticklen, ticks, srt=-45)\n\n\n\n\n\n\nFigure 6.1: Joint 95% confidence ellipse for \\((\\beta_{\\text{Inc}}, \\beta_{\\text{Educ}})\\), together with their 1D shadows, which give 95% confidence intervals for the separate coefficients and the linear hypothesis that the coefficients are equal. Projecting the confidence ellipse along the line with unit slope gives a confidence interval for the difference between coefficients, shown by the dark red line.\n\n\n\n\n\n\n\n\n\n\n\n\n6.1.1.1 Diagnostic plots\nBut, should Duncan be so happy? It is unlikely that he ran any model diagnostics or plotted his model; we do so now. Here is the regression quartet (Figure 6.2) for this model. Each plot shows some trend lines, and importantly, labels some observations that stand out and might deserve attention.\n\nop &lt;- par(mfrow = c(2,2), \n          mar = c(4,4,3,1)+.1)\nplot(duncan.mod, lwd=2, pch=16)\npar(op)\n\n\n\n\n\n\nFigure 6.2: Regression quartet of diagnostic plots for the Duncan data. Several possibly unusual observations are labeled.\n\n\n\n\nSome points to note:\n\nA few observations (minister, reporter, conductor, contractor) are flagged in multiple panels. It turns out (Section 6.6.3) that the observations for minister and reporter noted in the residuals vs. leverage plot are highly influential and largely responsible for Duncan’s finding that the slopes for income and education could be considered equal.\nThe red trend line in the scale-location plot indicates that residual variance is not constant, but rather increases from both ends. This is a consequence of the fact that prestige is measured as a percentage, bounded at [0, 100], and the standard deviation of a percentage \\(p\\) is proportional to \\(\\sqrt{p \\times (1-p)}\\) which is maximal at $p = 0.5.\n\nSimilar, but nicer-looking diagnostic plots are provided by performance::check_model() which uses ggplot2 for graphics. These include helpful captions indicating what should be observed for each for a good-fitting model. However, they don’t have as good facilities for labeling unusual observations as the base R plot() or functions in the car package.\n\ncheck_model(duncan.mod, check=c(\"linearity\", \"qq\", \"homogeneity\", \"outliers\"))\n\n\n\n\n\n\nFigure 6.3: Diagnostic plots for the Duncan data, using check_model().\n\n\n\n\n\n6.1.2 Example: Canadian occupational prestige\n\nFollowing Duncan (1961), occupational prestige was studied in a Canadian context by Bernard Blishen and others at York University, giving the dataset carData::Prestige which we looked at in Section 3.2.3. It differs from the Duncan dataset primarily in that the main variables—prestige, income and education were revamped to better reflect the underlying constructs in more meaningful units.\n\nprestige: Rather than a simple percentage of “good+” ratings, this uses a wider and more reliable scale from Pineo & Porter (1967) on a scale from 10–90.\nincome is measured as the average income of incumbents in each occupation, in 1971 dollars, rather than percent exceeding a given threshold ($3500)\neducation is measured as the average education of occupational incumbents, years.\n\nThe dataset again includes type of occupation with the same levels \"bc\" (blue collar), \"wc\" (white collar) and \"prof\" (professional)1, but in addition includes the percent of women in these occupational categories.\nOur interest again is in understanding how prestige is related to census measures of the average education, income, percent women of incumbents in those occupations, but with attention to the scales of measurement and possibly more complex relationships.\n\ndata(Prestige, package=\"carData\")\n# Reorder levels of type\nPrestige$type &lt;- factor(Prestige$type, \n                        levels=c(\"bc\", \"wc\", \"prof\")) \nstr(Prestige)\n#&gt; 'data.frame':  102 obs. of  6 variables:\n#&gt;  $ education: num  13.1 12.3 12.8 11.4 14.6 ...\n#&gt;  $ income   : int  12351 25879 9271 8865 8403 11030 8258 14163 11377 11023 ...\n#&gt;  $ women    : num  11.16 4.02 15.7 9.11 11.68 ...\n#&gt;  $ prestige : num  68.8 69.1 63.4 56.8 73.5 77.6 72.6 78.1 73.1 68.8 ...\n#&gt;  $ census   : int  1113 1130 1171 1175 2111 2113 2133 2141 2143 2153 ...\n#&gt;  $ type     : Factor w/ 3 levels \"bc\",\"wc\",\"prof\": 3 3 3 3 3 3 3 3 3 3 ...\n\nWe fit a main-effects model using all predictors (ignoring census, the Canadian Census occupational code):\n\nprestige.mod &lt;- lm(prestige ~ education + income + women + type,\n                   data=Prestige)\n\nplot(model) produces four separate plots. For a quick look, I like to arrange them in a single 2x2 figure.\n\nop &lt;- par(mfrow = c(2,2), \n          mar=c(4,4,3,1)+.1)\nplot(prestige.mod, lwd=2, cex.lab=1.4)\npar(op)\n\n\n\n\n\n\nFigure 6.4: Regression quartet of diagnostic plots for the Prestige data. Several possibly unusual observations are labeled in each plot.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Plots for univariate response models</span>"
+    ]
+  },
+  {
+    "objectID": "06-linear_models-plots.html#other-model-plots",
+    "href": "06-linear_models-plots.html#other-model-plots",
+    "title": "6  Plots for univariate response models",
+    "section": "\n6.2 Other Model plots",
+    "text": "6.2 Other Model plots\nTODO: What goes here?",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Plots for univariate response models</span>"
+    ]
+  },
+  {
+    "objectID": "06-linear_models-plots.html#coefficient-displays",
+    "href": "06-linear_models-plots.html#coefficient-displays",
+    "title": "6  Plots for univariate response models",
+    "section": "\n6.3 Coefficient displays",
+    "text": "6.3 Coefficient displays\nThe results of linear models are most often reported in tables and typically with “significance stars” (*, **, ***) to indicate the outcome of hypothesis tests. These are useful for looking up precise values and you can use this format to compare a small number of competing models side-by-side. However, as illustrated by Kastellec & Leoni (2007), plots of coefficients can increase the clarity of presentation and make it easier to draw correct conclusions. Yet, when you need to present tables, there is a variety of tools in R that can help make them attractive in publications.\nFor illustration, I’ll consider three models for the Prestige data of increasing complexity:\n\n\nmod1 fits the main effects of the three quantitative predictors;\n\nmod2 adds the categorical variable type of occupation;\n\nmod3 allows an interaction of income with type.\n\n\nmod1 &lt;- lm(prestige ~ education + income + women,\n           data=Prestige)\nmod2 &lt;- lm(prestige ~ education + women + income + type,\n           data=Prestige)\nmod3 &lt;- lm(prestige ~ education + women + income * type,\n           data=Prestige)\n\nFrom our earlier analyses (Section 3.2.3) we saw that the marginal relationship between income and prestige was nonlinear Figure 3.11), and was better represented in a linear model using log(income) (Section 3.2.3.1) shown in Figure 3.14. However, this possibly non-linear relationship could also be explained by stratifying (Section 3.2.3.2) the data by type of occupation (Figure 3.15).\n\n6.3.1 Displaying coefficients\nsummary() gives the complete precis of a fitted model, with information about the estimated coefficients, residuals and goodness-of fit statistics like \\(R^2\\). But if you only want to see the coefficients, standard errors, etc. lmtest::coeftest() gives these results in the familiar format for console output. broom::tidy() places these in a tidy format common to many modeling functions which is useful for futher processing (e.g., comparing models).\n\nlmtest::coeftest(mod1)\n#&gt; \n#&gt; t test of coefficients:\n#&gt; \n#&gt;              Estimate Std. Error t value Pr(&gt;|t|)    \n#&gt; (Intercept) -6.794334   3.239089   -2.10    0.039 *  \n#&gt; education    4.186637   0.388701   10.77  &lt; 2e-16 ***\n#&gt; income       0.001314   0.000278    4.73  7.6e-06 ***\n#&gt; women       -0.008905   0.030407   -0.29    0.770    \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nbroom::tidy(mod1)\n#&gt; # A tibble: 4 × 5\n#&gt;   term        estimate std.error statistic  p.value\n#&gt;   &lt;chr&gt;          &lt;dbl&gt;     &lt;dbl&gt;     &lt;dbl&gt;    &lt;dbl&gt;\n#&gt; 1 (Intercept) -6.79     3.24        -2.10  3.85e- 2\n#&gt; 2 education    4.19     0.389       10.8   2.59e-18\n#&gt; 3 income       0.00131  0.000278     4.73  7.58e- 6\n#&gt; 4 women       -0.00891  0.0304      -0.293 7.70e- 1\n\nThe modelsummary package (Arel-Bundock, 2024b) is an easy to use, very general package to summarize data and statistical models in R. The main function modelsummary() can produce highly customizable tables of coefficients in a wide variety of output formats, including HTML, PDF, LaTeX, Markdown, and MS Word. You can select the statistics displayed for any model term with the estimate and statistic arguments.\n\nmodelsummary(list(\"Model1\" = mod1),\n  coef_omit = \"Intercept\",\n  shape = term ~ statistic,\n  estimate = \"{estimate} [{conf.low}, {conf.high}]\",\n  statistic = c(\"std.error\", \"p.value\"),\n  fmt = fmt_statistic(\"estimate\" = 3, \"conf.low\" = 4, \"conf.high\" = 4),\n  gof_omit = \".\")\n\n\nTable 6.1: Table of coefficients for the main effects model.\n\n\n\n\n    \n\n      \n\n\n \nModel1\n\n\n \n                Est.\n                S.E.\n                p\n              \n\n\n\neducation\n                  4.187 [3.4153, 4.9580]  \n                  0.389\n                  0.000\n                \n\nincome   \n                  0.001 [0.0008, 0.0019]  \n                  0.000\n                  0.000\n                \n\nwomen    \n                  -0.009 [-0.0692, 0.0514]\n                  0.030\n                  0.770\n                \n\n\n\n\n\n\n\n\n\ngof_omit allows you to omit or select the goodness-of-fit statistics and other model information available from those listed by get_gof():\n\nget_gof(mod1)\n#&gt;   aic bic r.squared adj.r.squared rmse nobs   F logLik\n#&gt; 1 716 729     0.798         0.792 7.69  102 129   -353\n\n\n6.3.2 Visualizing coefficients\nmodelplot() is the companion function. It allows you to plot model estimates and confidence intervals. It makes it easy to subset, rename, reorder, and customize plots using same mechanics as in modelsummary().\n\ntheme_set(theme_minimal(base_size = 14))\n\nmodelplot(mod1, coef_omit=\"Intercept\", \n          color=\"red\", size=1, linewidth=2) +\n  labs(title=\"Raw coefficients for mod1\")\n\n\n\n\n\n\nFigure 6.5: Plot of coefficients and their standard error bar for the simple main effects model\n\n\n\n\nBut this plot is disappointing and misleading because it show the raw coefficients. From the plot, it looks like only education has a non-zero effect, but the effect of income is also highly significant. The problem is that the magnitude of the coefficient \\(\\hat{b}_{\\text{education}}\\) is more than 40,000 times that of the other coefficients, because education is measured years, while income is measured in dollars. The 95% confidence interval for \\(\\hat{b}_{\\text{income}} = [0.0008, 0.0019]\\), but this is invisible in the plot.\nBefore figuring out how to fix this issue, I show the comparable displays from modelsummary() and modelplot() for all three models. When you give modelsummary() a list of models, it displays their coefficients side-by-side as shown in Table 6.2.\n\nmodels &lt;- list(\"Model1\" = mod1, \"Model2\" = mod2, \"Model3\" = mod3)\nmodelsummary(models,\n     coef_omit = \"Intercept\",\n     fmt = 2,\n     stars = TRUE,\n     shape = term ~ statistic,\n     statistic = c(\"std.error\", \"p.value\"),\n     gof_map = c(\"rmse\", \"r.squared\")\n     )\n\n\nTable 6.2: Table of coefficients for three models.\n\n\n\n\n    \n\n      \n\n\n \nModel1\nModel2\nModel3\n\n\n \n                Est.\n                S.E.\n                p\n                Est.\n                S.E.\n                p\n                Est.\n                S.E.\n                p\n              \n\n+ p &lt; 0.1, * p &lt; 0.05, ** p &lt; 0.01, *** p &lt; 0.001\n\n\neducation        \n                  4.19***\n                  0.39\n                  &lt;0.01\n                  3.66***\n                  0.65\n                  &lt;0.01\n                  2.80*** \n                  0.59\n                  &lt;0.01\n                \n\nincome           \n                  0.00***\n                  0.00\n                  &lt;0.01\n                  0.00***\n                  0.00\n                  &lt;0.01\n                  0.00*** \n                  0.00\n                  &lt;0.01\n                \n\nwomen            \n                  -0.01  \n                  0.03\n                  0.77 \n                  0.01   \n                  0.03\n                  0.83 \n                  0.08*   \n                  0.03\n                  0.02 \n                \n\ntypewc           \n                         \n                      \n                       \n                  -2.92  \n                  2.67\n                  0.28 \n                  3.43    \n                  5.37\n                  0.52 \n                \n\ntypeprof         \n                         \n                      \n                       \n                  5.91   \n                  3.94\n                  0.14 \n                  27.55***\n                  5.41\n                  &lt;0.01\n                \n\nincome × typewc  \n                         \n                      \n                       \n                         \n                      \n                       \n                  0.00    \n                  0.00\n                  0.21 \n                \n\nincome × typeprof\n                         \n                      \n                       \n                         \n                      \n                       \n                  0.00*** \n                  0.00\n                  &lt;0.01\n                \n\nRMSE             \n                  7.69   \n                      \n                       \n                  6.91   \n                      \n                       \n                  6.02    \n                      \n                       \n                \n\nR2               \n                  0.798  \n                      \n                       \n                  0.835  \n                      \n                       \n                  0.875   \n                      \n                       \n                \n\n\n\n\n\n\n\n\n\nNote that a factor predictor (like type here) with \\(d\\) levels is represented by \\(d-1\\) coefficients in main effects and in interactions with quantitative variables. These levels are coded with treatment contrasts by default. Also by default, the first level is set as the reference level in alphabetical order. Here the reference level is blue collar (bc), so the coefficient typeprof = 5.91 indicates that professional occupations on average are rated 5.91 greater on the Prestige scale than blue collar workers.\nNote also that unlike the table, the coefficients in Figure 6.5 are ordered from bottom to top, because the Y axis starts at the lower left corner. In Figure 6.6 I use scale_y_discrete() to reverse the order. It is also useful to add a vertical reference line at \\(\\beta = 0\\).\n\nmodelplot(models, \n          coef_omit=\"Intercept\", \n          size=1.3, linewidth=2) +\n  ggtitle(\"Raw coefficients\") +\n  geom_vline(xintercept = 0, linewidth=1.5) +\n  scale_y_discrete(limits=rev) +\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(0.85, 0.2))\n\n\n\n\n\n\nFigure 6.6: Plot of raw coefficients and their confidence intervals for all three models\n\n\n\n\n\n6.3.3 More useful coefficient plots\nThe problem with plots of raw coefficients shown in Figure 6.5 and Figure 6.6 is that the coefficients for different predictors are not directly comparable because they are measured in different units.\nOne alternative is to plot the standardized coefficients. Another way is to re-scale the predictors into more comparable and meaningful units. I illustrate these ideas below.\nStandardized coefficients\nThe simplest way to do this is to transform all variables to standardized (\\(z\\)) scores. The coefficients are then interpreted as the standardized change in prestige for a one standard deviation change in the predictors. The syntax below uses scale to transform all the numeric variables. Then, we re-fit the models using the standardized data.\n\nPrestige_std &lt;- Prestige |&gt;\n  as_tibble() |&gt;\n  mutate(across(where(is.numeric), scale))\n\nmod1_std &lt;- lm(prestige ~ education + income + women, \n               data=Prestige_std)\nmod2_std &lt;- lm(prestige ~ education + women + income + type, \n               data=Prestige_std)\nmod3_std &lt;- lm(prestige ~ education + women + income * type, \n               data=Prestige_std)\n\nThe plot in Figure 6.7 now shows the significant effect of income in all three models. As well, it offers a more sensitive comparison of the coefficients of other terms across models; for example women is not significant in models 1 and 2, but becomes significant in Model 3 when the interaction of income * type is included.\n\nmodels &lt;- list(\"Model1\" = mod1_std, \"Model2\" = mod2_std, \"Model3\" = mod3_std)\nmodelplot(models, \n          coef_omit=\"Intercept\", size=1.3) +\n  ggtitle(\"Standardized coefficients\") +\n  geom_vline(xintercept = 0, linewidth = 1.5) +\n  scale_y_discrete(limits=rev) +\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(0.85, 0.2))\n\n\n\n\n\n\nFigure 6.7: Plot of standardized coefficients and their confidence intervals for all three models\n\n\n\n\nIt turns out there is an easier way to get plots of standardized coefficients. modelsummary() extracts coefficients from model objects using the parameters package, and that package offers several options for standardization: See model parameters documentation. We can pass the standardize=\"refit\" (or other) argument directly to modelsummary() or modelplot(), and that argument will be forwarded to parameters. The plot produced by the code below is identical to Figure 6.7 and is not shown.\n\nmodelplot(list(\"mod1\" = mod1, \"mod2\" = mod2, \"mod3\" = mod3),\n          standardize = \"refit\",\n          coef_omit=\"Intercept\", size=1.3) +\n  ggtitle(\"Standardized coefficients\") +\n  geom_vline(xintercept = 0, linewidth=1.5) +\n  scale_y_discrete(limits=rev) +\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(0.85, 0.2))\n\nThe ggstats package (Larmarange, 2024) provides even nicer versions of coefficient plots that handle factors in a more reasonable way, as levels within the factor. ggcoef_model() plots a single model and ggcoef_compare() plots a list of models using sensible defaults. A small but nice feature is that it explicitly shows the 0 value for the reference level of a factor (type = \"bc\" here) and uses better labels for factors and their interactions.\n\nmodels &lt;- list(\n  \"Base model\"      = mod1_std,\n  \"Add type\"        = mod2_std,\n  \"Add interaction\" = mod3_std)\n\nggcoef_compare(models) +\n  labs(x = \"Standarized Coefficient\")\n\n\n\n\n\n\nFigure 6.8: Model comparison plot from ggcoef_compare()\n\n\n\n\nMore meaningful units\nStandardizing the variables makes the coefficients directly comparable, but it may be harder to understand what they mean in terms of the variables. For example, the coefficient of income in mod2_std is 0.25. A literal interpretation is that occupational prestige is expected to increase 0.25 standard deviations for each standard deviation increase in income, but it may be difficult to appreciate what this means.\nA better substantive comparison of the coefficients could use understandable scales for the predictors, e.g., months of education, $100,000 of income or 10% of women’s participation. Note that the effect of this is just to multiply the coefficients and their standard errors by a factor. The statistical conclusions of significance are unchanged.\nFor simplicity, I do this just for Model 1.\n\nPrestige_scaled &lt;- Prestige |&gt;\n  mutate(education = 12 * education,\n         income = income / 100,\n         women = women / 10)\n\nmod1_scaled &lt;- lm(prestige ~ education + income + women,\n                  data=Prestige_scaled)\n\nWhen we plot this with ggcoef_model(), there are many options to control how variables are labeled and other details.\n\nggcoef_model(mod1_scaled,\n  signif_stars = FALSE,\n  variable_labels = c(education = \"education\\n(months)\",\n                      income = \"income\\n(/$100K)\",\n                      women = \"women\\n(/10%)\")) +\n  xlab(\"Coefficients for prestige with scaled predictors\")\n\n\n\n\n\n\nFigure 6.9: Plot of coefficients for prestige with scaled predictors for Model 1.\n\n\n\n\nSo, on average, each additional month of education increases the prestige rating by 0.34 units, while an additional $100,000 of income increases it by 0.13 units. While these are significant effects, they are not large in relation to the scale of prestige which ranges 14.8—87.2.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Plots for univariate response models</span>"
+    ]
+  },
+  {
+    "objectID": "06-linear_models-plots.html#sec-avplots",
+    "href": "06-linear_models-plots.html#sec-avplots",
+    "title": "6  Plots for univariate response models",
+    "section": "\n6.4 Added-variable and related plots",
+    "text": "6.4 Added-variable and related plots\nIn multiple regression problems, it is most often useful to construct a scatterplot matrix and examine the plot of the response vs. each of the predictors as well as those of the predictors against each other. However, the simple, marginal scatterplots of a response \\(y\\) against each of several predictors \\(x_1, x_2, \\dots\\) can be misleading because each one ignores the other predictors.\nTo see this consider a toy dataset, coffee, giving measures of coffee consumption, occupational stress and an index of heart problems in a sample of \\(n=20\\) graduate students and professors.\n\ndata(coffee, package=\"matlib\")\n\nscatterplotMatrix(~ Heart + Coffee + Stress, \n  data=coffee,\n  smooth = FALSE,\n  ellipse = list(levels=0.68, fill.alpha = 0.1),\n  pch = 19, cex.labels = 2.5)\n\n\n\n\n\n\nFigure 6.10: Scatterplot matrix showing pairwise relations among Heart (\\(y\\)), Coffee consumption (\\(x_1\\)) and Stress (\\(x_2\\)), with linear regression lines and 68% data ellipses for the bivariate relations\n\n\n\n\nThe message from these marginal plots in Figure 6.10 seems to be that coffee is bad for your heart, stress is bad for your heart, and stress is also strongly related to coffee consumption. Yet, when we fit a model with both variables together, we get the following results:\n\nfit.both   &lt;- lm(Heart ~ Coffee + Stress, data=coffee)\nlmtest::coeftest(fit.both)\n#&gt; \n#&gt; t test of coefficients:\n#&gt; \n#&gt;             Estimate Std. Error t value Pr(&gt;|t|)    \n#&gt; (Intercept)   -7.794      5.793   -1.35     0.20    \n#&gt; Coffee        -0.409      0.292   -1.40     0.18    \n#&gt; Stress         1.199      0.224    5.34  5.4e-05 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nThe coefficients suggest that stress is indeed bad for your heart, but the negative (though non-significant) coefficient for coffee suggests that coffee is good for you.How can this be? Does that mean I should drink more coffee, while avoiding stress?\nThe reason for this apparent paradox is that the general linear model fit by lm() estimates all effects together and so the coefficients pertain to the partial effect of a given predictor, adjusting for the effects of all others. That is, the coefficient for coffee (\\(\\beta_{\\text{Coffee}} = -0.41\\)) estimates the effect of coffee for people with same level of stress. In the marginal scatterplot, the positive slope for coffee (1.10) ignores the correlation of coffee and stress.\nThis is an example of confounding in regression when an important predictor is omitted. Stress is positively associated with both coffee consumption and heart damage. When stress is omitted, the coefficient for coffee is biased because it “picks up” the relation with the omitted variable.\nA solution to this problem is the added-variable plot (“AV plot”, also called partial regression plot, MostellerTukey-1977). This is a multivariate analog of a a simple marginal scatterplot, designed to visualize directly the partial relation between \\(y\\) and the predictors \\(x_1, x_2, \\dots\\) in a multiple regression model.\nYou can think of this as a magic window that hides the relations of all other variables with each of the \\(y\\) and \\(x_i\\) shown in a given added-variable plot. This gives an unobstructed view of the net relation between \\(y\\) and \\(x_i\\) with the effect of all other variables removed. In effect, it reduces the problem of viewing the complete model in \\(p\\)-dimensional space to a sequence of \\(p\\) 2D plots, each of which tells the story of one predictor, unentangled from the others. This is essentially the same idea as the partial variables plot (Section 3.8.3) used to understand partial correlations.\nThe construction of an AV plot is conceptually very simple. For variable \\(x_i\\), imagine that we fit two supplementary regressions:\n\nRegress \\(\\mathbf{y}\\) on \\(\\mathbf{X_{(-i)}}\\), the model matrix of all of the regressors except \\(x_i\\). By definition, the residuals from this regression, \\(\\mathbf{y}^\\star \\equiv \\mathbf{y} \\,\\vert\\, \\text{others} = \\mathbf{y} - \\widehat{\\mathbf{y}} \\,\\vert\\, \\mathbf{X_{(-i)}}\\),  are the part of \\(\\mathbf{y}\\) that cannot be explained by all the other regression terms. These residuals are necessarily uncorrelated with the other predictors.\nRegress \\(x_i\\) on the other predictors, \\(\\mathbf{X_{(-i)}}\\) and again obtain the residuals. These residuals, \\(\\mathbf{x}_i^\\star \\equiv \\mathbf{x}_i \\,\\vert\\, \\text{others} = \\mathbf{x}_i - \\widehat{\\mathbf{x}}_i \\,\\vert\\, \\mathbf{X_{(-i)}}\\) give the part of \\(x_i\\) that cannot be explained by the others, and so are uncorrelated with them.\n\nThe AV plot is then just a simple scatterplot of these residuals, \\(\\mathbf{y}^\\star\\) on the vertical axis, and \\(\\mathbf{x}^\\star\\) on the horizontal. In practice, it is unnecessary to run the auxilliary regressions this way (Velleman & Welsh, 1981). Both can be calculated using stats::lsfit() roughly as follows:\n\nAVcalc &lt;- function(model, variable)\nX &lt;- model.matrix(model)\nresponse &lt;- model.response(model)\nx &lt;- X[, -variable]\ny &lt;- cbind(X[, variable], response)\nfit &lt;- lsfit(x, y, intercept = FALSE)\nresids &lt;- residuals(fit)\nreturn(resids)\n\nNote that y here contains both the current predictor, \\(\\mathbf{x}_i\\) and the response \\(\\mathbf{y}\\), so the residuals resids have two columns, one for \\(x_i \\,\\vert\\, \\text{others}\\) and one for \\(y \\,\\vert\\, \\text{others}\\).\nAdded-variable plots are produced using car::avPlot() for one predictor or avPlots() for any number of model terms. The id argument controls which points are identified in the plots; n=2 labels the two points that are furthest from the mean on the horizontal axis and the two with the largest absolute residuals. For instance, in Figure 6.11, observations 5 and 13 are flagged because their conditional \\(\\mathbf{x}_i^\\star\\) values are extreme; observation 17 has a large absolute residual, \\(\\mathbf{y}^\\star = \\text{Heart} \\,\\vert\\, \\text{others}\\).\n\navPlots(fit.both,\n  ellipse = list(levels = 0.68, fill=TRUE, fill.alpha = 0.1),\n  pch = 19,\n  id = list(n = 2),\n  cex.lab = 1.5,\n  main = \"Added-variable plots for Coffee data\")\n\n\n\n\n\n\nFigure 6.11: Added-variable plots for Coffee and Stress in the multiple regression model\n\n\n\n\nThe data ellipses for \\(\\mathbf{x}_i^\\star\\) and \\(\\mathbf{y}^\\star\\) summarize the conditional (or partial) relations of the response to each predictor controlling for all other predictors in each plot. The essential idea is that the data ellipse for \\((\\mathbf{x}_i^\\star, \\mathbf{y}^\\star)\\) has the identical relation to the estimate \\(\\hat{b}_i\\) in a multiple regression as the data ellipse of \\((\\mathbf{x}, \\mathbf{y})\\) has to the slope in a simple regression.\n\n6.4.1 Properties of AV plots\nAV plots are particularly interesting and useful for the following noteworthy properties:\n\nThe slope of the simple regression in the AV plot for variable \\(x_i\\) is identical to the slope \\(b_i\\) for that variable in the full multiple regression model.\nThe residuals in this plot are the same as the residuals using all predictors. This means you can see the degree of fit for observations directly in relation to the various predictors, which is not the case for marginal scatterplots.\nConsequentially, the standard deviation of the (vertical) residuals in the AV plot is the same as \\(s = \\sqrt(MSE)\\) in the full model and the standard error of a coefficient is \\(\\text{SE}(b_i) = s / \\sqrt{\\Sigma (\\mathbf{x}_i^\\star)^2}\\). This is shown by the size of the shadow of the data ellipses on the vertical axis in Figure 6.11.\nThe horizontal positions, \\(\\mathbf{x}_i^\\star\\), of points adjust for all other predictors, and so we can see points at the extreme left and right as unusual in relation to the others. If these points are also badly fitted (large residuals), we can see their influence on the fitted relation in the full model. AV plots thus provide visual displays of (partial) leverage and influence on each of the regression coefficients.\nThe correlation of \\(\\mathbf{x}_i^\\star\\) and \\(\\mathbf{y}^\\star\\) shown by the shape of the data ellipses is the partial correlation between \\(\\mathbf{x}_i\\) and \\(\\mathbf{y}_i\\) with other predictors partialled out.\n\n6.4.2 Marginal - conditional plots\nThe relation of the conditional data ellipses in AV plots to those in marginal plots of the same variables provides further insight into what it means to “control for” other variables. Figure 6.12 shows the same added-variable plots for Heart disease on Stress and Coffee as in Figure 6.11 (with a zoomed-out scaling), but here we also overlay the marginal data ellipses for \\((\\mathbf{x}_i, \\mathbf{y})\\) (centered at the means), and marginal regression lines for Stress and Coffee separately. Drawing arrows connecting the original data points to their positions in the AV plot shows what happens when we condition on or partial out the other variable.\nThese marginal - conditional plots are produced by car::mcPlot() (for one regressor) and car::mcPlots() (for several). The plots for the marginal and conditional relations can be compared separately using the same scales for both, or overlaid as shown here. The points labeled here are only those with large absolute residuals \\(\\mathbf{y}^\\star\\) in the vertical direction.\n\nmcPlots(fit.both, \n  ellipse = list(levels=0.68, fill=TRUE, fill.alpha=0.2),\n  id = list(n=2),\n  pch = c(16, 16),\n  col.marginal = \"red\", col.conditional = \"blue\",\n  col.arrows = \"black\",\n  cex.lab = 1.5)\n\n\n\n\n\n\nFigure 6.12: Marginal \\(+\\) conditional (added-variable) plots for Coffee and Stress in the multiple regression predicting Heart disease. Each panel shows the 68% conditional data ellipse for \\(x_i^\\star, y^\\star\\) residuals (shaded, blue) as well as the marginal 68% data ellipse for the \\((x_i, y)\\) variables, shifted to the origin. Arrows connect the mean-centered marginal points (red) to the residual points (blue).\n\n\n\n\nThe most obvious feature of Figure 6.12 is that Coffee has a negative slope in the conditional AV plot but a positive slope in the marginal plot. This is an example of Simpson’s paradox in a regression context: marginal and conditional relations can have opposite signs. \nLess obvious is the relation between the marginal and AVP ellipses. In 3D, the marginal data ellipse is the shadow of the ellipsoid for \\((\\mathbf{y}, \\mathbf{x}_1, \\mathbf{x}_2)\\) on one of the coordinate planes, while the AV plot is a slice through the ellipsoid where either \\(\\mathbf{x}_1\\) or \\(\\mathbf{x}_2\\) is held constant. Thus, the AVP ellipse must be contained in the marginal ellipse, as we can see in Figure 6.12. If there are only two \\(x\\)s, then the AVP ellipse must touch the marginal ellipse at two points.\nFinally, Figure 6.12 also shows how conditioning on other predictors works for individual observations, where each point of \\((\\mathbf{x}_i^\\star, \\mathbf{y}^\\star)\\) is the image of \\((\\mathbf{x}_i, \\mathbf{y})\\) along the path of the marginal regression. The variability in the response and in the focal predictor are both reduced, leaving only the uncontaminated relation of \\(\\mathbf{y}\\) with \\(\\mathbf{x}_i\\).\nThese plots are similar in spirit to the ARES plot (“Adding REgressors Smoothly”) proposed by Cook & Weisberg (1994), but their idea was an interactive animation, displaying a smooth transition between the fit of a marginal model and the fit of a larger model. They used linear interpolation, \\[\n(\\mathbf{x}_i, \\mathbf{y})_{\\text{interp}} = (\\mathbf{x}_i, \\mathbf{y}) + \\lambda [(\\mathbf{x}_i^\\star, \\mathbf{y}^\\star) - (\\mathbf{x}_i, \\mathbf{y})] \\:\\: ,\n\\] controlled by a slider whose value, \\(\\lambda \\in [0, 1]\\), was the weight given to the smaller marginal model. See this animation for an example using the Duncan data.\n\n6.4.3 Prestige data\nFor a substantive example, let’s return to the model for income, education and women in the Prestige data. The plot in Figure 6.13 shows the strong positive relations of income and education to prestige in the full model, and the negligible relation of percent women. But, in the plot for income, two occupations (physicians and general managers) with high income strongly pull the regression line down from what can be seen in the orientation of the conditional data ellipse.\n\nprestige.mod1 &lt;- lm(prestige ~ education + income + women,\n           data=Prestige)\n\navPlots(prestige.mod1, \n  ellipse = list(levels = 0.68),\n  id = list(n = 2, cex = 1.2),\n  pch = 19,\n  cex.lab = 1.5,\n  main = \"Added-variable plots for prestige\")\n\n\n\n\n\n\nFigure 6.13: Added-variable plot for the quantitative predictors in the Prestige data.\n\n\n\n\nThe influential points for physicians and general managers could just be unusual, or suggest that the relation of income to prestige is nonlinear. A rough test of this is to fit a smoothed curve through the points in the AV plot as shown in Figure 6.14.\n\nop &lt;- par(mar=c(4, 4, 1, 0) + 0.5)\nres &lt;- avPlot(prestige.mod1, \"income\",\n              ellipse = list(levels = 0.68),\n              pch = 19,\n              cex.lab = 1.5)\nsmooth &lt;- loess.smooth(res[,1], res[,2])\nlines(smooth, col = \"red\", lwd = 2.5)\n\n\n\n\n\n\nFigure 6.14: Added-variable plot for income, with a loess smooth.\n\n\n\n\nHowever, this use of AV plots to diagnose nonlinearity or suggest transformations can be misleading (Cook, 1996). Curvature in these plots is an indication of some model deficiency, but unless the predictors are uncorrelated, they cannot determine the form of a possible transformation of the predictors.\n\n6.4.4 Component + Residual plots\nA plot more suited to detecting the need to transform a predictor \\(\\mathbf{x}_i\\) to a form \\(f(\\mathbf{x}_i)\\) to make it’s relationship with the response \\(\\mathbf{y}\\) more nearly linear is the component + residual plot (“C+R plot”, also called partial residual plot, Larsen & McCleary (1972); Cook (1993)). This plot displays the partial residual \\(\\mathbf{e} + \\hat{b}_i \\mathbf{x}_i\\) on the vertical axis against \\(\\mathbf{x}_i\\) on the horizontal, where \\(\\mathbf{e}\\) are the residuals from the full model. A smoothed curve through the points will often suggest the form of the transformation \\(f()\\). The fact that the horizontal axis is \\(\\mathbf{x}_i\\) itself rather than \\(\\mathbf{x}^\\star_i\\) makes it easier to see the functional form.\nThe C+R plot has the same desirable properties as the AV plot: The slope \\(\\hat{b}_i\\) and residuals \\(\\mathbf{e}\\) in this plot are the same as those in the full model.\nC+R plots are produced by car::crPlots() and car::crPlot(). Figure 6.15 shows this just for income in the model prestige.mod1. (These plots for education and women show no strong evidence of curvilinearity.) The dashed blue line is the linear partial fit, \\(\\hat{b}_i \\mathbf{x}_i\\), whose slope \\(\\hat{b}_2 = 0.0013\\) is the same as that for income in prestige.mod1. The solid red curve is the loess smooth through the points. The same points are identified as noteworthy as in AV plot in Figure 6.14.\n\ncrPlot(prestige.mod1, \"income\",\n       smooth = TRUE,\n       order = 2,\n       pch = 19,\n       col.lines = c(\"blue\", \"red\"),\n       id = list(n=2, cex = 1.2),\n       cex.lab = 1.5) \n\n\n\n\n\n\nFigure 6.15: Component + residual plot for income in the model for the quantitative predictors of prestige. The dashed blue line is the partial linear fit for income. The solid red curve is the loess smooth.\n\n\n\n\nThe partial relation between prestige and income is clearly curved, so it would be appropriate to transform income or to include a polynomial (quadratic) term and refit the model. As suggested earlier (Section 3.2.3) it makes sense statistically and substantively to model the effect of income on a log scale, so then the slope for log(income) would measure the increment in prestige for a constant percentage increase in income.\nThe effect of percent women on prestige seen in Figure 6.13 appears very small and essentially linear. However, if we wished to examine this more closely, we could use the C+R plot in Figure 6.16.\n\ncrPlot(prestige.mod1, \"women\",\n       pch = 19,\n       col.lines = c(\"blue\", \"red\"),\n       id = list(n=2, cex = 1.2),\n       cex.lab = 1.5)\n\n\n\n\n\n\nFigure 6.16: Component + residual plot for women in the model for the quantitative predictors of prestige.\n\n\n\n\nThis shows a slight degree of curvature, with modestly larger values in the extremes. If we wished to test this statistically, we could fit a model with a quadratic effect of women, and compare that to the linear-only effect using anova().\n\nprestige.mod2 &lt;- lm(prestige ~ education + income + poly(women,2),\n           data=Prestige)\n\nanova(prestige.mod1, prestige.mod2)\n#&gt; Analysis of Variance Table\n#&gt; \n#&gt; Model 1: prestige ~ education + income + women\n#&gt; Model 2: prestige ~ education + income + poly(women, 2)\n#&gt;   Res.Df  RSS Df Sum of Sq    F Pr(&gt;F)\n#&gt; 1     98 6034                         \n#&gt; 2     97 5907  1       127 2.08   0.15\n\nThis model ignores the type of occupation (“bc”, “wc”, “prof”) as well as any possible interactions of type with other predictors. We examine this next, using effect displays.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Plots for univariate response models</span>"
+    ]
+  },
+  {
+    "objectID": "06-linear_models-plots.html#effect-displays",
+    "href": "06-linear_models-plots.html#effect-displays",
+    "title": "6  Plots for univariate response models",
+    "section": "\n6.5 Effect displays",
+    "text": "6.5 Effect displays\nFor two predictors it is possible, even if awkward, to display the fitted response surface in a 3D plot or faceted 2D views in what I call a full model plot. For more than two predictors such displays become cumbersome if not impractical, particularly when there are interactions in the model, when some effects are curvilinear, or when the main substantive interest is focused understanding on one or more main effects or interaction terms in the presence of others. The method of effect displays, largely introduced by John Fox (Fox, 1987, 2003; Fox & Weisberg, 2018b) is a generally useful solution to this problem. 2 These plots are nearly always easier to understand than tables of coefficients.\nThe idea of effect displays is quite simple, but very general and handles models of arbitrary complexity. Imagine that in a model we have a particular subset of predictors (focal predictors) whose effects on the response variable we wish to visualize. The essence of an effect display is that we calculate the predicted values (and standard errors) of the response for the model term(s) involving the focal predictors (and all low-order relatives, e.g, main effects that are marginal to an interaction) as those predictors are allowed to vary over a grid covering their range.\nFor a given plot, the other, non-focal variables are “controlled” by being fixed at typical values. For example, a quantitative predictor could be fixed at it’s mean, median or some representative value. A factor could be fixed at equal proportions of its levels or its proportions in the data. The result, when plotted, shows the predicted effects of the focal variables, either with multiple lines or in a faceted display, but with all the other variables controlled, adjusted for or averaged over. For interaction effects all low-order relatives are typically included in the fitted values for the term being graphed.\nIn practical terms, a scoring matrix \\(\\mathbf{X}^\\bullet\\) is defined by the focal variables varied over their ranges and the other variables held fixed. The fitted values for a model term are then calculated as \\(\\widehat{\\mathbf{y}}^\\bullet = \\mathbf{X}^\\bullet \\; \\widehat{\\mathbf{b}}\\) using the equivalent of:\n\npredict(model, newdata = X, se.fit = TRUE) \n\nwhich also calculates the standard errors as the square roots of \\(\\mathrm{diag}\\, (\\mathbf{X}^\\bullet \\, \\widehat{\\mathbf{\\mathsf{Var}}} (\\mathbf{b}) \\, \\mathbf{X}^{\\bullet\\mathsf{T}} )\\) where \\(\\widehat{\\mathbf{\\mathsf{Var}}} (\\mathbf{b})\\) is the estimated covariance matrix of the coefficients. Consequently, predictor effect values can be obtained for any modelling function that has predict() and vcov() methods. To date, effect displays are available for over 100 different model types in various packages.\nTo illustrate the mechanics for the effect of education in the prestige.mod1 model, construct a data frame varying education, but fixing income and women at their means:\n\nX &lt;- expand.grid(\n      education = seq(8, 16, 2),\n      income = mean(Prestige$income),\n      women = mean(Prestige$women)) |&gt; \n  print(digits = 3)\n#&gt;   education income women\n#&gt; 1         8   6798    29\n#&gt; 2        10   6798    29\n#&gt; 3        12   6798    29\n#&gt; 4        14   6798    29\n#&gt; 5        16   6798    29\n\npredict() then gives the fitted values for a simple effect plot of prestige against education. predict.lm() returns list, so it is necessary to massage this to a data frame for graphing.\n\npred &lt;- predict(prestige.mod1, newdata=X, se.fit = TRUE)\ncbind(X, fit = pred$fit, se = pred$se.fit) |&gt; \n  print(digits=3)\n#&gt;   education income women  fit    se\n#&gt; 1         8   6798    29 35.4 1.318\n#&gt; 2        10   6798    29 43.7 0.828\n#&gt; 3        12   6798    29 52.1 0.919\n#&gt; 4        14   6798    29 60.5 1.487\n#&gt; 5        16   6798    29 68.9 2.188\n\nAs Fox & Weisberg (2018b) note, effect displays can be combined with partial residuals to visualize both fit and potential lack of fit simultaneously, by plotting residuals from a model around 2D slices of the fitted response surface. This adds the benefits of C+R plots, in that we can see the impact of unmodeled curvilinearity and interactions in addition to those of predictor effect displays.\nThere are several implementations of effect displays in R, whose details, terminology and ease of use vary. Among these,  ggeffects (Lüdecke, 2024) calculates adjusted predicted values under several methods for conditioning.  marginaleffects (Arel-Bundock, 2024a) is similar and also provides estimation of marginal slopes, contrasts, odds ratios, etc. Both have plot() methods based on ggplot2. My favorite is the  effects (Fox et al., 2022) package, which alone provides partial residuals, and is somewhat easier to use, though it uses lattice graphics. See the vignette Predictor Effects Graphics Gallery for details of the computations for effect displays.\nThe main functions for computing fitted effects are predictorEffect() (for one predictor) and predictorEffects() (for one or more). For a model mod with formula y ~ x1 + x2 + x3 + x1:x2, the call to predictorEffects(mod, ~ x1) recognizes that an interaction is present and calculates the fitted values for combinations of x1 and x2, holding x3 fixed at its average value. This returns an object of class \"eff\" which can be graphed using the plot.eff() method.\nThe effect displays for several predictors can be plotted together, as with avplots() (Figure 6.13) by including them in the plot formula, e.g., predictorEffects(mod, ~ x1 + x3). Another function, allEffects() calculates the effects for each high-order term in the model, so allEffects(mod) |&gt; plot() is handy for getting a visual overview of a fitted model.\n\n6.5.1 Prestige data\nTo illustrate effect plots, I consider a more complex model, allowing a quadratic effect of women, representing income on a \\(\\log_{10}\\) scale, and allowing this to interact with type of occupation. Anova() provides the Type II tests of each of the model terms.\n\nprestige.mod3 &lt;- lm(prestige ~ education + poly(women,2) +\n                       log10(income)*type, data=Prestige)\n\n# test model terms\nAnova(prestige.mod3)\n#&gt; Anova Table (Type II tests)\n#&gt; \n#&gt; Response: prestige\n#&gt;                    Sum Sq Df F value  Pr(&gt;F)    \n#&gt; education             994  1   25.87 2.0e-06 ***\n#&gt; poly(women, 2)        414  2    5.38 0.00620 ** \n#&gt; log10(income)        1523  1   39.63 1.1e-08 ***\n#&gt; type                  589  2    7.66 0.00085 ***\n#&gt; log10(income):type    221  2    2.88 0.06133 .  \n#&gt; Residuals            3420 89                    \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nThe fitted coefficients, standard errors and \\(t\\)-tests from coeftest() are shown below. The coefficient for education means that an increase of one year of education, holding other predictors fixed, gives an expected increase of 2.96 in prestige. The other coefficients are more difficult to understand. For example, the effect of women is represented by two coefficients for the linear and quadratic components of poly(women, 2). The interpretation of coefficients of terms involving type depend on the contrasts used. Here, with the default treatment contrasts, they represent comparisons with type = \"bc\" as the reference level. It is not obvious how to understand the interaction effects like log10(income):typeprof.\n\n\n\n\n\n\n\n\n\n\nlmtest::coeftest(prestige.mod3)\n#&gt; \n#&gt; t test of coefficients:\n#&gt; \n#&gt;                        Estimate Std. Error t value Pr(&gt;|t|)    \n#&gt; (Intercept)            -137.500     23.522   -5.85  8.2e-08 ***\n#&gt; education                 2.959      0.582    5.09  2.0e-06 ***\n#&gt; poly(women, 2)1          28.339     10.190    2.78   0.0066 ** \n#&gt; poly(women, 2)2          12.566      7.095    1.77   0.0800 .  \n#&gt; log10(income)            40.326      6.714    6.01  4.1e-08 ***\n#&gt; typewc                    0.969     39.495    0.02   0.9805    \n#&gt; typeprof                 74.276     30.736    2.42   0.0177 *  \n#&gt; log10(income):typewc     -1.073     10.638   -0.10   0.9199    \n#&gt; log10(income):typeprof  -17.725      7.947   -2.23   0.0282 *  \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nIt is easiest to produce effect displays for all terms in the model using allEffects(), accepting all defaults. This gives (Figure 6.17) effect plots for the main effects of education and income and the interaction of income with type, with the non-focal variables held fixed. Each plot shows the fitted regression relation and a default 95% pointwise confidence band using the standard errors. Rug plots at the bottom show the locations of observations for the horizontal focal variable, which is useful when the observations are not otherwise plotted.\n\nallEffects(prestige.mod3) |&gt;\n  plot()\n\n\n\n\n\n\nFigure 6.17: Predictor effect plot for all terms in the model with 95% confidence bands.\n\n\n\n\nThe effect for women, holding education, income and type constant looks to be quite strong and curved upwards. But note that these plots use different vertical scales for prestige in each plot and the range in the plot for women is much smaller than in the others. The interaction is graphed showing separate curves for the three levels of type.\nFor a more detailed look, it is useful to make separate plots for the predictors in the model, which allows customizing options for calculation and display. Partial residuals for the observations are computed by using residuals = TRUE in the call to predictorEffects(). The slope of the fitted line (in blue) is exactly coefficient for education in the full model. As with C+R plots, a smooth loess curve (in red) gives a visual assessment of linearity for a given predictor. A wide variety of graphing options are available in the call to plot(). Figure 6.18 shows the effect display for education with partial residuals and point identification of those points with the largest Mahalanobis distances from the centroid.\n\n\nlattice::trellis.par.set(par.xlab.text=list(cex=1.5),\n                         par.ylab.text=list(cex=1.5))\n\npredictorEffects(prestige.mod3, ~ education,\n                 residuals = TRUE) |&gt;\n  plot(partial.residuals = list(pch = 16, col=\"blue\"),\n       id=list(n=4, col=\"black\")) \n\n\n\n\n\n\nFigure 6.18: Predictor effect plot for education displaying partial residuals. The blue line shows the slice of the fitted regression surface where other variables are held fixed. The red curve shows a loess smooth of the partial residuals.\n\n\n\n\nThe effect plot for women in this model is shown in Figure 6.19. This uses the same vertical scale as in Figure 6.18, showing a more modest effect of percent women.\n\npredictorEffects(prestige.mod3, ~women,\n                 residuals = TRUE) |&gt;\n  plot(partial.residuals = list(pch = 16, col=\"blue\", cex=0.8),\n       id=list(n=4, col=\"black\"))\n\n\n\n\n\n\nFigure 6.19: Predictor effect plot for women with partial residuals\n\n\n\n\n\nBecause of the interaction with type, the fitted effects for income are calculated for the three types of occupation. It is easiest to compare these in the a single plot (using multiline = TRUE), rather than in separate panels as in Figure 6.17. Income is represented as log10(income) in the model prestige.mod3, and it is also easier to understand the interaction by plotting income on a log scale, using the axes argument to specify a transformation of the \\(x\\) axis. I use 68% confidence bands here to make the differences among type more apparent.\n\npredictorEffects(prestige.mod3, ~ income,\n                 confidence.level = 0.68) |&gt;\n  plot(lines=list(multiline=TRUE, lwd=3),\n       confint=list(style=\"bands\"),\n       axes=list(\n          x=list(income=list(transform=list(trans=log, inverse=exp)))),\n       key.args = list(x=.7, y=.35)) \n\n\n\n\n\n\nFigure 6.20: Predictor effect plot for income, plotted on a log scale.\n\n\n\n\nFigure 6.20 provides a clear interpretation of the interaction, represented by the coefficients shown above for log10(income):typewc and log10(income):typeprof in the model. Averaging over three occupation types, prestige increases linearly with log income with a coefficient of 40.33. This means that increasing income by 10% (say) gives an increase of \\(40.33 / 10 = 4.033\\) in prestige. The slope for professional workers is less steep: the coefficient for log10(income):typeprof is -17.725. For these workers compared with blue collar jobs, prestige increases 1.77 less with a 10% increase in income. The difference in slopes for blue collar and white collar jobs is negligible.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Plots for univariate response models</span>"
+    ]
+  },
+  {
+    "objectID": "06-linear_models-plots.html#sec-leverage",
+    "href": "06-linear_models-plots.html#sec-leverage",
+    "title": "6  Plots for univariate response models",
+    "section": "\n6.6 Outliers, leverage and influence",
+    "text": "6.6 Outliers, leverage and influence\nIn small to moderate samples, “unusual” observations can have dramatic effects on a fitted regression model, as we saw in the analysis of Davis’s data on reported and measured weight (Section 2.1.2) where one erroneous observations hugely altered the fitted line. As well, it turns out that two observations in Duncan’s data are unusual enough that removing them alters his conclusion that income and education have nearly equal effects on occupational prestige.\nAn observation can be unusual in three archetypal ways, with different consequences:\n\nUnusual in the response \\(y\\), but typical in the predictor(s), \\(\\mathbf{x}\\) — a badly fitted case with a large absolute residual, but with \\(x\\) not far from the mean, as in Figure 2.4. This case does not do much harm to the fitted model.\nUnusual in the predictor(s) \\(\\mathbf{x}\\), but typical in \\(y\\) — an otherwise well-fitted point. This case also does litle harm, and in fact can be considered to improve precision, a “good leverage” point.\nUnusual in both \\(\\mathbf{x}\\) and \\(y\\) — This is the case, a “bad leverage” point, revealed in the analysis of Davis’s data, Figure 2.3, where the one erroneous point for women was highly influential, pulling the regression line towards it and affecting the estimated coefficient as well as all the fitted values. In addition, subsets of observations can be jointly influential, in that their effects combine, or can mask each other’s influence.\n\nInfluential cases are the ones that matter most. As suggested above, to be influential an observation must be unusual in both \\(\\mathbf{x}\\) and \\(y\\), and affects the estimated coefficients, thereby also altering the predicted values for all observations. A heuristic formula capturing the relations among leverage, “outlyingness” on \\(y\\) and influence is\n\\[\n\\text{Influence}_{\\text{coefficients}} \\;=\\; X_\\text{leverage} \\;\\times\\; Y_\\text{residual}\n\\] As described below, leverage is proportional to the squared distance \\((x_i - \\bar{x})^2\\) of an observation \\(x_i\\) from its mean in simple regression and to the squared Mahalanobis distance in the general case. The \\(Y_\\text{residual}\\) is best measured by a studentized residual, obtained by omitting each case \\(i\\) in turn and calculating its residual from the coefficients obtained from the remaining cases.\n\n6.6.1 The leverage-influence quartet\nThese ideas can be illustrated in the “leverage-influence quartet” by considering a standard simple linear regression for a sample and then adding one additional point reflecting the three situations described above. Below, I generate a sample of \\(N = 15\\) points with \\(x\\) uniformly distributed between (40, 60) and \\(y \\sim 10 + 0.75 x + \\mathcal{N}(0, 1.25^2)\\), duplicated four times.\n\nlibrary(tidyverse)\nlibrary(car)\nset.seed(42)\nN &lt;- 15\ncase_labels &lt;- paste(1:4, c(\"OK\", \"Outlier\", \"Leverage\", \"Influence\"))\nlevdemo &lt;- tibble(\n  case = rep(case_labels, \n             each = N),\n  x = rep(round(40 + 20 * runif(N), 1), 4),\n  y = rep(round(10 + .75 * x + rnorm(N, 0, 1.25), 4)),\n  id = \" \"\n)\n\nmod &lt;- lm(y ~ x, data=levdemo)\ncoef(mod)\n#&gt; (Intercept)           x \n#&gt;      13.332       0.697\n\nThe additional points, one for each situation are set to the values below.\n\n\nOutlier: (52, 60) a low leverage point, but an outlier (O) with a large residual\n\nLeverage: (75, 65) a “good” high leverage point (L) that fits well with the regression line\n\nInfluence: (70, 40) a “bad” high leverage point (OL) with a large residual.\n\n\nextra &lt;- tibble(\n  case = case_labels,\n  x  = c(65, 52, 75, 70),\n  y  = c(NA, 65, 65, 40),\n  id = c(\"  \", \"O\", \"L\", \"OL\")\n)\n\n#' Join these to the data\nboth &lt;- bind_rows(levdemo, extra) |&gt;\n  mutate(case = factor(case))\n\nWe can plot these four situations with ggplot2 in panels faceted by case as shown below. The standard version of this plot shows the regression line for the original data and that for the ammended data with the additional point. Note that we use the levdemo dataset in geom_smooth() for the regression line with the original data, but specify data = both for that with the additional point.\n\nggplot(levdemo, aes(x = x, y = y)) +\n  geom_point(color = \"blue\", size = 2) +\n  geom_smooth(data = both, \n              method = \"lm\", formula = y ~ x, se = FALSE,\n              color = \"red\", linewidth = 1.3, linetype = 1) +\n  geom_smooth(method = \"lm\", formula = y ~ x, se = FALSE,\n              color = \"blue\", linewidth = 1, linetype = \"longdash\" ) +\n  stat_ellipse(data = both, level = 0.5, color=\"blue\", type=\"norm\", linewidth = 1.4) +\n  geom_point(data=extra, color = \"red\", size = 4) +\n  geom_text(data=extra, aes(label = id), nudge_x = -2, size = 5) +\n  facet_wrap(~case, labeller = label_both) +\n  theme_bw(base_size = 14)\n\n\n\n\n\n\nFigure 6.21: Leverage influence quartet with data 50% ellipses. Case (1) original data; (2) adding one low-leverage outlier, “O”; (3) adding one “good” leverage point, “L”; (4) adding one “bad” leverage point, “OL”. The dashed blue line is the fitted line for the original data, while the solid red line reflects the additional point. The data ellipses show the effect of the additional point on precision.\n\n\n\n\nThe standard version of this graph shows only the fitted regression lines in each panel. As can be seen, the fitted line doesn’t change very much in panels (2) and (3); only the bad leverage point, “OL” in panel (4) is harmful. Adding data ellipses to each panel immediately makes it clear that there is another part to this story— the effect of the unusual point on precision (standard errors) of our estimates of the coefficients.\nNow, we see directly that there is a big difference in impact between the low-leverage outlier [panel (2)] and the high-leverage, small-residual case [panel (3)], even though their effect on coefficient estimates is negligible. In panel (2), the single outlier inflates the estimate of residual variance (the size of the vertical slice of the data ellipse at \\(\\bar{x}\\)), while in panel (3) this is decreased.\nTo allow direct comparison and make the added value of the data ellipse more apparent, we overlay the data ellipses from Figure 6.21 in a single graph, shown in Figure 6.22. Here, we can also see why the high-leverage point “L” (added in panel (c) of Figure 6.21) is called a “good leverage” point. By increasing the standard deviation of \\(x\\), it makes the data ellipse somewhat more elongated, giving increased precision of our estimates of \\(\\mathbf{\\beta}\\).\n\nCodecolors &lt;- c(\"black\", \"blue\", \"darkgreen\", \"red\")\nwith(both,\n     {dataEllipse(x, y, groups = case, \n          levels = 0.68,\n          plot.points = FALSE, add = FALSE,\n          center.pch = \"+\",\n          col = colors,\n          fill = TRUE, fill.alpha = 0.1)\n     })\n\ncase1 &lt;- both |&gt; filter(case == \"1 OK\")\npoints(case1[, c(\"x\", \"y\")], cex=1)\n\npoints(extra[, c(\"x\", \"y\")], \n       col = colors,\n       pch = 16, cex = 2)\n\ntext(extra[, c(\"x\", \"y\")],\n     labels = extra$id,\n     col = colors, pos = 2, offset = 0.5)\n\n\n\n\n\n\nFigure 6.22: Data ellipses in the Leverage-influence quartet. This graph overlays the data ellipses and additional points from the four panels of Figure 6.22. It can be seen that only the OL point affects the slope, while the O and L points affect precision of the estimates in opposite directions.\n\n\n\n\n\n6.6.1.1 Measuring leverage\nLeverage is thus an index of the potential impact of an observation on the model due to its’ atypical value in the X space of the predictor(s). It is commonly measured by the “hat” value, \\(h_i\\), so called because it puts the hat \\(\\mathbf{\\widehat{(\\bullet)}}\\) on \\(\\mathbf{y}\\), i.e., the vector of fitted values can be expressed as\n\\[\\begin{aligned}\n\\mathbf{\\hat{y}}\n   &= \\underbrace{\\mathbf{X}(\\mathbf{X}^{\\top}\\mathbf{X})^{-1}\\mathbf{X}^{\\top}}_\\mathbf{H}\\mathbf{y} \\\\[2ex]\n   &= \\mathbf{H\\:y} \\:\\: .\n\\end{aligned}\\]\n\n\n\n\nHere, \\(h_i \\equiv h_{ii}\\) are the diagonal elements of the Hat matrix \\(\\mathbf{H}\\). In simple regression, hat values are proportional to the squared distance of the observation \\(x_i\\) from the mean, \\(h_i \\propto (x_i - \\bar{x})^2\\), \\[\nh_i = \\frac{1}{n} + \\frac{(x_i - \\bar{x})^2}{\\Sigma_i (x_i - \\bar{x})^2} \\; ,\n\\tag{6.1}\\]\nand range from \\(1/n\\) to 1, with an average value \\(\\bar{h} = 2/n\\). Consequently, observations with \\(h_i\\) greater than \\(2 \\bar{h}\\) or \\(3 \\bar{h}\\) are commonly considered to be of high leverage.\nWith \\(p \\ge 2\\) predictors, an analogous relationship holds, but the correlations among the predictors must be taken into account. It is demonstrated below that \\(h_i \\propto D^2 (\\mathbf{x} - \\bar{\\mathbf{x}})\\), the Mahalanobis squared distance of \\(\\mathbf{x}\\) from the centroid \\(\\bar{\\mathbf{x}}\\)3.\nThe generalized version of Equation 6.1 is\n\\[\nh_i = \\frac{1}{n} + \\frac{1}{n-1} D^2 (\\mathbf{x} - \\bar{\\mathbf{x}}) \\; ,\n\\tag{6.2}\\]\nwhere \\(D^2 (\\mathbf{x} - \\bar{\\mathbf{x}}) = (\\mathbf{x} - \\bar{\\mathbf{x}})^\\mathsf{T} \\mathbf{S}_X^{-1} (\\mathbf{x} - \\bar{\\mathbf{x}})\\). From Section 3.2, it follows that contours of constant leverage correspond to data ellipses or ellipsoids of the predictors in \\(\\mathbf{x}\\), whose boundaries, assuming normality, correspond to quantiles of the \\(\\chi^2_p\\) distribution\nExample:\nTo illustrate Equation 6.2, I generate \\(N = 100\\) points from a bivariate normal distribution with means \\(\\mu = (30, 30)\\), variances = 10, and a correlation \\(\\rho = 0.7\\) and add two noteworthy points that show an apparently paradoxical result.\n\nset.seed(421)\nN &lt;- 100\nr &lt;- 0.7\nmu &lt;- c(30, 30)\ncov &lt;- matrix(c(10,   10*r,\n                10*r, 10), ncol=2)\n\nX &lt;- MASS::mvrnorm(N, mu, cov) |&gt; as.data.frame()\ncolnames(X) &lt;- c(\"x1\", \"x2\")\n\n# add 2 points\nX &lt;- rbind(X,\n           data.frame(x1 = c(28, 38),\n                      x2 = c(42, 35)))\n\nThe Mahalanobis squared distances of these points can be calculated using heplots::Mahalanobis(), and their corresponding hatvalues found using hatvalues() for any linear model using both x1 and x2.\n\nX &lt;- X |&gt;\n  mutate(Dsq = heplots::Mahalanobis(X)) |&gt;\n  mutate(y = 2*x1 + 3*x2 + rnorm(nrow(X), 0, 5),\n         hat = hatvalues(lm(y ~ x1 + x2))) \n\nPlotting x1 and x2 with data ellipses shows the relation of leverage to squared distance from the mean. The blue point looks to be farther from the mean, but the red point is actually very much further by Mahalanobis squared distance, which takes the correlation into account; it thus has much greater leverage.\n\ndataEllipse(X$x1, X$x2, \n            levels = c(0.40, 0.68, 0.95),\n            fill = TRUE, fill.alpha = 0.05,\n            col = \"darkgreen\",\n            xlab = \"X1\", ylab = \"X2\")\npoints(X[1:nrow(X) &gt; N, 1:2], pch = 16, \n       col=c(\"red\", \"blue\"), cex = 2)\nX |&gt; slice_tail(n = 2) |&gt;      # last two rows\n  points(pch = 16, col=c(\"red\", \"blue\"), cex = 2)\n\n\n\n\n\n\nFigure 6.23: Data ellipses for a bivariate normal sample with correlation 0.7, and two additional noteworthy points. The blue point looks to be farther from the mean, but the red point is actually more than 5 times further by Mahalanobis squared distance, and thus has much greater leverage.\n\n\n\n\nThe fact that hatvalues are proportional to leverage can be seen by plotting one against the other. I highlight the two noteworthy points in their colors from Figure 6.23 to illustrate how much greater leverage the red point has compared to the blue point.\n\nplot(hat ~ Dsq, data = X,\n     cex = c(rep(1, N), rep(2, 2)), \n     col = c(rep(\"black\", N), \"red\", \"blue\"),\n     pch = 16,\n     ylab = \"Hatvalue\",\n     xlab = \"Mahalanobis Dsq\")\n\n\n\n\n\n\nFigure 6.24: Hat values are proportional to squared Mahalanobis distances from the mean.\n\n\n\n\nLook back at these two points in Figure 6.23. Can you guess how much further the red point is from the mean than the blue point? You might be surprised that its’ \\(D^2\\) and leverage are about five times as great!\n\nX |&gt; slice_tail(n=2)\n#&gt;   x1 x2   Dsq   y    hat\n#&gt; 1 28 42 25.65 179 0.2638\n#&gt; 2 38 35  4.95 175 0.0588\n\n\n6.6.1.2 Outliers: Measuring residuals\nFrom the discussion in Section 6.6, outliers for the response \\(y\\) are those observations for which the residual \\(e_i = y_i - \\hat{y}_i\\) are unusually large in magnitude. However, as demonstrated in Figure 6.21, a high-leverage point will pull the fitted line towards it, reducing its’ residual and thus making them look less unusual.\nThe standard approach (Cook & Weisberg, 1982; Hoaglin & Welsch, 1978) is to consider a deleted residual \\(e_{(-i)}\\), conceptually as that obtained by re-fitting the model with observation \\(i\\) omitted and obtaining the fitted value \\(\\hat{y}_{(-i)}\\) from the remaining \\(n-1\\) observations, \\[\ne_{(-i)} = y_i - \\hat{y}_{(-i)} \\; .\n\\] The (externally) studentized residual is then obtained by dividing \\(e_{(-i)}\\) by it’s estimated standard error, giving \\[\ne^\\star_{(-i)} = \\frac{e_{(-i)}}{\\text{sd}(e_{(-i)})} = \\frac{e_i}{\\sqrt{\\text{MSE}_{(-i)}\\; (1 - h_i)}} \\; .\n\\]\nThis is just the ordinary residual \\(e_i\\) divided by a factor that increases with the residual variance but decreases with leverage. It can be shown that these studentized residuals follow a \\(t\\) distribution with \\(n - p -2\\) degrees of freedom, so a value \\(|e^\\star_{(-i)}| &gt; 2\\) can be considered large enough to pay attention to.\nIn practice for classical linear models, it is unnecessary to actually re-fit the model \\(n\\) times. Velleman & Welsh (1981) show that all these leave-one-out quantities can be calculated from the model fitted to the full data set and the hat (projection) matrix \\(\\mathbf{H} = (\\mathbf{X}^\\mathsf{T}\\mathbf{X})^{-1} \\mathbf{X}^\\mathsf{T}\\) from which \\(\\widehat{\\mathbf{b}} = \\mathbf{H} \\mathbf{y}\\).\n\n6.6.1.3 Measuring influence\nAs described at the start of this section, the actual influence of a given case depends multiplicatively on its’ leverage and residual. But how can we measure it?\nThe essential idea introduced above, is to delete the observations one at a time, each time refitting the regression model on the remaining \\(n–1\\) observations. Then, for observation \\(i\\) compare the results using all \\(n\\) observations to those with the \\(i^{th}\\) observation deleted to see how much influence the observation has on the analysis.\nThe simplest such measure, called DFFITS, compares the predicted value for case \\(i\\) with what would be obtained when that observation is excluded.\n\\[\\begin{aligned}\n\\text{DFFITS}_i & = & \\frac{\\hat{y}_i - \\hat{y}_{(-i)}}{\\sqrt{\\text{MSE}_{(-i)}\\; h_i}} \\\\\n   & = & e^\\star_{(-i)} \\times \\sqrt{\\frac{h_i}{1-h_i}} \\;\\; .\n\\end{aligned}\\]\nThe first equation gives the signed difference in fitted values in units of the standard deviation of that difference weighted by leverage; the second version (Belsley et al., 1980) represents that as a product of residual and leverage. A rule of thumb is that an observation is deemed to be influential if \\(| \\text{DFFITS}_i | &gt; 2 \\sqrt{(p+1) / n}\\).\nInfluence can also be assessed in terms of the change in the estimated coefficients \\(\\mathbf{b} = \\widehat{\\mathbf{\\beta}}\\) versus their values \\(\\mathbf{b}_{(-i)}\\) when case \\(i\\) is removed. Cook’s distance, \\(D_i\\), summarizes the size of the difference as a weighted sum of squares of the differences \\(\\mathbf{d} =\\mathbf{b} - \\mathbf{b}_{(-i)}\\) (Cook, 1977).\n\\[\nD_i = \\mathbf{d}^\\mathsf{T}\\, (\\mathbf{X}^\\mathsf{T}\\mathbf{X}) \\,\\mathbf{d} / (p+1) \\hat{\\sigma}^2\n\\] This can be re-expressed in terms of the components of residual and leverage\n\\[\nD_i = \\frac{e^{\\star 2}_{(-i)}}{p+1} \\times \\frac{h_i}{(1- h_i)}\n\\]\nCook’s distance is in the metric of an \\(F\\) distribution with \\(p\\) and \\(n − p\\) degrees of freedom, so values \\(D_i &gt; 4/n\\) are considered large.\n\n6.6.2 Influence plots\nThe most common plot to detect influence is a bubble plot of the studentized residuals versus hat values, with the size (area) of the plotting symbol proportional to Cook’s \\(D\\). These plots are constructed using car::influencePlot() which also fills the bubble symbols with color whose opacity is proportional to Cook’s \\(D\\).\nThis is shown in Figure 6.25 for the demonstration dataset constructed in Section 6.6.1. In this plot, notable cutoffs for hatvalues at \\(2 \\bar{h}\\) and \\(3 \\bar{h}\\) are shown by dashed vertical lines and horizontal cutoffs for studentized residuals are shown at values of \\(\\pm 2\\).\nThe demonstration data of Section 6.6.1 has four copies of the same \\((x, y)\\) data, three of which have an unusual observation. The influence plot in Figure 6.25 subsets the data to give the \\(19 = 15 + 4\\) unique observations, including the three unusual cases. As can be seen, the high “Leverage” point has has less influence than the point labeled “Influence”, which has moderate leverage but a large absolute residual.\n\nSee the codeonce &lt;- both[c(1:16, 62, 63, 64),]      # unique observations\nonce.mod &lt;- lm(y ~ x, data=once)\ninf &lt;- influencePlot(once.mod, \n                     id = list(cex = 0.01),\n                     fill.alpha = 0.5,\n                     cex.lab = 1.5)\n# custom labels\nunusual &lt;- bind_cols(once[17:19,], inf) |&gt; \n  print(digits=3)\n#&gt; # A tibble: 3 × 7\n#&gt;   case            x     y id    StudRes    Hat CookD\n#&gt; * &lt;fct&gt;       &lt;dbl&gt; &lt;dbl&gt; &lt;chr&gt;   &lt;dbl&gt;  &lt;dbl&gt; &lt;dbl&gt;\n#&gt; 1 2 Outlier      52    65 O        3.11 0.0591 0.201\n#&gt; 2 3 Leverage     75    65 L        1.52 0.422  0.784\n#&gt; 3 4 Influence    70    40 OL      -4.93 0.262  1.82\nwith(unusual, {\n  casetype &lt;- gsub(\"\\\\d \", \"\", case)\n  text(Hat, StudRes, label = casetype,\n       pos = c(4, 2, 3), cex=1.5)\n})\n\n\n\n\n\n\nFigure 6.25: Influence plot for the demonstration data. The areas of the bubble symbols are proportional to Cook’s \\(D\\). The impact of the three unusual points on Cook’s \\(D\\) is clearly seen.\n\n\n\n\n\n6.6.3 Duncan data\nLet’s return to the Duncan data used as an example in Section 6.1.1 where a few points stood out as unusual in the basic diagnostic plots (Figure 6.2). The influence plot in Figure 6.26 helps to make sense of these noteworthy observations. The default method for identifying points in influencePlot() labels points with any of large studentized residuals, hat-values or Cook’s distances.\n\ninf &lt;- influencePlot(duncan.mod, id = list(n=3),\n                     cex.lab = 1.5)\n\n\n\n\n\n\nFigure 6.26: Influence plot for the model predicting occupational prestige in Duncan’s data. Cases with large studentized residuals, hat-values or Cook’s distances are labeled.\n\n\n\n\ninfluencePlot() returns (invisibly) the influence diagnostics for the cases identified in the plot. It is often useful to look at data values for these cases to understand why each of these was flagged.\n\nmerge(Duncan, inf, by=\"row.names\", all.x = FALSE) |&gt; \n  arrange(desc(CookD)) |&gt; \n  print(digits=3)\n#&gt;     Row.names type income education prestige StudRes    Hat  CookD\n#&gt; 1    minister prof     21        84       87   3.135 0.1731 0.5664\n#&gt; 2   conductor   wc     76        34       38  -1.704 0.1945 0.2236\n#&gt; 3    reporter   wc     67        87       52  -2.397 0.0544 0.0990\n#&gt; 4 RR.engineer   bc     81        28       67   0.809 0.2691 0.0810\n#&gt; 5  contractor prof     53        45       76   2.044 0.0433 0.0585\n\n\nminister has by far the largest influence, because it has an extremely positive residual and a large hat value. Looking at the data, we see that ministers have very low income, so their prestige is under-predicted. The large hat value reflects the fact that ministers have low income combined with very high education.\nconductor has the next largest Cook’s \\(D\\). It has a large hat value because its combination of relatively high income and low education is unusual in the data.\nAmong the others, reporter has a relatively large negative residual—its prestige is far less than the model predicts—but its low leverage make it not highly influential. railroad engineer has an extremely large hat value because its income is very high in relation to education. But this case is well-predicted and has a small residual, so its leverage is not large.\n\n6.6.4 Influence in added-variable plots\nThe properties of added-variable plots discussed in Section 6.4 make them also useful for understanding why cases are influential because they control for other predictors in each plot, and therefore show the partial contributions of each observation to hat values and residuals. As a consequence, we can see directly the how individual cases become individually or jointly influential.\nThe Duncan data provides a particularly instructive example of this. Figure 6.27 shows the AV plots for both income and education in the model duncan.mod, with some annotations added. I want to focus here on the joint influence of the occupations minister and conductor which were seen to be the most influential in Figure 6.26. The green vertical lines show their residuals in each panel and the red lines show the regressions when these two observations are deleted.\nThe basic AV plots are produced using the call to avPlots() below. To avoid clutter, I use the argument id = list(method = \"mahal\", n=3) so that only the three points with the greatest Mahalanobis distances from the centroid in each plot are labeled. These are the cases with the largest leverage seen in Figure 6.26.\n\navPlots(duncan.mod,\n  ellipse = list(levels = 0.68, fill = TRUE, fill.alpha = 0.1),\n  id = list(method = \"mahal\", n=3),\n  pch = 16, cex = 0.9,\n  cex.lab = 1.5)\n\n\n\n\n\n\n\n\nFigure 6.27: Added variable plots for the Duncan model, highlighting the impact of the observations for minister and conductor in each plot. The green lines show the residuals for these observations. The red line in each panel shows the regression line omitting these observations.\n\n\n\n\nThe two cases—minister and conductor—are the most highly influential, but as we can see in Figure 6.27 their influence combines because they are at opposite sides of the horizontal axis and their residuals are of opposite signs. They act together to decrease the slope for income and increase that for education.\n\nCode for income AV plotres &lt;- avPlot(duncan.mod, \"income\",\n              ellipse = list(levels = 0.68),\n              id = list(method = \"mahal\", n=3),\n              pch = 16,\n              cex.lab = 1.5) |&gt;\n  as.data.frame()\nfit &lt;- lm(prestige ~ income, data = res)\ninfo &lt;- cbind(res, fitted = fitted(fit), \n             resids = residuals(fit),\n             hat = hatvalues(fit),\n             cookd = cooks.distance(fit))\n\n# noteworthy points in this plot\nbig &lt;- which(info$cookd &gt; .20)\nwith(info, {\n  arrows(income[big], fitted[big], income[big], prestige[big], \n         angle = 12, length = .18, lwd = 2, col = \"darkgreen\")\n  })\n\n# line w/o the unusual points\nduncan.mod2 &lt;- update(duncan.mod, subset = -c(6, 16))\nbs &lt;- coef(duncan.mod2)[\"income\"]\nabline(a=0, b=bs, col = \"red\", lwd=2)\n\n\nDuncan’s hypothesis that the slopes for income and education were equal thus fails when these two observations are deleted. The slope for income then becomes 2.6 times that of education.\n\nduncan.mod2 &lt;- update(duncan.mod, subset = -c(6, 16))\ncoef(duncan.mod2)\n#&gt; (Intercept)      income   education \n#&gt;      -6.409       0.867       0.332\n\nPackage summary\n\n29 packages used here: bayestestR, car, carData, correlation, datawizard, dplyr, easystats, effects, effectsize, forcats, ggeffects, ggplot2, ggstats, insight, knitr, lubridate, marginaleffects, modelbased, modelsummary, parameters, performance, purrr, readr, report, see, stringr, tibble, tidyr, tidyverse\n\n\n\n\n\n\nArel-Bundock, V. (2024a). Marginaleffects: Predictions, comparisons, slopes, marginal means, and hypothesis tests. https://marginaleffects.com/\n\n\nArel-Bundock, V. (2024b). Modelsummary: Summary tables and plots for statistical models and data: Beautiful, customizable, and publication-ready. https://modelsummary.com\n\n\nBelsley, D. A., Kuh, E., & Welsch, R. E. (1980). Regression diagnostics: Identifying influential data and sources of collinearity. John Wiley; Sons.\n\n\nCook, R. D. (1977). Detection of influential observation in linear regression. Technometrics, 19(1), 15–18. http://links.jstor.org/sici?sici=0040-1706%28197702%2919%3A1%3C15%3ADOIOIL%3E2.0.CO%3B2-8\n\n\nCook, R. D. (1993). Exploring partial residual plots. Technometrics, 35(4), 351–362.\n\n\nCook, R. D. (1996). Added-variable plots and curvature in linear regression. Technometrics, 38(3), 275–278. https://doi.org/10.1080/00401706.1996.10484507\n\n\nCook, R. D., & Weisberg, S. (1982). Residuals and influence in regression. Chapman; Hall.\n\n\nCook, R. D., & Weisberg, S. (1994). ARES plots for generalized linear models. Computational Statistics & Data Analysis, 17(3), 303–315. https://doi.org/10.1016/0167-9473(92)00075-3\n\n\nDuncan, O. D. (1961). A socioeconomic index for all occupations. In Jr. A. J. Reiss, P. K. H. O. D. Duncan, & C. C. North (Eds.), Occupations and social status. The Free Press.\n\n\nFisher, R. A. (1925). Statistical methods for research workers (6th ed.). Oliver & Boyd.\n\n\nFox, J. (1987). Effect displays for generalized linear models. In C. C. Clogg (Ed.), Sociological methodology, 1987 (pp. 347–361). Jossey-Bass.\n\n\nFox, J. (2003). Effect displays in R for generalized linear models. Journal of Statistical Software, 8(15), 1–27.\n\n\nFox, J. (2020). Regression diagnostics (2nd ed.). SAGE Publications, Inc. https://doi.org/10.4135/9781071878651\n\n\nFox, J., & Weisberg, S. (2018a). An R companion to applied regression (Third). SAGE Publications. https://books.google.ca/books?id=uPNrDwAAQBAJ\n\n\nFox, J., & Weisberg, S. (2018b). Visualizing fit and lack of fit in complex regression models with predictor effect plots and partial residuals. Journal of Statistical Software, 87(9). https://doi.org/10.18637/jss.v087.i09\n\n\nFox, J., Weisberg, S., & Price, B. (2023). Car: Companion to applied regression. https://CRAN.R-project.org/package=car\n\n\nFox, J., Weisberg, S., Price, B., Friendly, M., & Hong, J. (2022). Effects: Effect displays for linear, generalized linear, and other models. https://www.r-project.org\n\n\nHoaglin, D. C., & Welsch, R. E. (1978). The hat matrix in regression and ANOVA. The American Statistician, 32(1), 17–22. https://doi.org/10.1080/00031305.1978.10479237\n\n\nKastellec, J. P., & Leoni, E. L. (2007). Using graphs instead of tables in political science. Perspectives on Politics, 5(04), 755–771. https://doi.org/10.1017/S1537592707072209\n\n\nLarmarange, J. (2024). Ggstats: Extension to ggplot2 for plotting stats. https://larmarange.github.io/ggstats/\n\n\nLarsen, W. A., & McCleary, S. J. (1972). The use of partial residual plots in regression analysis. Technometrics, 14, 781–790.\n\n\nLüdecke, D. (2024). Ggeffects: Create tidy data frames of marginal effects for ggplot from model outputs. https://strengejacke.github.io/ggeffects/\n\n\nLüdecke, D., Ben-Shachar, M. S., Patil, I., Waggoner, P., & Makowski, D. (2021). performance: An R package for assessment, comparison and testing of statistical models. Journal of Open Source Software, 6(60), 3139. https://doi.org/10.21105/joss.03139\n\n\nLüdecke, D., Ben-Shachar, M. S., Patil, I., Wiernik, B. M., & Makowski, D. (2022). Easystats: Framework for easy statistical modeling, visualization, and reporting. In CRAN. https://easystats.github.io/easystats/\n\n\nPineo, P. O., & Porter, J. (1967). Occupational prestige in canada*. Canadian Review of Sociology, 4(1), 24–40. https://doi.org/https://doi.org/10.1111/j.1755-618X.1967.tb00472.x\n\n\nSearle, S. R., Speed, F. M., & Milliken, G. A. (1980). Population marginal means in the linear model: An alternative to least squares means. The American Statistician, 34(4), 216–221.\n\n\nVelleman, P. F., & Welsh, R. E. (1981). Efficient computing of regression diagnostics. The American Statistician, 35(4), 234–242.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Plots for univariate response models</span>"
+    ]
+  },
+  {
+    "objectID": "06-linear_models-plots.html#footnotes",
+    "href": "06-linear_models-plots.html#footnotes",
+    "title": "6  Plots for univariate response models",
+    "section": "",
+    "text": "Note that the factor type in the dataset has its levels ordered alphabetically. For analysis and graphing it is useful to reorder the levels in the natural increasing order. An alternative is to make type an ordered factor, but this would represent it using polynomial contrasts for linear and quadratic trends, which seems unuseful in this context.↩︎\nEarlier, but less general expression of these ideas go back to the use of adjusted means in analysis of covariance (Fisher, 1925) or least squares means or population marginal means in analysis of variance (Searle et al., 1980)↩︎\nSee this Stats StackExchange discussion for a proof.↩︎",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Plots for univariate response models</span>"
+    ]
+  },
+  {
+    "objectID": "07-lin-mod-topics.html",
+    "href": "07-lin-mod-topics.html",
+    "title": "\n7  Topics in Linear Models\n",
+    "section": "",
+    "text": "7.1 Ellipsoids in data space and \\(\\mathbf{\\beta}\\) space\nIt is most common to look at data and fitted models in “data space,” where axes correspond to variables, points represent observations, and fitted models are plotted as lines (or planes) in this space. As we’ve suggested, data ellipsoids provide informative summaries of relationships in data space. For linear models, particularly regression models with quantitative predictors, there is another space—“\\(\\mathbf{\\beta}\\) space”—that provides deeper views of models and the relationships among them. This discussion extends Friendly et al. (2013), Sec. 4.6.\nIn \\(\\mathbf{\\beta}\\) space, the axes pertain to coefficients, for example \\((\\beta_0, \\beta_1)\\) in a simple linear regression. Points in this space are models (true, hypothesized, fitted) whose coordinates represent values of these parameters. For example, one point \\(\\widehat{\\mathbf{\\beta}}_{\\text{OLS}} = (\\hat{\\beta}_0, \\hat{\\beta}_1)\\) represents the least squares estimate; other points, \\(\\widehat{\\mathbf{\\beta}}_{\\text{WLS}}\\) and \\(\\widehat{\\mathbf{\\beta}}_{\\text{ML}}\\) would give weighted least squares and maximum likelihood estimates, and the line \\(\\beta_1 = 0\\) represents the null hypothesis that the slope is zero.\nIn the sense described below, data space and \\(\\boldsymbol{\\beta}\\) space are each dual to the other. In simple linear regression, for example:\nFigure 7.1: Duality of \\((x, y)\\) lines in data space (left) and points in \\(\\beta\\)-space (right). Each line in data space corresponds to a point, whose intercept and slope are shown in \\(\\beta\\)-space.\nThis is illustrated in Figure 7.1. The left panel shows three lines in data space, which can be expressed as linear equations in \\(\\mathbf{z} = (x, y)\\) of the form \\(\\mathbf{A} \\mathbf{z} = \\mathbf{d}\\). matlib::showEqn(A, d) prints these as equations in \\(x\\) and \\(y\\).\nA &lt;- matrix(c( 1, 1, 0,\n              -1, 1, 1), 3, 2) \nd &lt;- c(2, 1/2, 1)\nshowEqn(A, d, vars = c(\"x\", \"y\"), simplify = TRUE)\n#&gt;   x - 1*y  =    2 \n#&gt;   x   + y  =  0.5 \n#&gt; 0*x   + y  =    1\nThe first equation, \\(x - y = 2\\) can be expressed as the line \\(y = x - 2\\) and corresponds to the point \\((\\beta_0, \\beta_1) = (-2, 1)\\) in \\(\\beta\\) space, and similarly for the other two equations. The second equation, \\(x + y = \\frac{1}{2}\\), or \\(y = 0.5 - x\\) intersects the first at the point \\((x, y) = (1.25, 0.75)\\); this corresponds to the line connecting \\((-2, 1)\\) and \\((0.5, -1)\\) in \\(\\beta\\) space.\nThis lovely duality is an example of an important principle in modern mathematics which translates concepts and structures from one perspective to another and back again. We get two views of the same thing, whose dual nature provides greater insight.\nWe have seen (Section 3.2) how ellipsoids in data space summarize variance (lack of precision) and correlation of our data. For the purpose of understanding linear models, ellipsoids in \\(\\beta\\) space do the same thing for the estimates of parameters. These ellipsoids are dual and inversely related to each other, a point first made clear by Dempster (1969, Ch. 6):\nIt is useful to understand the underlying geometry here connecting the ellipses for a matrix and its inverse. This can be seen in Figure 7.2, which shows an ellipse for a covariance matrix \\(\\mathbf{S}\\), whose axes, as we saw in Chapter 4 are the eigenvectors \\(\\mathbf{v}_i\\) of \\(\\mathbf{S}\\) and whose radii are the square roots \\(\\sqrt{\\lambda_i}\\) of the corresponding eigenvalues. The comparable ellipse for \\(2 \\mathbf{S}\\) has radii multiplied by \\(\\sqrt{2}\\).\nFigure 7.2: Geometric properties of an ellipse \\(\\mathbf{S}\\) and its inverse, \\(\\mathbf{S}^{-1}\\). The principal axes (dotted lines) are given by the eigenvectors, which are the same for \\(\\mathbf{S}\\) and \\(\\mathbf{S}^{-1}\\). Multiplying \\(\\mathbf{S}\\) by 2 makes it’s ellipse larger by \\(\\sqrt{2}\\), while the same factor makes the ellipse for \\((2 \\mathbf{S})^{-1}\\) smaller by the same factor.\nAs long as \\(\\mathbf{S}\\) is of full rank, the eigenvectors of \\(\\mathbf{S}^{-1}\\) are identical, while the eigenvalues are \\(1 / \\lambda_i\\), so the radii are the reciprocals \\(1 / \\sqrt{\\lambda_i}\\). The analogous ellipse for \\((2 \\mathbf{S}^{-1})\\) is smaller by a factor of \\(\\sqrt{2}\\).\nThus, in two dimensions, the ellipse for \\(\\mathbf{S}^{-1}\\) is a \\(90^o\\) rotation of that for \\(\\mathbf{S}\\). It is small in directions where the ellipse for \\(\\mathbf{S}\\) is large, and vice-versa. In our statistical applications, this translates as: parameter estimates in \\(\\beta\\) space are more precise (have less variance) in the directions where the data are more widely dispersed, having more information about the relationship.\nWe illustrate these ideas in the example below.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>7</span>  <span class='chapter-title'>Topics in Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "07-lin-mod-topics.html#sec-betaspace",
+    "href": "07-lin-mod-topics.html#sec-betaspace",
+    "title": "\n7  Topics in Linear Models\n",
+    "section": "",
+    "text": "each line, like \\(\\mathbf{y} = \\beta_0 + \\beta_1 \\mathbf{x}\\) with intercept \\(\\beta_0\\) and slope \\(\\beta_1\\) in data space corresponds to a point \\((\\beta_0,\\beta_1)\\) in \\(\\mathbf{\\beta}\\) space, and conversely;\nthe set of points on any line \\(\\beta_1 = x + y \\beta_0\\) in \\(\\mathbf{\\beta}\\) space corresponds to a set of lines through a given point \\((x, y)\\) in data space, and conversely;\nthe geometric proposition that every pair of points defines a line in one space corresponds to the proposition that every two lines intersect in a point in the other space.\n\n\n\n\n\n\n\n\nIn data space, joint confidence intervals for the mean vector or joint prediction regions for the data are given by the ellipsoids \\((\\bar{x}_1, \\bar{x}_2)^\\mathsf{T} \\oplus c \\sqrt{\\mathbf{S}_{\\mathbf{X}}}\\), where the covariance matrix \\(\\mathbf{S}_{\\mathbf{X}}\\) depends on \\(\\mathbf{X}^\\mathsf{T}\\mathbf{X}\\) (\\(\\oplus\\) here shifts the ellipsoid to one centered at \\((\\bar{x}_1, \\bar{x}_2)\\) here, as in Equation 3.2).\nIn the dual \\(\\mathbf{\\beta}\\) space, joint confidence regions for the coefficients of a response variable \\(y\\) on \\((x_1, x_2)\\) are given by ellipsoids of the form \\(\\widehat{\\mathbf{\\beta}} \\oplus c \\sqrt{\\mathbf{S}_{\\mathbf{X}}^{-1}}\\), and depend on \\(\\mathbf{(\\mathbf{X}^\\mathsf{T}\\mathbf{X})}^{-1}\\).\n\n\n\n\n\n\n\n7.1.1 Coffee, stress and heart disease\nConsider the dataset coffee, giving measures of Heart (\\(y\\)), an index of cardiac damage, Coffee (\\(x_1\\)), a measure of daily coffee consumption, and Stress (\\(x_2\\)), a measure of occupational stress, in a contrived sample of \\(n=20\\) university people.1 For the sake of the example we assume that the main goal is to determine whether or not coffee is good or bad for your heart, and stress represents one potential confounding variable among others (age, smoking, etc.) that might be useful to control statistically.\n\nset.seed(1234)\ndata(coffee, package=\"matlib\")\ncoffee |&gt; dplyr::sample_n(6)\n#&gt;          Group Coffee Stress Heart\n#&gt; 1 Grad_Student    104    117    92\n#&gt; 2      Student     52     86    63\n#&gt; 3 Grad_Student     76     92    58\n#&gt; 4      Student    100    123    92\n#&gt; 5      Student     64     74    63\n#&gt; 6    Professor    141    175   145\n\nFigure 7.3 shows the scatterplot matrix, giving the marginal relations between all pairs of variables. The marginal message seems to be that coffee is bad for your heart, stress is bad for your heart and coffee consumption is also related to occupational stress.\n\nShow the codescatterplotMatrix(~ Heart + Coffee + Stress, data=coffee,\n    smooth = FALSE,\n    pch = 16, col = \"brown\",\n    cex.axis = 1.3, cex.labels = 3,\n    ellipse = list(levels = 0.68, fill.alpha = 0.1))\n\n\n\n\n\n\nFigure 7.3: Scatterplot matrix for the coffee data showing the pairwise relationships among Heart damage (\\(y\\)), Coffee consumption (\\(x_1\\)), and Stress (\\(x_2\\)), with linear regression lines and 68% data ellipses.\n\n\n\n\nYet, when we fit both variables together, we obtain the following results, suggesting that coffee is good for you—the coefficient for coffee is now negative, though non-significant. How can this be?\n\ncoffee.mod &lt;- lm(Heart ~ Coffee + Stress, data=coffee)\nbroom::tidy(coffee.mod)\n#&gt; # A tibble: 3 × 5\n#&gt;   term        estimate std.error statistic   p.value\n#&gt;   &lt;chr&gt;          &lt;dbl&gt;     &lt;dbl&gt;     &lt;dbl&gt;     &lt;dbl&gt;\n#&gt; 1 (Intercept)   -7.79      5.79      -1.35 0.196    \n#&gt; 2 Coffee        -0.409     0.292     -1.40 0.179    \n#&gt; 3 Stress         1.20      0.224      5.34 0.0000536\n\nThe answer is that the marginal plots of Heart vs. Coffee and Stress in the first row of Figure 7.3 each ignore the the other predictor. In contrast, the coefficients for coffee and stress in the multiple regression model coffee.mod are partial coefficients, giving the estimated change in heart damage for a unit change in each predictor, but adjusting for (controlling for, or holding constant) the other predictor.\nWe can see these effects directly in added variable plots (Section 6.4), but here I consider the relationship of coffee and stress in data space and beta space and how their ellipses relate to each other and to hypothesis tests.\nThe left panel in Figure 7.4 is the same as that in the (3,2) cell of Figure 7.3 for the relation Stress ~ Coffee but with data ellipses of 40% and 60% coverage. The shadows of the 40% ellipse on any axis give univariate intervals of the mean \\(\\bar{x} \\pm 1 s_x\\) (standard deviation) shown by the thick red lines; the shadow of the 68% ellipse corresponds to an interval \\(\\bar{x} \\pm 1.5 s_x\\).\nThe right panel shows the joint 95% confidence region for the coefficients \\((\\beta_{\\text{Coffee}}, \\beta_{\\text{Stress}})\\) and individual confidence intervals in \\(\\mathbf{\\beta}\\) space. These are determined as\n\\[\n\\widehat{\\mathbf{\\beta}} \\oplus \\sqrt{d F^{.95}_{d, \\nu}} \\times s_e \\times \\mathbf{S}_X^{-1/2} \\:\\: .\n\\] where \\(d\\) is the number of dimensions for which we want coverage, \\(\\nu\\) is the residual degrees of freedom for \\(s_e\\), and \\(\\mathbf{S}_X\\) is the covariance matrix of the predictors.\n\n\n\n\n\n\n\nFigure 7.4: Data space and \\(\\mathbf{\\beta}\\) space representations of Coffee and Stress. Left: 40% and 68% data ellipses. Right: Joint 95% confidence ellipse (blue) for (\\(\\beta_{\\text{Coffee}}, \\beta_{\\text{Stress}}\\)), confidence interval generating ellipse (red) with 95% univariate shadows. \\(H_0\\) marks the joint hypothesis that both coefficients equal zero.\n\n\n\n\nThus, the blue ellipse in Figure 7.4 (right) is the ellipse of joint 95% coverage, using the factor \\(\\sqrt{2 F^{.95}_{2, \\nu}}\\), which covers the true values of (\\(\\beta_{\\mathrm{Stress}}, \\beta_{\\mathrm{Coffee}}\\)) in 95% of samples. Moreover:\n\nAny joint hypothesis (e.g., \\(\\mathcal{H}_0:\\beta_{\\mathrm{Stress}}=0, \\beta_{\\mathrm{Coffee}}=0\\)) can be tested visually, simply by observing whether the hypothesized point, \\((0, 0)\\) here, lies inside or outside the joint confidence ellipse. That hypothesis is rejected\nThe shadows of this ellipse on the horizontal and vertical axes give Scheff'e joint 95% confidence intervals for the parameters, with protection for simultaneous inference (“fishing”) in a 2-dimensional space.\nSimilarly, using the factor \\(\\sqrt{F^{1-\\alpha/d}_{1, \\nu}} = t^{1-\\alpha/2d}_\\nu\\) would give an ellipse whose 1D shadows are \\(1-\\alpha\\) Bonferroni confidence intervals for \\(d\\) posterior hypotheses.\n\nVisual hypothesis tests and \\(d=1\\) confidence intervals for the parameters separately are obtained from the red ellipse in Figure 7.4, which is scaled by \\(\\sqrt{F^{.95}_{1, \\nu}} = t^{.975}_\\nu\\). We call this the _confidence-interval generating ellipse” (or, more compactly, the “confidence-interval ellipse”). The shadows of the confidence-interval ellipse on the axes (thick red lines) give the corresponding individual 95% confidence intervals, which are equivalent to the (partial, Type III) \\(t\\)-tests for each coefficient given in the standard multiple regression output shown above.\nThus, controlling for Stress, the confidence interval for the slope for Coffee includes 0, so we cannot reject the hypothesis that \\(\\beta_{\\mathrm{Coffee}}=0\\) in the multiple regression model, as we saw above in the numerical output. On the other hand, the interval for the slope for Stress excludes the origin, so we reject the null hypothesis that \\(\\beta_{\\mathrm{Stress}}=0\\), controlling for Coffee consumption.\nFinally, consider the relationship between the data ellipse and the confidence ellipse. These have exactly the same shape, but (with equal coordinate scaling of the axes), the confidence ellipse is exactly a \\(90^o\\) rotation and rescaling of the data ellipse. In directions in data space where the slice of the data ellipse is wide—where we have more information about the relationship between Coffee and Stress—the projection of the confidence ellipse is narrow, reflecting greater precision of the estimates of coefficients. Conversely, where slice of the the data ellipse is narrow (less information), the projection of the confidence ellipse is wide (less precision).\nConfidence ellipses are drawn using car::confidenceEllipse(). Click the button to show the code.\n\nCode for confidence ellipsesconfidenceEllipse(coffee.mod, \n    grid = FALSE,\n    xlim = c(-2, 1), ylim = c(-0.5, 2.5),\n    xlab = expression(paste(\"Coffee coefficient,  \", beta[\"Coffee\"])),\n    ylab = expression(paste(\"Stress coefficient,  \", beta[\"Stress\"])),\n    cex.lab = 1.5)\nconfidenceEllipse(coffee.mod, add=TRUE, draw = TRUE,\n    col = \"red\", fill = TRUE, fill.alpha = 0.1,\n    dfn = 1)\nabline(h = 0, v = 0, lwd = 2)\n\n# confidence intervals\nbeta &lt;- coef( coffee.mod )[-1]\nCI &lt;- confint(coffee.mod)\nlines( y = c(0,0), x = CI[\"Coffee\",] , lwd = 6, col = 'red')\nlines( x = c(0,0), y = CI[\"Stress\",] , lwd = 6, col = 'red')\npoints( diag( beta ), col = 'black', pch = 16, cex=1.8)\n\nabline(v = CI[\"Coffee\",], col = \"red\", lty = 2)\nabline(h = CI[\"Stress\",], col = \"red\", lty = 2)\n\ntext(-2.1, 2.35, \"Beta space\", cex=2, pos = 4)\narrows(beta[1], beta[2], beta[1], 0, angle=8, len=0.2)\narrows(beta[1], beta[2], 0, beta[2], angle=8, len=0.2)\n\ntext( -1.5, 1.85, \"df = 2\", col = 'blue', adj = 0, cex=1.2)\ntext( 0.2, .85, \"df = 1\", col = 'red', adj = 0, cex=1.2)\n\nheplots::mark.H0(col = \"darkgreen\", pch = \"+\", lty = 0, pos = 4, cex = 3)",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>7</span>  <span class='chapter-title'>Topics in Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "07-lin-mod-topics.html#measurement-error",
+    "href": "07-lin-mod-topics.html#measurement-error",
+    "title": "\n7  Topics in Linear Models\n",
+    "section": "\n7.2 Measurement error",
+    "text": "7.2 Measurement error\n\n7.2.1 OLS is BLUE\nIn classical linear models, the predictors are often considered to be fixed variables, or, if random, to be measured without error and independent of the regression errors. Either condition, along with the assumption of linearity, guarantees that the standard OLS estimators are unbiased. That is, in a simple linear regression, \\(y = \\beta_0 + \\beta_1 x + \\epsilon\\), the estimated slope \\(\\hat{\\beta}_1\\) wiil have an average, expected value \\(\\mathcal{E} (\\hat{\\beta}_1)\\) equal to the true population value \\(\\beta_1\\) over repeated samples.\nNot only this, but the Gauss-Markov theorem guarantees that the OLS estimator is also the most efficient because it has the least variance among all linear and unbiased estimators. The classical OLS estimator is said to be BLUE: Best (lowest variance), Linear (among linear estimators), Unbiased, Estimator.\n\n7.2.2 Errors in predictors\nErrors in the response \\(y\\) are accounted for in the model and measured by the mean squared error, \\(\\text{MSE} = \\hat{\\sigma}_\\epsilon^2\\). But in practice, of course, predictor variables are often also observed indicators, subject to their own error. Indeed, in the behavioral sciences it is rare that predictors are perfectly reliable and measured exactly. This fact that is recognized in errors-in-variables regression models (Fuller, 2006) and in more general structural equation models, but often ignored otherwise. Ellipsoids in data space and \\(\\beta\\) space are well suited to showing the effect of measurement error in predictors on OLS estimates.\nThe statistical facts are well known, though perhaps counter-intuitive in certain details: measurement error in a predictor biases regression coefficients (towards 0), while error in the measurement in \\(y\\) increases the MSE and thus standard errors of the regression coefficients but does not introduce bias in the coefficients.\n\n7.2.2.1 Example\nAn illuminating example can be constructed by starting with the simple linear regression \\[\ny_i = \\beta_0 + \\beta_1 x_i + \\epsilon_i \\; ,\n\\] where \\(x_i\\) is the true, fully reliable predictor and \\(y\\) is the response, with error variance \\(\\sigma_\\epsilon^2\\). Now consider that we don’t measure \\(x_i\\) exactly, but instead observe \\(x^\\star_i\\). \\[\nx^\\star_i = x_i + \\eta_i \\; ,\n\\] where the measurement error \\(\\eta_i\\) is independent of the true \\(x_i\\) with variance \\(\\sigma^2_\\eta\\). We can extend this example to also consider the effect of adding additional, independent error variance to \\(y\\), so that instead of \\(y_i\\) we observe\n\\[\ny^\\star_i = y_i + \\nu_i\n\\] with variance \\(\\sigma^2_\\nu\\).\nLet’s simulate an example where the true relation is \\(y = 0.2 + 0.3 x\\) with error standard deviation \\(\\sigma = 0.5\\). I’ll take \\(x\\) to be uniformly distributed in [0, 10] and calculate \\(y\\) as normally distributed around that linear relation.\n\n\nset.seed(123)\nn &lt;- 300\n\na &lt;- 0.2    # true intercept\nb &lt;- 0.3    # true slope\nsigma &lt;- 0.5 # baseline error standard deviation\n\nx &lt;- runif(n, 0, 10)\ny &lt;- rnorm(n, a + b*x, sigma)\ndemo &lt;- data.frame(x,y)\n\nThen, generate alternative values \\(x^\\star\\) and \\(y^\\star\\) with additional error standard deviations around \\(x\\) given by \\(\\sigma_\\eta = 4\\) and around \\(y\\) given by \\(\\sigma_\\nu = 1\\).\n\nerr_y &lt;- 1   # additional error stdev for y\nerr_x &lt;- 4   # additional error stdev for x\ndemo  &lt;- demo |&gt;\n  mutate(y_star = rnorm(n, y, err_y),\n         x_star = rnorm(n, x, err_x))\n\nThere are four possible models we could fit and compare, using the combinations of \\((x, x^\\star)\\) and \\((y, y^\\star)\\)\n\nfit_1 &lt;- lm(y ~ x,           data = demo)   # no additional error\nfit_2 &lt;- lm(y_star ~ x,      data = demo)   # error in y\nfit_3 &lt;- lm(y ~ x_star,      data = demo)   # error in x\nfit_4 &lt;- lm(y_star ~ x_star, data = demo)   # error in x and y\n\nHowever, to show the differences visually, we can simply plot the data for each pair and show the regression lines (with confidence bands) and the data ellipses. To do this efficiently with ggplot2, it is necessary to transform the demo data to long format with columns x and y, distinguished by name for the four combinations.\n\n# make the demo dataset long, with names for the four conditions\ndf &lt;- bind_rows(\n  data.frame(x=demo$x,      y=demo$y,      name=\"No measurement error\"),\n  data.frame(x=demo$x,      y=demo$y_star, name=\"Measurement error on y\"),\n  data.frame(x=demo$x_star, y=demo$y,      name=\"Measurement error on x\"),\n  data.frame(x=demo$x_star, y=demo$y_star, name=\"Measurement error on x and y\")) |&gt;\n  mutate(name = fct_inorder(name)) \n\nThen, we can plot the data in df with points, regression lines and a data ellipse, faceting by name to give the measurement error quartet.\n\n\nggplot(df, aes(x, y)) +\n  geom_point(alpha = 0.2) +\n  stat_ellipse(geom = \"polygon\", \n               color = \"blue\",fill= \"blue\", \n               alpha=0.05, linewidth = 1.1) +\n  geom_smooth(method=\"lm\", formula = y~x, fullrange=TRUE, level=0.995,\n              color = \"red\", fill = \"red\", alpha = 0.2) +\n  facet_wrap(~name) \n\n\n\n\n\n\nFigure 7.5: The measurement error quartet: Each plot shows the linear regression of y on x, but where additional error variance has been added to y or x or both. The widths of the confidence bands and the vertical extent of the data ellipses show the effect on precision.\n\n\n\n\nComparing the plots in the first row, you can see that when additional error is added to \\(y\\), the regression slope remains essentially unchanged, illustrating that the estimate is unbiased. However, the confidence bounds on the regression line become wider, and the data ellipse becomes fatter in the \\(y\\) direction, illustrating the loss of precision.\nThe effect of error in \\(x\\) is less kind. Comparing the first row of plots with the second row, you can see that the estimated slope decreases when errors are added to \\(x\\). This is called attenuation bias, and it can be shown that \\[\n\\widehat{\\beta}_{x^\\star} \\longrightarrow \\frac{\\beta}{1+\\sigma^2_\\eta /\\sigma^2_x} \\; ,\n\\] where \\(\\beta\\) here refers to the regression slope and \\(\\longrightarrow\\) means “converges to”, as the sample size gets large. Thus, as \\(\\sigma^2_\\eta\\) increases, \\(\\widehat{\\beta}_{x^\\star}\\) becomes less than \\(\\beta\\).\nBeyond plots like Figure 7.5, we can see the effects of error in \\(x\\) or \\(y\\) on the model summary statistics such as the correlation \\(r_{xy}\\) or MSE by extracting these from the fitted models. This is easily done using dplyr::nest_by(name) and fitting the regression model to each subset, from which we can obtain the model statistics using sigma(), coef() and so forth. A bit of dplyr::mutate() magic is used to construct indicators errX and errY giving whether or not error was added to \\(x\\) and/or \\(y\\).\n\nmodel_stats &lt;- df |&gt;\n  dplyr::nest_by(name) |&gt;\n  mutate(model = list(lm(y ~ x, data = data)),\n         sigma = sigma(model),\n         intercept = coef(model)[1],\n         slope = coef(model)[2],\n         r = sqrt(summary(model)$r.squared)) |&gt;\n  mutate(errX = stringr::str_detect(name, \" x\"),\n         errY = stringr::str_detect(name, \" y\")) |&gt;\n  mutate(errX = factor(errX, levels = c(\"TRUE\", \"FALSE\")),\n         errY = factor(errY, levels = c(\"TRUE\", \"FALSE\"))) |&gt;\n  relocate(errX, errY, r, .after = name) |&gt;\n  select(-data) |&gt;\n  print()\n#&gt; # A tibble: 4 × 8\n#&gt; # Rowwise:  name\n#&gt;   name                errX  errY      r model sigma intercept  slope\n#&gt;   &lt;fct&gt;               &lt;fct&gt; &lt;fct&gt; &lt;dbl&gt; &lt;lis&gt; &lt;dbl&gt;     &lt;dbl&gt;  &lt;dbl&gt;\n#&gt; 1 No measurement err… FALSE FALSE 0.858 &lt;lm&gt;  0.495    0.244  0.294 \n#&gt; 2 Measurement error … FALSE TRUE  0.648 &lt;lm&gt;  1.09     0.0838 0.329 \n#&gt; 3 Measurement error … TRUE  FALSE 0.481 &lt;lm&gt;  0.844    1.22   0.0946\n#&gt; 4 Measurement error … TRUE  TRUE  0.401 &lt;lm&gt;  1.31     1.12   0.117\n\nWe plot the model \\(R = r_{xy}\\) and the estimated residual standard error in Figure 7.6 below. The lines connecting the points are approximately parallel, indicating that errors of measurement in \\(x\\) and \\(y\\) have nearly additive effects on model summaries.\n\n\np1 &lt;- ggplot(data=model_stats, \n             aes(x = errX, y = r, \n                 group = errY, color = errY, \n                 shape = errY, linetype = errY)) +\n  geom_point(size = 4) +\n  geom_line(linewidth = 1.2) +\n  labs(x = \"Error on X?\",\n       y = \"Model R \",\n       color = \"Error on Y?\",\n       shape = \"Error on Y?\",\n       linetype = \"Error on Y?\") +\n  legend_inside(c(0.25, 0.8))\n\np2 &lt;- ggplot(data=model_stats, \n             aes(x = errX, y = sigma, \n                 group = errY, color = errY, \n                 shape = errY, linetype = errY)) +\n  geom_point(size = 4) +\n  geom_line(linewidth = 1.2) +\n  labs(x = \"Error on X?\",\n       y = \"Model residual standard error\",\n       color = \"Error on Y?\",\n       shape = \"Error on Y?\",\n       linetype = \"Error on Y?\") \n\np1 + p2\n\n\n\n\n\n\nFigure 7.6: Model statistics for the combinations of additional error variance in x or y or both. Left: model R; right: Residual standard error.\n\n\n\n\n\n7.2.3 Coffee data: \\(\\beta\\) space\nIn multiple regression the effects of measurement error in a predictor become more complex, because error variance in one predictor, \\(x_1\\), say, can affect the coefficients of other terms in the model.\nConsider the marginal relation between Heart disease and Stress in the coffee data. Figure 7.7 shows this with data ellipses in data space and the corresponding confidence ellipses in \\(\\beta\\) space. Each panel starts with the observed data (the darkest ellipse, marked \\(0\\)), then adds random normal error, \\(\\mathcal{N}(0, \\delta \\times \\mathrm{SD}_{Stress})\\), with \\(\\delta = \\{0.75, 1.0, 1.5\\}\\), to the value of Stress, while keeping the mean of Stress the same. All of the data ellipses have the same vertical shadows (\\(\\text{SD}_{\\textrm{Heart}}\\)), while the horizontal shadows increase with \\(\\delta\\), driving the slope for Stress toward 0.\nIn \\(\\beta\\) space, it can be seen that the estimated coefficients, \\((\\beta_0, \\beta_{\\textrm{Stress}})\\) vary along a line and approach \\(\\beta_{\\textrm{Stress}}=0\\) as \\(\\delta\\) gets sufficiently large. The shadows of ellipses for \\((\\beta_0, \\beta_{\\textrm{Stress}})\\) along the \\(\\beta_{\\textrm{Stress}}\\) axis also demonstrate the effects of measurement error on the standard error of \\(\\beta_{\\textrm{Stress}}\\).\n\n\n\n\n\n\n\nFigure 7.7: Effects of measurement error in Stress on the marginal relationship between Heart disease and Stress. Each panel starts with the observed data (\\(\\delta = 0\\)), then adds random normal error, \\(\\mathcal{N}(0, \\delta \\times \\text{SD}_\\text{Stress})\\) with standard deviations multiplied by \\(\\delta\\) = 0.75, 1.0, 1.5, to the value of Stress. Increasing measurement error biases the slope for Stress toward 0. Left: 50% data ellipses; right: 50% confidence ellipses.\n\n\n\n\nPerhaps less well-known, but both more surprising and interesting, is the effect that measurement error in one variable, \\(x_1\\), has on the estimate of the coefficient for an other variable, \\(x_2\\), in a multiple regression model. Figure 7.8 shows the confidence ellipses for \\((\\beta_{\\textrm{Coffee}}, \\beta_{\\textrm{Stress}})\\) in the multiple regression predicting Heart disease, adding random normal error \\(\\mathcal{N}(0, \\delta \\times \\mathrm{SD}_{Stress})\\), with \\(\\delta = \\{0, 0.2, 0.4, 0.8\\}\\), to the value of Stress alone.\nAs can be plainly seen, while this measurement error in Stress attenuates its coefficient, it also has the effect of biasing the coefficient for Coffee toward that in the marginal regression of Heart disease on Coffee alone.\n\n\n\n\n\n\n\nFigure 7.8: Biasing effect of measurement error in one variable (Stress) on on the coefficient of another variable (Coffee) in a multiple regression. The coefficient for Coffee is driven towards its value in the marginal model using Coffee alone, as measurement error in Stress makes it less informative in the joint model.\n\n\n\n\n\nPackages used here:\n10 packages used here: car, carData, dplyr, forcats, gganimate, ggplot2, knitr, matlib, patchwork, tidyr\n\n\n\n\n\nDempster, A. P. (1969). Elements of continuous multivariate analysis. Addison-Wesley.\n\n\nFriendly, M., Monette, G., & Fox, J. (2013). Elliptical insights: Understanding statistical methods through elliptical geometry. Statistical Science, 28(1), 1–39. https://doi.org/10.1214/12-STS402\n\n\nFuller, W. (2006). Measurement error models (2nd ed.). John Wiley & Sons.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>7</span>  <span class='chapter-title'>Topics in Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "07-lin-mod-topics.html#footnotes",
+    "href": "07-lin-mod-topics.html#footnotes",
+    "title": "\n7  Topics in Linear Models\n",
+    "section": "",
+    "text": "This example was developed by Georges Monette.↩︎",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>7</span>  <span class='chapter-title'>Topics in Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html",
+    "href": "08-collinearity-ridge.html",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "",
+    "text": "8.1 What is collinearity?\nResearchers who have studies standard treatments of linear models (e.g, Graybill (1961); Hocking (2013)) are often confused about what collinearity is, how to find its sources and how to take steps to resolve them. There are a number of important diagnostic measures that can help, but these are usually presented in a tabular display like Figure 8.1, which prompted this querry on an online forum:\nFigure 8.1: Collinearity diagnostics for a multiple regression model from SPSS. Source: Arndt Regorz, How to interpret a Collinearity Diagnostics table in SPSS, https://bit.ly/3YRB82b\nThe trouble with displays like Figure 8.1 is that the important information is hidden in a sea of numbers, some of which are bad when large, others bad when they are small and a large bunch which are irrelevant. In Friendly & Kwan (2009), we liken this problem to that of the reader of Martin Hansford’s successful series of books, Where’s Waldo. These consist of a series of full-page illustrations of hundreds of people and things and a few Waldos— a character wearing a red and white striped shirt and hat, glasses, and carrying a walking stick or other paraphernalia. Waldo was never disguised, yet the complex arrangement of misleading visual cues in the pictures made him very hard to find. Collinearity diagnostics often provide a similar puzzle: where should you look in traditional tabular displays?\nFigure 8.2: A scene from one of the Where’s Waldo books. Waldo wears a red-striped shirt, but far too many of the other figures in the scene have horizontal red stripes, making it very difficult to find him among all the distractors. This is often the problem with collinearity diagnostics. Source: Modified from https://bit.ly/48KPcOo\nRecall the standard classical linear model for a response variable \\(y\\) with a collection of predictors in \\(\\mathbf{X} = (\\mathbf{x}_1, \\mathbf{x}_2, ..., \\mathbf{x}_p)\\)\n\\[\\begin{aligned}\n\\mathbf{y}  & =  \\beta_0 + \\beta_1 \\mathbf{x}_1 + \\beta_2 \\mathbf{x}_2 + \\cdots + \\beta_p \\mathbf{x}_p + \\boldsymbol{\\epsilon} \\\\\n            & =  \\mathbf{X} \\mathbf{\\beta} + \\mathbf{\\epsilon} \\; ,\n\\end{aligned}\\]\nfor which the ordinary least squares solution is:\n\\[\n\\widehat{\\mathbf{b}} = (\\mathbf{X}^\\mathsf{T} \\mathbf{X})^{-1} \\; \\mathbf{X}^\\mathsf{T} \\mathbf{y} \\; .\n\\] The sampling variances and covariances of the estimated coefficients is \\(\\text{Var} (\\widehat{\\mathbf{b}}) = \\sigma_\\epsilon^2 \\times (\\mathbf{X}^\\mathsf{T} \\mathbf{X})^{-1}\\) and \\(\\sigma_\\epsilon^2\\) is the variance of the residuals \\(\\mathbf{\\epsilon}\\), estimated by the mean squared error (MSE).\nIn the limiting case, collinearity becomes particularly problematic when one \\(x_i\\) is perfectly predictable from the other \\(x\\)s, i.e., \\(R^2 (x_i | \\text{other }x) = 1\\). This is problematic because:\nThis extreme case reflects a situation when one or more predictors are effectively redundant, for example when you include two variables \\(x\\) and \\(y\\) and their sum \\(z = x + y\\) in a model. For instance, a dataset may include variables for income, expenses, and savings. But income is the sum of expenses and savings, so not all three should be used as predictors.\nA more subtle case is the use ipsatized, defined as scores that sum to a constant, such as proportions of a total. You might have scores on tests of reading, math, spelling and geography. With ipsatized scores, any one of these is necessarily 1 \\(-\\) sum of the others, i.e., if reading is 0.5, math and geography are both 0.15, then geography must be 0.2. Once thre of the four scores are known, the last provides no new information.\nMore generally, collinearity refers to the case when there are very high multiple correlations among the predictors, such as \\(R^2 (x_i | \\text{other }x) \\ge 0.9\\). Note that you can’t tell simply by looking at the simple correlations. A large correlation \\(r_{ij}\\) is sufficient for collinearity, but not necessary—you can have variables \\(x_1, x_2, x_3\\) for which the pairwise correlation are low, but the multiple correlation is high.\nThe consequences are:",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#what-is-collinearity",
+    "href": "08-collinearity-ridge.html#what-is-collinearity",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "",
+    "text": "Some of my collinearity diagnostics have large values, or small values, or whatever they are not supposed to be\n\nWhat is bad?\nIf bad, what can I do about it?\n\n\n\n\n\n\n\n\n\n\n\n\nthere is no unique solution for the regression coefficients \\(\\mathbf{b} = (\\mathbf{X}^\\mathsf{T} \\mathbf{X})^{-1} \\mathbf{X} \\mathbf{y}\\);\nthe standard errors \\(s (b_i)\\) of the estimated coefficients are infinite and t statistics \\(t_i = b_i / s (b_i)\\) are 0.\n\n\n\n\n\n\nThe estimated coefficients have large standard errors, \\(s(\\hat{b}_j)\\). They are multiplied by the square root of the variance inflation factor, \\(\\sqrt{\\text{VIF}}\\), discussed below.\nThe large standard errors deflate the \\(t\\)-statistics, \\(t = \\hat{b}_j / s(\\hat{b}_j)\\), by the same factor, so a coefficient that would significant if the predictors were uncorrelated becomes insignificant when collinearity is present.\nThus you may find a situation where an overall model is highly significant (large \\(F\\)-statistic), while no (or few) of the individual predictors are. This is a puzzlement!\nBeyond this, the least squares solution may have poor numerical accuracy (Longley, 1967), because the solution depends inversely on the determinant \\(|\\,\\mathbf{X}^\\mathsf{T} \\mathbf{X}\\,|\\), which approaches 0 as multiple correlations increase.\nThere is an interpretive problem as well. Recall that the coefficients \\(\\hat{b}\\) are partial coefficients, meaning that they estimate change \\(\\Delta y\\) in \\(y\\) when \\(x\\) changes by one unit \\(\\Delta x\\), but holding all other variables constant. Then, the model may be trying to estimate something that does not occur in the data. (For example: predicting strength from the highly correlated height and weight)\n\n\n8.1.1 Visualizing collinearity\nCollinearity can be illustrated in data space for two predictors in terms of the stability of the regression plane for a linear model Y = X1 + X2. Figure 8.3 (adapted from Fox (2016), Fig. 13.2) shows three cases as 3D plots of \\((X_1, X_2, Y)\\), where the correlation of predictors can be observed in the \\((X_1, X_2)\\) plane.\n\nshows a case where \\(X_1\\) and \\(X_2\\) are uncorrelated as can be seen in their scatter in the horizontal plane (+ symbols). The gray regression plane is well-supported; a small change in Y for one observation won’t make much difference.\nIn panel (b), \\(X_1\\) and \\(X_2\\) have a perfect correlation, \\(r (x_1, x_2) = 1.0\\). The regression plane is not unique; in fact there are an infinite number of planes that fit the data equally well. Note that, if all we care about is prediction (not the coefficients), we could use \\(X_1\\) or \\(X_2\\), or both, or any weighted sum of them in a model and get the same predicted values.\nShows a typical case where there is a strong correlation between \\(X_1\\) and \\(X_2\\). The regression plane here is unique, but is not well determined. A small change in Y can make quite a difference in the fitted value or coefficients, depending on the values of \\(X_1\\) and \\(X_2\\). Where \\(X_1\\) and \\(X_2\\) are far from their near linear relation in the botom plane, you can imagine that it is easy to tilt the plane substantially by a small change in \\(Y\\).\n\n\n\n\n\n\n\n\nFigure 8.3: Effect of collinearity on the least squares regression plane. (a) Small correlation between predictors; (b) Perfect correlation ; (c) Very strong correlation. The black points show the data Y values, white points are the fitted values in the regression plane, and + signs represent the values of X1 and X2. Source: Adapted from Fox (2016), Fig. 13.2\n\n\n\n\n\n8.1.2 Data space and \\(\\beta\\) space\nIt is also useful to visualize collinearity by comparing the representation in data space with the analogous view of the confidence ellipses for coefficients in beta space. To do so in this example, I generate data from a known model \\(y = 3 x_1 + 3 x_2 + \\epsilon\\) with \\(\\epsilon \\sim \\mathcal{N} (0, 100)\\) and various true correlations between \\(x_1\\) and \\(x_2\\), \\(\\rho_{12} = (0, 0.8, 0.97)\\) 1.\n\n\nR file: R/collin-data-beta.R\nFirst, I use MASS:mvrnorm() to construct a list of three data frames XY with the same means and standard deviations, but with different correlations. In each case, the variable \\(y\\) is generated with true coefficients beta \\(=(3, 3)\\), and the fitted model for that value of rho is added to a corresponding list of models, mods.\n\nCodelibrary(MASS)\nlibrary(car)\n\nset.seed(421)            # reproducibility\nN &lt;- 200                 # sample size\nmu &lt;- c(0, 0)            # means\ns &lt;- c(1, 1)             # standard deviations\nrho &lt;- c(0, 0.8, 0.97)   # correlations\nbeta &lt;- c(3, 3)          # true coefficients\n\n# Specify a covariance matrix, with standard deviations\n#   s[1], s[2] and correlation r\nCov &lt;- function(s, r){\n  matrix(c(s[1],        r * s[1]*s[2],\n         r * s[1]*s[2], s[2]), nrow = 2, ncol = 2)\n}\n\n# Generate a dataframe of X, y for each rho\n# Fit the model for each\nXY &lt;- vector(mode =\"list\", length = length(rho))\nmods &lt;- vector(mode =\"list\", length = length(rho))\nfor (i in seq_along(rho)) {\n  r &lt;- rho[i]\n  X &lt;- mvrnorm(N, mu, Sigma = Cov(s, r))\n  colnames(X) &lt;- c(\"x1\", \"x2\")\n  y &lt;- beta[1] * X[,1] + beta[2] * X[,2] + rnorm(N, 0, 10)\n\n  XY[[i]] &lt;- data.frame(X, y=y)\n  mods[[i]] &lt;- lm(y ~ x1 + x2, data=XY[[i]])\n}\n\n\nThe estimated coefficients can then be extracted using coef() applied to each model:\n\ncoefs &lt;- sapply(mods, coef)\ncolnames(coefs) &lt;- paste0(\"mod\", 1:3, \" (rho=\", rho, \")\")\ncoefs\n#&gt;             mod1 (rho=0) mod2 (rho=0.8) mod3 (rho=0.97)\n#&gt; (Intercept)         1.01        -0.0535           0.141\n#&gt; x1                  3.18         3.4719           3.053\n#&gt; x2                  1.68         2.9734           2.059\n\nThen, I define a function to plot the data ellipse (car::dataEllipse()) for each data frame and confidence ellipse (car::confidenceEllipse()) for the coefficients in the corresponding fitted model. In the plots in Figure 8.4, I specify the x, y limits for each plot so that the relative sizes of these ellipses are comparable, so that variance inflation can be assessed visually.\n\nCodedo_plots &lt;- function(XY, mod, r) {\n  X &lt;- as.matrix(XY[, 1:2])\n  dataEllipse(X,\n              levels= 0.95,\n              col = \"darkgreen\",\n              fill = TRUE, fill.alpha = 0.05,\n              xlim = c(-3, 3),\n              ylim = c(-3, 3), asp = 1)\n  text(0, 3, bquote(rho == .(r)), cex = 2, pos = NULL)\n\n  confidenceEllipse(mod,\n                    col = \"red\",\n                    fill = TRUE, fill.alpha = 0.1,\n                    xlab = expression(paste(\"x1 coefficient, \", beta[1])),\n                    ylab = expression(paste(\"x2 coefficient, \", beta[2])),\n                    xlim = c(-5, 10),\n                    ylim = c(-5, 10),\n                    asp = 1)\n  points(beta[1], beta[2], pch = \"+\", cex=2)\n  abline(v=0, h=0, lwd=2)\n}\n\nop &lt;- par(mar = c(4,4,1,1)+0.1,\n          mfcol = c(2, 3),\n          cex.lab = 1.5)\nfor (i in seq_along(rho)) {\n  do_plots(XY[[i]], mods[[i]], rho[i])\n}\npar(op)\n\n\n\n\n\n\nFigure 8.4: 95% Data ellipses for x1, x2 and the corresponding 95% confidence ellipses for their coefficients in the model predicting y. In the confidence ellipse plots, reference lines show the value (0,0) for the null hypothesis and “+” marks the true values for the coefficients. This figure adapts an example by John Fox (2022).\n\n\n\n\nRecall (Section 7.1) that the confidence ellipse for \\((\\beta_1, \\beta_2)\\) is just a 90 degree rotation (and rescaling) of the data ellipse for \\((x_1, x_2)\\): it is wide (more variance) in any direction where the data ellipse is narrow.\nThe shadows of the confidence ellipses on the coordinate axes in Figure 8.4 represent the standard errors of the coefficients, and get larger with increasing \\(\\rho\\). This is the effect of variance inflation, described in the following section.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#sec-measure-collin",
+    "href": "08-collinearity-ridge.html#sec-measure-collin",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "\n8.2 Measuring collinearity",
+    "text": "8.2 Measuring collinearity\nThis section first describes the variance inflation factor (VIF) used to measure the effect of possible collinearity on each predictor and a collection of diagnostic measures designed to help interpret these. Then I describe some novel graphical methods to make these effects more readily understandable, to answer the “Where’s Waldo” question posed at the outset.\n\n8.2.1 Variance inflation factors\nHow can we measure the effect of collinearity? The essential idea is to compare, for each predictor the variance \\(s^2 (\\widehat{b_j})\\) that the coefficient that \\(x_j\\) would have if it was totally unrelated to the other predictors to the actual variance it has in the given model.\nFor two predictors such as shown in Figure 8.4 the sampling variance of \\(x_1\\) can be expressed as\n\\[\ns^2 (\\widehat{b_1}) = \\frac{MSE}{(n-1) \\; s^2(x_1)} \\; \\times \\; \\left[ \\frac{1}{1-r^2_{12}} \\right]\n\\] The first term here is the variance of \\(b_1\\) when the two predictors are uncorrelated. The term in brackets represents the variance inflation factor (Marquardt, 1970), the amount by which the variance of the coefficient is multiplied as a consequence of the correlation \\(r_{12}\\) of the predictors. As \\(r_{12} \\rightarrow 1\\), the variances approaches infinity.\nMore generally, with any number of predictors, this relation has a similar form, replacing the simple correlation \\(r_{12}\\) with the multiple correlation predicting \\(x_j\\) from all others,\n\\[\ns^2 (\\widehat{b_j}) = \\frac{MSE}{(n-1) \\; s^2(x_j)} \\; \\times \\; \\left[ \\frac{1}{1-R^2_{j | \\text{others}}} \\right]\n\\] So, we have that the variance inflation factors are:\n\\[\n\\text{VIF}_j = \\frac{1}{1-R^2_{j \\,|\\, \\text{others}}}\n\\] In practice, it is often easier to think in terms of the square root, \\(\\sqrt{\\text{VIF}_j}\\) as the multiplier of the standard errors. The denominator, \\(1-R^2_{j | \\text{others}}\\) is sometimes called tolerance, a term I don’t find particularly useful, but it is just the proportion of the variance of \\(x_j\\) that is not explainable from the others.\nFor the cases shown in Figure 8.4 the VIFs and their square roots are:\n\nvifs &lt;- sapply(mods, car::vif)\ncolnames(vifs) &lt;- paste(\"rho:\", rho)\nvifs\n#&gt;    rho: 0 rho: 0.8 rho: 0.97\n#&gt; x1      1     3.09      18.6\n#&gt; x2      1     3.09      18.6\n\nsqrt(vifs)\n#&gt;    rho: 0 rho: 0.8 rho: 0.97\n#&gt; x1      1     1.76      4.31\n#&gt; x2      1     1.76      4.31\n\nNote that when there are terms in the model with more than one degree of freedom, such as education with four levels (and hence 3 df) or a polynomial term specified as poly(age, 3), that variable, education or age is represented by three separate \\(x\\)s in the model matrix, and the standard VIF calculation gives results that vary with how those terms are coded in the model.\nTo allow for these cases, Fox & Monette (1992) define generalized, GVIFs as the inflation in the squared area of the confidence ellipse for the coefficients of such terms, relative to what would be obtained with uncorrelated data. Visually, this can be seen by comparing the areas of the ellipses in the bottom row of Figure 8.4. Because the magnitude of the GVIF increases with the number of degrees of freedom for the set of parameters, Fox & Monette suggest the analog \\(\\sqrt{\\text{GVIF}^{1/2 \\text{df}}}\\) as the measure of impact on standard errors. This is what car::vif() calculates for a factor or other term with more than 1 df.\nExample: This example uses the cars dataset in the VisCollin package containing various measures of size and performance on 406 models of automobiles from 1982. Interest is focused on predicting gas mileage, mpg.\n\ndata(cars, package = \"VisCollin\")\nstr(cars)\n#&gt; 'data.frame':  406 obs. of  10 variables:\n#&gt;  $ make    : Factor w/ 30 levels \"amc\",\"audi\",\"bmw\",..: 6 4 22 1 12 12 6 22 23 1 ...\n#&gt;  $ model   : chr  \"chevelle\" \"skylark\" \"satellite\" \"rebel\" ...\n#&gt;  $ mpg     : num  18 15 18 16 17 15 14 14 14 15 ...\n#&gt;  $ cylinder: int  8 8 8 8 8 8 8 8 8 8 ...\n#&gt;  $ engine  : num  307 350 318 304 302 429 454 440 455 390 ...\n#&gt;  $ horse   : int  130 165 150 150 140 198 220 215 225 190 ...\n#&gt;  $ weight  : int  3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...\n#&gt;  $ accel   : num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...\n#&gt;  $ year    : int  70 70 70 70 70 70 70 70 70 70 ...\n#&gt;  $ origin  : Factor w/ 3 levels \"Amer\",\"Eur\",\"Japan\": 1 1 1 1 1 1 1 1 1 1 ...\n\nWe fit a model predicting gas mileage (mpg) from the number of cylinders, engine displacement, horsepower, weight, time to accelerate from 0 – 60 mph and model year (1970–1982). Perhaps surprisingly, only weight and year appear to significantly predict gas mileage. What’s going on here?\n\ncars.mod &lt;- lm (mpg ~ cylinder + engine + horse + \n                      weight + accel + year, \n                data=cars)\nAnova(cars.mod)\n#&gt; Anova Table (Type II tests)\n#&gt; \n#&gt; Response: mpg\n#&gt;           Sum Sq  Df F value Pr(&gt;F)    \n#&gt; cylinder      12   1    0.99   0.32    \n#&gt; engine        13   1    1.09   0.30    \n#&gt; horse          0   1    0.00   0.98    \n#&gt; weight      1214   1  102.84 &lt;2e-16 ***\n#&gt; accel          8   1    0.70   0.40    \n#&gt; year        2419   1  204.99 &lt;2e-16 ***\n#&gt; Residuals   4543 385                   \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nWe check the variance inflation factors, using car::vif(). We see that most predictors have very high VIFs, indicating moderately severe multicollinearity.\n\nvif(cars.mod)\n#&gt; cylinder   engine    horse   weight    accel     year \n#&gt;    10.63    19.64     9.40    10.73     2.63     1.24\n\nsqrt(vif(cars.mod))\n#&gt; cylinder   engine    horse   weight    accel     year \n#&gt;     3.26     4.43     3.07     3.28     1.62     1.12\n\nAccording to \\(\\sqrt{\\text{VIF}}\\), the standard error of cylinder has been multiplied by \\(\\sqrt{10.63} = 3.26\\) and it’s \\(t\\)-value is divided by this number, compared with the case when all predictors are uncorrelated. engine, horse and weight suffer a similar fate.\nIf we also included the factor origin in the models, we would get the generalized GVIF:\n\ncars.mod2 &lt;- lm (mpg ~ cylinder + engine + horse + \n                       weight + accel + year + origin, \n                 data=cars)\nvif(cars.mod2)\n#&gt;           GVIF Df GVIF^(1/(2*Df))\n#&gt; cylinder 10.74  1            3.28\n#&gt; engine   22.94  1            4.79\n#&gt; horse     9.96  1            3.16\n#&gt; weight   11.07  1            3.33\n#&gt; accel     2.63  1            1.62\n#&gt; year      1.30  1            1.14\n#&gt; origin    2.10  2            1.20\n\n\n\n\n\n\n\nConnection with inverse of correlation matrix\n\n\n\nIn the linear regression model with standardized predictors, the covariance matrix of the estimated intercept-excluding parameter vector \\(\\mathbf{b}^\\star\\) has the simpler form, \\[\n\\mathcal{V} (\\mathbf{b}^\\star) = \\frac{\\sigma^2}{n-1} \\mathbf{R}^{-1}_{X} \\; .\n\\] where \\(\\mathbf{R}_{X}\\) is the correlation matrix among the predictors. It can then be seen that the VIF\\(_j\\) are just the diagonal entries of \\(\\mathbf{R}^{-1}_{X}\\).\nMore generally, the matrix \\(\\mathbf{R}^{-1}_{X} = (r^{ij})\\), when standardized to a correlation matrix as \\(-r^{ij} / \\sqrt{r^{ii} \\; r^{jj}}\\) gives the matrix of all partial correlations, \\(r_{ij} \\,|\\, \\text{others}\\). }\n\n\n\n8.2.2 Collinearity diagnostics\nOK, we now know that large VIF\\(_j\\) indicate predictor coefficients whose estimation is degraded due to large \\(R^2_{j \\,|\\, \\text{others}}\\). But for this to be useful, we need to determine:\n\nhow many dimensions in the space of the predictors are associated with nearly collinear relations?\nwhich predictors are most strongly implicated in each of these?\n\nAnswers to these questions are provided using measures developed by Belsley and colleagues (Belsley et al., 1980; Belsley, 1991). These measures are based on the eigenvalues \\(\\lambda_1, \\lambda_2, \\dots \\lambda_p\\) of the correlation matrix \\(R_{X}\\) of the predictors (preferably centered and scaled, and not including the constant term for the intercept), and the corresponding eigenvectors in the columns of \\(\\mathbf{V}_{p \\times p}\\), given by the the eigen decomposition \\[\n\\mathbf{R}_{X} = \\mathbf{V} \\mathbf{\\Lambda} \\mathbf{V}^\\mathsf{T}\n\\] By elementary matrix algebra, the eigen decomposition of \\(\\mathbf{R}_{XX}^{-1}\\) is then \\[\n\\mathbf{R}_{X}^{-1} = \\mathbf{V} \\mathbf{\\Lambda}^{-1} \\mathbf{V}^\\mathsf{T} \\; ,\n\\tag{8.1}\\] so, \\(\\mathbf{R}_{X}\\) and \\(\\mathbf{R}_{XX}^{-1}\\) have the same eigenvectors, and the eigenvalues of \\(\\mathbf{R}_{X}^{-1}\\) are just \\(\\lambda_i^{-1}\\). Using Equation 8.1, the variance inflation factors may be expressed as \\[\n\\text{VIF}_j = \\sum_{k=1}^p \\frac{V^2_{jk}}{\\lambda_k} \\; ,\n\\] which shows that only the small eigenvalues contribute to variance inflation, but only for those predictors that have large eigenvector coefficients on those small components. These facts lead to the following diagnostic statistics for collinearity:\n\n\nCondition indices: The smallest of the eigenvalues, those for which \\(\\lambda_j \\approx 0\\), indicate collinearity and the number of small values indicates the number of near collinear relations. Because the sum of the eigenvalues, \\(\\Sigma \\lambda_i = p\\) increases with the number of predictors \\(p\\), it is useful to scale them all in relation to the largest. This leads to condition indices, defined as \\(\\kappa_j = \\sqrt{ \\lambda_1 / \\lambda_j}\\). These have the property that the resulting numbers have common interpretations regardless of the number of predictors.\n\nFor completely uncorrelated predictors, all \\(\\kappa_j = 1\\).\n\n\\(\\kappa_j \\rightarrow \\infty\\) as any \\(\\lambda_k \\rightarrow 0\\).\n\n\nVariance decomposition proportions: Large VIFs indicate variables that are involved in some nearly collinear relations, but they don’t indicate which other variable(s) each is involved with. For this purpose, Belsley et al. (1980) and Belsley (1991) proposed calculation of the proportions of variance of each variable associated with each principal component as a decomposition of the coefficient variance for each dimension.\n\nThese measures can be calculated using VisCollin::colldiag(). For the current model, the usual display contains both the condition indices and variance proportions. However, even for a small example, it is often difficult to know what numbers to pay attention to.\n\n(cd &lt;- colldiag(cars.mod, center=TRUE))\n#&gt; Condition\n#&gt; Index  Variance Decomposition Proportions\n#&gt;           cylinder engine horse weight accel year \n#&gt; 1   1.000 0.005    0.003  0.005 0.004  0.009 0.010\n#&gt; 2   2.252 0.004    0.002  0.000 0.007  0.022 0.787\n#&gt; 3   2.515 0.004    0.001  0.002 0.010  0.423 0.142\n#&gt; 4   5.660 0.309    0.014  0.306 0.087  0.063 0.005\n#&gt; 5   8.342 0.115    0.000  0.654 0.715  0.469 0.052\n#&gt; 6  10.818 0.563    0.981  0.032 0.176  0.013 0.004\n\nBelsley (1991) recommends that the sources of collinearity be diagnosed (a) only for those components with large \\(\\kappa_j\\), and (b) for those components for which the variance proportion is large (say, \\(\\ge 0.5\\)) on two or more predictors. The print method for \"colldiag\" objects has a fuzz argument controlling this.\n\nprint(cd, fuzz = 0.5)\n#&gt; Condition\n#&gt; Index  Variance Decomposition Proportions\n#&gt;           cylinder engine horse weight accel year \n#&gt; 1   1.000  .        .      .     .      .     .   \n#&gt; 2   2.252  .        .      .     .      .    0.787\n#&gt; 3   2.515  .        .      .     .      .     .   \n#&gt; 4   5.660  .        .      .     .      .     .   \n#&gt; 5   8.342  .        .     0.654 0.715   .     .   \n#&gt; 6  10.818 0.563    0.981   .     .      .     .\n\nThe mystery is solved, if you can read that table with these recommendations in mind. There are two nearly collinear relations among the predictors, corresponding to the two smallest dimensions.\n\nDimension 5 reflects the high correlation between horsepower and weight,\nDimension 6 reflects the high correlation between number of cylinders and engine displacement.\n\nNote that the high variance proportion for year (0.787) on the second component creates no problem and should be ignored because (a) the condition index is low and (b) it shares nothing with other predictors.\n\n8.2.3 Tableplots\nThe default tabular display of condition indices and variance proportions from colldiag() is what triggered the comparison to “Where’s Waldo”. It suffers from the fact that the important information — (a) how many Waldos? (b) where are they hiding — is disguised by being embedded in a sea of mostly irrelevant numbers. The simple option of using a principled fuzz factor helps considerably, but not entirely.\nThe simplified tabular display above can be improved to make the patterns of collinearity more visually apparent and to signify warnings directly to the eyes. A tableplot (Kwan et al., 2009) is a semi-graphic display that presents numerical information in a table using shapes proportional to the value in a cell and other visual attributes (shape type, color fill, and so forth) to encode other information.\nFor collinearity diagnostics, these show:\n\nthe condition indices, using squares whose background color is red for condition indices &gt; 10, brown for values &gt; 5 and green otherwise, reflecting danger, warning and OK respectively. The value of the condition index is encoded within this using a white square whose side is proportional to the value (up to some maximum value, cond.max that fills the cell).\nVariance decomposition proportions are shown by filled circles whose radius is proportional to those values and are filled (by default) with shades ranging from white through pink to red. Rounded values of those diagnostics are printed in the cells.\n\nThe tableplot below (Figure 8.5) encodes all the information from the values of colldiag() printed above. To aid perception, it uses prop.col color breaks such that variance proportions &lt; 0.3 are shaded white. The visual message is that one should attend to collinearities with large condition indices and large variance proportions implicating two or more predictors.\n\n\n\nR file: R/cars-colldiag.R\n\ntableplot(cd, title = \"Tableplot of cars data\", \n          cond.max = 30 )\n\n\n\n\n\n\nFigure 8.5: Tableplot of condition indices and variance proportions for the cars data. In column 1, the square symbols are scaled relative to a maximum condition index of 30. In the remaining columns, variance proportions (times 100) are shown as circles scaled relative to a maximum of 100.\n\n\n\n\n\n8.2.4 Collinearity biplots\nAs we have seen, the collinearity diagnostics are all functions of the eigenvalues and eigenvectors of the correlation matrix of the predictors in the regression model, or alternatively, the SVD of the \\(\\mathbf{X}\\) matrix in the linear model (excluding the constant). The standard biplot (Gabriel, 1971; Gower & Hand, 1996) (see: Section 4.3) can be regarded as a multivariate analog of a scatterplot, obtained by projecting a multivariate sample into a low-dimensional space (typically of 2 or 3 dimensions) accounting for the greatest variance in the data.\nHowever the standard biplot is less useful for visualizing the relations among the predictors that lead to nearly collinear relations. Instead, biplots of the smallest dimensions show these relations directly, and can show other features of the data as well, such as outliers and leverage points. We use prcomp(X, scale.=TRUE) to obtain the PCA of the correlation matrix of the predictors:\n\ncars.X &lt;- cars |&gt;\n  select(where(is.numeric)) |&gt;\n  select(-mpg) |&gt;\n  tidyr::drop_na()\ncars.pca &lt;- prcomp(cars.X, scale. = TRUE)\ncars.pca\n#&gt; Standard deviations (1, .., p=6):\n#&gt; [1] 2.070 0.911 0.809 0.367 0.245 0.189\n#&gt; \n#&gt; Rotation (n x k) = (6 x 6):\n#&gt;             PC1     PC2    PC3    PC4     PC5     PC6\n#&gt; cylinder -0.454 -0.1869  0.168 -0.659 -0.2711 -0.4725\n#&gt; engine   -0.467 -0.1628  0.134 -0.193 -0.0109  0.8364\n#&gt; horse    -0.462 -0.0177 -0.123  0.620 -0.6123 -0.1067\n#&gt; weight   -0.444 -0.2598  0.278  0.350  0.6860 -0.2539\n#&gt; accel     0.330 -0.2098  0.865  0.143 -0.2774  0.0337\n#&gt; year      0.237 -0.9092 -0.335  0.025 -0.0624  0.0142\n\nThe standard deviations above are the square roots \\(\\sqrt{\\lambda_j}\\) of the eigenvalues of the correlation matrix, and are returned in the sdev component of the \"prcomp\" object. The eigenvectors are returned in the rotation component, whose directions are arbitrary. Because we are interested in seeing the relative magnitude of variable vectors, we are free to multiply them by any constant to make them more visible in relation to the scores for the cars.\n\ncars.pca$rotation &lt;- -2.5 * cars.pca$rotation    # reflect & scale the variable vectors\n\nggp &lt;- fviz_pca_biplot(\n  cars.pca,\n  axes = 6:5,\n  geom = \"point\",\n  col.var = \"blue\",\n  labelsize = 5,\n  pointsize = 1.5,\n  arrowsize = 1.5,\n  addEllipses = TRUE,\n  ggtheme = ggplot2::theme_bw(base_size = 14),\n  title = \"Collinearity biplot for cars data\")\n\n# add point labels for outlying points\ndsq &lt;- heplots::Mahalanobis(cars.pca$x[, 6:5])\nscores &lt;- as.data.frame(cars.pca$x[, 6:5])\nscores$name &lt;- rownames(scores)\n\nggp + geom_text_repel(data = scores[dsq &gt; qchisq(0.95, df = 6),],\n                aes(x = PC6,\n                    y = PC5,\n                    label = name),\n                vjust = -0.5,\n                size = 5)\n\n\n\n\n\n\nFigure 8.6: Collinearity biplot of the Cars data, showing the last two dimensions. The projections of the variable vectors on the coordinate axes are proportional to their variance proportions. To reduce graphic clutter, only the most outlying observations in predictor space are identified by case labels. An extreme outlier (case 20) appears in the lower right corner.\n\n\n\n\nAs with the tabular display of variance proportions, Waldo is hiding in the dimensions associated with the smallest eigenvalues (largest condition indices). As well, it turns out that outliers in the predictor space (also high leverage observations) can often be seen as observations far from the centroid in the space of the smallest principal components.\nThe projections of the variable vectors in Figure 8.6 on the Dimension 5 and Dimension 6 axes are proportional to their variance proportions shown above. The relative lengths of these variable vectors can be considered to indicate the extent to which each variable contributes to collinearity for these two near-singular dimensions.\nThus, we see again that Dimension 6 is largely determined by engine size, with a substantial (negative) relation to cylinder. Dimension 5 has its’ strongest relations to weight and horse.\nMoreover, there is one observation, #20, that stands out as an outlier in predictor space, far from the centroid. It turns out that this vehicle, a Buick Estate wagon, is an early-year (1970) American behemoth, with an 8-cylinder, 455 cu. in, 225 horse-power engine, and able to go from 0 to 60 mph in 10 sec. (Its MPG is only slightly under-predicted from the regression model, however.)\nWith PCA and the biplot, we are used to looking at the dimensions that account for the most variation, but the answer to Where’s Waldo? is that he is hiding in the smallest data dimensions, just as he does in Figure 8.2 where the weak signals of his stripped shirt, hat and glasses are embedded in a visual field of noise. As we just saw, outliers hide there also, hoping to escape detection. These small dimensions are also implicated in ridge regression as we will see shortly (Section 8.4).",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#sec-remedies",
+    "href": "08-collinearity-ridge.html#sec-remedies",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "\n8.3 Remedies for collinearity: What can I do?",
+    "text": "8.3 Remedies for collinearity: What can I do?\nCollinearity is often a data problem, for which there is no magic cure. Nevertheless there are some general guidelines and useful techniques to address this problem.\n\nPure prediction: If we are only interested in predicting / explaining an outcome, and not the model coefficients or which are “significant”, collinearity can be largely ignored. The fitted values are unaffected by collinearity, even in the case of perfect collinearity as shown in Figure 8.3 (b).\n\nStructural collinearity: Sometimes collinearity results from structural relations among the variables that relate to how they have been defined.\n\nFor example, polynomial terms, like \\(x, x^2, x^3\\) or interaction terms like \\(x_1, x_2, x_1 * x_2\\) are necessarily correlated. A simple cure is to center the predictors at their means, using \\(x - \\bar{x}, (x - \\bar{x})^2, (x - \\bar{x})^3\\) or \\((x_1 - \\bar{x}_1), (x_2 - \\bar{x}_2), (x_1 - \\bar{x}_1) * (x_2 - \\bar{x}_2)\\). Centering removes the spurious ill-conditioning, thus reducing the VIFs. Note that in polynomial models, using y ~ poly(x, 3) to specify a cubic model generates orthogonal (uncorrelated) regressors, whereas in y ~ x + I(x^2) + I(x^3) the terms have built-in correlations.\nWhen some predictors share a common cause, as in GNP or population in time-series or cross-national data, you can reduce collinearity by re-defining predictors to reflect per capita measures. In a related example with sports data, when you have cumulative totals (e.g., runs, hits, homeruns in baseball) for players over years, expressing these measures as per year will reduce the common effect of longevity on these measures.\n\n\n\nModel re-specification:\n\nDrop one or more regressors that have a high VIF, if they are not deemed to be essential to understanding the model. Care must be taken here to not omit variables which should be controlled or accounted for in interpretation.\nReplace highly correlated regressors with less correlated linear combination(s) of them. For example, two related variables, \\(x_1\\) and \\(x_2\\) can be replaced without any loss of information by replacing them with their sum and difference, \\(z_1 = x_1 + x_2\\) and \\(z_2 = x_1 - x_2\\). For instance, in a dataset on fitness, we may have correlated predictors of resting pulse rate and pulse rate while running. Transforming these to average pulse rate and their difference gives new variables which are interpretable and less correlated.\n\n\n\nStatistical remedies:\n\nTransform the predictors \\(\\mathbf{X}\\) to uncorrelated principal component scores \\(\\mathbf{Z} = \\mathbf{X} \\mathbf{V}\\), and regress \\(\\mathbf{y}\\) on \\(\\mathbf{Z}\\). These will have the identical overall model fit without loss of information. A related technique is incomplete principal components regression, where some of the smallest dimensions (those causing collinearity) are omitted from the model. The trade-off is that it may be more difficult to interpret what the model means, but this can be countered with a biplot, showing the projections of the original variables into the reduced space of the principal components.\nUse regularization methods such as ridge regression and lasso, which correct for collinearity by introducing shrinking coefficients towards 0, introducing a small amount of bias, . See the genridge package and its pkgdown documentation for visualization methods.\nuse Bayesian regression; if multicollinearity prevents a regression coefficient from being estimated precisely, then a prior on that coefficient will help to reduce its posterior variance.\n\n\n\nExample: Centering\nTo illustrate the effect of centering a predictor in a polynomial model, we generate a perfect quadratic relationship, \\(y = x^2\\) and consider the correlations of \\(y\\) with \\(x\\) and with \\((x - \\bar{x})^2\\). The correlation of \\(y\\) with \\(x\\) is 0.97, while the correlation of \\(y\\) with \\((x - \\bar{x})^2\\) is zero.\n\nx &lt;- 1:20\ny1 &lt;- x^2\ny2 &lt;- (x - mean(x))^2\nXY &lt;- data.frame(x, y1, y2)\n\n(R &lt;- cor(XY))\n#&gt;        x    y1    y2\n#&gt; x  1.000 0.971 0.000\n#&gt; y1 0.971 1.000 0.238\n#&gt; y2 0.000 0.238 1.000\n\nThe effect of centering here is remove the linear association in what is a purely quadratic relationship, as can be seen by plotting y1 and y2 against x.\n\nr1 &lt;- R[1, 2]\nr2 &lt;- R[1, 3]\n\ngg1 &lt;-\nggplot(XY, aes(x = x, y = y1)) +\n  geom_point(size = 3) +\n  geom_smooth(method = \"lm\", formula = y~x, linewidth = 2, se = FALSE) +\n  labs(x = \"X\", y = \"Y\") +\n  theme_bw(base_size = 16) +\n  annotate(\"text\", x = 5, y = 350, size = 6,\n           label = paste(\"X Uncentered\\nr =\", round(r1, 3)))\n\ngg2 &lt;-\n  ggplot(XY, aes(x = x, y = y2)) +\n  geom_point(size = 3) +\n  geom_smooth(method = \"lm\", formula = y~x, linewidth = 2, se = FALSE) +\n  labs(x = \"X\", y = \"Y\") +\n  theme_bw(base_size = 16) +\n  annotate(\"text\", x = 5, y = 80, size = 6,\n           label = paste(\"X Centered\\nr =\", round(r2, 3)))\n\ngg1 + gg2         # show plots side-by-side\n\n\n\n\n\n\nFigure 8.7: Centering a predictor removes the nessessary correlation in a quadratic regression\n\n\n\n\nExample: Interactions\nThe dataset genridge::Acetylene gives data from Marquardt & Snee (1975) on the yield of a chemical manufacturing process to produce acetylene in relation to reactor temperature (temp), the ratio of two components and the contact time in the reactor. A naive response surface model might suggest that yield is quadratic in time and there are potential interactions among all pairs of predictors.\n\n\ndata(Acetylene, package = \"genridge\")\nacetyl.mod0 &lt;- lm(\n  yield ~ temp + ratio + time + I(time^2) + \n          temp:time + temp:ratio + time:ratio,\n  data=Acetylene)\n\n(acetyl.vif0 &lt;- vif(acetyl.mod0))\n#&gt;       temp      ratio       time  I(time^2)  temp:time temp:ratio \n#&gt;        383      10555      18080        564       9719       9693 \n#&gt; ratio:time \n#&gt;        225\n\nThese results are horrible! How much does centering help? I first center all three predictors and then use update() to re-fit the same model using the centered data.\n\nAcetylene.centered &lt;-\n  Acetylene |&gt;\n  mutate(temp = temp - mean(temp),\n         time = time - mean(time),\n         ratio = ratio - mean(ratio))\n\nacetyl.mod1 &lt;- update(acetyl.mod0, data=Acetylene.centered)\n(acetyl.vif1 &lt;- vif(acetyl.mod1))\n#&gt;       temp      ratio       time  I(time^2)  temp:time temp:ratio \n#&gt;      57.09       1.09      81.57      51.49      44.67      30.69 \n#&gt; ratio:time \n#&gt;      33.33\n\nThis is far better, although still not great in terms of VIF. But, how much have we improved the situation by the simple act of centering the predictors? The square roots of the ratios of VIFs tell us the impact of centering on the standard errors.\n\nsqrt(acetyl.vif0 / acetyl.vif1)\n#&gt;       temp      ratio       time  I(time^2)  temp:time temp:ratio \n#&gt;       2.59      98.24      14.89       3.31      14.75      17.77 \n#&gt; ratio:time \n#&gt;       2.60\n\nFinally, we use poly(time, 2) in the model for the centered data. Because there are multiple degree of freedom terms in the model, car::vif() calculates GVIFs here. The final column gives \\(\\sqrt{\\text{GVIF}^{1/2 \\text{df}}}\\), the remaining effect of collinearity on the standard errors of terms in this model.\n\nacetyl.mod2 &lt;- lm(yield ~ temp + ratio + poly(time, 2) + \n                          temp:time + temp:ratio + time:ratio,\n                  data=Acetylene.centered)\n\nvif(acetyl.mod2, type = \"term\")\n#&gt;                  GVIF Df GVIF^(1/(2*Df))\n#&gt; temp            57.09  1            7.56\n#&gt; ratio            1.09  1            1.05\n#&gt; poly(time, 2) 1733.56  2            6.45\n#&gt; temp:time       44.67  1            6.68\n#&gt; temp:ratio      30.69  1            5.54\n#&gt; ratio:time      33.33  1            5.77",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#sec-ridge",
+    "href": "08-collinearity-ridge.html#sec-ridge",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "\n8.4 Ridge regression",
+    "text": "8.4 Ridge regression\nRidge regression is an instance of a class of techniques designed to obtain more favorable predictions at the expense of some increase in bias, compared to ordinary least squares (OLS) estimation. These methods began as a way of solving collinearity problems in OLS regression with highly correlated predictors (Hoerl & Kennard, 1970). More recently ridge regression developed to a larger class of model selection methods, of which the LASSO method of Tibshirani (1996) and LAR method of Efron et al. (2004) are well-known instances. See, for example, the reviews in Vinod (1978) and McDonald (2009) for details and context omitted here. The case of ridge regression has also been extended to the case of two or more response variables (Brown & Zidek, 1980; Haitovsky, 1987).\nAn essential idea behind these methods is that the OLS estimates are constrained in some way, shrinking them, on average, toward zero, to achieve increased predictive accuracy at the expense of some increase in bias. Another common characteristic is that they involve some tuning parameter (\\(k\\)) or criterion to quantify the tradeoff between bias and variance. In many cases, analytical or computationally intensive methods have been developed to choose an optimal value of the tuning parameter, for example using generalized cross validation, bootstrap methods.\nA common means to visualize the effects of shrinkage in these problems is to make what are called univariate ridge trace plots (Section 8.5) showing how the estimated coefficients \\(\\widehat{\\boldsymbol{\\beta}}_k\\) change as the shrinkage criterion \\(k\\) increases. (An example is shown in Fig XX below.) But this only provides a view of bias. It is the wrong graphic form for a multivariate problem where we want to visualize bias in the coefficients \\(\\widehat{\\boldsymbol{\\beta}}_k\\) vs. their precision, as reflected in their estimated variances, \\(\\widehat{\\textsf{Var}} (\\widehat{\\boldsymbol{\\beta}}_k)\\). A more useful graphic plots the confidence ellipses for the coefficients, showing both bias and precision (Section 8.6). Some of the material below borrows from Friendly (2011) and Friendly (2013).\n\n8.4.1 Properties of ridge regression\nTo provide some context, I summarize the properties of ridge regression below, comparing the OLS estimates with their ridge counterparts. To avoid unnecessary details related to the intercept, assume the predictors have been centered at their means and the unit vector is omitted from \\(\\mathbf{X}\\). Further, to avoid scaling issues, we standardize the columns of \\(\\mathbf{X}\\) to unit length, so that \\(\\mathbf{X}^\\mathsf{T}\\mathbf{X}\\) is a also correlation matrix.\nThe ordinary least squares estimates of coefficients and their estimated variance covariance matrix take the (hopefully now) familiar form\n\\[\\begin{aligned}\n\\widehat{\\boldsymbol{\\beta}}^{\\mathrm{OLS}} = &\n    (\\mathbf{X}^\\mathsf{T}\\mathbf{X})^{-1} \\mathbf{X}^\\mathsf{T}\\mathbf{y} \\:\\: ,\\\\\n\\widehat{\\mathsf{Var}} (\\widehat{\\boldsymbol{\\beta}}^{\\mathrm{OLS}}) = &\n    \\widehat{\\sigma}_{\\epsilon}^2 (\\mathbf{X}^\\mathsf{T}\\mathbf{X})^{-1}.\n\\end{aligned} \\tag{8.2}\\]\nAs we saw ealier, one signal of the problem of collinearity is that the determinant \\(\\mathrm{det}(\\mathbf{X}^\\mathsf{T}\\mathbf{X})\\) approaches zero as the predictors become more collinear. The inverse \\((\\mathbf{X}^\\mathsf{T}\\mathbf{X})^{-1}\\) becomes numerically unstable, or does not exist if the determinant becomes zero in the case of exact dependency of one variable on the others.\nRidge regression uses a trick to avoid this. It adds a constant, \\(k\\) to the diagonal elements, replacing \\(\\mathbf{X}^\\mathsf{T}\\mathbf{X}\\) with \\(\\mathbf{X}^\\mathsf{T}\\mathbf{X} + k \\mathbf{I}\\) in Equation 8.2. This drives the determinant away from zero as \\(k\\) increases. The ridge regression estimates then become,\n\\[\\begin{aligned}\n\\widehat{\\boldsymbol{\\beta}}^{\\mathrm{RR}}_k = &\n    (\\mathbf{X}^\\mathsf{T}\\mathbf{X} + k \\mathbf{I})^{-1} \\mathbf{X}^\\mathsf{T}\\mathbf{y}  \\\\\n                                    = & \\mathbf{G}_k \\, \\widehat{\\boldsymbol{\\beta}}^{\\mathrm{OLS}} \\:\\: ,\\\\\n\\widehat{\\mathsf{Var}} (\\widehat{\\boldsymbol{\\beta}}^{\\mathrm{RR}}_k) = &\n     \\widehat{\\sigma}^2  \\mathbf{G}_k (\\mathbf{X}^\\mathsf{T}\\mathbf{X})^{-1} \\mathbf{G}_k^\\mathsf{T}\\:\\: ,\n\\end{aligned} \\tag{8.3}\\]\nwhere \\(\\mathbf{G}_k = \\left[\\mathbf{I} + k (\\mathbf{X}^\\mathsf{T}\\mathbf{X})^{-1} \\right] ^{-1}\\) is the \\((p \\times p)\\) shrinkage matrix. Thus, as \\(k\\) increases, \\(\\mathbf{G}_k\\) decreases, and drives \\(\\widehat{\\mathbf{\\beta}}^{\\mathrm{RR}}_k\\) toward \\(\\mathbf{0}\\) (Hoerl & Kennard, 1970).\nAnother insight, from the shrinkage literature, is that ridge regression can be formulated as least squares regression, minimizing a residual sum of squares, \\(\\text{RSS}(k)\\), which adds a penalty for large coefficients,\n\\[\n\\text{RSS}(k) = (\\mathbf{y}-\\mathbf{X} \\mathbf{\\beta}) ^\\mathsf{T}(\\mathbf{y}-\\mathbf{X} \\boldsymbol{\\beta}) + k \\boldsymbol{\\beta}^\\mathsf{T}\\boldsymbol{\\beta} \\quad\\quad (k \\ge 0)\n\\:\\: ,\n\\tag{8.4}\\] where the penalty restrict the coefficients to some squared length \\(\\boldsymbol{\\beta}^\\mathsf{T}\\boldsymbol{\\beta} = \\Sigma \\beta_i \\le t(k)\\).\nThe geometry of ridge regession is illustrated in Figure 8.8 for two coefficients \\(\\boldsymbol{\\beta} = (\\beta_1, \\beta_2)\\). The blue circles at the origin, having radii \\(\\sqrt{t_k}\\), show the constraint that the sum of squares of coefficients, \\(\\boldsymbol{\\beta}^\\mathsf{T}\\boldsymbol{\\beta} = \\beta_1^2 + \\beta_2^2\\) be less than \\(k\\). The red ellipses show contours of the covariance ellipse of \\(\\widehat{\\boldsymbol{\\beta}}^{\\mathrm{OLS}}\\). As the shrinkage constant \\(k\\) increases, the center of these ellipses travel along the path illustrated toward \\(\\boldsymbol{\\beta} = \\mathbf{0}\\) This path is called the locus of osculation, the path along which circles or ellipses first kiss as they expand, like the pattern of ripples from rocks dropped into a pond (Friendly et al., 2013).\n\n\n\n\n\n\n\nFigure 8.8: Geometric interpretation of ridge regression, using elliptical contours of the \\(\\text{RSS}(k)\\) function. The blue circles at the origin show the constraint that the sum of squares of coefficients, \\(\\boldsymbol{\\beta}^\\mathsf{T}\\boldsymbol{\\beta}\\) be less than \\(k\\). The red ellipses show the covariance ellipse of two coefficients \\(\\boldsymbol{\\beta}\\). Ridge regression finds the point \\(\\widehat{\\boldsymbol{\\beta}}^{\\mathrm{RR}}_k\\) where the OLS contours just kiss the constraint region. _Source: Friendly et al. (2013).\n\n\n\n\n\nEquation 8.3 is computationally expensive, potentially numerically unstable for small \\(k\\), and it is conceptually opaque, in that it sheds little light on the underlying geometry of the data in the column space of \\(\\mathbf{X}\\). An alternative formulation can be given in terms of the singular value decomposition (SVD) of \\(\\mathbf{X}\\),\n\\[\n\\mathbf{X} = \\mathbf{U} \\mathbf{D} \\mathbf{V}^\\mathsf{T}\n\\]\nwhere \\(\\mathbf{U}\\) and \\(\\mathbf{V}\\) are respectively \\(n\\times p\\) and \\(p\\times p\\) orthonormal matrices, so that \\(\\mathbf{U}^\\mathsf{T}\\mathbf{U} = \\mathbf{V}^\\mathsf{T}\\mathbf{V} = \\mathbf{I}\\), and \\(\\mathbf{D} = \\mathrm{diag}\\, (d_1, d_2, \\dots d_p)\\) is the diagonal matrix of ordered singular values, with entries \\(d_1 \\ge d_2 \\ge \\cdots \\ge d_p \\ge 0\\).\nBecause \\(\\mathbf{X}^\\mathsf{T}\\mathbf{X} = \\mathbf{V} \\mathbf{D}^2 \\mathbf{V}^\\mathsf{T}\\), the eigenvalues of \\(\\mathbf{X}^\\mathsf{T}\\mathbf{X}\\) are given by \\(\\mathbf{D}^2\\) and therefore the eigenvalues of \\(\\mathbf{G}_k\\) can be shown (Hoerl & Kennard, 1970) to be the diagonal elements of\n\\[\n\\mathbf{D}(\\mathbf{D}^2 + k \\mathbf{I} )^{-1} \\mathbf{D} = \\mathrm{diag}\\,  \\left(\\frac{d_i^2}{d_i^2 + k}\\right) \\:\\: .\n\\]\nNoting that the eigenvectors, \\(\\mathbf{V}\\) are the principal component vectors, and that \\(\\mathbf{X} \\mathbf{V} = \\mathbf{U} \\mathbf{D}\\), the ridge estimates can be calculated more simply in terms of \\(\\mathbf{U}\\) and \\(\\mathbf{D}\\) as\n\\[\n\\widehat{\\boldsymbol{\\beta}}^{\\mathrm{RR}}_k = (\\mathbf{D}^2 + k \\mathbf{I})^{-1} \\mathbf{D} \\mathbf{U}^\\mathsf{T}\\mathbf{y} = \\left( \\frac{d_i}{d_i^2 + k}\\right) \\: \\mathbf{u}_i^\\mathsf{T}\\mathbf{y}, \\quad i=1, \\dots p \\:\\: .\n\\]\nThe terms \\(d^2_i / (d_i^2 + k) \\le 1\\) are thus the factors by which the coordinates of \\(\\mathbf{u}_i^\\mathsf{T}\\mathbf{y}\\) are shrunk with respect to the orthonormal basis for the column space of \\(\\mathbf{X}\\). The small singular values \\(d_i\\) correspond to the directions which ridge regression shrinks the most. These are the directions which contribute most to collinearity, discussed earlier.\nThis analysis also provides an alternative and more intuitive characterization of the ridge tuning constant. By analogy with OLS, where the hat matrix, \\(\\mathbf{H} = \\mathbf{X} (\\mathbf{X}^\\mathsf{T}\\mathbf{X})^{-1} \\mathbf{X}^\\mathsf{T}\\) reflects degrees of freedom \\(\\text{df} = \\mathrm{tr} (\\mathbf{H}) = p\\) corresponding to the \\(p\\) parameters, the effective degrees of freedom for ridge regression (Hastie et al., 2009) is\n\\[\\begin{aligned}\n\\text{df}_k\n    = & \\text{tr}[\\mathbf{X} (\\mathbf{X}^\\mathsf{T}\\mathbf{X} + k \\mathbf{I})^{-1} \\mathbf{X}^\\mathsf{T}] \\\\\n    = & \\sum_i^p \\text{df}_k(i) = \\sum_i^p \\left( \\frac{d_i^2}{d_i^2 + k} \\right) \\:\\: .\n\\end{aligned} \\tag{8.5}\\]\n\\(\\text{df}_k\\) is a monotone decreasing function of \\(k\\), and hence any set of ridge constants can be specified in terms of equivalent \\(\\text{df}_k\\). Greater shrinkage corresponds to fewer coefficients being estimated.\nThere is a close connection with principal components regression mentioned in Section 8.3. Ridge regression shrinks all dimensions in proportion to \\(\\text{df}_k(i)\\), so the low variance dimensions are shrunk more. Principal components regression discards the low variance dimensions and leaves the high variance dimensions unchanged.\n\n8.4.2 The genridge package\nRidge regression and other shrinkage methods are available in several packages including MASS (the lm.ridge() function), glmnet (Friedman et al., 2023), and penalized (Goeman et al., 2022), but none of these provides insightful graphical displays. glmnet::glmnet() also implements a method for multivariate responses with a `family=“mgaussian”.\nHere, I focus in the genridge package (Friendly, 2024), where the ridge() function is the workhorse and pca.ridge() transforms these results to PCA/SVD space. vif.ridge() calculates VIFs for class \"ridge\" objects and precision() calculates precision and shrinkage measures.\nA variety of plotting functions is available for univariate, bivariate and 3D plots:\n\n\ntraceplot() Traditional univariate ridge trace plots\n\nplot.ridge() Bivariate 2D ridge trace plots, showing the covariance ellipse of the estimated coefficients\n\npairs.ridge() All pairwise bivariate ridge trace plots\n\nplot3d.ridge() 3D ridge trace plots with ellipsoids\n\nbiplot.ridge() ridge trace plots in PCA/SVD space\n\nIn addition, the pca() method for \"ridge\" objects transforms the coefficients and covariance matrices of a ridge object from predictor space to the equivalent, but more interesting space of the PCA of \\(\\mathbf{X}^\\mathsf{T}\\mathbf{X}\\) or the SVD of \\(\\mathbf{X}\\). biplot.pcaridge() adds variable vectors to the bivariate plots of coefficients in PCA space",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#sec-ridge-univar",
+    "href": "08-collinearity-ridge.html#sec-ridge-univar",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "\n8.5 Univariate ridge trace plots",
+    "text": "8.5 Univariate ridge trace plots\nA classic example for ridge regression is Longley’s (1967) data, consisting of 7 economic variables, observed yearly from 1947 to 1962 (n=16), in the dataset datasets::longley. The goal is to predict Employed from GNP, Unemployed, Armed.Forces, Population, Year, and GNP.deflator.\n\n\ndata(longley, package=\"datasets\")\nstr(longley)\n#&gt; 'data.frame':  16 obs. of  7 variables:\n#&gt;  $ GNP.deflator: num  83 88.5 88.2 89.5 96.2 ...\n#&gt;  $ GNP         : num  234 259 258 285 329 ...\n#&gt;  $ Unemployed  : num  236 232 368 335 210 ...\n#&gt;  $ Armed.Forces: num  159 146 162 165 310 ...\n#&gt;  $ Population  : num  108 109 110 111 112 ...\n#&gt;  $ Year        : int  1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 ...\n#&gt;  $ Employed    : num  60.3 61.1 60.2 61.2 63.2 ...\n\nThese data were constructed to illustrate numerical problems in least squares software at the time, and they are (purposely) perverse, in that:\n\nEach variable is a time series so that there is clearly a lack of independence among predictors. Year is at least implicitly correlated with most of the others.\nWorse, there is also some structural collinearity among the variables GNP, Year, GNP.deflator, and Population; for example, GNP.deflator is a multiplicative factor to account for inflation.\n\nWe fit the regression model, and sure enough, there are some extremely large VIFs. The largest, for GNP represents a multiplier of \\(\\sqrt{1788.5} = 42.3\\) on the standard errors.\n\nlongley.lm &lt;- lm(Employed ~ GNP + Unemployed + Armed.Forces + \n                            Population + Year + GNP.deflator, \n                 data=longley)\nvif(longley.lm)\n#&gt;          GNP   Unemployed Armed.Forces   Population         Year \n#&gt;      1788.51        33.62         3.59       399.15       758.98 \n#&gt; GNP.deflator \n#&gt;       135.53\n\nShrinkage values can be specified using \\(k\\) (where \\(k = 0\\) corresponds to OLS) or the equivalent degrees of freedom $ _k$ (Equation 8.5). (The function uses the notation \\(\\lambda \\equiv k\\), so the argument is lambda.) Among other quantities, ridge() returns a matrix containing the coefficients for each predictor for each shrinkage value and other quantities.\n\nlambda &lt;- c(0, 0.002, 0.005, 0.01, 0.02, 0.04, 0.08)\nlridge &lt;- ridge(Employed ~ GNP + Unemployed + Armed.Forces + \n                           Population + Year + GNP.deflator, \n    data=longley, lambda=lambda)\nprint(lridge, digits = 2)\n#&gt; Ridge Coefficients:\n#&gt;        GNP     Unemployed  Armed.Forces  Population  Year  \n#&gt; 0.000  -3.447  -1.828      -0.696        -0.344       8.432\n#&gt; 0.002  -2.114  -1.644      -0.658        -0.713       7.466\n#&gt; 0.005  -1.042  -1.491      -0.623        -0.936       6.567\n#&gt; 0.010  -0.180  -1.361      -0.588        -1.003       5.656\n#&gt; 0.020   0.499  -1.245      -0.548        -0.868       4.626\n#&gt; 0.040   0.906  -1.155      -0.504        -0.523       3.577\n#&gt; 0.080   1.091  -1.086      -0.458        -0.086       2.642\n#&gt;        GNP.deflator\n#&gt; 0.000   0.157      \n#&gt; 0.002   0.022      \n#&gt; 0.005  -0.042      \n#&gt; 0.010  -0.026      \n#&gt; 0.020   0.098      \n#&gt; 0.040   0.321      \n#&gt; 0.080   0.570\n\nThe standard univariate plot, given by traceplot(), simply plots the estimated coefficients for each predictor against the shrinkage factor \\(k\\).\n\ntraceplot(lridge, \n          X = \"lambda\",\n          xlab = \"Ridge constant (k)\",\n          xlim = c(-0.02, 0.08), cex.lab=1.25)\n\n\n\n\n\n\nFigure 8.9: Univariate ridge trace plot for the coefficients of predictors of Employment in Longley’s data via ridge regression, with ridge constants k = (0, 0.002, 0.005, 0.01, 0.02, 0.04, 0.08). The dotted lines show optimal values for shrinkage by two criteria (HKB, LW).\n\n\n\n\nYou can see that the coefficients for Year and GNP are shrunk considerably. Differences from the \\(\\beta\\) value at \\(k =0\\) represent the bias (smaller \\(\\mid \\beta \\mid\\)) needed to achieve more stable estimates.\nThe dotted lines in Figure 8.9 show choices for the ridge constant by two commonly used criteria to balance bias against precision due to Hoerl et al. (1975) (HKB) and Lawless & Wang (1976) (LW). These values (along with a generalized cross-validation value GCV) are also stored in the “ridge” object as a vector criteria.\n\nlridge$criteria\n#&gt;    kHKB     kLW    kGCV \n#&gt; 0.00428 0.03230 0.00200\n\n\nThe shrinkage constant \\(k\\) doesn’t have much intrinsic meaning, so it is often easier to interpret the plot when coefficients are plotted against the equivalent degrees of freedom, \\(\\text{df}_k\\). OLS corresponds to \\(\\text{df}_k = 6\\) degrees of freedom in the space of six parameters, and the effect of shrinkage is to decrease the degrees of freedom, as if estimating fewer parameters. This more natural scale also makes the changes in coefficient with shrinkage more nearly linear.\n\ntraceplot(lridge, \n          X = \"df\",\n          xlim = c(4, 6.2), cex.lab=1.25)\n\n\n\n\n\n\nFigure 8.10: Univariate ridge trace plot using equivalent degrees of freedom, \\(\\text{df}_k\\) to specify shrinkage. This scale is easier to understand and makes the traces of prarameters more nearly linear.\n\n\n\n\nBut a bigger problem is that these univariate plots are the wrong kind of plot! They show the trends in increased bias (toward smaller \\(\\mid \\beta \\mid\\)) associated with larger \\(k\\), but they do not show the accompanying increase in precision (decrease in variance) achieved by allowing a bit of bias.\nFor that, we need to consider the variances and covariances of the estimated coefficients. The univariate trace plot is the wrong graphic form for what is essentially a multivariate problem, where we would like to visualize how both coefficients and their variances change with \\(k\\).",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#sec-ridge-bivar",
+    "href": "08-collinearity-ridge.html#sec-ridge-bivar",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "\n8.6 Bivariate ridge trace plots",
+    "text": "8.6 Bivariate ridge trace plots\nThe bivariate analog of the trace plot suggested by Friendly (2013) plots bivariate confidence ellipses for pairs of coefficients. Their centers, \\((\\widehat{\\beta}_i, \\widehat{\\beta}_j)\\) compared to the OLS values show the bias induced for each coefficient, and also how the change in the ridge estimate for one parameter is related to changes for other parameters.\nThe size and shapes of the covariance ellipses show directly the effect on precision of the estimates as a function of the ridge tuning constant. and their size and shape indicate sampling variance, \\(\\widehat{\\text{Var}} (\\mathbf{\\widehat{\\beta}}_{ij})\\). Here, I plot those for GNP against four of the other predictors. The plot() method for \"ridge\" objects plots these ellipses for a pair of variables.\n\nclr &lt;-  c(\"black\", \"red\", \"brown\", \"darkgreen\",\"blue\", \"cyan4\", \"magenta\")\npch &lt;- c(15:18, 7, 9, 12)\nlambdaf &lt;- c(expression(~widehat(beta)^OLS), as.character(lambda[-1]))\n\nfor (i in 2:5) {\n  plot(lridge, variables=c(1,i), \n       radius=0.5, cex.lab=1.5, col=clr, \n       labels=NULL, fill=TRUE, fill.alpha=0.2)\n  text(lridge$coef[1,1], lridge$coef[1,i], \n       expression(~widehat(beta)^OLS), cex=1.5, pos=4, offset=.1)\n  text(lridge$coef[-1,c(1,i)], lambdaf[-1], pos=3, cex=1.3)\n}\n\n\n\n\n\n\nFigure 8.11: Bivariate ridge trace plots for the coefficients of four predictors against the coefficient for GNP in Longley’s data, with k = 0, 0.002, 0.005, 0.01, 0.02, 0.04, 0.08. In most cases, the coefficients are driven toward zero, but the bivariate plot also makes clear the reduction in variance, as well as the bivariate path of shrinkage.\n\n\n\n\nAs can be seen, the coefficients for each pair of predictors trace a path generally in toward the origin (0,0), and the covariance ellipses get smaller, indicating increased precision. Sometimes, these paths are rather direct, but it takes a peculiar curvilinear route in the case of population and GNP.\nThe pairs() method for \"ridge\" objects shows all pairwise views in scatterplot matrix form. radius sets the base size of the ellipse-generating circle for the covariance ellipses.\n\npairs(lridge, radius=0.5, diag.cex = 2, \n      fill = TRUE, fill.alpha = 0.1)\n\n\n\n\n\n\nFigure 8.12: Scatterplot matrix of bivariate ridge trace plots. Each panel shows the effect of shrinkage on the covariance ellipse for a pair of predictors.\n\n\n\n\n\n8.6.1 Visualizing the bias-variance tradeoff\nThe function precision() calculates a number of measures of the effect of shrinkage of the coefficients in relation to the “size” of the covariance matrix \\(\\boldsymbol{\\mathcal{V}}_k \\equiv \\widehat{\\mathsf{Var}} (\\widehat{\\boldsymbol{\\beta}}^{\\mathrm{RR}}_k)\\). Larger shrinkage \\(k\\) should lead to a smaller ellipsoid for \\(\\boldsymbol{\\mathcal{V}}_k\\), indicating increased precision.\n\npdat &lt;- precision(lridge) |&gt; print()\n#&gt;       lambda   df   det  trace max.eig norm.beta norm.diff\n#&gt; 0.000  0.000 6.00 -12.9 18.119  15.419     1.000     0.000\n#&gt; 0.002  0.002 5.70 -13.6 11.179   8.693     0.857     0.695\n#&gt; 0.005  0.005 5.42 -14.4  6.821   4.606     0.741     1.276\n#&gt; 0.010  0.010 5.14 -15.4  4.042   2.181     0.637     1.783\n#&gt; 0.020  0.020 4.82 -16.8  2.218   1.025     0.528     2.262\n#&gt; 0.040  0.040 4.48 -18.7  1.165   0.581     0.423     2.679\n#&gt; 0.080  0.080 4.13 -21.1  0.587   0.260     0.337     3.027\n\nHere, the first three terms described below are (inverse) measures of precision; the last two quantify shrinkage:\n\ndet \\(=\\log{| \\mathcal{V}_k |}\\) is an overall measure of variance of the coefficients. It is the (linearized) volume of the covariance ellipsoid and corresponds conceptually to Wilks’ Lambda criterion.\ntrace \\(=\\text{trace} (\\boldsymbol{\\mathcal{V}}_k)\\) is the sum of the variances and also the sum of the eigenvalues of \\(\\boldsymbol{\\mathcal{V}}_k\\), conceptually similar to Pillai’s trace criterion.\nmax.eig is the largest eigenvalue measure of size, an analog of Roy’s maximum root test.\nnorm.beta \\(= \\left \\Vert \\boldsymbol{\\beta}\\right \\Vert / \\max{\\left \\Vert \\boldsymbol{\\beta}\\right \\Vert}\\) is a summary measure of shrinkage, the normalized root mean square of the estimated coefficients. It starts at 1.0 for \\(k=0\\) and decreases with the penalty for large coefficients.\ndiff.beta is the root mean square of the difference from the OLS estimate \\(\\lVert \\mathbf{\\beta}_{\\text{OLS}} - \\mathbf{\\beta}_k \\rVert\\). This measure is inversely related to norm.beta.\n\nPlotting shrinkage against a measure of variance gives a direct view of the tradeoff between bias and precision. Here I plot norm.beta against det, and join the points with a curve. You can see that in this example the HKB criterion prefers a smaller degree of shrinkage, but achieves only a modest decrease in variance. But variance decreases more sharply thereafter and the LW choice achieves greater precision.\n\nShow the codelibrary(splines)\nwith(pdat, {\n  plot(norm.beta, det, type=\"b\", \n       cex.lab=1.25, pch=16, \n       cex=1.5, col=clr, lwd=2,\n       xlab='shrinkage: ||b|| / max(||b||)',\n       ylab='variance: log |Var(b)|')\n  text(norm.beta, det, \n       labels = lambdaf, \n       cex = 1.25, \n       pos = c(rep(2,length(lambda)-1),4))\n  text(min(norm.beta), max(det), \n       labels = \"log |Variance| vs. Shrinkage\", \n       cex=1.5, pos=4)\n  })\n# find locations for optimal shrinkage criteria\nmod &lt;- lm(cbind(det, norm.beta) ~ bs(lambda, df=5), \n          data=pdat)\nx &lt;- data.frame(lambda=c(lridge$kHKB, \n                         lridge$kLW))\nfit &lt;- predict(mod, x)\npoints(fit[,2:1], pch=15, \n       col=gray(.50), cex=1.6)\ntext(fit[,2:1], c(\"HKB\", \"LW\"), \n     pos=3, cex=1.5, col=gray(.50))\n\n\n\n\n\n\nFigure 8.13: The tradeoff between bias and precision. Bias increases as we move away from the OLS solution, but precision increases.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#low-rank-views",
+    "href": "08-collinearity-ridge.html#low-rank-views",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "\n8.7 Low-rank views",
+    "text": "8.7 Low-rank views\nJust as principal components analysis gives low-dimensional views of a data set, PCA can be useful to understand ridge regression, just as it did for the problem of collinearity.\nThe pca method transforms a \"ridge\" object from parameter space, where the estimated coefficients are \\(\\beta_k\\) with covariance matrices \\(\\boldsymbol{\\mathcal{V}}_k\\), to the principal component space defined by the right singular vectors, \\(\\mathbf{V}\\), of the singular value decomposition \\(\\mathbf{U} \\mathbf{D} \\mathbf{V}^\\mathsf{T}\\) of the scaled predictor matrix, \\(\\mathbf{X}\\). In PCA space the total variance of the predictors remains the same, but it is distributed among the linear combinations that account for successively greatest variance.\n\nplridge &lt;- pca(lridge)\nplridge\n#&gt; Ridge Coefficients:\n#&gt;        dim1     dim2     dim3     dim4     dim5     dim6   \n#&gt; 0.000  1.51541  0.37939  1.80131  0.34595  5.97391  6.74225\n#&gt; 0.002  1.51537  0.37935  1.80021  0.34308  5.69497  5.06243\n#&gt; 0.005  1.51531  0.37928  1.79855  0.33886  5.32221  3.68519\n#&gt; 0.010  1.51521  0.37918  1.79579  0.33205  4.79871  2.53553\n#&gt; 0.020  1.51500  0.37898  1.79031  0.31922  4.00988  1.56135\n#&gt; 0.040  1.51459  0.37858  1.77944  0.29633  3.01774  0.88291\n#&gt; 0.080  1.51377  0.37778  1.75810  0.25915  2.01876  0.47238\n\nThen, a traceplot() of the resulting \"pcaridge\" object shows how the dimensions are affected by shrinkage, shown on the scale of degrees of freedom in Figure 8.14.\n\ntraceplot(plridge, X=\"df\", \n          cex.lab = 1.2, lwd=2)\n\n\n\n\n\n\nFigure 8.14: Ridge traceplot for the longley regression viewed in PCA space. The dimensions are the linear combinations of the predictors which account for greatest variance.\n\n\n\n\nWhat may be surprising at first is that the coefficients for the first 4 components are not shrunk at all. These large dimensions are immune to ridge tuning. Rather, the effect of shrinkage is seen only on the last two dimensions. But those also are the directions that contribute most to collinearity as we saw earlier.\n\nA pairs() plot gives a dramatic representation bivariate effects of shrinkage in PCA space: the principal components of X are uncorrelated, so the ellipses are all aligned with the coordinate axes and the ellipses largely coincide for dimensions 1 to 4. You can see them shrink in one direction in the last two columns and rows.\n\npairs(plridge)\n\n\n\n\n\n\nFigure 8.15: All pairwise bivariate ridge plots shown in PCA space.\n\n\n\n\nIf we focus on the plot of dimensions 5:6, we can see where all the shrinkage action is in this representation. Generally, the predictors that are related to the smallest dimension (6) are shrunk quickly at first.\n\nplot(plridge, variables=5:6, \n     fill = TRUE, fill.alpha=0.15, cex.lab = 1.5)\ntext(plridge$coef[, 5:6], \n     label = lambdaf, \n     cex=1.5, pos=4, offset=.1)\n\n\n\n\n\n\nFigure 8.16: Bivariate ridge trace plot for the smallest two dimensions …\n\n\n\n\n\n8.7.1 Biplot view\nThe question arises how to relate this view of shrinkage in PCA space to the original predictors. The biplot is again your friend. You can project variable vectors for the predictor variables into the PCA space of the smallest dimensions, where the shrinkage action mostly occurs to see how the predictor variables relate to these dimensions.\nbiplot.pcaridge() supplements the standard display of the covariance ellipsoids for a ridge regression problem in PCA/SVD space with labeled arrows showing the contributions of the original variables to the dimensions plotted. Recall from Section 4.3 that these reflect the correlations of the variables with the PCA dimensions. The lengths of the arrows reflect the proportion of variance that each predictors shares with the components.\n\nbiplot(plridge, radius=0.5, \n       ref=FALSE, asp=1, \n       var.cex=1.15, cex.lab=1.3, col=clr,\n       fill=TRUE, fill.alpha=0.15, \n       prefix=\"Dimension \")\n#&gt; Vector scale factor set to  5.25\ntext(plridge$coef[,5:6], lambdaf, pos=2, cex=1.3)\n\n\n\n\n\n\nFigure 8.17: Biplot view of the ridge trace plot for the smallest two dimensions, where the effects of shrinkage are most apparent.\n\n\n\n\nThe biplot view in Figure 8.17 showing the two smallest dimensions is particularly useful for understanding how the predictors contribute to shrinkage in ridge regression. Here, Year and Population largely contribute to dimension 5; a contrast between (Year, Population) and GNP contributes to dimension 6.",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#what-have-we-learned",
+    "href": "08-collinearity-ridge.html#what-have-we-learned",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "\n8.8 What have we learned?",
+    "text": "8.8 What have we learned?\nThis chapter has considered the problems in regression models which stem from high correlations among the predictors. We saw that collinearity results in unstable estimates of coefficients with larger uncertainty, often dramatically more so than would be the case if the predictors were uncorrelated. Collinearity can be seen as merely a “data problem” which can safely be ignored if we are only interested in prediction. When we want to understand a model, ridge regression can tame the collinearity beast by shrinking the coefficients slightly to gain greater precision in the estimates.\nBeyond these statistical considerations, the methods of this chapter highlight the roles of multivariate thinking and visualization in understanding these phenomena and the methods developed for solving them. Data ellipses and confidence ellipses for coefficients again provide tools for visualizing what is concealed in numerical summaries. A perhaps surprising feature of both collinearity and ridge regression is that the important information usually resides in the smallest PCA dimensions and biplots help again to understand these dimensions.\n\nPackages used here:\n12 packages used here: car, carData, dplyr, factoextra, genridge, ggplot2, ggrepel, knitr, MASS, patchwork, splines, VisCollin\n\n\n\n\n\n\nBelsley, D. A. (1991). Conditioning diagnostics: Collinearity and weak data in regression. Wiley.\n\n\nBelsley, D. A., Kuh, E., & Welsch, R. E. (1980). Regression diagnostics: Identifying influential data and sources of collinearity. John Wiley; Sons.\n\n\nBrown, P. J., & Zidek, J. V. (1980). Adaptive multivariate ridge regression. The Annals of Statistics, 8(1), 64–74. http://www.jstor.org/stable/2240743\n\n\nEfron, B., Hastie, T., Johnstone, I., & Tibshirani, R. (2004). Least angle regression. The Annals of Statistics, 32(2), 407–499.\n\n\nFox, J. (2016). Applied regression analysis and generalized linear models (Third edition.). SAGE.\n\n\nFox, J., & Monette, G. (1992). Generalized collinearity diagnostics. Journal of the American Statistical Association, 87(417), 178–183.\n\n\nFriedman, J., Hastie, T., Tibshirani, R., Narasimhan, B., Tay, K., Simon, N., & Yang, J. (2023). Glmnet: Lasso and elastic-net regularized generalized linear models. https://glmnet.stanford.edu\n\n\nFriendly, M. (2011). Generalized ridge trace plots: Visualizing bias and precision with the genridge R package. SCS Seminar.\n\n\nFriendly, M. (2013). The generalized ridge trace plot: Visualizing bias and precision. Journal of Computational and Graphical Statistics, 22(1), 50–68. https://doi.org/10.1080/10618600.2012.681237\n\n\nFriendly, M. (2024). Genridge: Generalized ridge trace plots for ridge regression. https://github.com/friendly/genridge\n\n\nFriendly, M., & Kwan, E. (2009). Where’s Waldo: Visualizing collinearity diagnostics. The American Statistician, 63(1), 56–65. https://doi.org/10.1198/tast.2009.0012\n\n\nFriendly, M., Monette, G., & Fox, J. (2013). Elliptical insights: Understanding statistical methods through elliptical geometry. Statistical Science, 28(1), 1–39. https://doi.org/10.1214/12-STS402\n\n\nGabriel, K. R. (1971). The biplot graphic display of matrices with application to principal components analysis. Biometrics, 58(3), 453–467. https://doi.org/10.2307/2334381\n\n\nGoeman, J., Meijer, R., Chaturvedi, N., & Lueder, M. (2022). Penalized: L1 (lasso and fused lasso) and L2 (ridge) penalized estimation in GLMs and in the cox model. https://CRAN.R-project.org/package=penalized\n\n\nGower, J. C., & Hand, D. J. (1996). Biplots. Chapman & Hall.\n\n\nGraybill, F. A. (1961). An introduction to linear statistical models. McGraw-Hill.\n\n\nHaitovsky, Y. (1987). On multivariate ridge regression. Biometrika, 74(3), 563–570. https://doi.org/10.1093/biomet/74.3.563\n\n\nHastie, T., Tibshirani, R., & Friedman, J. (2009). The elements of statistical learning: Data mining, inference and prediction (2nd ed.). Springer. http://www-stat.stanford.edu/~tibs/ElemStatLearn/\n\n\nHocking, R. R. (2013). Methods and applications of linear models: Regression and the analysis of variance. Wiley. https://books.google.ca/books?id=iq2J-1iS6HcC\n\n\nHoerl, A. E., & Kennard, R. W. (1970). Ridge regression: Biased estimation for nonorthogonal problems. Technometrics, 12, 55–67.\n\n\nHoerl, A. E., Kennard, R. W., & Baldwin, K. F. (1975). Ridge regression: Some simulations. Communications in Statistics, 4(2), 105–123. https://doi.org/10.1080/03610927508827232\n\n\nKwan, E., Lu, I. R. R., & Friendly, M. (2009). Tableplot: A new tool for assessing precise predictions. Zeitschrift für Psychologie / Journal of Psychology, 217(1), 38–48. https://doi.org/10.1027/0044-3409.217.1.38\n\n\nLawless, J. F., & Wang, P. (1976). A simulation study of ridge and other regression estimators. Communications in Statistics, 5, 307–323.\n\n\nLongley, J. W. (1967). An appraisal of least squares programs for the electronic computer from the point of view of the user. Journal of the American Statistical Association, 62, 819–841. https://doi.org/https://www.tandfonline.com/doi/abs/10.1080/01621459.1967.10500896\n\n\nMarquardt, D. W. (1970). Generalized inverses, ridge regression, biased linear estimation, and nonlinear estimation. Technometrics, 12, 591–612.\n\n\nMarquardt, D. W., & Snee, R. D. (1975). Ridge regression in practice. The American Statistician, 29(1), 3–20. https://doi.org/10.1080/00031305.1975.10479105\n\n\nMcDonald, G. C. (2009). Ridge regression. Wiley Interdisciplinary Reviews: Computational Statistics, 1(1), 93–100. https://doi.org/10.1002/wics.14\n\n\nTibshirani, R. (1996). Regression shrinkage and selection via the lasso. Journal of the Royal Statistical Society, Series B: Methodological, 58, 267–288.\n\n\nVinod, H. D. (1978). A survey of ridge regression and related techniques for improvements over ordinary least squares. The Review of Economics and Statistics, 60(1), 121–131. http://www.jstor.org/stable/1924340",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "08-collinearity-ridge.html#footnotes",
+    "href": "08-collinearity-ridge.html#footnotes",
+    "title": "8  Collinearity & Ridge Regression",
+    "section": "",
+    "text": "This example is adapted from one by John Fox (2022), Collinearity Diagnostics↩︎",
+    "crumbs": [
+      "Univariate Linear Models",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Collinearity & Ridge Regression</span>"
+    ]
+  },
+  {
+    "objectID": "09-hotelling.html",
+    "href": "09-hotelling.html",
+    "title": "9  Hotelling’s \\(T^2\\)",
+    "section": "",
+    "text": "9.1 \\(T^2\\) as a generalized \\(t\\)-test\nHotelling’s \\(T^2\\) (Hotelling, 1931) is an analog of the square of a univariate \\(t\\) statistic, extended to the case of two or more response variables tested together. Consider the basic one-sample \\(t\\)-test, where we wish to test the hypothesis that the mean \\(\\bar{x}\\) of a set of \\(N\\) measures on a test of basic math, with standard deviation \\(s\\) does not differ from an assumed mean \\(\\mu_0 = 150\\) for a population. The \\(t\\) statistic for testing \\(\\mathcal{H}_0 : \\mu = \\mu_0\\) against the two-sided alternative, \\(\\mathcal{H}_0 : \\mu \\ne \\mu_0\\) is \\[\nt = \\frac{(\\bar{x} - \\mu_0)}{s / \\sqrt{N}} = \\frac{(\\bar{x} - \\mu_0)\\sqrt{N}}{s}\n\\]\nSquaring this gives\n\\[\nt^2 = \\frac{N (\\bar{x} - \\mu_0)^2}{s^2} = N (\\bar{x} - \\mu_0)(s^2)^{-1} (\\bar{x} - \\mu_0)\n\\]\nNow consider we also have measures on a test of solving word problems for the same sample. Then, a hypothesis test for the means on basic math (BM) and word problems (WP) is the test of the means of these two variables jointly equal some specified values, say, \\((\\mu_{0,BM}=150,\\; \\mu_{0,WP} =100)\\):\n\\[\n\\mathcal{H}_0 : \\mathbf{\\mu} = \\mathbf{\\mu_0} =\n  \\begin{pmatrix}\n    \\mu_{0,BM} \\\\ \\mu_{0,WP}\n  \\end{pmatrix}\n  =\n  \\begin{pmatrix}\n    150 \\\\ 100\n  \\end{pmatrix}\n\\]\nHotelling’s \\(T^2\\) is then the analog of \\(t^2\\), with the variance-covariance matrix \\(\\mathbf{S}\\) of the scores on (BM, WP) replacing the variance of a single score. This is nothing more than the squared Mahalanobis \\(D^2_M\\) distance between the sample mean vector \\((\\bar{x}_{BM}, \\bar{x}_{WP})^\\mathsf{T}\\) and the hypothesized means \\(\\mathbf{\\mu}_0\\), in the metric of \\(\\mathbf{S}\\), as shown in Figure 9.1.\n\\[\\begin{aligned}\nT^2 &= N (\\bar{\\mathbf{x}} - \\mathbf{\\mu}_0)^\\mathsf{T} \\; \\mathbf{S}^{-1} \\; (\\bar{\\mathbf{x}} - \\mathbf{\\mu}_0) \\\\\n    &= N D^2_M (\\bar{\\mathbf{x}}, \\mathbf{\\mu}_0)\n\\end{aligned}\\]\nFigure 9.1: Hotelling’s T^2 statistic as the squared distance between the sample means and hypothesized means relative to the variance-covariance matrix. Source: Author",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>9</span>  <span class='chapter-title'>Hotelling's $T^2$</span>"
+    ]
+  },
+  {
+    "objectID": "09-hotelling.html#sec-t2-properties",
+    "href": "09-hotelling.html#sec-t2-properties",
+    "title": "9  Hotelling’s \\(T^2\\)",
+    "section": "\n9.2 \\(T^2\\) properties",
+    "text": "9.2 \\(T^2\\) properties\nAside from it’s elegant geometric interpretation Hotelling’s \\(T^2\\) has simple properties that aid in understanding the extension to more complex multivariate tests.\n\nMaximum \\(t^2\\) : Consider constructing a new variable \\(w\\) as a linear combination of the scores in a matrix \\(\\mathbf{X} = [ \\mathbf{x_1}, \\mathbf{x_2}, \\dots, \\mathbf{x_p}]\\) with weights \\(\\mathbf{a}\\), \\[\nw = a_1 \\mathbf{x_1} + a_2 \\mathbf{x_2} + \\dots + a_p \\mathbf{x_p} = \\mathbf{X} \\mathbf{a}\n\\] Hotelling’s \\(T^2\\) is then the maximum value of a univariate \\(t^2 (\\mathbf{a})\\) over all possible choices of the weights in \\(\\mathbf{a}\\). In this way, Hotellings test reduces a multivariate problem to a univariate one.\nEigenvalue : Hotelling showed that \\(T^2\\) is the one non-zero eigenvalue (latent root) \\(\\lambda\\) of the matrix \\(\\mathbf{Q}_H = N (\\bar{\\mathbf{x}} - \\mathbf{\\mu}_0)^\\mathsf{T}  (\\bar{\\mathbf{x}} - \\mathbf{\\mu}_0)\\) relative to \\(\\mathbf{Q}_E = \\mathbf{S}\\) that solves the equation \\[\n(\\mathbf{Q}_H - \\lambda \\mathbf{Q}_E) \\mathbf{a} = 0\n\\tag{9.1}\\] In more complex MANOVA problems, there are more than one non-zero latent roots, \\(\\lambda_1, \\lambda_2, \\dots \\lambda_s\\), and test statistics (Wilks’ \\(\\Lambda\\), Pillai and Hotelling-Lawley trace criteria, Roy’s maximum root test) are functions of these.\nEigenvector : The corresponding eigenvector is \\(\\mathbf{a} = \\mathbf{S}^{-1} (\\bar{\\mathbf{x}} - \\mathbf{\\mu}_0)\\). These are the (raw) discriminant coefficients, giving the relative contribution of each variable to \\(T^2\\).\nCritical values : For a single response, the square of a \\(t\\) statistic with \\(N-1\\) degrees of freedom is an \\(F (1, N-1)\\) statistic. But we chose \\(\\mathbf{a}\\) to give the maximum \\(t^2 (\\mathbf{a})\\); this can be taken into account with a transformation of \\(T^2\\) to give an exact \\(F\\) test with the correct sampling distribution: \\[\nF^* = \\frac{N - p}{p (N-1)} T^2 \\; \\sim \\; F (p, N - p)\n\\tag{9.2}\\]\nInvariance under linear transformation : Just as a univariate \\(t\\)-test is unchanged if we apply a linear transformation to the variable, \\(x \\rightarrow a x + b\\), \\(T^2\\) is invariant under all linear (affine) transformations, \\[\n\\mathbf{x}_{p \\times 1} \\rightarrow \\mathbf{C}_{p \\times p} \\mathbf{x} + \\mathbf{b}\n\\] So, you get the same results if you convert penguins flipper lengths from millimeters to centimeters or inches. The same is true for all MANOVA tests.\nTwo-sample tests : With minor variations in notation, everything above applies to the more usual test of equality of multivariate means in a two sample test of \\(\\mathcal{H}_0 : \\mathbf{\\mu}_1 = \\mathbf{\\mu}_2\\). \\[\nT^2 = N (\\bar{\\mathbf{x}}_1 - \\bar{\\mathbf{x}}_2)^\\mathsf{T} \\; \\mathbf{S}_p^{-1} \\; (\\bar{\\mathbf{x}}_1 - \\bar{\\mathbf{x}}_2)\n\\] where \\(\\mathbf{S}_p\\) is the pooled within-sample variance covariance matrix.\n\nExample\nThe data set heplots::mathscore gives (fictitious) scores on a test of basic math skills (BM) and solving word problems (WP) for two groups of \\(N=6\\) students in an algebra course, each taught by different instructors.\n\ndata(mathscore, package = \"heplots\")\nstr(mathscore)\n#&gt; 'data.frame':  12 obs. of  3 variables:\n#&gt;  $ group: Factor w/ 2 levels \"1\",\"2\": 1 1 1 1 1 1 2 2 2 2 ...\n#&gt;  $ BM   : int  190 170 180 200 150 180 160 190 150 160 ...\n#&gt;  $ WP   : int  90 80 80 120 60 70 120 150 90 130 ...\n\nYou can carry out the test that the means for both variables are jointly equal across groups using either Hotelling::hotelling.test() (Curran & Hersh, 2021) or car::Anova(), but the latter is more generally useful\n\nhotelling.test(cbind(BM, WP) ~ group, data=mathscore) |&gt; print()\n#&gt; Test stat:  64.174 \n#&gt; Numerator df:  2 \n#&gt; Denominator df:  9 \n#&gt; P-value:  0.0001213\n\nmath.mod &lt;- lm(cbind(BM, WP) ~ group, data=mathscore)\nAnova(math.mod)\n#&gt; \n#&gt; Type II MANOVA Tests: Pillai test statistic\n#&gt;       Df test stat approx F num Df den Df  Pr(&gt;F)    \n#&gt; group  1     0.865     28.9      2      9 0.00012 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nWhat’s wrong with just doing the two \\(t\\)-tests (or equivalent \\(F\\)-test with lm())?\n\nAnova(mod1 &lt;- lm(BM ~ group, data=mathscore))\n#&gt; Anova Table (Type II tests)\n#&gt; \n#&gt; Response: BM\n#&gt;           Sum Sq Df F value Pr(&gt;F)  \n#&gt; group       1302  1    4.24  0.066 .\n#&gt; Residuals   3071 10                 \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\nAnova(mod2 &lt;- lm(WP ~ group, data=mathscore))\n#&gt; Anova Table (Type II tests)\n#&gt; \n#&gt; Response: WP\n#&gt;           Sum Sq Df F value Pr(&gt;F)   \n#&gt; group       4408  1    10.4  0.009 **\n#&gt; Residuals   4217 10                  \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nFrom this, we might conclude that the two groups do not differ significantly on Basic Math but strongly differ on Word problems. But the two univariate tests do not take the correlation among the mean differences into account.\nIf you want to just extract the \\(t\\)-tests, here’s a handy trick using broom::tidy.mlm(), which summarizes the test statistics for each response and each term in a MLM. The mean difference shown below is that for group 2 - group 1.\n\ntidy(math.mod) |&gt;\n  filter(term != \"(Intercept)\") |&gt;\n  select(-term) |&gt;\n  rename(Mean_diff = estimate,\n         t = statistic) |&gt;\n  mutate(signif = noquote(gtools::stars.pval(p.value)))\n#&gt; # A tibble: 2 × 6\n#&gt;   response Mean_diff std.error     t p.value signif   \n#&gt;   &lt;chr&gt;        &lt;dbl&gt;     &lt;dbl&gt; &lt;dbl&gt;   &lt;dbl&gt; &lt;noquote&gt;\n#&gt; 1 BM           -20.8      10.1 -2.06 0.0665  .        \n#&gt; 2 WP            38.3      11.9  3.23 0.00897 **\n\nTo see the differences between the groups on both variables together, we draw their data (68%) ellipses, using heplots::covEllipses(). (Setting pooled=FALSE here omits drawing the the ellipse for the pooled covariance matrix \\(\\mathbf{S}_p\\).)\n\n\ncolors &lt;- c(\"darkgreen\", \"blue\")\ncovEllipses(mathscore[,c(\"BM\", \"WP\")], mathscore$group,\n            pooled = FALSE, \n            col = colors,\n            fill = TRUE, \n            fill.alpha = 0.05,\n            cex = 2, cex.lab = 1.5,\n            asp = 1,\n            xlab=\"Basic math\", ylab=\"Word problems\")\n# plot points\npch &lt;- ifelse(mathscore$group==1, 15, 16)\ncol &lt;- ifelse(mathscore$group==1, colors[1], colors[2])\npoints(mathscore[,2:3], pch=pch, col=col, cex=1.25)\n\n\n\n\n\n\nFigure 9.2: Data ellipses for the mathscore data, enclosing approximately 68% of the observations in each group\n\n\n\n\nWe can see that:\n\nGroup 1 &gt; Group 2 on Basic Math, but worse on Word Problems\nGroup 2 &gt; Group 1 on Word Problems, but worse on Basic Math\nWithin each group, those who do better on Basic Math also do better on Word Problems\n\nWe can also see why the univariate test, at least for Basic math is non-significant: the scores for the two groups overlap considerably on the horizontal axis. They are slightly better separated along the vertical axis for word problems. The plot also reveals why Hotelling’s \\(T^2\\) reveals such a strongly significant result: the two groups are very widely separated along an approximately 45\\(^o\\) line between them.\nA relatively simple interpretation is that the groups don’t really differ in overall math ability, but perhaps the instructor in Group 1 put more focus on basic math skills, while the instructor for Group 2 placed greater emphasis on solving word problems.\nIn Hotelling’s \\(T^2\\), the “size” of the difference between the means (labeled “1” and “2”) is assessed relative to the pooled within-group covariance matrix \\(\\mathbf{S}_p\\), which is just a size-weighted average of the two within-sample matrices, \\(\\mathbf{S}_1\\) and \\(\\mathbf{S}_2\\),\n\\[\n\\mathbf{S}_p = [ (n_1 - 1) \\mathbf{S}_1 + (n_2 - 1) \\mathbf{S}_2 ] / (n_1 + n_2 - 2) \\:\\: .\n\\]\nVisually, imagine sliding the the separate data ellipses to the grand mean, \\((\\bar{x}_{\\text{BM}}, \\bar{x}_{\\text{WP}})\\) and finding their combined data ellipse. This is just the data ellipse of the sample of deviations of the scores from their group means, or that of the residuals from the model lm(cbind(BM, WP) ~ group, data=mathscore)\nTo see this, we plot \\(\\mathbf{S}_1\\), \\(\\mathbf{S}_2\\) and \\(\\mathbf{S}_p\\) together,\n\ncovEllipses(mathscore[,c(\"BM\", \"WP\")], mathscore$group,\n            col = c(colors, \"red\"),\n            fill = c(FALSE, FALSE, TRUE), \n            fill.alpha = 0.3,\n            cex = 2, cex.lab = 1.5,\n            asp = 1,\n            xlab=\"Basic math\", ylab=\"Word problems\")\n\n\n\n\n\n\nFigure 9.3: Data ellipses and the pooled covariance matrix mathscore data.\n\n\n\n\nOne of the assumptions of the \\(T^2\\) test (and of MANOVA) is that the within-group variance covariance matrices, \\(\\mathbf{S}_1\\) and \\(\\mathbf{S}_2\\), are the same. In Figure 9.3, you can see how the shapes of \\(\\mathbf{S}_1\\) and \\(\\mathbf{S}_2\\) are very similar, differing in that the variance of word Problems is slightly greater for group 2. In Chapter XX we take of the topic of visualizing tests of this assumption, based on Box’s \\(M\\)-test.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>9</span>  <span class='chapter-title'>Hotelling's $T^2$</span>"
+    ]
+  },
+  {
+    "objectID": "09-hotelling.html#sec-t2-heplot",
+    "href": "09-hotelling.html#sec-t2-heplot",
+    "title": "9  Hotelling’s \\(T^2\\)",
+    "section": "\n9.3 HE plot and discriminant axis",
+    "text": "9.3 HE plot and discriminant axis\nAs we describe in detail in Chapter 11, all the information relevant to the \\(T^2\\) test and MANOVA can be captured in the remarkably simple Hypothesis Error plot, which shows the relative size of two data ellipses,\n\n\n\\(\\mathbf{H}\\): the data ellipse of the fitted values, which are just the group means on the two variables, \\(\\bar{\\mathbf{x}}\\), corresponding to \\(\\mathbf{Q}_H\\) in Equation 9.1. In case of \\(T^2\\), the \\(\\mathbf{H}\\) matrix is of rank 1, so the “ellipse” plots as a line.\n\n\n# calculate H directly\nfit &lt;- fitted(math.mod)\nxbar &lt;- colMeans(mathscore[,2:3])\nN &lt;- nrow(mathscore)\ncrossprod(fit) - N * outer(xbar, xbar)\n#&gt;       BM    WP\n#&gt; BM  1302 -2396\n#&gt; WP -2396  4408\n\n# same as: SSP for group effect from Anova\nmath.aov &lt;- Anova(math.mod)\n(H &lt;- math.aov$SSP)\n#&gt; $group\n#&gt;       BM    WP\n#&gt; BM  1302 -2396\n#&gt; WP -2396  4408\n\n\n\n\\(\\mathbf{E}\\): the data ellipse of the residuals, the deviations of the scores from the group means, \\(\\mathbf{x} - \\bar{\\mathbf{x}}\\), corresponding to \\(\\mathbf{Q}_E\\).\n\n\n# calculate E directly\nresids &lt;- residuals(math.mod)\ncrossprod(resids)\n#&gt;      BM   WP\n#&gt; BM 3071 2808\n#&gt; WP 2808 4217\n\n# same as: SSPE from Anova\n(E &lt;- math.aov$SSPE)\n#&gt;      BM   WP\n#&gt; BM 3071 2808\n#&gt; WP 2808 4217\n\n\n9.3.1 heplot()\n\nheplots::heplot() takes the model object, extracts the \\(\\mathbf{H}\\) and \\(\\mathbf{E}\\) matrices (from summary(Anova(math.mod))) and plots them. There are many options to control the details.\n\nheplot(math.mod, \n       fill=TRUE, lwd = 3,\n       asp = 1,\n       cex=2, cex.lab=1.8,\n       xlab=\"Basic math\", ylab=\"Word problems\")\n\n\n\n\n\n\nFigure 9.4: Hypothesis error plot of the mathscore data. The line through the group means is the H ellipse, which plots as a line here. The red ellipse labeled ‘Error’ represents the pooled within-group covariance matrix.\n\n\n\n\nBut the HE plot offers more:\n\nA visual test of significance: the \\(\\mathbf{H}\\) ellipse is scaled so that it projects anywhere outside the \\(\\mathbf{E}\\) ellipse, if and only if the test is significant at a given \\(\\alpha\\) level (\\(\\alpha = 0.05\\) by default)\nThe \\(\\mathbf{H}\\) ellipse, which appears as a line, goes through the means of the two groups. This is also the discriminant axis, the direction in the space of the variables which maximally discriminates between the groups. That is, if we project the data points onto this line, we get the linear combination \\(w\\) which has the maximum possible univariate \\(t^2\\).\n\nYou can see how the HE plot relates to the plots of the separate data ellipses by overlaying them in a single figure. We also plot the scores on the discriminant axis, by using this small function to find the orthogonal projection of a point \\(\\mathbf{a}\\) on the line joining two points, \\(\\mathbf{p}_1\\) and \\(\\mathbf{p}_2\\), which in math is \\(\\mathbf{p}_1 + \\frac{\\mathbf{d}^\\mathsf{T} (\\mathbf{a} - \\mathbf{p}_1)} {\\mathbf{d}^\\mathsf{T} \\mathbf{d}}\\), letting \\(\\mathbf{d} = \\mathbf{p}_1 - \\mathbf{p}_2\\).\n\ndot &lt;- function(x, y) sum(x*y)       # dot product of two vectors\nproject_on &lt;- function(a, p1, p2) {\n  a &lt;- as.numeric(a)\n  p1 &lt;- as.numeric(p1)\n  p2 &lt;- as.numeric(p2)\n  t &lt;- dot(p2-p1, a-p1) / dot(p2-p1, p2-p1)\n  C &lt;- p1 + t*(p2-p1)\n  C\n}\n\nThen, we run the same code as before to plot the data ellipses, and follow this with a call to heplot() using the option add=TRUE which adds to an existing plot. Following this, we find the group means and draw lines projecting the points on the line between them.\n\ncovEllipses(mathscore[,c(\"BM\", \"WP\")], mathscore$group,\n            pooled=FALSE, \n            col = colors,\n            cex=2, cex.lab=1.5,\n            asp=1, \n            xlab=\"Basic math\", ylab=\"Word problems\"\n            )\npch &lt;- ifelse(mathscore$group==1, 15, 16)\ncol &lt;- ifelse(mathscore$group==1, \"red\", \"blue\")\npoints(mathscore[,2:3], pch=pch, col=col, cex=1.25)\n\n# overlay with HEplot (add = TRUE)\nheplot(math.mod, \n       fill=TRUE, \n       cex=2, cex.lab=1.8, \n       fill.alpha=0.2, lwd=c(1,3),\n       add = TRUE, \n       error.ellipse=TRUE)\n\n# find group means\nmeans &lt;- mathscore |&gt;\n  group_by(group) |&gt;\n  summarize(BM = mean(BM), WP = mean(WP))\n\nfor(i in 1:nrow(mathscore)) {\n  gp &lt;- mathscore$group[i]\n  pt &lt;- project_on( mathscore[i, 2:3], means[1, 2:3], means[2, 2:3]) \n  segments(mathscore[i, \"BM\"], mathscore[i, \"WP\"], pt[1], pt[2], lwd = 1.2)\n}\n\n\n\n\n\n\nFigure 9.5: HE plot overlaid on top of the within-group data ellipses, with lines showing the projection of each point on the discriminant axis.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>9</span>  <span class='chapter-title'>Hotelling's $T^2$</span>"
+    ]
+  },
+  {
+    "objectID": "09-hotelling.html#sec-t2-discrim",
+    "href": "09-hotelling.html#sec-t2-discrim",
+    "title": "9  Hotelling’s \\(T^2\\)",
+    "section": "\n9.4 Discriminant analysis",
+    "text": "9.4 Discriminant analysis\nDiscriminant analysis for two-group designs or for one-way MANOVA essentially turns the problem around: Instead of asking whether the mean vectors for two or more groups are equal, discriminant analysis tries to find the linear combination \\(w\\) of the response variables that has the greatest separation among the groups, allowing cases to be best classified. It was developed by Fisher (1936) as a solution to the biological taxonomy problem of developing a rule to classify instances of flowers—in his famous case, Iris flowers—into known species (I. setosa, I. versicolor, I. virginica) on the basis of multiple measurements (length and width of their sepals and petals).\n\n(math.lda &lt;- MASS::lda(group ~ ., data=mathscore))\n#&gt; Call:\n#&gt; lda(group ~ ., data = mathscore)\n#&gt; \n#&gt; Prior probabilities of groups:\n#&gt;   1   2 \n#&gt; 0.5 0.5 \n#&gt; \n#&gt; Group means:\n#&gt;    BM    WP\n#&gt; 1 178  83.3\n#&gt; 2 158 121.7\n#&gt; \n#&gt; Coefficients of linear discriminants:\n#&gt;        LD1\n#&gt; BM -0.0835\n#&gt; WP  0.0753\n\nThe coefficients give \\(w = -0.084 \\;\\text{BM} + 0.075 \\;\\text{WP}\\). This is exactly the direction given by the line for the \\(\\mathbf{H}\\) ellipse in Figure 9.5.\nTo round this out, we can calculate the discriminant scores by multiplying the matrix \\(\\mathbf{X}\\) by the vector \\(\\mathbf{a} = \\mathbf{S}^{-1} (\\bar{\\mathbf{x}}_1 - \\bar{\\mathbf{x}}_2)\\) of the discriminant weights. These were shown in Figure 9.5 as the projections of the data points on the line joining the group means,\n\nmath.lda$scaling\n#&gt;        LD1\n#&gt; BM -0.0835\n#&gt; WP  0.0753\n\nscores &lt;- cbind(group = mathscore$group,\n                as.matrix(mathscore[, 2:3]) %*% math.lda$scaling) |&gt;\n  as.data.frame()\nscores |&gt;\n  group_by(group) |&gt;\n  slice(1:3)\n#&gt; # A tibble: 6 × 2\n#&gt; # Groups:   group [2]\n#&gt;   group   LD1\n#&gt;   &lt;dbl&gt; &lt;dbl&gt;\n#&gt; 1     1 -9.09\n#&gt; 2     1 -8.17\n#&gt; 3     1 -9.01\n#&gt; 4     2 -4.33\n#&gt; 5     2 -4.58\n#&gt; 6     2 -5.75\n\nThen a \\(t\\)-test on these scores gives the same value as Hotelling’s \\(T\\); it is accessed via the statistic component of t.test()\n\nt &lt;- t.test(LD1 ~ group, data=scores)$statistic\nc(t, T2 =t^2)\n#&gt;     t  T2.t \n#&gt; -8.01 64.17\n\nFinally, it is instructive to compare violin plots for the three measures, BM, WP and LD1. To do this with ggplot2 requires reshaping the data from wide to long format so the plots can be faceted.\n\nscores &lt;- mathscore |&gt;\n  bind_cols(LD1 = scores[, \"LD1\"]) \n\nscores |&gt;\n  tidyr::gather(key = \"measure\", value =\"Score\", BM:LD1) |&gt;\n  mutate(measure = factor(measure, levels = c(\"BM\", \"WP\", \"LD1\"))) |&gt;\n  ggplot(aes(x = group, y = Score, color = group, fill = group)) +\n    geom_violin(alpha = 0.2) +\n    geom_jitter(width = .2, size = 2) +\n    facet_wrap( ~ measure, scales = \"free\", labeller = label_both) +\n    scale_fill_manual(values = c(\"darkgreen\", \"blue\")) +\n    scale_color_manual(values = c(\"darkgreen\", \"blue\")) +\n    theme_bw(base_size = 14) +\n    theme(legend.position = \"none\")\n\n\n\n\n\n\nFigure 9.6: Violin plots comparing group 1 and 2 for the two observed measures and the linear discriminant score.\n\n\n\n\nYou can readily see how well the groups are separated on the discriminant axes, relative to the two individual variables.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>9</span>  <span class='chapter-title'>Hotelling's $T^2$</span>"
+    ]
+  },
+  {
+    "objectID": "09-hotelling.html#sec-t2-more-variables",
+    "href": "09-hotelling.html#sec-t2-more-variables",
+    "title": "9  Hotelling’s \\(T^2\\)",
+    "section": "\n9.5 More variables",
+    "text": "9.5 More variables\nThe mathscore data gave a simple example with two outcomes to explain the essential ideas behind Hotelling’s \\(T^2\\) and multivariate tests. Multivariate methods become increasingly useful as the number of response variables increases because it is harder to show them all together and see how they relate to differences between groups.\nA classic example is the dataset mbclust::banknote, containing six size measures made on 100 genuine and 100 counterfeit old-Swiss 1000-franc bank notes (Flury & Riedwyl, 1988). The goal is to see how well the real and fake banknotes can be distinguished. The measures are the Length and Diagonal lengths of a banknote and the Left, Right, Top and Bottom edge margins in mm.\nBefore considering hypothesis tests, let’s look at some exploratory graphics. Figure 9.7 shows univariate violin and boxplots of each of the measures. To make this plot, faceted by measure, I first reshape the data from wide to long and make measure a factor with levels in the order of the variables in the data set.\n\n\ndata(banknote, package= \"mclust\")\nbanknote |&gt;\n  tidyr::gather(key = \"measure\", \n                value = \"Size\", \n                Length:Diagonal) |&gt; \n  mutate(measure = factor(measure, \n                          levels = c(names(banknote)[-1]))) |&gt; \n\n  ggplot(aes(x = Status, y = Size, color = Status)) +\n  geom_violin(aes(fill = Status), alpha = 0.2) +           # (1)\n  geom_jitter(width = .2, size = 1.2) +                    # (2)\n  geom_boxplot(width = 0.25,                               # (3)\n               linewidth = 1.1, \n               color = \"black\", \n               alpha = 0.5) +\n  labs(y = \"Size (mm)\") +\n  facet_wrap( ~ measure, scales = \"free\", labeller = label_both) +\n  theme_bw(base_size = 14) +\n  theme(legend.position = \"top\")\n\n\n\n\n\n\nFigure 9.7: Overlaid violin and boxplots of the banknote variables. The violin plots give a sense of the shapes of the distributions, while the boxplots highlight the center and spread.\n\n\n\n\nA quick glance at Figure 9.7 shows that the counterfeit and genuine bills differ in their means on most of the measures, with the counterfeit ones slightly larger on Left, Right, Bottom and Top margins. But univariate plots don’t give an overall sense of how these variables are related to one another.\n\n\n\n\n\n\nGraph craft: Layers and transparency\n\n\n\nFigure 9.7 is somewhat complex, so it is useful to understand the steps needed to make this figure show what I wanted. The plot in each panel contains three layers:\n\nthe violin plot based on a density estimate, showing the shape of each distribution;\nthe data points, but they are jittered horizontally using geom_jiter() because otherwise they would all overlap on the X axis;\nthe boxplot, showing the center (median) and spread (IQR) of each distribution.\n\nIn composing graphs with layers, order matters, and also does the alpha transparency, because each layer adds data ink on top of earlier ones. I plotted these in the order shown because I wanted the violin plot to provide the background, and the boxplot to show a simple univariate summary, not obscured by the other layers. The alpha values allow the data ink to be blended for each layer, and in this case, alpha = 0.5 for the boxplot let the earlier layers show through.\n\n\n\n9.5.1 Biplots\nMultivariate relations among these six variables could be explored in data space using scatterplots or other methods, but I turn to my trusty multivariate juicer, a biplot, to give a 2D summary. Two dimensions account for 70% of the total variance of all the banknotes, while three would give 85%.\n\nbanknote.pca &lt;- prcomp(banknote[, -1], scale = TRUE)\nsummary(banknote.pca)\n#&gt; Importance of components:\n#&gt;                          PC1   PC2   PC3   PC4    PC5    PC6\n#&gt; Standard deviation     1.716 1.131 0.932 0.671 0.5183 0.4346\n#&gt; Proportion of Variance 0.491 0.213 0.145 0.075 0.0448 0.0315\n#&gt; Cumulative Proportion  0.491 0.704 0.849 0.924 0.9685 1.0000\n\nThe biplot in Figure 9.8 gives a nicely coherent overview, at least in two dimensions. The first component shows the positive correlations among the measures of the margins, where the counterfeit bills are larger than the real ones and a negative correlation of the Diagonal with the other measures. The length of bills only distinguishes the types of banknotes on the second dimension.\n\nbanknote.pca &lt;- ggbiplot::reflect(banknote.pca)\nggbiplot(banknote.pca,\n   obs.scale = 1, var.scale = 1,\n   groups = banknote$Status,\n   ellipse = TRUE, \n   ellipse.level = 0.5, \n   ellipse.alpha = 0.1, \n   ellipse.linewidth = 0,\n   varname.size = 4,\n   varname.color = \"black\") +\n  labs(fill = \"Status\", \n       color = \"Status\") +\n  theme_minimal(base_size = 14) +\n  theme(legend.position = 'top')\n\n\n\n\n\n\nFigure 9.8: Biplot of the banknote variables, showing how the size measurements are related to each other. The points and data ellipses for the component scores are colored by Status, showing how the counterfeit and genuine bills are distinguished by these measures.\n\n\n\n\n\n9.5.2 Testing mean differences\nAs noted above, Hotelling’s \\(T^2\\) is equivalent to a one-way MANOVA, fitting the size measures to the Status of the banknotes. Anova() reports only the \\(F\\)-statistic based on Pillai’s trace criterion.\n\nbanknote.mlm &lt;- lm(cbind(Length, Left, Right, Bottom, Top, Diagonal) ~ Status,\n                    data = banknote)\nAnova(banknote.mlm)\n#&gt; \n#&gt; Type II MANOVA Tests: Pillai test statistic\n#&gt;        Df test stat approx F num Df den Df Pr(&gt;F)    \n#&gt; Status  1     0.924      392      6    193 &lt;2e-16 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nYou can see all the multivariate test statistics with the summary() method for \"Anova.mlm\" objects. With two groups, and hence a 1 df test, these all translate into identical \\(F\\)-statistics.\n\nsummary(Anova(banknote.mlm)) |&gt; print(SSP = FALSE)\n#&gt; \n#&gt; Type II MANOVA Tests:\n#&gt; \n#&gt; ------------------------------------------\n#&gt;  \n#&gt; Term: Status \n#&gt; \n#&gt; Multivariate Tests: Status\n#&gt;                  Df test stat approx F num Df den Df Pr(&gt;F)    \n#&gt; Pillai            1      0.92      392      6    193 &lt;2e-16 ***\n#&gt; Wilks             1      0.08      392      6    193 &lt;2e-16 ***\n#&gt; Hotelling-Lawley  1     12.18      392      6    193 &lt;2e-16 ***\n#&gt; Roy               1     12.18      392      6    193 &lt;2e-16 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nIf you wish, you can extract the univariate \\(t\\)-tests or equivalent \\(F = t^2\\) statistics from the \"mlm\" object using broom::tidy.mlm(). What is given as the estimate is the difference in the mean for the genuine banknotes relative to the counterfeit ones.\n\nbroom::tidy(banknote.mlm) |&gt; \n  filter(term != \"(Intercept)\") |&gt;\n  dplyr::select(-term) |&gt;\n  rename(t = statistic) |&gt;\n  mutate(F = t^2) |&gt;\n  relocate(F, .after = t)\n#&gt; # A tibble: 6 × 6\n#&gt;   response estimate std.error      t      F  p.value\n#&gt;   &lt;chr&gt;       &lt;dbl&gt;     &lt;dbl&gt;  &lt;dbl&gt;  &lt;dbl&gt;    &lt;dbl&gt;\n#&gt; 1 Length      0.146    0.0524   2.79   7.77 5.82e- 3\n#&gt; 2 Left       -0.357    0.0445  -8.03  64.5  8.50e-14\n#&gt; 3 Right      -0.473    0.0464 -10.2  104.   6.84e-20\n#&gt; 4 Bottom     -2.22     0.130  -17.1  292.   7.78e-41\n#&gt; 5 Top        -0.965    0.0909 -10.6  113.   3.85e-21\n#&gt; 6 Diagonal    2.07     0.0715  28.9  836.   5.35e-73\n\nThe individual \\(F_{(1, 198)}\\) statistics can be compared to the \\(F_{(6, 193)} = 392\\) value for the overall multivariate test. While all of the individual tests are highly significant, the average of the univariate \\(F\\)s is only 236. The multivariate test gains power by taking the correlations of the size measures into account.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>9</span>  <span class='chapter-title'>Hotelling's $T^2$</span>"
+    ]
+  },
+  {
+    "objectID": "09-hotelling.html#variance-accounted-for-eta-square-eta2",
+    "href": "09-hotelling.html#variance-accounted-for-eta-square-eta2",
+    "title": "9  Hotelling’s \\(T^2\\)",
+    "section": "\n9.6 Variance accounted for: Eta square (\\(\\eta^2\\))",
+    "text": "9.6 Variance accounted for: Eta square (\\(\\eta^2\\))\nIn a univariate multiple regression model, the coefficient of determination \\(R^2 = \\text{SS}_H / \\text{SS}_\\text{Total}\\) gives the proportion of variance accounted for by hypothesized terms in \\(H\\) relative to the total variance. An analog for ANOVA-type models with categorical, group factors as predictors is \\(\\eta^2\\) (Pearson, 1903), defined as \\[\n\\eta^2 = \\frac{\\text{SS}_\\text{Between groups}}{\\text{SS}_\\text{Total}}\n\\] For multivariate response models, the generalization of \\(\\eta^2\\) uses multivariate analogs of these sums of squares, \\(\\mathbf{Q}_H\\) and \\(\\mathbf{Q}_T = \\mathbf{Q}_H + \\mathbf{Q}_E\\), and there are different calculations for a single measure corresponding to the various test statistics (Wilks’ \\(\\Lambda\\), etc.), as described in Chapter 10.\nLet’s calculate the \\(\\eta^2\\) for the multivariate model banknote.mlm with Status as the only predictor, giving \\(\\eta^2 = 0.92\\), or 92% of the total variance.\n\nheplots::etasq(banknote.mlm)\n#&gt;        eta^2\n#&gt; Status 0.924\n\nThis can be compared to the principal components analysis and the biplot in Figure 9.8, where two components (less favorably) accounted for 70% of total variance and it took four PCA dimensions to account for over 90%. The goals of PCA and MANOVA are different, of course, but they are both concerned with accounting for variance of multivariate data. We will meet another multivariate juicer, canonical discriminant analysis in Chapter 11.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>9</span>  <span class='chapter-title'>Hotelling's $T^2$</span>"
+    ]
+  },
+  {
+    "objectID": "09-hotelling.html#what-weve-learned",
+    "href": "09-hotelling.html#what-weve-learned",
+    "title": "9  Hotelling’s \\(T^2\\)",
+    "section": "\n9.7 What we’ve learned",
+    "text": "9.7 What we’ve learned\nThis chapter was designed to illustrate the main ideas for visualizing differences between means on multiple response variables in a two-group design. Hotelling’s \\(T^2\\) is the generalization of a simple univariate \\(t\\)-test and works by combining the responses into a weighted sum that has the maximum possible univariate \\(t\\) for all choices of weights.\nFigure 9.9 summarizes what was shown in Section 9.3 and Section 9.4. The data ellipses for the two groups in the mathscore data summarize the information about means and within-group variances. In the HE plot, the difference between the means is itself summarized by the line through them, which represents the \\(\\mathbf{H} =\\mathbf{Q}_H\\) matrix and within-group variation is represented by the “Error” ellipse which is the \\(\\mathbf{E} = \\mathbf{S}_p = \\mathbf{Q}_E\\) matrix.\n\n\n\n\n\n\n\nFigure 9.9: The Hypothesis Error plot framework for a two-group design. Above: Data ellipses can be summarized in an HE plot showing the pooled within-group error (\\(\\mathbf{E}\\)) ellipse and the \\(\\mathbf{H}\\) ‘ellipse’ for the group means. Below: Observations projected on the line joining the means give discriminant scores which correpond to a one-dimensional canonical space, represented by a boxplot of their scores and arrows reflecting the variable weights.\n\n\n\n\nAs we will see later (Chapter 11), the \\(\\mathbf{H}\\) ellipse is scaled so that it provides a visual test of significance: it projects somewhere outside the \\(\\mathbf{E}\\) ellipse if and only if the means differ significantly. The direction of the line between the means is also the discriminant axis and scores on this axis are weighted sum of the responses that have the greatest possible mean difference.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>9</span>  <span class='chapter-title'>Hotelling's $T^2$</span>"
+    ]
+  },
+  {
+    "objectID": "09-hotelling.html#exercises",
+    "href": "09-hotelling.html#exercises",
+    "title": "9  Hotelling’s \\(T^2\\)",
+    "section": "\n9.8 Exercises",
+    "text": "9.8 Exercises\n\nThe value of Hotelling’s \\(T^2\\) found by hotelling.test() is 64.17. The value of the equivalent \\(F\\) statistic found by Anova() is 28.9. Verify that Equation 9.2 gives this result.\n\n\nPackages used here:\n11 packages used here: broom, car, carData, corpcor, dplyr, ggbiplot, ggplot2, heplots, Hotelling, knitr, tidyr\n\n\n\n\n\n\nCurran, J., & Hersh, T. (2021). Hotelling: Hotelling’s t^2 test and variants. https://CRAN.R-project.org/package=Hotelling\n\n\nFisher, R. A. (1936). The use of multiple measurements in taxonomic problems. Annals of Eugenics, 7(2), 179–188. https://doi.org/10.1111/j.1469-1809.1936.tb02137.x\n\n\nFlury, B., & Riedwyl, H. (1988). Multivariate statistics: A practical approach. Chapman & Hall.\n\n\nHotelling, H. (1931). The generalization of Student’s ratio. The Annals of Mathematical Statistics, 2(3), 360–378. https://doi.org/10.1214/aoms/1177732979\n\n\nPearson, K. (1903). I. Mathematical contributions to the theory of evolution. —XI. On the influence of natural selection on the variability and correlation of organs. Philosophical Transactions of the Royal Society of London, 200(321–330), 1–66. https://doi.org/10.1098/rsta.1903.0001",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>9</span>  <span class='chapter-title'>Hotelling's $T^2$</span>"
+    ]
+  },
+  {
+    "objectID": "10-mlm-review.html",
+    "href": "10-mlm-review.html",
+    "title": "10  Multivariate Linear Models",
+    "section": "",
+    "text": "10.1 Structure of the MLM\nTODO Revise notation here\nWith \\(p\\) response variables, the multivariate linear model is most easily appreciated as the collection of \\(p\\) linear models, one for each response.\n\\[\\begin{aligned}\n\\mathbf{y}_1 =& \\mathbf{X}\\boldsymbol{\\beta}_1 + \\boldsymbol{\\epsilon}_1 \\\\\n\\mathbf{y}_2 =& \\mathbf{X}\\boldsymbol{\\beta}_2 + \\boldsymbol{\\epsilon}_2 \\\\\n  \\vdots      &  \\\\\n\\mathbf{y}_p =& \\mathbf{X}\\boldsymbol{\\beta}_p + \\boldsymbol{\\epsilon}_p \\\\\n\\end{aligned}\\]\nThe model matrix \\(\\mathbf{X}\\) is the same for all responses, but each one gets its own vector \\(\\boldsymbol{\\beta}_j\\) of coefficients for how the predictors in \\(\\mathbf{X}\\) fit a given response \\(\\mathbf{y}_j\\).\nAmong the beauties of multivariate thinking is that we can put these separate equations together in single equation by joining the responses \\(\\mathbf{y}_j\\) as columns in a matrix \\(\\mathbf{Y}\\) and similarly arranging the vectors of coefficients \\(\\boldsymbol{\\beta}_j\\) as columns in a matrix \\(\\mathbf{B}\\). (A slight hiccup in notation is that the uppercase for the greek \\(\\boldsymbol{\\beta}\\) is the same as \\(\\mathbf{B}\\), so I use \\(\\mathbf{b}_1 , \\mathbf{b}_2 , \\dots\\) below to refer to its’ columns.)\nThe MLM then becomes:\n\\[\n\\mathord{\\mathop{\\mathbf{Y}}\\limits_{n \\times p}} =\n\\mathord{\\mathop{\\mathbf{X}}\\limits_{n \\times (q+1)}} \\, \\mathord{\\mathop{\\mathbf{B}}\\limits_{(q+1) \\times p}} + \\mathord{\\mathop{\\mathbf{\\boldsymbol{\\Large\\varepsilon}}}\\limits_{n \\times p}} \\:\\: ,\n\\tag{10.1}\\]\nwhere\nWriting Equation 10.1 in terms of its elements, we have\n\\[\n\\begin{align*}\n\\overset{\\mathbf{Y}}\n  {\\begin{bmatrix}\n  y_{11} & y_{12} & \\cdots & y_{1p} \\\\\n  y_{21} & y_{22} & \\cdots & y_{2p} \\\\\n  \\vdots & \\vdots & \\ddots & \\vdots \\\\\n  y_{n1} & y_{n2} & \\cdots & y_{np}\n  \\end{bmatrix}\n  }\n& =\n\\overset{\\mathbf{X}}\n  {\\begin{bmatrix}\n  1 & x_{11} & \\cdots & x_{1q} \\\\\n  1 & x_{21} & \\cdots & x_{2q} \\\\\n  \\vdots & \\vdots & \\ddots & \\vdots \\\\\n  1 & x_{n1} & \\cdots & x_{nq}\n  \\end{bmatrix}\n  }\n\\overset{\\mathbf{B}}\n  {\\begin{bmatrix}\n  \\beta_{01} & \\beta_{02} & \\cdots & \\beta_{0p} \\\\\n  \\beta_{11} & \\beta_{12} & \\cdots & \\beta_{1p} \\\\\n  \\vdots & \\vdots & \\ddots & \\vdots \\\\\n  \\beta_{q1} & \\beta_{q2} & \\cdots & \\beta_{qp}\n  \\end{bmatrix}\n  } \\\\\n& + \\quad\\quad\n\\overset{\\mathcal{\\boldsymbol{\\Large\\varepsilon}}}\n  {\\begin{bmatrix}\n  \\epsilon_{11} & \\epsilon_{12} & \\cdots & \\epsilon_{1p} \\\\\n  \\epsilon_{21} & \\epsilon_{22} & \\cdots & \\epsilon_{2p} \\\\\n  \\vdots & \\vdots & \\ddots & \\vdots \\\\\n  \\epsilon_{n1} & \\epsilon_{n2} & \\cdots & \\epsilon_{np}\n  \\end{bmatrix}\n  }\n\\end{align*}\n\\]\nThe structure of the model matrix \\(\\mathbf{X}\\) is exactly the same as the univariate linear model, and may therefore contain,",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Multivariate Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "10-mlm-review.html#structure-of-the-mlm",
+    "href": "10-mlm-review.html#structure-of-the-mlm",
+    "title": "10  Multivariate Linear Models",
+    "section": "",
+    "text": "\\(\\mathbf{Y} = (\\mathbf{y}_1 , \\mathbf{y}_2, \\dots ,  \\mathbf{y}_p )\\) is the matrix of \\(n\\) observations on \\(p\\) responses, with typical column \\(\\mathbf{y}_j\\);\n\n\\(\\mathbf{X}\\) is the model matrix with columns \\(\\mathbf{x}_i\\) for \\(q\\) regressors, which typically includes an initial column \\(\\mathbf{x}_0\\) of 1s for the intercept;\n\n\\(\\mathbf{B} = ( \\mathbf{b}_1 , \\mathbf{b}_2 , \\dots,  \\mathbf{b}_p )\\) is a matrix of regression coefficients, one column \\(\\mathbf{b}_j\\) for each response variable;\n\n\\(\\boldsymbol{\\Large\\varepsilon}\\) is a matrix of errors in predicting \\(\\mathbf{Y}\\).\n\n\n\n\n\n\nquantitative predictors, such as age, income, years of education\n\n\ntransformed predictors like \\(\\sqrt{\\text{age}}\\) or \\(\\log{(\\text{income})}\\)\n\n\npolynomial terms: \\(\\text{age}^2\\), \\(\\text{age}^3, \\dots\\) (using poly(age, k) in R)\n\ncategorical predictors (“factors”), such as treatment (Control, Drug A, drug B), or sex; internally a factor with k levels is transformed to k-1 dummy (0, 1) variables, representing comparisons with a reference level, typically the first.\n\ninteraction terms, involving either quantitative or categorical predictors, e.g., age * sex, treatment * sex.\n\n\n10.1.1 Assumptions\nJust as in univariate models, the assumptions of the multivariate linear model almost entirely concern the behavior of the errors (residuals). Let \\(\\mathbf{\\epsilon}_{i}^{\\prime}\\) represent the \\(i\\)th row of \\(\\boldsymbol{\\Large\\varepsilon}\\). Then it is assumed that:\n\n\nNormality: The residuals, \\(\\mathbf{\\epsilon}_{i}^{\\prime}\\) are distributed as multivariate normal, \\(\\mathcal{N}_{p}(\\mathbf{0},\\boldsymbol{\\Sigma})\\), where \\(\\mathbf{\\Sigma}\\) is a non-singular error-covariance matrix. Statistical tests of multivariate normality of the residuals include the Shapiro-Wilk (Shapiro & Wilk, 1965) and Mardia (Mardia, 1970) tests (in the MVN package);  however this is often better assessed visually using a \\(\\chi^2\\) QQ plot of Mahalanobis squared distance against their corresponding \\(\\chi^2_p\\) values for \\(p\\) degrees of freedom using heplots::cqplot().\n\nHomoscedasticity: The error-covariance matrix \\(\\mathbf{\\Sigma}\\) is constant across all observations and grouping factors. Graphical methods to show if this assumption is met are described in Chapter 12.\n\nIndependence: \\(\\mathbf{\\epsilon}_{i}^{\\prime}\\) and \\(\\mathbf{\\epsilon}_{j}^{\\prime}\\) are independent for \\(i\\neq j\\), so knowing the data for case \\(i\\) gives no information about case \\(j\\) (as would be true if the data consisted of pairs of husbands and wives);\nThe predictors, \\(\\mathbf{X}\\), are fixed and measured without error or at least they are independent of the errors, \\(\\boldsymbol{\\Large\\varepsilon}\\).\n\nThese statements are simply the multivariate analogs of the assumptions of normality, constant variance and independence of the errors in univariate models. Note that it is unnecessary to assume that the predictors (regressors, columns of \\(\\mathbf{X}\\)) are normally distributed.\nImplicit in the above is perhaps the most important assumption—that the model has been correctly specified. This means:\n\nLinearity: The form of the relations between each \\(\\mathbf{y}\\) and the \\(\\mathbf{x}\\)s is correct. Typically this means that the relations are linear, but if not, we have specified a correct transformation of \\(\\mathbf{y}\\) and/or \\(\\mathbf{x}\\).\nCompleteness: No relevant predictors have been omitted from the model.\nAdditive effects: The combined effect of different predictors is the sum of their individual effects.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Multivariate Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "10-mlm-review.html#fitting-the-model",
+    "href": "10-mlm-review.html#fitting-the-model",
+    "title": "10  Multivariate Linear Models",
+    "section": "\n10.2 Fitting the model",
+    "text": "10.2 Fitting the model\nThe least squares (and also maximum likelihood) solution for the coefficients \\(\\mathbf{B}\\) is given by\n\n\n\n\\[\n\\widehat{\\mathbf{B}} = (\\mathbf{X}^\\mathsf{T} \\mathbf{X})^{-1} \\mathbf{X}^\\mathsf{T} \\mathbf{Y} \\:\\: .\n\\]\nThis is precisely the same as fitting the separate responses \\(\\mathbf{y}_1 , \\mathbf{y}_2 , \\dots , \\mathbf{y}_p\\), and placing the estimated coefficients \\(\\widehat{\\mathbf{b}}_i\\) as columns in \\(\\widehat{\\mathbf{B}}\\)\n\\[\n\\widehat{\\mathbf{B}} = [ \\widehat{\\mathbf{b}}_1, \\widehat{\\mathbf{b}}_2, \\dots , \\widehat{\\mathbf{b}}_p] \\:\\: .\n\\] In R, we fit the multivariate linear model with lm() simply by giving a collection of response variables y1, y2, ... on the left-hand side of the model formula, wrapped in cbind() which combines them to form a matrix response.\n\nlm(cbind(y1, y2, y3) ~ x1 + x2 + ..., data=)\n\nIn the presence of possible outliers, robust methods are available for univariate linear models (e.g., MASS::rlm()). So too, heplots::robmlm() provides robust estimation in the multivariate case.\n\n10.2.1 Example: Dog Food Data\nAs a toy example to make these ideas concrete, consider the dataset heplots::dogfood. Here, a dogfood manufacturer wanted to study preference for different dogfood formulas, two of their own (“Old”, “New”) and two from other manufacturers (“Major”, “Alps”). In a between-dog design, each of \\(n=4\\) dogs were presented with a bowl of one formula and the time to start eating and amount eaten were recorded. Greater preference would be seen in a shorter delay to start eating and a greater amount, so these responses are expected to be negatively correlated.\n\ndata(dogfood, package = \"heplots\")\nstr(dogfood)\n#&gt; 'data.frame':  16 obs. of  3 variables:\n#&gt;  $ formula: Factor w/ 4 levels \"Old\",\"New\",\"Major\",..: 1 1 1 1 2 2 2 2 3 3 ...\n#&gt;  $ start  : int  0 1 1 0 0 1 2 3 1 5 ...\n#&gt;  $ amount : int  100 97 88 92 95 85 82 89 77 84 ...\n\nFor this data, boxplots for the two responses provide an initial look, shown in Figure 10.1. Putting these side-by-side makes it easy to see the inverse relation between the medians on the two response variables.\n\nCodedog_long &lt;- dogfood |&gt;\n  pivot_longer(c(start, amount),\n               names_to = \"variable\")\nggplot(data = dog_long, \n       aes(x=formula, y = value, fill = formula)) +\n  geom_boxplot(alpha = 0.2) +\n  geom_point(size = 2.5) +\n  facet_wrap(~ variable, scales = \"free\") +\n  theme_bw(base_size = 14) + \n  theme(legend.position=\"none\")\n\n\n\n\n\n\nFigure 10.1: Boxplots for time to start eating and amount eaten by dogs given one of four dogfood formulas.\n\n\n\n\nAs suggested above, the multivariate model for testing mean differences due to the dogfood formula is fit using lm() on the matrix \\(\\mathbf{Y}\\) constructed with cbind(start, amount).\n\ndogfood.mod &lt;- lm(cbind(start, amount) ~ formula, \n                  data=dogfood) |&gt; \n  print()\n#&gt; \n#&gt; Call:\n#&gt; lm(formula = cbind(start, amount) ~ formula, data = dogfood)\n#&gt; \n#&gt; Coefficients:\n#&gt;               start   amount\n#&gt; (Intercept)     0.50   94.25\n#&gt; formulaNew      1.00   -6.50\n#&gt; formulaMajor    2.00  -12.25\n#&gt; formulaAlps     1.75  -16.00\n\nBy default, the factor formula is represented by three columns in the \\(\\mathbf{X}\\) matrix that correspond to treatment contrasts, which are comparisons of the Old formula (a baseline level) with each of the others. The coefficients, for example formulaNEW, are the difference in means from those for Old.\nThen, the overall multivariate test that means on both variables do not differ is carried out using car::Anova().\n\ndogfood.aov &lt;- Anova(dogfood.mod) |&gt;\n  print()\n#&gt; \n#&gt; Type II MANOVA Tests: Pillai test statistic\n#&gt;         Df test stat approx F num Df den Df Pr(&gt;F)  \n#&gt; formula  3     0.702     2.16      6     24  0.083 .\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nThe details of these analysis steps are explained below.\n\n10.2.2 Sums of squares\nIn univariate response models, statistical tests and model summaries (like \\(R^2\\)) are based on the familiar decomposition of the total sum of squares \\(SS_T\\) into regression or hypothesis (\\(SS_H\\)) and error (\\(SS_E\\)) sums of squares. In the multivariate linear model each of these becomes a \\(p \\times p\\) matrix \\(SSP\\) containing sums of squares for the \\(p\\) responses on the diagonal and sums of cross products in the off-diagonal. elements. For the MLM this is expressed as:\n\\[\\begin{aligned}\n\\underset{(p\\times p)}{\\mathbf{SSP}_{T}}\n& =  \\mathbf{Y}^{\\top} \\mathbf{Y} - n \\overline{\\mathbf{y}}\\,\\overline{\\mathbf{y}}^{\\top} \\\\\n& =  \\left(\\widehat {\\mathbf{Y}}^{\\top}\\widehat{\\mathbf{Y}} - n\\overline{\\mathbf{y}}\\,\\overline{\\mathbf{y}}^{\\top} \\right) + \\widehat{\\boldsymbol{\\Large\\varepsilon}}^{\\top}\\widehat{\\boldsymbol{\\Large\\varepsilon}} \\\\\n& =   \\mathbf{SSP}_{H} + \\mathbf{SSP}_{E} \\\\\n& \\equiv  \\mathbf{H} + \\mathbf{E} \\:\\: ,\n\\end{aligned} \\tag{10.2}\\]\nwhere,\n\n\n\\(\\overline{\\mathbf{y}}\\) is the \\((p\\times 1)\\) vector of means for the response variables;\n\n\\(\\widehat{\\mathbf{Y}} = \\mathbf{X}\\widehat{\\mathbf{B}}\\) is the matrix of fitted values; and\n\n\\(\\widehat{\\boldsymbol{\\Large\\varepsilon}} = \\mathbf{Y} -\\widehat{\\mathbf{Y}}\\) is the matrix of residuals.\n\nWe can visualize this decomposition in the simple case of a two-group design (for the mathscore data in Section 9.2) as shown in Figure 10.2. Let \\(\\mathbf{y}_{ij}\\) be the vector of \\(p\\) responses for subject \\(j\\) in group \\(i, i=1,\\dots g\\) for \\(j = 1, \\dots n_i\\). Then, using \\(.\\) to represent a subscript averaged over, Equation 10.2 comes from the identity\n\\[\n\\underbrace{(\\mathbf{y}_{ij} - \\mathbf{y}_{\\cdot \\cdot})}_T =\n\\underbrace{(\\overline{\\mathbf{y}}_{i \\cdot} - \\mathbf{y}_{\\cdot \\cdot})}_H +\n\\underbrace{(\\mathbf{y}_{ij} - \\overline{\\mathbf{y}}_{i \\cdot})}_E\n\\tag{10.3}\\]\nwhere each side of Equation 10.3 is squared and summed over observations to give Equation 10.2. In Figure 10.2,\n\nThe total variance \\(\\mathbf{SSP}_T\\) reflects the deviations of the observations \\(\\mathbf{y}_{ij}\\) from the grand mean \\(\\overline{\\mathbf{y}}_{. .}\\) and has the data ellipse shown in gray.\nIn the middle panel, all the observations are represented at their group means, \\(\\overline{\\mathbf{y}}_{i .}\\), the fitted values. Their variance and covariance is then reflected by deviations of the group means (weighted for the number of observations per group) around the grand mean.\nThe right panel then shows the residual variance, which is the variation of the observations \\(\\mathbf{y}_{ij}\\) around their group means, \\(\\overline{\\mathbf{y}}_{i .}\\). Centering the two data ellipses at the centroid \\(\\overline{\\mathbf{y}}_{. .}\\) then gives the ellipse for the \\(\\mathbf{SSP}_E\\), also called the pooled within-group covariance matrix.\n\n\n\n\n\n\n\n\nFigure 10.2: Breakdown of the total \\(\\mathbf{SSP}_{T}\\) into sums of squares and products for between-group hypothesis variance (\\(\\mathbf{SSP}_{H}\\)) and within-group, error variance (\\(\\mathbf{SSP}_{E}\\)).\n\n\n\n\nThe formulas for these sum of squares and products matrices can be shown explicitly as follows, where the notation \\(\\mathbf{z} \\mathbf{z}^\\top\\) generates the \\(p \\times p\\) outer product of a vector \\(\\mathbf{z}\\), giving \\(z_k \\times z_\\ell\\) for all pairs of elements. \\[\n\\mathbf{SSP}_T =\n\\sum_{i=1}^{g} \\sum_{j=1}^{n_{i}}\\left(\\mathbf{y}_{ij}-\\overline{\\mathbf{y}}_{\\cdot \\cdot}\\right)\\left(\\mathbf{y}_{ij}-\\overline{\\mathbf{y}}_{\\cdot \\cdot}\\right)^{\\top}\n\\]\n\\[\n\\mathbf{SSP}_H =\n\\sum_{i=1}^{g} \\mathbf{n}_{i}\\left(\\overline{\\mathbf{y}}_{i \\cdot}-\\overline{\\mathbf{y}}_{\\cdot \\cdot}\\right)\\left(\\overline{\\mathbf{y}}_{i \\cdot}-\\overline{\\mathbf{y}}_{\\cdot \\cdot}\\right)^{\\top}\n\\]\n\\[\n\\mathbf{SSP}_E =\n\\sum_{i=1}^{g} \\sum_{j=1}^{n_{i}} \\left(\\mathbf{y}_{ij}-\\overline{\\mathbf{y}}_{i \\cdot}\\right) \\left(\\mathbf{y}_{ij}-\\overline{\\mathbf{y}}_{i \\cdot}\\right)^{\\top}\n\\]\nThis is the decomposition that we visualize in HE plots, where the size and direction of \\(\\mathbf{H}\\) and \\(\\mathbf{E}\\) can be represented as ellipsoids.\nBut first, let’s find these results for the example. The easy way is to get them from the result returned by car::Anova(), where the hypothesis \\(\\mathbf{SSP}_{H}\\) for each term in the model is returned as an element in a named list SSP and the error \\(\\mathbf{SSP}_{E}\\) is returned as the matrix SSPE.\n\nSSP_H &lt;- dogfood.aov$SSP |&gt; print()\n#&gt; $formula\n#&gt;         start amount\n#&gt; start    9.69  -70.9\n#&gt; amount -70.94  585.7\nSSP_E &lt;- dogfood.aov$SSPE |&gt; print()\n#&gt;        start amount\n#&gt; start   25.8   11.8\n#&gt; amount  11.8  390.3\n\nYou can calculate these directly as shown below. sweep() is used to subtract the colMeans() from \\(\\mathbf{Y}\\) and \\(\\widehat{\\mathbf{Y}}\\) and crossprod() premultiplies a matrix by its’ transpose.\n\nY &lt;- dogfood[, c(\"start\", \"amount\")]\nYdev &lt;- sweep(Y, 2, colMeans(Y)) |&gt; as.matrix()\nSSP_T &lt;- crossprod(as.matrix(Ydev)) |&gt; print()\n#&gt;        start amount\n#&gt; start   35.4  -59.2\n#&gt; amount -59.2  975.9\n\nfitted &lt;- fitted(dogfood.mod)\nYfit &lt;- sweep(fitted, 2, colMeans(fitted)) |&gt; as.matrix()\nSSP_H &lt;- crossprod(Yfit) |&gt; print()\n#&gt;         start amount\n#&gt; start    9.69  -70.9\n#&gt; amount -70.94  585.7\n\nresiduals &lt;- residuals(dogfood.mod)\nSSP_E &lt;- crossprod(residuals) |&gt; print()\n#&gt;        start amount\n#&gt; start   25.8   11.8\n#&gt; amount  11.8  390.3\n\nThe decomposition of the total sum of squares and products in Equation 10.2 can be shown as:\n\\[\n\\overset{\\mathbf{SSP}_T}\n  {\\begin{pmatrix}\n   35.4 & -59.2 \\\\\n  -59.2 & 975.9 \\\\\n  \\end{pmatrix}}\n=\n\\overset{\\mathbf{SSP}_H}\n  {\\begin{pmatrix}\n    9.69 & -70.94 \\\\\n  -70.94 & 585.69 \\\\\n  \\end{pmatrix}}\n+\n\\overset{\\mathbf{SSP}_E}\n  {\\begin{pmatrix}\n   25.8 &  11.8 \\\\\n   11.8 & 390.3 \\\\\n  \\end{pmatrix}}\n\\]\n\n10.2.3 How big is \\(SS_H\\) compared to \\(SS_E\\)?\nIn a univariate response model, \\(SS_H\\) and \\(SS_E\\) are both scalar numbers and the univariate \\(F\\) test statistic, \\[F = \\frac{\\text{SS}_H/\\text{df}_h}{\\text{SS}_E/\\text{df}_e} = \\frac{\\mathsf{Var}(H)}{\\mathsf{Var}(E)} \\:\\: ,\n\\] assesses “how big” \\(\\text{SS}_H\\) is, relative to \\(\\text{SS}_E\\), the variance accounted for by a hypothesized model or model terms relative to error variance. The measure \\(R^2 = SS_H / (SS_H + SS_E) = SS_H / SS_T\\) gives the proportion of total variance accounted for by the model terms.\nIn the multivariate analog \\(\\mathbf{H}\\) and \\(\\mathbf{E}\\) are both \\(p \\times p\\) matrices, and \\(\\mathbf{H}\\) “divided by” \\(\\mathbf{E}\\) becomes \\(\\mathbf{H}\\mathbf{E}^{-1}\\). The answer, “how big” \\(\\text{SS}_H\\) is compared to \\(\\text{SS}_E\\) is expressed in terms of the \\(p\\) eigenvalues \\(\\lambda_i, i = 1, 2, \\dots p\\) of \\(\\mathbf{H}\\mathbf{E}^{-1}\\). These are the \\(p\\) values \\(\\lambda\\) which solve the determinant equation \\[\n\\mathrm{det}(\\mathbf{H}\\mathbf{E}^{-1} - \\lambda \\mathbf{I}) = 0 \\:\\: .\n\\] The solution also gives the \\(\\lambda_i\\) as the eigenvalues, with vectors \\(\\mathbf{v}_i\\) as the corresponding eigenvectors, \\[\n\\mathbf{H}\\mathbf{E}^{-1} \\; \\lambda_i = \\lambda_i \\mathbf{v}_i \\:\\: .\n\\tag{10.4}\\]\nThis can also be expressed in terms of the size of \\(\\mathbf{H}\\) relative to total variation \\((\\mathbf{H}+ \\mathbf{E})\\) as\n\\[\n\\mathbf{H}(\\mathbf{H}+\\mathbf{E}+)^{-1} \\; \\rho_i = \\rho_i \\mathbf{v}_i \\:\\: ,\n\\tag{10.5}\\]\nwhich has the same eigenvectors as Equation 10.4 and the eigenvalues are \\(\\rho_i = \\lambda_i / (1 + \\lambda_i)\\).\nHowever, when the hypothesized model terms have \\(\\text{df}_h\\) degrees of freedom (columns of the \\(\\mathbf{X}\\) matrix for that term), \\(\\mathbf{H}\\) is of rank \\(\\text{df}_h\\), so only \\(s=\\min(p, \\text{df}_h)\\) eigenvalues can be non-zero. For example, a test for a hypothesis about a single quantitative predictor \\(\\mathbf{x}\\), has \\(\\text{df}_h = 1\\) degree of freedom and \\(\\mathrm{rank} (\\mathbf{H}) = 1\\); for a factor with \\(g\\) groups, \\(\\text{df}_h = \\mathrm{rank} (\\mathbf{H}) = g-1\\).\nFor the dogfood data, we get the following results:\n\nHEinv &lt;- SSP_H %*% solve(SSP_E) |&gt; print()\n#&gt;         start amount\n#&gt; start   0.466 -0.196\n#&gt; amount -3.488  1.606\neig &lt;- eigen(HEinv)\neig$values\n#&gt; [1] 2.0396 0.0317\n\n# as proportions\neig$values / sum(eig$values)\n#&gt; [1] 0.9847 0.0153\n\nThe factor formula has four levels and therefore \\(\\text{df}_h = 3\\) degrees of freedom. But there are only \\(p = 2\\) responses, so there are \\(s=\\min(p, \\text{df}_h) = 2\\) eigenvalues (and corresponding eigenvectors). The eigenvalues tell us that 98.5% of the hypothesis variance due to formula can be accounted for by a single dimension.\n\n\n\n\n\n\n\n\nThe overall multivariate test for the model in Equation 10.1 is essentially a test of the hypothesis \\(\\mathcal{H}_0: \\mathbf{B} = 0\\) (excluding the row for the intercept). Equivalently, this is a test based on the incremental \\(\\mathbf{SSP}_{H}\\) for the hypothesized terms in the model—that is, the difference between the \\(\\mathbf{SSP}_{H}\\) for the full model and the null, intercept-only model. The same idea can be applied to test the difference between any pair of nested models—the added contribution of terms in a larger model relative to a smaller model containing a subset of terms.\nThe eigenvectors \\(\\mathbf{v}_i\\) in Equation 10.4 are also important. These are the weights for the variables in a linear combination \\(v_{i1} \\mathbf{y}_1 + v_{i2} \\mathbf{y}_2 + \\cdots + v_{ip} \\mathbf{y}_p\\) which produces the largest univariate \\(F\\) statistic for the \\(i\\)-th dimension. We exploit this in canonical discriminant analysis and the corresponding canonical HE plots (Section 11.3).\nThe eigenvectors of \\(\\mathbf{H}\\mathbf{E}^{-1}\\) for the dogfood model are shown below:\n\nrownames(eig$vectors) &lt;- rownames(HEinv)\ncolnames(eig$vectors) &lt;- paste(\"Dim\", 1:2)\neig$vectors\n#&gt;         Dim 1  Dim 2\n#&gt; start   0.123 -0.411\n#&gt; amount -0.992 -0.911\n\nThe first column corresponds to the weighted sum \\(0.12 \\times\\text{start} - 0.99 \\times \\text{amount}\\), which as we saw above accounts for 95.5% of the differences in the group means.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Multivariate Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "10-mlm-review.html#multivariate-test-statistics",
+    "href": "10-mlm-review.html#multivariate-test-statistics",
+    "title": "10  Multivariate Linear Models",
+    "section": "\n10.3 Multivariate test statistics",
+    "text": "10.3 Multivariate test statistics\nIn the univariate case, the overall \\(F\\)-test of \\(\\mathcal{H}_0: \\boldsymbol{\\beta} = \\mathbf{0}\\) is the uniformly most powerful invariant test when the assumptions are met. There is nothing better. This is not the case in the MLM.\nThe reason is that when there are \\(p &gt; 1\\) response variables, and we are testing a hypothesis comprising \\(\\text{df}_h &gt;1\\) coefficients or degrees of freedom, there are \\(s &gt; 1\\) possible dimensions in which \\(\\mathbf{H}\\) can be large relative to \\(\\mathbf{E}\\), each measured by the eigenvalue \\(\\lambda_i\\). There are several test statistics that combine these into a single measure, shown in Table 10.1.\n\n\n\n\n\n\n\n\n\n\nTable 10.1: Test statistics for multivariate tests combine the size of dimensions of \\(\\mathbf{H}\\mathbf{E}^{-1}\\) into a single measure.\n\n\n\n\nCriterion\nFormula\nPartial \\(\\eta^2\\)\n\n\n\n\n\nWilks’s \\(\\Lambda\\)\n\n\\(\\Lambda = \\prod^s_i \\frac{1}{1+\\lambda_i}\\)\n\\(\\eta^2 = 1-\\Lambda^{1/s}\\)\n\n\n\nPillai trace\n\\(V = \\sum^s_i \\frac{\\lambda_i}{1+\\lambda_i}\\)\n\\(\\eta^2 = \\frac{V}{s}\\)\n\n\n\nHotelling-Lawley trace\n\\(H = \\sum^s_i \\lambda_i\\)\n\\(\\eta^2 = \\frac{H}{H+s}\\)\n\n\n\nRoy maximum root\n\\(R = \\lambda_1\\)\n\\(\\eta^2 = \\frac{\\lambda_1}{1+\\lambda_1}\\)\n\n\n\n\n\n\n\n\nThese correspond to different kinds of “means” of the \\(\\lambda_i\\): geometric (Wilks), arithmetic (Pillai), harmonic (Hotelling-Lawley) and supremum (Roy). See Friendly et al. (2013) for the geometry behind these measures.\nEach of these statistics have different sampling distributions under the null hypothesis, but they can all be converted to \\(F\\) statistics. These are exact when \\(s \\le 2\\), and approximations otherwise. As well, each has an analog of the \\(R^2\\)-like partial \\(\\eta^2\\) measure, giving the partial association accounted for by each term in the MLM.\n\n10.3.1 Testing contrasts and linear hypotheses\nEven more generally, these multivariate tests apply to every linear hypothesis concerning the coefficients in \\(\\mathbf{B}\\). Suppose we want to test the hypothesis that a subset of rows (predictors) and/or columns (responses) simultaneously have null effects. This can be expressed in the general linear test, \\[\n\\mathcal{H}_0 : \\mathbf{C}_{h \\times q} \\, \\mathbf{B}_{q \\times p} = \\mathbf{0}_{h \\times p} \\:\\: ,\n\\] where \\(\\mathbf{C}\\) is a full rank \\(h \\le q\\) hypothesis matrix of constants, that selects subsets or linear combinations (contrasts) of the coefficients in \\(\\mathbf{B}\\) to be tested in a \\(h\\) degree-of-freedom hypothesis.\nIn this case, the SSP matrix for the hypothesis has the form \\[\n\\mathbf{H}  =\n(\\mathbf{C} \\widehat{\\mathbf{B}})^\\mathsf{T}\\,\n[\\mathbf{C} (\\mathbf{X}^\\mathsf{T}\\mathbf{X} )^{-1} \\mathbf{C}^\\mathsf{T}]^{-1} \\,\n(\\mathbf{C} \\widehat{\\mathbf{B}}) \\:\\: ,\n\\tag{10.6}\\]\nwhere there are \\(s = \\min(h, p)\\) non-zero eigenvalues of \\(\\mathbf{H}\\mathbf{E}^{-1}\\). In Equation 10.6, \\(\\mathbf{H}\\) measures the (Mahalanobis) squared distances (and cross products) among the linear combinations \\(\\mathbf{C} \\widehat{\\mathbf{B}}\\) from the origin under the null hypothesis.  \nFor example, with three responses \\(y_1, y_2, y_3\\) and three predictors \\(x_1, x_2, x_3\\), we can test the hypothesis that neither \\(x_2\\) nor \\(x_3\\) contribute at all to predicting the \\(y\\)s in terms of the hypothesis that the coefficients for the corresponding rows of \\(\\mathbf{B}\\) are zero using a 1-row \\(\\mathbf{C}\\) matrix that simply selects those rows:\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\\[\\begin{aligned}\n\\mathcal{H}_0 : \\mathbf{C} \\mathbf{B} & =\n\\begin{bmatrix}\n0 & 1 & 1 & 0\n\\end{bmatrix}\n\\begin{pmatrix}\n  \\beta_{0,y_1} & \\beta_{0,y_2} & \\beta_{0,y_3} \\\\\n  \\beta_{1,y_1} & \\beta_{1,y_2} & \\beta_{1,y_3} \\\\\n  \\beta_{2,y_1} & \\beta_{2,y_2} & \\beta_{2,y_3} \\\\\n  \\beta_{3,y_1} & \\beta_{3,y_2} & \\beta_{3,y_3}\n\\end{pmatrix} \\\\ \\\\\n& =\n\\begin{bmatrix}\n  \\beta_{1,y_1} & \\beta_{1,y_2} & \\beta_{1,y_3} \\\\\n  \\beta_{2,y_1} & \\beta_{2,y_2} & \\beta_{2,y_3} \\\\\n\\end{bmatrix}\n=\n\\mathbf{0}_{(2 \\times 3)}\n\\end{aligned}\\]\nIn MANOVA designs, it is often desirable to follow up a significant effect for a factor with subsequent tests to determine which groups differ. While you can simply test for all pairwise differences among groups (using Bonferonni or other corrections for multiplicity), a more substantively-driven approach uses planned comparisons or contrasts among the factor levels.\nFor a factor with \\(g\\) groups, a contrast is simply a comparison of the mean of one subset of groups against the mean of another subset. This is specified as a weighted sum, \\(L\\) of the means with weights \\(\\mathbf{c}\\) that sum to zero,\n\\[\nL = \\mathbf{c}^\\mathsf{T} \\boldsymbol{\\mu} = \\sum_i c_i \\mu_i \\quad\\text{such that}\\quad \\Sigma c_i = 0\n\\] Two contrasts, \\(\\mathbf{c}_1\\) and \\(\\mathbf{c}_2\\) are orthogonal if the sum of products of their weights is zero, i.e., \\(\\mathbf{c}_1^\\mathsf{T} \\mathbf{c}_2 = \\Sigma c_{1i} \\times c_{2i} = 0\\). When contrasts are placed as columns of a matrix \\(\\mathbf{C}\\), they are all mutually orthogonal if each pair is orthogonal, which means \\(\\mathbf{C}^\\top \\mathbf{C}\\) is a diagonal matrix. Orthogonal contrasts correspond to statistically independent tests. This is nice because they reflect separate, non-overlapping research questions.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nFor example, with the \\(g=4\\) groups for the dogfood data, the company might want to test the following comparisons among the formulas Old, New, Major and Alps: (a) Ours vs. Theirs: The average of (Old, New) compared to (Major, Alps); (b) Old vs. New; (c) Major vs. Alps. The contrasts that do this are:\n\\[\\begin{aligned}\nL_1 & = \\textstyle{\\frac12} (\\mu_O + \\mu_N) -\n        \\textstyle{\\frac12} (\\mu_M + \\mu_A) & \\rightarrow\\: & \\mathbf{c}_1 =\n\\textstyle{\\frac12}\n     \\begin{pmatrix}\n      1 &  1 & -1 & -1\n     \\end{pmatrix} \\\\\nL_2 & = \\mu_O - \\mu_N                     & \\rightarrow\\: & \\mathbf{c}_2 =\n    \\begin{pmatrix}\n     1 &  -1 & 0 & 0\n    \\end{pmatrix} \\\\\nL_3 & = \\mu_M - \\mu_A                     & \\rightarrow\\: & \\mathbf{c}_3 =\n    \\begin{pmatrix}\n     0 &  0 & 1 & -1\n    \\end{pmatrix}\n\\end{aligned}\\]\nNote that these correspond to nested dichotomies among the four groups: first we compare groups (Old and New) against groups (Major and Alps), then subsequently within each of these sets. Nested dicontomy contrasts are always orthogonal, and therefore correspond to statistically independent tests. We are effectively taking a three degree-of-freedom question, “do the means differ?” and breaking it down into three separate 1 df tests that answer specific parts of that overall question.\nIn R, contrasts for a factor are specified as columns of matrix, each of which sums to zero. For this example, we can set this up by creating each as a vector and joining them as columns using cbind():\n\nc1 &lt;- c(1,  1, -1, -1)/2    # Old,New vs. Major,Alps\nc2 &lt;- c(1, -1,  0,  0)      # Old vs. New\nc3 &lt;- c(0,  0,  1, -1)      # Major vs. Alps\nC &lt;- cbind(c1,c2,c3) \nrownames(C) &lt;- levels(dogfood$formula)\n\nC\n#&gt;         c1 c2 c3\n#&gt; Old    0.5  1  0\n#&gt; New    0.5 -1  0\n#&gt; Major -0.5  0  1\n#&gt; Alps  -0.5  0 -1\n\n# show they are mutually orthogonal\nt(C) %*% C\n#&gt;    c1 c2 c3\n#&gt; c1  1  0  0\n#&gt; c2  0  2  0\n#&gt; c3  0  0  2\n\nFor the dogfood data, with formula as the group factor, you can set up the analyses to use these contrasts by assigning the matrix C to contrasts() for that factor in the dataset itself. When the contrasts are changed, it is necessary to refit the model. The estimated coefficients then become the estimated mean differences for the contrasts.\n\ncontrasts(dogfood$formula) &lt;- C\ndogfood.mod &lt;- lm(cbind(start, amount) ~ formula, \n                  data=dogfood)\ncoef(dogfood.mod)\n#&gt;              start amount\n#&gt; (Intercept)  1.688  85.56\n#&gt; formulac1   -1.375  10.88\n#&gt; formulac2   -0.500   3.25\n#&gt; formulac3    0.125   1.88\n\nFor example, Ours vs. Theirs estimated by formulac1 takes 0.69 less time to start eating and eats 5.44 more on average.\nFor multivariate tests, when all contrasts are pairwise orthogonal, the overall test of a factor with \\(\\text{df}_h = g-1\\) degrees of freedom can be broken down into \\(g-1\\) separate 1 df tests. This gives rise to a set of \\(\\text{df}_h\\) rank 1 \\(\\mathbf{H}\\) matrices that additively decompose the overall hypothesis SSCP matrix,\n\\[\n\\mathbf{H} = \\mathbf{H}_1 + \\mathbf{H}_2 + \\cdots + \\mathbf{H}_{\\text{df}_h} \\:\\: ,\n\\tag{10.7}\\]\nexactly as the univariate \\(\\text{SS}_H\\) can be decomposed using orthogonal contrasts in an ANOVA.\nYou can test such contrasts or any other hypotheses involving linear combinations of the coefficients using car::linearHypothesis. Here, \"formulac1\" refers to the contrast c1 for the difference between Ours and Theirs. Note that because this is a 1 df test, all four test statistics yield the same \\(F\\) values.\n\nhyp &lt;- rownames(coef(dogfood.mod))[-1] |&gt; print()\n#&gt; [1] \"formulac1\" \"formulac2\" \"formulac3\"\nH1 &lt;- linearHypothesis(dogfood.mod, hyp[1], title=\"Ours vs. Theirs\") |&gt; \n  print()\n#&gt; \n#&gt; Sum of squares and products for the hypothesis:\n#&gt;         start amount\n#&gt; start    7.56  -59.8\n#&gt; amount -59.81  473.1\n#&gt; \n#&gt; Sum of squares and products for error:\n#&gt;        start amount\n#&gt; start   25.8   11.7\n#&gt; amount  11.7  390.3\n#&gt; \n#&gt; Multivariate Tests: Ours vs. Theirs\n#&gt;                  Df test stat approx F num Df den Df Pr(&gt;F)   \n#&gt; Pillai            1     0.625     9.18      2     11 0.0045 **\n#&gt; Wilks             1     0.375     9.18      2     11 0.0045 **\n#&gt; Hotelling-Lawley  1     1.669     9.18      2     11 0.0045 **\n#&gt; Roy               1     1.669     9.18      2     11 0.0045 **\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nSimilarly we can test the other two contrasts within these each of these two subsets, but I don’t print the results\n\nH2 &lt;- linearHypothesis(dogfood.mod, hyp[2], \n                       title=\"Old vs. New\")\nH3 &lt;- linearHypothesis(dogfood.mod, hyp[3], \n                       title=\"Alps vs. Major\")\n\nThen, we can illustrate Equation 10.7 by extracting the 1 df \\(\\mathbf{H}\\) matrices (SSPH) from the results of linearHypothesis.\n\n\\[\n\\overset{\\mathbf{H}}\n{\\begin{pmatrix}\n  9.7 & -70.9 \\\\\n-70.9 & 585.7 \\\\\n\\end{pmatrix}}\n=\n\\overset{\\mathbf{H}_1}\n{\\begin{pmatrix}\n  7.6 & -59.8 \\\\\n-59.8 & 473.1 \\\\\n\\end{pmatrix}}\n+\n\\overset{\\mathbf{H}_2}\n{\\begin{pmatrix}\n0.13 &  1.88 \\\\\n1.88 & 28.12 \\\\\n\\end{pmatrix}}\n+\n\\overset{\\mathbf{H}_3}\n{\\begin{pmatrix}\n  2 & -13 \\\\\n-13 &  84 \\\\\n\\end{pmatrix}}\n\\]",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Multivariate Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "10-mlm-review.html#anova-rightarrow-manova",
+    "href": "10-mlm-review.html#anova-rightarrow-manova",
+    "title": "10  Multivariate Linear Models",
+    "section": "\n10.4 ANOVA \\(\\rightarrow\\) MANOVA",
+    "text": "10.4 ANOVA \\(\\rightarrow\\) MANOVA\nMultivariate analysis of variance (MANOVA) generalizes the familiar ANOVA model to situations where there are two or more response variables. Unlike ANOVA, which focuses on discerning statistical differences in one continuous dependent variable influenced by an independent variable (or grouping variable), MANOVA considers several dependent variables at once. It integrates these variables into a single, composite variable through a weighted linear combination, allowing for a comprehensive analysis of how these dependent variables collectively vary with respect to the levels of the independent variable. Essentially, MANOVA investigates whether the grouping variable explains significant variations in the combined dependent variables.\nThe situation is illustrated in Figure 10.3 where there are two response measures, \\(Y_1\\) and \\(Y_2\\) with data collected for three groups. For concreteness, \\(Y_1\\) might be a score on a math test and \\(Y_2\\) might be a reading score. Let’s also say that group 1 has been studying Shakespeare, while group 2 has concentrated on physics, but group 3 has done nothing beyond the normal curriculum.\n\n\n\n\n\n\n\nFigure 10.3: Data from simple MANOVA design involving three groups and two response measures, \\(Y_1\\) and \\(Y_2\\), summarized by their data ellipses.\n\n\n\n\nAs shown in the figure, the centroids, \\((\\mu_{1g}, \\mu_{2g})\\), clearly differ—the data ellipses barely overlap. A multivariate analysis would show a highly difference among groups. From a rough visual inspection, it seems that means differ on the math test \\(Y_1\\), with the physics group out-performing the other two. On the reading test \\(Y_2\\) however it might turn out that the three group means don’t differ significantly in an ANOVA, but the Shakespeare and physics groups appear to outperform the normal curriculum group. Doing separate ANOVAs on these variables would miss what is so obvious from Figure 10.3: there is wide separation among the groups in the two tests considered jointly.\nFigure 10.4 illustrates a second important advantage of performing a multivariate analysis over separate ANOVAS: that of determining the number of dimensions or aspects along which groups differ. In the panel on the left, the means of the three groups increase nearly linearly on the combination of \\(Y_1\\) and \\(Y_2\\), so their differences can be ascribed to a single dimension, which simplifies the interpretation.\nFor example, the groups here might be patients diagnosed as normal, mild schizophrenia and profound schizophrenia, and the measures could be tests of memory and attention. The obvious multivariate interpretation from the figure is that of increasing impairment of cognitive functioning across the groups, comprised by memory and attention. Note also the positive association within each group: those who perform better on the memory task also do better on attention.\n\n\n\n\n\n\n\nFigure 10.4: A simple MANOVA design involving three groups and two response measures, \\(Y_1\\) and \\(Y_2\\), but with different patterns of the differences among the group means. The red arrows suggest interpretations in terms of dimensions or aspects of the response variables.\n\n\n\n\nIn contrast, the right panel of Figure 10.4 shows a situation where the group means have a low correlation. Data like this might arise in a study of parental competency, where there are are measures of both the degree of caring (\\(Y_1\\)) and time spent in play (\\(Y_2\\)) by fathers and groups consisting of fathers of children with no disability, or a physical disability or a mental ability.\nAs can be seen in Figure 10.4 fathers of the disabled children differ from those of the not disabled group in two different directions corresponding to being higher on either \\(Y_1\\) or \\(Y_2\\). The red arrows suggest that the differences among groups could be interpreted in terms of two uncorrelated dimensions, perhaps labeled overall competency and emphasis on physical activity. (The pattern in Figure 10.4 (right) is contrived for the sake of illustration; it does not reflect the data analyzed in the example below.)\n\n10.4.1 Example: Father parenting data\nI use a simple example of a three-group multivariate design to illustrate the basic ideas of fitting MLMs in R and testing hypotheses. Visualization methods using HE plots are discussed in Chapter 11.\nThe dataset heplots::Parenting come from an exercise (10B) in Meyers et al. (2006) and are probably contrived, but are modeled on a real study in which fathers were assessed on three subscales of a Perceived Parenting Competence Scale,\n\n\ncaring, caretaking responsibilities;\n\nemotion, emotional support provided to the child; and\n\nplay, recreational time spent with the child.\n\nThe dataset Parenting comprises 60 fathers selected from three groups of \\(n = 20\\) each: (a) fathers of a child with no disabilities (\"Normal\"); (b) fathers with a physically disabled child; (c) fathers with a mentally disabled child. The design is thus a three-group MANOVA, with three response variables.\n\nThe main questions concern whether the group means differ on these scales, and the nature of these differences. That is, do the means differ significantly on all three measures? Is there a consistent order of groups across these three aspects of parenting?\nMore specific questions are: (a) Do the fathers of typical children differ from the other two groups on average? (b) Do the physical and mental groups differ? These questions can be tested using contrasts, and are specified by assigning a matrix to contrasts(Parenting$group); each column is a contrast whose values sum to zero. They are given labels \"group1\" (normal vs. other) and \"group2\" (physical vs. mental) in some output.\n\ndata(Parenting, package=\"heplots\")\nC &lt;- matrix(c(1, -.5, -.5,\n              0,  1,  -1), \n            nrow = 3, ncol = 2) |&gt; print()\n#&gt;      [,1] [,2]\n#&gt; [1,]  1.0    0\n#&gt; [2,] -0.5    1\n#&gt; [3,] -0.5   -1\ncontrasts(Parenting$group) &lt;- C\n\nExploratory plots\nBefore setting up a model and testing, it is well-advised to examine the data graphically. The simplest plots are side-by-side boxplots (or violin plots) for the three responses. With ggplot2, this is easily done by reshaping the data to long format and using faceting. In Figure 10.5, I’ve also plotted the group means with white dots.\n\nSee the ggplot codeparenting_long &lt;- Parenting |&gt;\n  tidyr::pivot_longer(cols=caring:play, \n                      names_to = \"variable\")\n\nggplot(parenting_long, \n       aes(x=group, y=value, fill=group)) +\n  geom_boxplot(outlier.size=2.5, \n               alpha=.5, \n               outlier.alpha = 0.9) + \n  stat_summary(fun=mean, \n               color=\"white\", \n               geom=\"point\", \n               size=2) +\n  scale_fill_hue(direction = -1) +     # reverse default colors\n  labs(y = \"Scale value\", x = \"Group\") +\n  facet_wrap(~ variable) +\n  theme_bw(base_size = 14) + \n  theme(legend.position=\"top\") +\n  theme(axis.text.x = element_text(angle = 15,\n                                   hjust = 1)) \n\n\n\n\n\n\nFigure 10.5: Faceted boxplots of scores on the three parenting scales, showing also the mean for each.\n\n\n\n\nIn this figure, differences among the groups on play are most apparent, with fathers of non-disabled children scoring highest. Differences among the groups on emotion are very small, but one high outlier for the fathers of mentally disabled children is apparent. On caring, fathers of children with a physical disability stand out as highest.\nFor exploratory purposes, you might also make a scatterplot matrix. Here, because the MLM assumes homogeneity of the variances and covariance matrices \\(\\mathbf{S}_i\\), I show only the data ellipses in scatterplot matrix format, using heplots:covEllipses() (with 50% coverage, for clarity):\n\ncolors &lt;- scales::hue_pal()(3) |&gt; rev()  # match color use in ggplot\ncovEllipses(cbind(caring, play, emotion) ~ group, data=Parenting,\n  variables = 1:3,\n  fill = TRUE, fill.alpha = 0.2,\n  pooled = FALSE,\n  level = 0.50, \n  col = colors)\n\n\n\n\n\n\nFigure 10.6: Bivariate data ellipses for pairs of the three responses, showing the means, correlations and variances for the three groups.\n\n\n\n\nIf the covariance matrices were all the same, the data ellipses would have roughly the same size and orientation, but that is not the case in Figure 10.6. The normal group shows greater variability overall and the correlations among the measures differ somewhat from group to group. We’ll assess later whether this makes a difference in the conclusions that can be drawn (Chapter 12). The group centroids also differ, but the pattern is not particularly clear. We’ll see an easier to understand view in HE plots and their canonical discriminant cousins.\n\n10.4.1.1 Testing the model\nLet’s proceed to fit the multivariate model predicting all three scales from the group factor. lm() for a multivariate response returns an object of class \"mlm\", for which there are many methods (use methods(class=\"mlm\") to find them).\n\nparenting.mlm &lt;- lm(cbind(caring, play, emotion) ~ group, \n                    data=Parenting) |&gt; print()\n#&gt; \n#&gt; Call:\n#&gt; lm(formula = cbind(caring, play, emotion) ~ group, data = Parenting)\n#&gt; \n#&gt; Coefficients:\n#&gt;              caring   play     emotion\n#&gt; (Intercept)   5.8833   4.6333   5.9167\n#&gt; group1       -0.3833   2.4167  -0.0667\n#&gt; group2        1.7750  -0.2250  -0.6000\n\nThe coefficients in this model are the values of the contrasts set up above. group1 is the mean of the typical group minus the average of the other two, which is negative on caring and emotion but positive for play. group2 is the difference in means for the physical vs. mental groups.\nBefore doing multivariate tests, it is useful to see what would happen if we ran univariate ANOVAs on each of the responses. These can be extracted from an MLM using stats::summary.aov() and they give tests of the model terms for each response variable separately:\n\nsummary.aov(parenting.mlm)\n#&gt;  Response caring :\n#&gt;             Df Sum Sq Mean Sq F value Pr(&gt;F)    \n#&gt; group        2    130    65.2    18.6  6e-07 ***\n#&gt; Residuals   57    200     3.5                   \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#&gt; \n#&gt;  Response play :\n#&gt;             Df Sum Sq Mean Sq F value Pr(&gt;F)    \n#&gt; group        2    177    88.6    27.6  4e-09 ***\n#&gt; Residuals   57    183     3.2                   \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#&gt; \n#&gt;  Response emotion :\n#&gt;             Df Sum Sq Mean Sq F value Pr(&gt;F)\n#&gt; group        2     15    7.27    1.02   0.37\n#&gt; Residuals   57    408    7.16\n\nFor a more condensed summary, you can instead extract the univariate model fit statistics from the \"mlm\" object using the heplots::glance() method for a multivariate model object. The code below selects just the \\(R^2\\) and \\(F\\)-statistic for the overall model for each response, together with the associated \\(p\\)-value.\n\nglance(parenting.mlm) |&gt;\n  select(response, r.squared, fstatistic, p.value)\n#&gt; # A tibble: 3 × 4\n#&gt;   response r.squared fstatistic       p.value\n#&gt;   &lt;chr&gt;        &lt;dbl&gt;      &lt;dbl&gt;         &lt;dbl&gt;\n#&gt; 1 caring      0.395       18.6  0.000000602  \n#&gt; 2 play        0.492       27.6  0.00000000405\n#&gt; 3 emotion     0.0344       1.02 0.369\n\nFrom this, one might conclude that there are differences only in caring and play and therefore ignore emotion, but this would be short-sighted. car::Anova(), shown below, gives the overall multivariate test \\(\\mathcal{H}_0: \\mathbf{B} = 0\\) of the group effect, that the groups don’t differ on any of the response variables. Note that this has a much smaller \\(p\\)-value than any of the univariate \\(F\\) tests.\n\nAnova(parenting.mlm)\n#&gt; \n#&gt; Type II MANOVA Tests: Pillai test statistic\n#&gt;       Df test stat approx F num Df den Df Pr(&gt;F)    \n#&gt; group  2     0.948     16.8      6    112  9e-14 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nAnova() returns an object of class \"Anova.mlm\" which has various methods. The summary() method for this gives more details, including all four test statistics. With \\(p=3\\) responses, these tests have \\(s = \\min(p, \\text{df}_h) = \\min(3,2) = 2\\) dimensions and the \\(F\\) approximations are not equivalent here. All four tests are highly significant, with Roy’s test giving the largest \\(F\\) statistic.\n\nparenting.summary &lt;- Anova(parenting.mlm) |&gt;  summary() \nprint(parenting.summary, SSP=FALSE)\n#&gt; \n#&gt; Type II MANOVA Tests:\n#&gt; \n#&gt; ------------------------------------------\n#&gt;  \n#&gt; Term: group \n#&gt; \n#&gt; Multivariate Tests: group\n#&gt;                  Df test stat approx F num Df den Df  Pr(&gt;F)    \n#&gt; Pillai            2     0.948     16.8      6    112 9.0e-14 ***\n#&gt; Wilks             2     0.274     16.7      6    110 1.3e-13 ***\n#&gt; Hotelling-Lawley  2     1.840     16.6      6    108 1.8e-13 ***\n#&gt; Roy               2     1.108     20.7      3     56 3.8e-09 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nThe summary() method by default prints the SSH = \\(\\mathbf{H}\\) and SSE = \\(\\mathbf{E}\\) matrices, but I suppressed them above. The data structure returned contains nested elements which can be extracted more easily from the object using purrr::pluck():\n\nH &lt;- parenting.summary |&gt; \n  purrr::pluck(\"multivariate.tests\", \"group\", \"SSPH\") |&gt; \n  print()\n#&gt;         caring    play emotion\n#&gt; caring   130.4 -43.767 -41.833\n#&gt; play     -43.8 177.233   0.567\n#&gt; emotion  -41.8   0.567  14.533\nE &lt;- parenting.summary |&gt; \n  purrr::pluck(\"multivariate.tests\", \"group\", \"SSPE\") |&gt; \n  print()\n#&gt;         caring  play emotion\n#&gt; caring   199.8 -45.8    35.2\n#&gt; play     -45.8 182.7    80.6\n#&gt; emotion   35.2  80.6   408.0\n\nLinear hypotheses & contrasts\nWith three or more groups or with a more complex MANOVA design, contrasts provide a way of testing questions of substantive interest regarding differences among group means.\nThe test of the contrast comparing the typical group to the average of the others is the test using the contrast \\(c_1 = (1, -\\frac12, -\\frac12)\\) which produces the coefficients labeled \"group1\". The function car::linearHypothesis() carries out the multivariate test that this difference is zero. This is a 1 df test, so all four test statistics produce the same \\(F\\) and \\(p\\)-values.\n\ncoef(parenting.mlm)[\"group1\",]\n#&gt;  caring    play emotion \n#&gt; -0.3833  2.4167 -0.0667\nlinearHypothesis(parenting.mlm, \"group1\") |&gt; \n  print(SSP=FALSE)\n#&gt; \n#&gt; Multivariate Tests: \n#&gt;                  Df test stat approx F num Df den Df  Pr(&gt;F)    \n#&gt; Pillai            1     0.521     19.9      3     55 7.1e-09 ***\n#&gt; Wilks             1     0.479     19.9      3     55 7.1e-09 ***\n#&gt; Hotelling-Lawley  1     1.088     19.9      3     55 7.1e-09 ***\n#&gt; Roy               1     1.088     19.9      3     55 7.1e-09 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nSimilarly, the difference between the physical and mental groups uses the contrast \\(c_2 = (0, 1, -1)\\) and the test that these means are equal is given by linearHypothesis() applied to group2.\n\ncoef(parenting.mlm)[\"group2\",]\n#&gt;  caring    play emotion \n#&gt;   1.775  -0.225  -0.600\nlinearHypothesis(parenting.mlm, \"group2\") |&gt; \n  print(SSP=FALSE)\n#&gt; \n#&gt; Multivariate Tests: \n#&gt;                  Df test stat approx F num Df den Df Pr(&gt;F)    \n#&gt; Pillai            1     0.429     13.8      3     55  8e-07 ***\n#&gt; Wilks             1     0.571     13.8      3     55  8e-07 ***\n#&gt; Hotelling-Lawley  1     0.752     13.8      3     55  8e-07 ***\n#&gt; Roy               1     0.752     13.8      3     55  8e-07 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nlinearHypothesis() is very general. The second argument (hypothesis.matrix) corresponds to \\(\\mathbf{C}\\), and can be specified as numeric matrix giving the linear combinations of coefficients by rows to be tested, or a character vector giving the hypothesis in symbolic form; \"group1\" is equivalent to \"group1 = 0\".\nBecause the two contrasts used here are orthogonal, they add together to give the overall test of \\(\\mathbf{B} = \\mathbf{0}\\), which implies that the means of the three groups are all equal. The test below gives the same results as Anova(parenting.mlm).\n\nlinearHypothesis(parenting.mlm, c(\"group1\", \"group2\")) |&gt; \n  print(SSP=FALSE)\n#&gt; \n#&gt; Multivariate Tests: \n#&gt;                  Df test stat approx F num Df den Df  Pr(&gt;F)    \n#&gt; Pillai            2     0.948     16.8      6    112 9.0e-14 ***\n#&gt; Wilks             2     0.274     16.7      6    110 1.3e-13 ***\n#&gt; Hotelling-Lawley  2     1.840     16.6      6    108 1.8e-13 ***\n#&gt; Roy               2     1.108     20.7      3     56 3.8e-09 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n\n10.4.2 Ordered factors\nWhen groups are defined by an ordered factor, such as level of physical fitness (rated 1–5) or grade in school, it is tempting to treat that as a numeric variable and use a multivariate regression model. This would assume that the effect of that factor is linear and if not, we might consider adding polynomial terms. A different strategy, often preferable, is to make the group variable an ordered factor, for which R assigns polynomial contrasts. This gives separate tests of the linear, quadratic, cubic, … trends of the response, without the need to specify them separately in the model\n\n10.4.3 Example: Adolescent mental health\nThe dataset heplots::AddHealth contains a large cross-sectional sample of participants from grades 7–12 from the National Longitudinal Study of Adolescent Health, described by Warne (2014). It contains responses to two Likert-scale (1–5) items, anxiety and depression. grade is an ordered factor, which means that the default contrasts are taken as orthogonal polynomials with linear (grade.L), quadratic (grade.Q), up to 5th degree (grade^5) trends, which decompose the total effect of grade.\n\ndata(AddHealth, package=\"heplots\")\nstr(AddHealth)\n#&gt; 'data.frame':  4344 obs. of  3 variables:\n#&gt;  $ grade     : Ord.factor w/ 6 levels \"7\"&lt;\"8\"&lt;\"9\"&lt;\"10\"&lt;..: 5 4 6 1 2 2 2 3 3 3 ...\n#&gt;  $ depression: int  0 0 0 0 0 0 0 0 1 2 ...\n#&gt;  $ anxiety   : int  0 0 0 1 1 0 0 1 1 0 ...\n\nThe research questions are:\n\nHow do the means for anxiety and depression vary separately with grade? Is there evidence for linear and nonlinear trends?\nHow do anxiety and depression vary jointly with grade?\nHow does the association of anxiety and depression vary with age?\n\nThe first question can be answered by fitting separate linear models for each response (e.g., lm(anxiety ~ grade))). However the second question is more interesting because it considers the two responses together and takes their correlation into account. This would be fit as the MLM:\n\\[\n\\mathbf{y} = \\boldsymbol{\\beta}_0 + \\boldsymbol{\\beta}_1 x + \\boldsymbol{\\beta}_2 x^2 + \\cdots \\boldsymbol{\\beta}_5 x^5\n\\tag{10.8}\\]\nor, expressed in terms of the variables,\n\\[\\begin{aligned}\n\\begin{bmatrix} y_{\\text{anx}} \\\\y_{\\text{dep}} \\end{bmatrix} & =\n\\begin{bmatrix} \\beta_{0,\\text{anx}} \\\\ \\beta_{0,\\text{dep}} \\end{bmatrix} +\n\\begin{bmatrix} \\beta_{1,\\text{anx}} \\\\ \\beta_{1,\\text{dep}} \\end{bmatrix} \\text{grade} +\n\\begin{bmatrix} \\beta_{2,\\text{anx}} \\\\ \\beta_{2,\\text{dep}} \\end{bmatrix} \\text{grade}^2 \\\\\n& + \\cdots\n\\begin{bmatrix} \\beta_{5,\\text{anx}} \\\\ \\beta_{5,\\text{dep}} \\end{bmatrix} \\text{grade}^5\n\\end{aligned} \\tag{10.9}\\]\nWithgrade represented as an ordered factor, the values of \\(x\\) in Equation 10.8 are those of the orthogonal polynomials given by poly(grade,5).\nExploratory plots\nSome exploratory analysis is useful before fitting and visualizing models. As a first step, we find the means, standard deviations, and standard errors of the means.\n\nmeans &lt;- AddHealth |&gt;\n  group_by(grade) |&gt;\n  summarise(\n    n = n(),\n    dep_sd = sd(depression, na.rm = TRUE),\n    anx_sd = sd(anxiety, na.rm = TRUE),\n    dep_se = dep_sd / sqrt(n),\n    anx_se = anx_sd / sqrt(n),\n    depression = mean(depression),\n    anxiety = mean(anxiety) ) |&gt; \n  relocate(depression, anxiety, .after = grade) |&gt;\n  print()\n#&gt; # A tibble: 6 × 8\n#&gt;   grade depression anxiety     n dep_sd anx_sd dep_se anx_se\n#&gt;   &lt;ord&gt;      &lt;dbl&gt;   &lt;dbl&gt; &lt;int&gt;  &lt;dbl&gt;  &lt;dbl&gt;  &lt;dbl&gt;  &lt;dbl&gt;\n#&gt; 1 7          0.881   0.751   622   1.11   1.05 0.0447 0.0420\n#&gt; 2 8          1.08    0.804   664   1.19   1.06 0.0461 0.0411\n#&gt; 3 9          1.17    0.934   778   1.19   1.08 0.0426 0.0387\n#&gt; 4 10         1.27    0.956   817   1.23   1.11 0.0431 0.0388\n#&gt; 5 11         1.37    1.12    790   1.20   1.16 0.0428 0.0411\n#&gt; 6 12         1.34    1.10    673   1.14   1.11 0.0439 0.0426\n\nNow, plot the means with \\(\\pm 1\\) error bars. It appears that average level of both depression and anxiety increase steadily with grade, except for grades 11 and 12 which don’t differ much. Alternatively, we could describe this as relationships that seem largely linear, with a hint of curvature at the upper end.\n\np1 &lt;-ggplot(data = means, aes(x = grade, y = anxiety)) +\n  geom_point(size = 4) +\n  geom_line(aes(group = 1), linewidth = 1.2) +\n  geom_errorbar(aes(ymin = anxiety - anx_se, \n                   ymax = anxiety + anx_se),\n                width = .2) \n\np2 &lt;-ggplot(data = means, aes(x = grade, y = depression)) +\n  geom_point(size = 4) +\n  geom_line(aes(group = 1), linewidth = 1.2) +\n  geom_errorbar(aes(ymin = depression - dep_se, \n                    ymax = depression + dep_se),\n                width = .2) \n\np1 + p2\n\n\n\n\n\n\nFigure 10.7: Means of anxiety and depression by grade, with \\(\\pm 1\\) standard error bars.\n\n\n\n\nIt is also useful to within-group correlations using covEllipses(), as shown in Figure 10.8. This also plots the bivariate means showing the form of the association , treating anxiety and depression as multivariate outcomes. (Because the variability of the scores within groups is so large compared to the range of the means, I show the data ellipses with coverage of only 10%.)\n\ncovEllipses(AddHealth[, 3:2], group = AddHealth$grade,\n            pooled = FALSE, level = 0.1,\n            center.cex = 2.5, cex = 1.5, cex.lab = 1.5,\n            fill = TRUE, fill.alpha = 0.05)\n\n\n\n\n\n\nFigure 10.8: Within-group covariance ellipses for the grade groups.\n\n\n\n\nFit the MLM\nNow, let’s fit the MLM for both responses jointly in relation to grade. The null hypothesis is that the means for anxiety and depression are the same at all six grades, \\[\n\\mathcal{H}_0: \\mathbf{\\mu}_7 = \\mathbf{\\mu}_8 = \\cdots = \\mathbf{\\mu}_{12} \\; ,\n\\] or equivalently, that all coefficients except the intercept in the model Equation 10.8 are zero, \\[\n\\mathcal{H}_0: \\boldsymbol{\\beta}_1 =  \\boldsymbol{\\beta}_2  = \\cdots =  \\boldsymbol{\\beta}_5 = \\boldsymbol{0} \\; .\n\\] We fit the MANOVA model, and test the grade effect using car::Anova(). The effect of grade is highly significant, as we could tell from Figure 10.7.\n\nAH.mlm &lt;- lm(cbind(anxiety, depression) ~ grade, data = AddHealth)\n\n# overall test of `grade`\nAnova(AH.mlm)\n#&gt; \n#&gt; Type II MANOVA Tests: Pillai test statistic\n#&gt;       Df test stat approx F num Df den Df Pr(&gt;F)    \n#&gt; grade  5    0.0224     9.83     10   8676 &lt;2e-16 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nHowever, the overall test, with 5 degrees of freedom is diffuse, in that it can be rejected if any pair of means differ. Given that grade is an ordered factor, it makes sense to examine narrower hypotheses of linear and nonlinear trends, car::linearHypothesis() on the coefficients of model AH.mlm.\n\ncoef(AH.mlm) |&gt; names()\n#&gt; NULL\n\nThe joint test of the linear coefficients \\(\\boldsymbol{\\beta}_1 = (\\beta_{1,\\text{anx}},  \\beta_{1,\\text{dep}})^\\mathsf{T}\\) for anxiety and depression, \\(\\mathcal{H}_0 : \\boldsymbol{\\beta}_1 = \\boldsymbol{0}\\) is highly significant,\n\n## linear effect\nlinearHypothesis(AH.mlm, \"grade.L\") |&gt; print(SSP = FALSE)\n#&gt; \n#&gt; Multivariate Tests: \n#&gt;                  Df test stat approx F num Df den Df Pr(&gt;F)    \n#&gt; Pillai            1     0.019     42.5      2   4337 &lt;2e-16 ***\n#&gt; Wilks             1     0.981     42.5      2   4337 &lt;2e-16 ***\n#&gt; Hotelling-Lawley  1     0.020     42.5      2   4337 &lt;2e-16 ***\n#&gt; Roy               1     0.020     42.5      2   4337 &lt;2e-16 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nThe test of the quadratic coefficients \\(\\mathcal{H}_0 : \\boldsymbol{\\beta}_2 = \\boldsymbol{0}\\) indicates significant curvature in trends across grade, as we saw in the plots of their means in Figure 10.7. One interpretation might be that depression and anxiety after increasing steadily up to grade eleven could level off thereafter.\n\n## quadratic effect\nlinearHypothesis(AH.mlm, \"grade.Q\") |&gt; print(SSP = FALSE)\n#&gt; \n#&gt; Multivariate Tests: \n#&gt;                  Df test stat approx F num Df den Df Pr(&gt;F)  \n#&gt; Pillai            1     0.002     4.24      2   4337  0.014 *\n#&gt; Wilks             1     0.998     4.24      2   4337  0.014 *\n#&gt; Hotelling-Lawley  1     0.002     4.24      2   4337  0.014 *\n#&gt; Roy               1     0.002     4.24      2   4337  0.014 *\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nAn advantage of linear hypotheses is that we can test several terms jointly. Of interest here is the hypothesis that all higher order terms beyond the quadratic are zero, \\(\\mathcal{H}_0 : \\boldsymbol{\\beta}_3 =  \\boldsymbol{\\beta}_4 =  \\boldsymbol{\\beta}_5 = \\boldsymbol{0}\\). Using linearHypothesis you can supply a vector of coefficient names to be tested for their joint effect when dropped from the model.\n\nrownames(coef(AH.mlm))\n#&gt; [1] \"(Intercept)\" \"grade.L\"     \"grade.Q\"     \"grade.C\"    \n#&gt; [5] \"grade^4\"     \"grade^5\"\n## joint test of all higher terms\nlinearHypothesis(AH.mlm, rownames(coef(AH.mlm))[3:5]) |&gt; print(SSP = FALSE)\n#&gt; \n#&gt; Multivariate Tests: \n#&gt;                  Df test stat approx F num Df den Df Pr(&gt;F)  \n#&gt; Pillai            3     0.002     1.70      6   8676   0.12  \n#&gt; Wilks             3     0.998     1.70      6   8674   0.12  \n#&gt; Hotelling-Lawley  3     0.002     1.70      6   8672   0.12  \n#&gt; Roy               3     0.002     2.98      3   4338   0.03 *\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n\n10.4.4 Factorial MANOVA\nWhen there are two or more categorical factors, the general linear model provides a way to investigate the effects (differences in means) of each simultaneously. More importantly, this allows you to determine if factors interact, so the effect of one factor varies with the levels of another factor …\nExample: Penguins data\nIn Chapter 3 we examined the Palmer penguins data graphically, using a mosaic plot (Figure 3.30) of the frequencies of the three factors, species, island and sex and then ggpairs() scatterplot matrix (Figure 3.31).",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Multivariate Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "10-mlm-review.html#mra-rightarrow-mmra",
+    "href": "10-mlm-review.html#mra-rightarrow-mmra",
+    "title": "10  Multivariate Linear Models",
+    "section": "\n10.5 MRA \\(\\rightarrow\\) MMRA",
+    "text": "10.5 MRA \\(\\rightarrow\\) MMRA",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Multivariate Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "10-mlm-review.html#ancova-rightarrow-mancova",
+    "href": "10-mlm-review.html#ancova-rightarrow-mancova",
+    "title": "10  Multivariate Linear Models",
+    "section": "\n10.6 ANCOVA \\(\\rightarrow\\) MANCOVA",
+    "text": "10.6 ANCOVA \\(\\rightarrow\\) MANCOVA",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Multivariate Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "10-mlm-review.html#repeated-measures-designs",
+    "href": "10-mlm-review.html#repeated-measures-designs",
+    "title": "10  Multivariate Linear Models",
+    "section": "\n10.7 Repeated measures designs",
+    "text": "10.7 Repeated measures designs\nPackage summary\n\nPackages used here:\n10 packages used here: broom, car, carData, dplyr, ggplot2, heplots, knitr, matlib, patchwork, tidyr\n\n\n\n\n\nFriendly, M., Monette, G., & Fox, J. (2013). Elliptical insights: Understanding statistical methods through elliptical geometry. Statistical Science, 28(1), 1–39. https://doi.org/10.1214/12-STS402\n\n\nMardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. Biometrika, 57(3), 519–530. https://doi.org/http://dx.doi.org/10.2307/2334770\n\n\nMeyers, L. S., Gamst, G., & Guarino, A. J. (2006). Applied multivariate research: Design and interpretation. SAGE Publications.\n\n\nShapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality (complete samples). Biometrika, 52(3–4), 591–611. https://doi.org/10.1093/biomet/52.3-4.591\n\n\nWarne, F. T. (2014). A primer on multivariate analysis of variance(MANOVA) for behavioral scientists. Practical Assessment, Research & Evaluation, 19(1). https://scholarworks.umass.edu/pare/vol19/iss1/17/",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Multivariate Linear Models</span>"
+    ]
+  },
+  {
+    "objectID": "11-mlm-viz.html",
+    "href": "11-mlm-viz.html",
+    "title": "11  Visualizing Multivariate Models",
+    "section": "",
+    "text": "11.1 HE plot framework\nChapter 9 illustrated the basic ideas of the framework for visualizing multivariate linear models in the context of a simple two group design, using Hotelling’s \\(T^2\\). The main ideas were illustrated in Figure 9.9.\nHaving described the statistical ideas behind the MLM in Chapter 10, we can proceed to extend this framework to larger designs. Figure 11.1 illustrates these ideas using the simple one-way MANOVA design of the dogfood data from Section 10.2.1.\nFigure 11.1: Dogfood quartet: Illustration of the conceptual ideas of the HE plot framework for the dogfood data. (a) Scatterplot of the data; (b) Summary using data ellipses; (c) HE plot shows the variation in the means in relation to pooled within group variance; (d) Transformation from data space to canonical space\nFor more complex models such as MANOVA with multiple factors or multivariate multivariate regression, there is one \\(\\mathbf{H}\\) ellipse for each term in the model. …",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>11</span>  <span class='chapter-title'>Visualizing Multivariate Models</span>"
+    ]
+  },
+  {
+    "objectID": "11-mlm-viz.html#sec-he-framework",
+    "href": "11-mlm-viz.html#sec-he-framework",
+    "title": "11  Visualizing Multivariate Models",
+    "section": "",
+    "text": "In data space, each group is summarized by its data ellipse, representing the means and covariances.\nVariation against the hypothesis of equal means can be seen by the \\(\\mathbf{H}\\) ellipse in the HE plot, representing the data ellipse of the fitted values. Error variance is shown in the \\(\\mathbf{E}\\) ellipse, representing the pooled within-group covariance matrix, \\(\\mathbf{S}_p\\) and the data ellipse of the residuals from the model.\nThe MANOVA (or Hotelling’s \\(T^2\\)) is formally equivalent to a discriminant analysis, predicting group membership from the response variables which can be seen in data space. (The main difference is emphasis and goals: MANOVA seeks to test differences among group means, while discriminant analysis aims at classification of the observations into groups.)\nThis effectively projects the \\(p\\)-dimensional space of the predictors into the smaller canonical space that shows the greatest differences among the groups.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>11</span>  <span class='chapter-title'>Visualizing Multivariate Models</span>"
+    ]
+  },
+  {
+    "objectID": "11-mlm-viz.html#he-plot-construction",
+    "href": "11-mlm-viz.html#he-plot-construction",
+    "title": "11  Visualizing Multivariate Models",
+    "section": "\n11.2 HE plot construction",
+    "text": "11.2 HE plot construction\nThe HE plot is constructed to allow a direct visualization of the “size” of hypothesized terms in a multivariate linear model in relation to unexplained error variation. These can be displayed in 2D or 3D plots, so I use the term “ellipsoid” below to cover all cases.\nError variation is represented by a standard 68% data ellipsoid of the \\(\\mathbf{E}\\) matrix of the residuals in \\(\\boldsymbol{\\Large\\varepsilon}\\). This is divided by the residual degrees of freedom, so the size of \\(\\mathbf{E} / \\text{df}_e\\) is analogous to a mean square error in univariate tests. The choice of 68% coverage allows you to ``read’’ the residual standard deviation as the half-length of the shadow of the \\(\\mathbf{E}\\) ellipsoid on any axis (see Figure 3.10). The \\(\\mathbf{E}\\) ellipsoid is then translated to the overall (grand) means \\(\\bar{\\mathbf{y}}\\) of the variables plotted, which allows us to show the means for factor levels on the same scale, facilitating interpretation. In the notation of Equation 3.2, the error ellipsoid is given by \\[\n\\mathcal{E}_c (\\bar{\\mathbf{y}}, \\mathbf{E}) = \\bar{\\mathbf{y}} \\; \\oplus \\; c\\,\\mathbf{E}^{1/2} \\:\\: ,\n\\] where \\(c = \\sqrt{2 F_{2, n-2}^{0.68}}\\) for 2D plots and \\(c = \\sqrt{3 F_{3, n-3}^{0.68}}\\) for 3D.\nAn ellipsoid representing variation in the means of a factor (or any other term reflected in a general linear hypothesis test, Equation 10.6) in the \\(\\mathbf{H}\\) matrix is simply the data ellipse of the fitted values for that term. Dividing the hypothesis matrix by the error degrees of freedom, giving \\(\\mathbf{H} / \\text{df}_e\\), puts this on the same scale as the ellipse.  I refer to this as effect size scaling, because it is similar to an effect size index used in univariate models, e.g., \\(ES = (\\bar{y}_1 - \\bar{y}_2) / s_e\\) in a two-group, univariate design.\nThis is illustrated in … \nThe geometry of ellipsoids and multivariate tests allow us to go further with another re-scaling of the \\(\\mathbf{H}\\) ellipsoid that gives a visual test of significance for any term in a MLM. This is done simply by dividing \\(\\mathbf{H} / df_e\\) further by the \\(\\alpha\\)-critical value of the corresponding test statistic to show the strength of evidence against the null hypothesis. Among the various multivariate test statistics, Roy’s maximum root test, based on the largest eigenvalue \\(\\lambda_1\\) of \\(\\mathbf{H} \\mathbf{E}^{-1}\\), gives \\(\\mathbf{H} / (\\lambda_\\alpha df_e)\\) which has the visual property that the scaled \\(\\mathbf{H}\\) ellipsoid will protrude somewhere outside the standard \\(\\mathbf{E}\\) ellipsoid if and only if Roy’s test is significant at significance level \\(\\alpha\\). The critical value \\(\\lambda_\\alpha\\) for Roy’s test is \\[\n\\lambda_\\alpha = \\left(\\frac{\\text{df}_1}{\\text{df}_2}\\right) \\; F_{\\text{df}_1, \\text{df}_2}^{1-\\alpha} \\:\\: ,\n\\] where \\(\\text{df}_1 = \\max(p, \\text{df}_h)\\) and \\(\\text{df}_2 = \\text{df}_h + \\text{df}_e - \\text{df}_1\\).\nFor these data, the HE plot using significance scaling is shown in the right panel of .",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>11</span>  <span class='chapter-title'>Visualizing Multivariate Models</span>"
+    ]
+  },
+  {
+    "objectID": "11-mlm-viz.html#sec-candisc",
+    "href": "11-mlm-viz.html#sec-candisc",
+    "title": "11  Visualizing Multivariate Models",
+    "section": "\n11.3 Canonical discriminant analysis",
+    "text": "11.3 Canonical discriminant analysis\n\n#&gt; Writing packages to  C:/R/Projects/Vis-MLM-book/bib/pkgs.txt\n#&gt; 8  packages used here:\n#&gt;  broom, car, carData, dplyr, ggplot2, heplots, knitr, tidyr",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>11</span>  <span class='chapter-title'>Visualizing Multivariate Models</span>"
+    ]
+  },
+  {
+    "objectID": "12-eqcov.html",
+    "href": "12-eqcov.html",
+    "title": "12  Visualizing Equality of Covariance Matrices",
+    "section": "",
+    "text": "12.1 Homogeneity of Variance in Univariate ANOVA\nIn classical (Gaussian) univariate ANOVA models, the main interest is typically on tests of mean differences in a response \\(y\\) according to one or more factors. The validity of the typical \\(F\\) test, however, relies on the assumption of homogeneity of variance: all groups have the same (or similar) variance, \\[\n\\sigma_1^2 = \\sigma_2^2 = \\cdots = \\sigma_g^2 \\; .\n\\]\nIt turns out that the \\(F\\) test for differences in means is relatively robust to violation of this assumption (Harwell et al., 1992), as long as the group sample sizes are roughly equal.1 This applies to Type I error \\(\\alpha\\) rates, which are not much affected. However, unequal variance makes the ANOVA tests less efficient: you lose power to detect significant differences.\nA variety of classical test statistics for homogeneity of variance are available, including Hartley’s \\(F_{max}\\) (Hartley, 1950), Cochran’s C (Cochran, 1941),and Bartlett’s test (Bartlett, 1937), but these have been found to have terrible statistical properties (Rogan & Keselman, 1977), which prompted Box’s famous quote.\nLevene (1960) introduced a different form of test, based on the simple idea that when variances are equal across groups, the average absolute values of differences between the observations and group means will also be equal, i.e., substituting an \\(L_1\\) norm for the \\(L_2\\) norm of variance. In a one-way design, this is equivalent to a test of group differences in the means of the auxilliary variable \\(z_{ij} = | y_{ij} - \\bar{y}_i |\\).\nMore robust versions of this test were proposed by Brown & Forsythe (1974). These tests substitute the group mean by either the group median or a trimmed mean in the ANOVA of the absolute deviations. Some suggest these should be almost always preferred to Levene’s version using the mean deviation. See Conover et al. (1981) for an early review and Gastwirth et al. (2009) for a general discussion of these tests. In what follows, we refer to this class of tests as “Levene-type” tests and suggest a multivariate extension described below (Section 12.2).\nThese deviations from a group central can be calculated using heplots::colDevs() and the central value can be a function, like mean, median or an anonymous one like function(x) mean(x, trim = 0.1)) that trims 10% off each side of the distribution. With a response Y Levene’s test then be performed “by hand” as follows:\nZ.mean &lt;- abs( colDevs(Y, group) )\nlm(Z.mean ~ group)\n\nZ.med &lt;- abs( colDevs(Y, group, median) )\nlm(Z.med ~ group)\nThe function car::leveneTest() does this, so we could examine whether the variances are equal in the Penguin variables, one at a time, like so:\ndata(peng, package = \"heplots\")\nleveneTest(bill_length ~ species, data=peng)\n#&gt; Levene's Test for Homogeneity of Variance (center = median)\n#&gt;        Df F value Pr(&gt;F)\n#&gt; group   2    2.29    0.1\n#&gt;       330\n  # ...\nleveneTest(body_mass ~ species, data=peng)\n#&gt; Levene's Test for Homogeneity of Variance (center = median)\n#&gt;        Df F value Pr(&gt;F)   \n#&gt; group   2    5.13 0.0064 **\n#&gt;       330                  \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\nMore conveniently, heplots:leveneTests() with an “s”, does this for each of a set of response variables, specified in a data frame, a model formula or a \"mlm\" object. It also formats the results in a more pleasing way:\npeng.mod &lt;- lm(cbind(bill_length, bill_depth, flipper_length, body_mass) ~ species, \n               data = peng)\nleveneTests(peng.mod)\n#&gt; Levene's Tests for Homogeneity of Variance (center = median)\n#&gt; \n#&gt;                df1 df2 F value Pr(&gt;F)   \n#&gt; bill_length      2 330    2.29 0.1033   \n#&gt; bill_depth       2 330    1.91 0.1494   \n#&gt; flipper_length   2 330    0.44 0.6426   \n#&gt; body_mass        2 330    5.13 0.0064 **\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\nSo, this tells us that the groups do not differ in variances on first three variables, but they do for body_mass.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>12</span>  <span class='chapter-title'>Visualizing Equality of Covariance Matrices</span>"
+    ]
+  },
+  {
+    "objectID": "12-eqcov.html#sec-mlevene",
+    "href": "12-eqcov.html#sec-mlevene",
+    "title": "12  Visualizing Equality of Covariance Matrices",
+    "section": "\n12.2 Visualizing Levene’s test",
+    "text": "12.2 Visualizing Levene’s test\nTo gain some insight into the problem of homogeneity of variance it is helpful how the situation looks in terms of data. For the Penguin data, it might be simplest just boxplots of the variables and try to see whether the widths of the central 50% boxes seem to be the same, as in Figure 12.1. However, it is perceptually difficult to focus on differences with widths of the boxes within each panel when their centers also differ from group to group.\n\nSee the codesource(\"R/penguin/penguin-colors.R\")\ncol &lt;- peng.colors(\"dark\")\nclr &lt;- c(col, gray(.20))\npeng_long &lt;- peng |&gt; \n  pivot_longer(bill_length:body_mass, \n               names_to = \"variable\", \n               values_to = \"value\") \n\npeng_long |&gt;\n  group_by(species) |&gt; \n  ggplot(aes(value, species, fill = species)) +\n  geom_boxplot() +\n  facet_wrap(~ variable, scales = 'free_x') +\n  theme_penguins() +\n  theme_bw(base_size = 14) +\n  theme(legend.position = 'none') \n\n\n\n\n\n\nFigure 12.1: Boxplots for the Penguin variables. For assessing homogeneity of variance, we should be looking for differences in width of the central 50% boxes in each panel, rather than difference in central tendency.\n\n\n\n\nInstead, you can see more directly what is tested by the Levene test by graphing the absolute deviations from the group means or medians. This is another example of the graphic idea that to make visual comparisons easier by plotting quantities of direct interest. You can calculate these values as follows:\n\nvars &lt;- c(\"bill_length\", \"bill_depth\", \"flipper_length\", \"body_mass\")\npengDevs &lt;- colDevs(peng[, vars], peng$species, median) |&gt;\n  abs()\n\nFrom a boxplot of the absolute deviations in Figure 12.2 your eye can now focus on the central value, shown by the median ‘|’ line, because Levene’s method is testing whether these differ across groups.\n\nSee the code# calculate absolute differences from median\ndev_long &lt;- data.frame(species = peng$species, pengDevs) |&gt; \n  pivot_longer(bill_length:body_mass, \n               names_to = \"variable\", \n               values_to = \"value\") \n\ndev_long |&gt;\n  group_by(species) |&gt; \n  ggplot(aes(value, species, fill = species)) +\n  geom_boxplot() +\n  facet_wrap(~ variable, scales = 'free_x') +\n  xlab(\"absolute median deviation\") +\n  theme_penguins() +\n  theme_bw(base_size = 14) +\n  theme(legend.position = 'none') \n\n\n\n\n\n\nFigure 12.2: Boxplots for absolute differences from group medians for the Penguin data.\n\n\n\n\nIt is now easy to see that the medians largely align for all the variables except for body_mass.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>12</span>  <span class='chapter-title'>Visualizing Equality of Covariance Matrices</span>"
+    ]
+  },
+  {
+    "objectID": "12-eqcov.html#sec-homogeneity-MANOVA",
+    "href": "12-eqcov.html#sec-homogeneity-MANOVA",
+    "title": "12  Visualizing Equality of Covariance Matrices",
+    "section": "\n12.3 Homogeneity of variance in MANOVA",
+    "text": "12.3 Homogeneity of variance in MANOVA\nIn the MANOVA context, the main emphasis, of course, is on differences among mean vectors, testing \\[\n\\mathcal{H}_0 : \\mathbf{\\mu}_1 = \\mathbf{\\mu}_2 = \\cdots = \\mathbf{\\mu}_g \\; .\n\\] However, the standard test statistics (Wilks’ Lambda, Hotelling-Lawley trace, Pillai-Bartlett trace, Roy’s maximum root) rely upon the analogous assumption that the within-group covariance matrices \\(\\mathbf{\\Sigma}_i\\) are equal for all groups, \\[\n\\mathbf{\\Sigma}_1 = \\mathbf{\\Sigma}_2 = \\cdots = \\mathbf{\\Sigma}_g \\; .\n\\] This is much stronger than in the univariate case, because it also requires that all the correlations between pairs of variables are the same for all groups. For example, for two responses, there are three parameters (\\(\\rho, \\sigma_1^2, \\sigma_2^2\\)) assumed equal across all groups; for \\(p\\) responses, there are \\(p (p+1) / 2\\) assumed equal.\n\n\nTo preview the main example, Figure 12.3 shows data ellipses for the main size variables in the palmerpenguins::penguins data.\nTo view the relations …\n\nSee the codeop &lt;- par(mar = c(4, 4, 1, 1) + .5,\n          mfrow = c(c(1,2)))\ncovEllipses(cbind(bill_length, bill_depth) ~ species, data=peng,\n  fill = TRUE,\n  fill.alpha = 0.1,\n  lwd = 3,\n  col = clr)\n\ncovEllipses(cbind(bill_length, bill_depth) ~ species, data=peng,\n  center = TRUE, \n  fill = c(rep(FALSE,3), TRUE), \n  fill.alpha = .1, \n  lwd = 3,\n  col = clr,\n  label.pos = c(1:3,0))\npar(op)\n\n\n\n\n\n\nFigure 12.3: Data ellipses for bill length and bill depth in the penguins data, also showing the pooled covariance. Left: As is; right: centered at the grand means for easier comparison.\n\n\n\n\nAll pairs:\n\nCodeclr &lt;- c(peng.colors(), \"black\")\ncovEllipses(peng[,3:6], peng$species, \n  variables=1:4,\n  col = clr,\n  fill=TRUE, \n  fill.alpha=.1)\n\n\n\n\n\n\nFigure 12.4: All pairwise covariance ellipses for the penguins data.\n\n\n\n\nThey covariance ellipses look pretty similar in size, shape and orientation. But what does Box’s M test (described below) say? As you can see, it concludes strongly against the null hypothesis.\n\nboxM(cbind(bill_length, bill_depth, flipper_length, body_mass) ~ species, data=peng)\n#&gt; \n#&gt;  Box's M-test for Homogeneity of Covariance Matrices\n#&gt; \n#&gt; data:  Y\n#&gt; Chi-Sq (approx.) = 75, df = 20, p-value = 3e-08",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>12</span>  <span class='chapter-title'>Visualizing Equality of Covariance Matrices</span>"
+    ]
+  },
+  {
+    "objectID": "12-eqcov.html#sec-boxM",
+    "href": "12-eqcov.html#sec-boxM",
+    "title": "12  Visualizing Equality of Covariance Matrices",
+    "section": "\n12.4 Assessing heterogeneity of covariance matrices: Box’s M test",
+    "text": "12.4 Assessing heterogeneity of covariance matrices: Box’s M test\nBox (1949) proposed the following likelihood-ratio test (LRT) statistic for testing the hypothesis of equal covariance matrices, \\[\nM = (N -g) \\ln \\;|\\; \\mathbf{S}_p \\;|\\; - \\sum_{i=1}^g (n_i -1) \\ln \\;|\\; \\mathbf{S}_i \\;|\\; \\; ,\n\\] {eq-boxm}\nwhere \\(N = \\sum n_i\\) is the total sample size and \\(\\mathbf{S}_p = (N-g)^{-1} \\sum_{i=1}^g (n_i - 1) \\mathbf{S}_i\\) is the pooled covariance matrix. \\(M\\) can thus be thought of as a ratio of the determinant of the pooled \\(\\mathbf{S}_p\\) to the geometric mean of the determinants of the separate \\(\\mathbf{S}_i\\).\nIn practice, there are various transformations of the value of \\(M\\) to yield a test statistic with an approximately known distribution (Timm, 1975). Roughly speaking, when each \\(n_i &gt; 20\\), a \\(\\chi^2\\) approximation is often used; otherwise an \\(F\\) approximation is known to be more accurate.\nAsymptotically, \\(-2 \\ln (M)\\) has a \\(\\chi^2\\) distribution. The \\(\\chi^2\\) approximation due to Box (1949, 1950) is that \\[\nX^2 = -2 (1-c_1) \\ln (M) \\quad \\sim \\quad \\chi^2_{df}\n\\] with \\(df = (g-1) p (p+1)/2\\) degrees of freedom, and a bias correction constant: \\[\nc_1 = \\left(\n\\sum_i \\frac{1}{n_i -1}\n- \\frac{1}{N-g}\n\\right)\n\\frac{2p^2 +3p -1}{6 (p+1)(g-1)} \\; .\n\\]\nIn this form, Bartlett’s test for equality of variances in the univariate case is the special case of Box’s M when there is only one response variable, so Bartlett’s test is sometimes used as univariate follow-up to determine which response variables show heterogeneity of variance.\nYet, like its univariate counterpart, Box’s test is well-known to be highly sensitive to violation of (multivariate) normality and the presence of outliers. For example, Tiku & Balakrishnan (1984) concluded from simulation studies that the normal-theory LRT provides poor control of Type I error under even modest departures from normality. O’Brien (1992) proposed some robust alternatives, and showed that Box’s normal theory approximation suffered both in controlling the null size of the test and in power. Zhang & Boos (1992) also carried out simulation studies with similar conclusions and used bootstrap methods to obtain corrected critical values.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>12</span>  <span class='chapter-title'>Visualizing Equality of Covariance Matrices</span>"
+    ]
+  },
+  {
+    "objectID": "12-eqcov.html#visualizing-heterogeneity",
+    "href": "12-eqcov.html#visualizing-heterogeneity",
+    "title": "12  Visualizing Equality of Covariance Matrices",
+    "section": "\n12.5 Visualizing heterogeneity",
+    "text": "12.5 Visualizing heterogeneity\nThe goal of this chapter is to use the above background as a platform for discussing approaches to visualizing and testing the heterogeneity of covariance matrices in multivariate designs. While researchers often rely on a single number to determine if their data have met a particular threshold, such compression will often obscure interesting information, particularly when a test concludes that differences exist, and one is left to wonder ``why?’’. It is within this context where, again, visualizations often reign supreme. In fact, we find it somewhat surprising that this issue has not been addressed before graphically in any systematic way. TODO: cut this down\nIn what follows, we propose three visualization-based approaches to questions of heterogeneity of covariance in MANOVA designs:\n\ndirect visualization of the information in the \\(\\mathbf{S}_i\\) and \\(\\mathbf{S}_p\\) using data ellipsoids to show size and shape as minimal schematic summaries;\na simple dotplot of the components of Box’s M test: the log determinants of the \\(\\mathbf{S}_i\\) together with that of the pooled \\(\\mathbf{S}_p\\). Extensions of these simple plots raise the question of whether measures of heterogeneity other than that captured in Box’s test might also be useful; and,\nthe connection between Levene-type tests and an ANOVA (of centered absolute differences) suggests a parallel with a multivariate extension of Levene-type tests and a MANOVA. We explore this with a version of Hypothesis-Error (HE) plots we have found useful for visualizing mean differences in MANOVA designs.\n\n\n#&gt; Writing packages to  C:/R/Projects/Vis-MLM-book/bib/pkgs.txt\n#&gt; 9  packages used here:\n#&gt;  broom, candisc, car, carData, dplyr, ggplot2, heplots, knitr, tidyr\n\n\n\n\n\n\nBartlett, M. S. (1937). Properties of sufficiency and statistical tests. Proceedings of the Royal Society of London. Series A, 160(901), 268–282. https://doi.org/10.2307/96803\n\n\nBox, G. E. P. (1949). A general distribution theory for a class of likelihood criteria. Biometrika, 36(3-4), 317–346. https://doi.org/10.1093/biomet/36.3-4.317\n\n\nBox, G. E. P. (1950). Problems in the analysis of growth and wear curves. Biometrics, 6, 362–389.\n\n\nBox, G. E. P. (1953). Non-normality and tests on variances. Biometrika, 40(3/4), 318–335. https://doi.org/10.2307/2333350\n\n\nBrown, M. B., & Forsythe, A. B. (1974). Robust tests for equality of variances. Journal of the American Statistical Association, 69(346), 364–367. https://doi.org/10.1080/01621459.1974.10482955\n\n\nCochran, W. G. (1941). The distribution of the largest of a set of estimated variances as a fraction of their total. Annals of Eugenics, 11(1), 47–52. https://doi.org/10.1111/j.1469-1809.1941.tb02271.x\n\n\nConover, W. J., Johnson, M. E., & Johnson, M. M. (1981). A comparative study of tests for homogeneity of variances, with applications to the outer continental shelf bidding data. Technometrics, 23(4), 351–361. https://doi.org/10.1080/00401706.1981.10487680\n\n\nGastwirth, J. L., Gel, Y. R., & Miao, W. (2009). The impact of Levene’s test of equality of variances on statistical theory and practice. Statistical Science, 24(3), 343–360. https://doi.org/10.1214/09-STS301\n\n\nHartley, H. O. (1950). The use of range in analysis of variance. Biometrika, 37(3–4), 271–280. https://doi.org/10.1093/biomet/37.3-4.271\n\n\nHarwell, M. R., Rubinstein, E. N., Hayes, W. S., & Olds, C. C. (1992). Summarizing monte carlo results in methodological research: The one- and two-factor fixed effects ANOVA cases. Journal of Educational and Behavioral Statistics, 17(4), 315–339. https://doi.org/10.3102/10769986017004315\n\n\nLevene, H. (1960). Robust tests for equality of variances. In I. Olkin, S. G. Ghurye, W. Hoeffding, W. G. Madow, & H. B. Mann (Eds.), Contributions to probability and statistics: Essays in honor of Harold Hotelling (pp. 278–292). Stanford University Press.\n\n\nLix, J. M., L. M. Keselman, & Keselman, H. J. (1996). Consequences of assumption violations revisited: A quantitative review of alternatives to the one-way analysis of variance F test. Review of Educational Research, 66(4), 579–619. https://doi.org/10.3102/00346543066004579\n\n\nO’Brien, P. C. (1992). Robust procedures for testing equality of covariance matrices. Biometrics, 48(3), 819–827. http://www.jstor.org/stable/2532347\n\n\nRogan, J. C., & Keselman, H. J. (1977). Is the ANOVA f-test robust to variance heterogeneity when sample sizes are equal?: An investigation via a coefficient of variation. American Educational Research Journal, 14(4), 493–498. https://doi.org/10.3102/00028312014004493\n\n\nTiku, M. L., & Balakrishnan, N. (1984). Testing equality of population variances the robust way. Communications in Statistics - Theory and Methods, 13(17), 2143–2159. https://doi.org/10.1080/03610928408828818\n\n\nTimm, N. H. (1975). Multivariate analysis with applications in education and psychology. Wadsworth (Brooks/Cole).\n\n\nWelch, B. L. (1947). The generalization of \"student’s\" problem when several different population varlances are involved. Biometrika, 34(1–2), 28–35. https://doi.org/10.1093/biomet/34.1-2.28\n\n\nZhang, J., & Boos, D. D. (1992). Bootstrap critical values for testing homogeneity of covariance matrices. Journal of the American Statistical Association, 87(418), 425–429. http://www.jstor.org/stable/2290273",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>12</span>  <span class='chapter-title'>Visualizing Equality of Covariance Matrices</span>"
+    ]
+  },
+  {
+    "objectID": "12-eqcov.html#footnotes",
+    "href": "12-eqcov.html#footnotes",
+    "title": "12  Visualizing Equality of Covariance Matrices",
+    "section": "",
+    "text": "If group sizes are greatly unequal and homogeneity of variance is violated, then the \\(F\\) statistic is too liberal (\\(p\\) values too large) when large sample variances are associated with small group sizes. Conversely, the \\(F\\) statistic is too conservative if large variances are associated with large group sizes.↩︎",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>12</span>  <span class='chapter-title'>Visualizing Equality of Covariance Matrices</span>"
+    ]
+  },
+  {
+    "objectID": "13-case-studies.html",
+    "href": "13-case-studies.html",
+    "title": "\n13  Case studies\n",
+    "section": "",
+    "text": "13.1 Neuro- and Social-cognitive measures in psychiatric groups\nA Ph.D. dissertation by Laura Hartman (2016) at York University was designed to evaluate whether and how clinical patients diagnosed (on the DSM-IV) as schizophrenic or with schizoaffective disorder could be distinguished from each other and from a normal, control sample on collections of standardized tests in the following domains:\nThe study is an important contribution to clinical research because the two diagnostic categories are subtly different and their symptoms often overlap. Yet, they’re very different and often require different treatments. A key difference between schizoaffective disorder and schizophrenia is the prominence of mood disorder involving bipolar, manic and depressive moods. With schizoaffective disorder, mood disorders are front and center. With schizophrenia, that is not a dominant part of the disorder, but psychotic ideation (hearing voices, seeing imaginary people) is.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>Case studies</span>"
+    ]
+  },
+  {
+    "objectID": "13-case-studies.html#neuro--and-social-cognitive-measures-in-psychiatric-groups",
+    "href": "13-case-studies.html#neuro--and-social-cognitive-measures-in-psychiatric-groups",
+    "title": "\n13  Case studies\n",
+    "section": "",
+    "text": "Neuro-cognitive: processing speed, attention, verbal learning, visual learning and problem solving;\nSocial-cognitive: managing emotions, theory of mind, externalizing, personalizing bias.\n\n\n\n13.1.1 Research questions\nThis example is concerned with the following substantitive questions:\n\nTo what extent can patients diagnosed as schizophrenic or with schizoaffective disorder be distinguished from each other and from a normal control sample using a well-validated, comprehensive neurocognitive battery specifically designed for individuals with psychosis (Heinrichs et al., 2015) ?\nIf the groups differ, do any of the cognitive domains show particularly larger or smaller differences among these groups?\nDo the neurocognitive measures discriminate among the in the same or different ways? If different, how many separate aspects or dimensions are distinguished?\n\nApart from the research interest, it could aid diagnosis and treatment if these similar mental disorders could be distinguished by tests in the cognitive domain.\n\n13.1.2 Data\nThe clinical sample comprised 116 male and female patients who met the following criteria: 1) a diagnosis of schizophrenia (\\(n\\) = 70) or schizoaffective disorder (\\(n\\) = 46) confirmed by the Structured Clinical Interview for DSM-IV-TR Axis I Disorders; 2) were outpatients; 3) a history free of developmental or learning disability; 4) age 18-65; 5) a history free of neurological or endocrine disorder; and 6) no concurrent diagnosis of substance use disorder. Non-psychiatric control participants (\\(n\\) = 146) were screened for medical and psychiatric illness and history of substance abuse and were recruited from three outpatient clinics.\n\ndata(NeuroCog, package=\"heplots\")\nglimpse(NeuroCog)\n#&gt; Rows: 242\n#&gt; Columns: 10\n#&gt; $ Dx        &lt;fct&gt; Schizophrenia, Schizophrenia, Schizophrenia, Sch…\n#&gt; $ Speed     &lt;int&gt; 19, 8, 14, 7, 21, 31, -1, 17, 7, 37, 30, 26, 32,…\n#&gt; $ Attention &lt;int&gt; 9, 25, 23, 18, 9, 10, 8, 20, 30, 15, 27, 20, 23,…\n#&gt; $ Memory    &lt;int&gt; 19, 15, 15, 14, 35, 26, 3, 27, 26, 17, 28, 22, 2…\n#&gt; $ Verbal    &lt;int&gt; 33, 28, 20, 34, 28, 29, 20, 30, 26, 33, 34, 33, …\n#&gt; $ Visual    &lt;int&gt; 24, 24, 13, 16, 29, 21, 12, 32, 27, 21, 19, 18, …\n#&gt; $ ProbSolv  &lt;int&gt; 39, 40, 32, 31, 45, 33, 29, 29, 30, 33, 30, 39, …\n#&gt; $ SocialCog &lt;int&gt; 28, 37, 24, 36, 28, 28, 28, 44, 39, 24, 32, 36, …\n#&gt; $ Age       &lt;int&gt; 44, 26, 55, 53, 51, 21, 53, 56, 48, 46, 48, 31, …\n#&gt; $ Sex       &lt;fct&gt; Female, Male, Female, Male, Male, Male, Male, Fe…\n\nThe diagnostic classification variable is called Dx in the dataset. To facilitate answering questions regarding group differences, the following contrasts were applied: the first column compares the control group to the average of the diagnosed groups, the second compares the schizophrenia group against the schizoaffective group.\n\ncontrasts(NeuroCog$Dx)\n#&gt;                 [,1] [,2]\n#&gt; Schizophrenia   -0.5    1\n#&gt; Schizoaffective -0.5   -1\n#&gt; Control          1.0    0\n\nIn this analysis, we ignore the SocialCog variable. The primary focus is on the variables Attention : ProbSolv.\n\n13.1.3 A first look\nAs always, plot the data first! We want an overview of the distributions of the variables to see the centers, spread, shape and possible outliers for each group on each variable.\nThe plot below combines the use of boxplots and violin plots to give an informative display. As we saw earlier (Chapter XXX), doing this with ggplot2 requires reshaping the data to long format.\n\n# Reshape from wide to long\nNC_long &lt;- NeuroCog |&gt;\n  dplyr::select(-SocialCog, -Age, -Sex) |&gt;\n  tidyr::gather(key = response, value = \"value\", Speed:ProbSolv)\n# view 3 observations per group and measure\nNC_long |&gt;\n  group_by(Dx) |&gt;\n  sample_n(3) |&gt; ungroup()\n#&gt; # A tibble: 9 × 3\n#&gt;   Dx              response  value\n#&gt;   &lt;fct&gt;           &lt;chr&gt;     &lt;int&gt;\n#&gt; 1 Schizophrenia   Speed        39\n#&gt; 2 Schizophrenia   Visual       21\n#&gt; 3 Schizophrenia   Memory       40\n#&gt; 4 Schizoaffective ProbSolv     40\n#&gt; 5 Schizoaffective Speed        25\n#&gt; 6 Schizoaffective Verbal       48\n#&gt; 7 Control         Speed        33\n#&gt; 8 Control         ProbSolv     43\n#&gt; 9 Control         Attention    37\n\nIn the plot, we take care to adjust the transparency (alpha) values for the points, violin plots and boxplots so that all can be seen. Options for geom_boxplot() are used to give these greater visual prominence.\n\nCodeggplot(NC_long, aes(x=Dx, y=value, fill=Dx)) +\n  geom_jitter(shape=16, alpha=0.8, size=1, width=0.2) +\n  geom_violin(alpha = 0.1) +\n  geom_boxplot(width=0.5, alpha=0.4, \n               outlier.alpha=1, outlier.size = 3, outlier.color = \"red\") +\n  scale_x_discrete(labels = c(\"Schizo\", \"SchizAff\", \"Control\")) +\n  facet_wrap(~response, scales = \"free_y\", as.table = FALSE) +\n  theme_bw() +\n  theme(legend.position=\"bottom\",\n        axis.title = element_text(size = rel(1.2)),\n        axis.text  = element_text(face = \"bold\"),\n        strip.text = element_text(size = rel(1.2)))\n\n\n\n\n\n\nFigure 13.1: Boxplots and violin plots of the NeuroCog data.\n\n\n\n\nWe can see that the control participants score higher on all measures, but there is no consistent pattern of medians for the two patient groups. But these univariate summaries do not inform about the relations among variables.\n\n13.1.4 Bivariate views\nCorrgram\nA corrgram (Friendly, 2002) provides a useful reconnaisance plot of the bivariate correlations in the dataset. It suppresses details, and allows focus on the overall pattern. The corrgram::corrgram() function has the ability to enhance perception by permuting the variables in the order of their variable vectors in a biplot, so more highly correlated variables are adjacent in the plot, and example of effect ordering for data displays (Friendly & Kwan, 2003).\nThe plot below includes all variables except for Dx group. There are a number of panel.* functions for choosing how the correlation for each pair is rendered.\n\nNeuroCog |&gt;\n  select(-Dx) |&gt;\n  corrgram(order = TRUE,\n           diag.panel = panel.density,\n           upper.panel = panel.pie)\n\n\n\n\n\n\nFigure 13.2: corrgram of the NeuroCog data. The upper and lower triangles use two different ways of encoding the value of the correlation for each pair of variables.\n\n\n\n\nIn this plot you can see that adjacent variables are more highly correlated than those more widely separated. The diagonal panels show that most variables are reasonably symmetric in their distributions. Age, not included in this analysis is negatively correlated with the others: older participants tend to do less well on these tests.\nScatterplot matrix\nA scatterplot matrix gives a more detailed overview of all pairwise relations. The plot below suppresses the data points and summarizes the relation using data ellipses and regression lines. The model syntax, ~ Speed + ... |Dx, treats Dx as a conditioning variable (similar to the use of the color aestheic in ggplot2) giving a separate data ellipse and regression line for each group. (The legend is suppressed here. The groups are Schizophrenic, SchizoAffective, Normal.)\n\nscatterplotMatrix(~ Speed + Attention + Memory + Verbal + Visual + ProbSolv | Dx,\n  data=NeuroCog,\n  plot.points = FALSE,\n  smooth = FALSE,\n  legend = FALSE,\n  col = scales::hue_pal()(3),\n  ellipse=list(levels=0.68))\n\n\n\n\n\n\nFigure 13.3: Scatterplot matrix of the NeuroCog data. Points are suppressed here, focusing on the data ellipses and regression lines. Colors for the groups: Schizophrenic (red), SchizoAffective (green), Normal (blue)\n\n\n\n\nIn this figure, we can see that the regression lines have similar slopes and similar data ellipses for the groups, though with a few exceptions.\nTODO: Should we add biplot here?",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>Case studies</span>"
+    ]
+  },
+  {
+    "objectID": "13-case-studies.html#fitting-the-mlm",
+    "href": "13-case-studies.html#fitting-the-mlm",
+    "title": "\n13  Case studies\n",
+    "section": "\n13.2 Fitting the MLM",
+    "text": "13.2 Fitting the MLM\nWe proceed to fit the one-way MANOVA model.\n\nNC.mlm &lt;- lm(cbind(Speed, Attention, Memory, Verbal, Visual, ProbSolv) ~ Dx,\n             data=NeuroCog)\nAnova(NC.mlm)\n#&gt; \n#&gt; Type II MANOVA Tests: Pillai test statistic\n#&gt;    Df test stat approx F num Df den Df  Pr(&gt;F)    \n#&gt; Dx  2     0.299     6.89     12    470 1.6e-11 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nThe first research question is captured by the contrasts for the Dx factor shown above. We can test these with car::linearHypothesis(). The contrast Dx1 for control vs. the diagnosed groups is highly significant,\n\n# control vs. patients\nprint(linearHypothesis(NC.mlm, \"Dx1\"), SSP=FALSE)\n#&gt; \n#&gt; Multivariate Tests: \n#&gt;                  Df test stat approx F num Df den Df  Pr(&gt;F)    \n#&gt; Pillai            1     0.289     15.9      6    234 2.8e-15 ***\n#&gt; Wilks             1     0.711     15.9      6    234 2.8e-15 ***\n#&gt; Hotelling-Lawley  1     0.407     15.9      6    234 2.8e-15 ***\n#&gt; Roy               1     0.407     15.9      6    234 2.8e-15 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nbut the second contrast, Dx2, comparing the schizophrenic and schizoaffective group, is not.\n\n# Schizo vs SchizAff\nprint(linearHypothesis(NC.mlm, \"Dx2\"), SSP=FALSE)\n#&gt; \n#&gt; Multivariate Tests: \n#&gt;                  Df test stat approx F num Df den Df Pr(&gt;F)\n#&gt; Pillai            1     0.006    0.249      6    234   0.96\n#&gt; Wilks             1     0.994    0.249      6    234   0.96\n#&gt; Hotelling-Lawley  1     0.006    0.249      6    234   0.96\n#&gt; Roy               1     0.006    0.249      6    234   0.96\n\n\n13.2.1 HE plot\nSo the question becomes: how to understand these results.heplot() shows the visualization of the multivariate model in the space of two response variables (the first two by default). The result (Figure 13.4) tells a very simple story: The control group performs higher on higher measures than the diagnosed groups, which do not differ between themselves.\n(For technical reasons, to abbreviate the group labels in the plot, we need to update() the MLM model after the labels are reassigned.)\n\n# abbreviate levels for plots\nNeuroCog$Dx &lt;- factor(NeuroCog$Dx, \n                      labels = c(\"Schiz\", \"SchAff\", \"Contr\"))\nNC.mlm &lt;- update(NC.mlm)\n\n\nop &lt;- par(mar=c(5,4,1,1)+.1)\nheplot(NC.mlm, \n       fill=TRUE, fill.alpha=0.1,\n       cex.lab=1.3, cex=1.25)\npar(op)\n\n\n\n\n\n\nFigure 13.4: HE plot of Speed and Attention in the MLM for the NeuroCog data. The labeled points show the means of the groups on the two variables. The blue H ellipse for groups indicates the strong positive correlation of the group means.\n\n\n\n\nThis pattern is consistent across all of the response variables, as we see from a plot of pairs(NC.mlm):\n\npairs(NC.mlm, \n      fill=TRUE, fill.alpha=0.1,\n      var.cex=2)\n\n\n\n\n\n\nFigure 13.5: HE plot matrix of the MLM for NeuroCog data.\n\n\n\n\n\n13.2.2 Canonical space\nWe can gain further insight, and a simplified plot showing all the response variables by projecting the MANOVA into the canonical space, which is entirely 2-dimensional (because \\(df_h=2\\)). However, the output from candisc() shows that 98.5% of the mean differences among groups can be accounted for in one canonical dimension. ::: {.cell layout-align=“center”}\nNC.can &lt;- candisc(NC.mlm)\nNC.can\n#&gt; \n#&gt; Canonical Discriminant Analysis for Dx:\n#&gt; \n#&gt;    CanRsq Eigenvalue Difference Percent Cumulative\n#&gt; 1 0.29295    0.41433      0.408    98.5       98.5\n#&gt; 2 0.00625    0.00629      0.408     1.5      100.0\n#&gt; \n#&gt; Test of H0: The canonical correlations in the \n#&gt; current row and all that follow are zero\n#&gt; \n#&gt;   LR test stat approx F numDF denDF Pr(&gt; F)    \n#&gt; 1        0.703     7.53    12   468   9e-13 ***\n#&gt; 2        0.994     0.30     5   235    0.91    \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n:::\nFigure 13.6 is the result of the plot() method for class \"candisc\" objects, that is, the result of calling plot(NC.can, ...). It plots the two canonical scores, \\(\\mathbf{Z}_{n \\times 2}\\) for the subjects, together with data ellipses for each of the three groups.\n\npos &lt;- c(4, 1, 4, 4, 1, 3)\ncol &lt;- c(\"red\", \"darkgreen\", \"blue\")\nop &lt;- par(mar=c(5,4,1,1)+.1)\nplot(NC.can, \n     ellipse=TRUE, \n     rev.axes=c(TRUE,FALSE), \n     pch=c(7,9,10),\n     var.cex=1.2, cex.lab=1.5, var.lwd=2,  scale=4.5, \n     col=col,\n     var.col=\"black\", var.pos=pos,\n     prefix=\"Canonical dimension \")\npar(op)\n\n\n\n\n\n\nFigure 13.6: Canonical discriminant plot for the NeuroCog data MANOVA. Scores on the two canonical dimensions are plotted, together with 68% data ellipses for each group.\n\n\n\n\nThe interpretation of Figure 13.6 is again fairly straightforward. As noted earlier (REF???), the projections of the variable vectors in this plot on the coordinate axes are proportional to the correlations of the responses with the canonical scores. From this, we see that the normal group differs from the two patient groups, having higher scores on all the neurocognitive variables, most of which are highyl correlated. The problem solving measure is slightly different, and this, compared to the cluster of memory, verbal and attention, is what distinguishes the schizophrenic group from the schizoaffectives.\nThe separation of the groups is essentially one-dimensional, with the control group higher on all measures. Moreover, the variables processing speed and visual memory are the purest measures of this dimension, but all variables contribute positively. The second canonical dimension accounts for only 1.5% of group mean differences and is non-significant (by a likelihood ratio test). Yet, if we were to interpret it, we would note that the schizophrenia group is slightly higher on this dimension, scoring better in problem solving and slightly worse on working memory, attention, and verbal learning tasks.\nSummary\nThis analysis gives a very simple description of the data, in relation to the research questions posed earlier:\n\nOn the basis of these neurocognitive tests, the schizophrenic and schizoaffective groups do not differ significantly overall, but these groups differ greatly from the normal controls.\nAll cognitive domains distinguish the groups in the same direction, with the greatest differences shown for the variables most closely aligned with the horizontal axis in Figure 13.6.",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>Case studies</span>"
+    ]
+  },
+  {
+    "objectID": "13-case-studies.html#social-cognitive-measures",
+    "href": "13-case-studies.html#social-cognitive-measures",
+    "title": "\n13  Case studies\n",
+    "section": "\n13.3 Social cognitive measures",
+    "text": "13.3 Social cognitive measures\nThe social cognitive measures were designed to tap various aspects of the perception and cognitive processing of emotions of others. Emotion perception was assessed using a Managing Emotions score from the MCCB. A “theory of mind” (ToM) score assessed ability to read the emotions of others from photographs of the eye region of male and female faces. Two other measures, externalizing bias (ExtBias) and personalizing bias (PersBias) were calculated from a scale measuring the degree to which individuals attribute internal, personal or situational causal attributions to positive and negative social events.\nThe analysis of the SocialCog data proceeds in a similar way: first we fit the MANOVA model, then test the overall differences among groups using Anova(). We find that the overall multivariate test is again significant,\n\ndata(SocialCog, package=\"heplots\")\nSC.mlm &lt;-  lm(cbind(MgeEmotions,ToM, ExtBias, PersBias) ~ Dx,\n               data=SocialCog)\nAnova(SC.mlm)\n#&gt; \n#&gt; Type II MANOVA Tests: Pillai test statistic\n#&gt;    Df test stat approx F num Df den Df  Pr(&gt;F)    \n#&gt; Dx  2     0.212     3.97      8    268 0.00018 ***\n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nTesting the same two contrasts using linearHypothesis() (results not shown), w e find that the overall multivariate test is again significant, but now both contrasts are significant (Dx1: \\(F(4, 133)=5.21, p &lt; 0.001\\); Dx2: \\(F(4, 133)=2.49, p = 0.0461\\)), the test for Dx2 just barely.\n\n# control vs. patients\nprint(linearHypothesis(SC.mlm, \"Dx1\"), SSP=FALSE)\n# Schizo vs. SchizAff\nprint(linearHypothesis(SC.mlm, \"Dx2\"), SSP=FALSE)\n\nThese results are important, because, if they are reliable and make sense substantively, they imply that patients with schizophrenia and schizoaffective diagnoses can be distinguished by their performance on tasks assessing social perception and cognition. This was potentially a new finding in the literature on schizophrenia.\nAs we did above, it is useful to visualize the nature of these differences among groups with HE plots for the SC.mlm model. Each contrast has a corresponding \\(\\mathbf{H}\\) ellipse, which we can show in the plot using the hypotheses argument. With a single degree of freedom, these degenerate ellipses plot as lines.\n\nop &lt;- par(mar=c(5,4,1,1)+.1)\nheplot(SC.mlm, \n       hypotheses=list(\"Dx1\"=\"Dx1\", \"Dx2\"=\"Dx2\"),\n       fill=TRUE, fill.alpha=.1,\n       cex.lab=1.5, cex=1.2)\npar(op)\n\n\n\n\n\n\nFigure 13.7: HE plot of Speed and Attention in the MLM for the SocialCog data. The labeled points show the means of the groups on the two variables. The lines for Dx1 and Dx2 show the tests of the contrasts among groups.\n\n\n\n\nIt can be seen that the three group means are approximately equally spaced on the ToM measure, whereas for MgeEmotions, the control and schizoaffective groups are quite similar, and both are higher than the schizophrenic group. This ordering of the three groups was somewhat similar for the other responses, as we could see in a pairs(SC.mlm) plot.\n\n13.3.1 Model checking\nNormally, we would continue this analysis, and consider other HE and canonical discriminant plots to further interpret the results, in particular the relations of the cognitive measures to group differences, or perhaps an analysis of the relationships between the neuro- and social-cognitive measures. We don’t pursue this here for reasons of length, but this example actually has a more important lesson to demonstrate.\nBefore beginning the MANOVA analyses, extensive data screening was done by the client using SPSS, in which all the response and predictor variables were checked for univariate normality and multivariate normality (MVN) for both sets. This traditional approach yielded a huge amount of tabular output and no graphs, and did not indicate any major violation of assumptions.1\nA simple visual test of MVN and the possible presence of multivariate outliers is related to the theory of the data ellipse: Under MVN, the squared Mahalanobis distances \\(D^2_M (\\mathbf{y}) = (\\mathbf{y} - \\bar{\\mathbf{y}})' \\, \\mathbf{S}^{-1} \\, (\\mathbf{y} - \\bar{\\mathbf{y}})\\) should follow a \\(\\chi^2_p\\) distribution. Thus, a quantile-quantile plot of the ordered \\(D^2_M\\) values vs. corresponding quantiles of the \\(\\chi^2\\) distribution should approximate a straight line (Cox, 1968; Healy, 1968). Note that this should be applied to the residuals from the model – residuals(SC.mlm) – and not to the response variables directly.\nheplots::cqplot() implements this for \"mlm\" objects Calling this function for the model SC.mlm produces Figure 13.8. It is immediately apparent that there is one extreme multivariate outlier; three other points are identified, but the remaining observations are nearly within the 95% confidence envelope (using a robust MVE estimate of \\(\\mathbf{S}\\)).\n\nop &lt;- par(mar=c(5,4,1,1)+.1)\ncqplot(SC.mlm, method=\"mve\", \n       id.n=4, \n       main=\"\", \n       cex.lab=1.25)\npar(op)\n\n\n\n\n\n\nFigure 13.8: Chi-square quantile-quantile plot for residuals from the model SC.mlm. The confidence band gives a point-wise 95% envelope, providing information about uncertainty. One extreme multivariate outlier is highlighted.\n\n\n\n\nFurther checking revealed that this was a data entry error where one case (15) in the schizophrenia group had a score of -33 recorded on the ExtBias measure, whose valid range was (-10, +10). In R, it is very easy to re-fit a model to a subset of observations (rather than modifying the dataset itself) using update(). The result of the overall Anova and the test of Dx1 were unchanged; however, the multivariate test for the most interesting contrast Dx2 comparing the schizophrenia and schizoaffective groups became non-significant at the \\(\\alpha=0.05\\) level (\\(F(4, 133)=2.18, p = 0.0742\\)).\n\nSC.mlm1 &lt;- update(SC.mlm, \n                  subset=rownames(SocialCog)!=\"15\")\n\nAnova(SC.mlm1)\nprint(linearHypothesis(SC.mlm1, \"Dx1\"), SSP=FALSE)\nprint(linearHypothesis(SC.mlm1, \"Dx2\"), SSP=FALSE)\n\n\n13.3.2 Canonical HE plot\nThis outcome creates a bit of a quandry for further analysis (do univariate follow-up tests? try a robust model?) and reporting (what to claim about the Dx2 contrast?) that we don’t explore here. Rather, we proceed to attempt to interpret the MLM with the aid of canonical analysis and a canonical HE plot. The canonical analysis of the model SC.mlm1 now shows that both canonical dimensions are significant, and account for 83.9% and 16.1% of between group mean differences respectively.\n\nSC.can1 &lt;- candisc(SC.mlm1)\nSC.can1\n#&gt; \n#&gt; Canonical Discriminant Analysis for Dx:\n#&gt; \n#&gt;   CanRsq Eigenvalue Difference Percent Cumulative\n#&gt; 1 0.1645     0.1969      0.159    83.9       83.9\n#&gt; 2 0.0364     0.0378      0.159    16.1      100.0\n#&gt; \n#&gt; Test of H0: The canonical correlations in the \n#&gt; current row and all that follow are zero\n#&gt; \n#&gt;   LR test stat approx F numDF denDF Pr(&gt; F)    \n#&gt; 1        0.805     3.78     8   264 0.00032 ***\n#&gt; 2        0.964     1.68     3   133 0.17537    \n#&gt; ---\n#&gt; Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n\nop &lt;- par(mar=c(5,4,1,1)+.1)\nheplot(SC.can1, \n  fill=TRUE, fill.alpha=.1,\n  hypotheses=list(\"Dx1\"=\"Dx1\", \"Dx2\"=\"Dx2\"),\n  lwd = c(1, 2, 3, 3),\n  col=c(\"red\", \"blue\", \"darkgreen\", \"darkgreen\"),\n  var.lwd=2, \n  var.col=\"black\", \n  label.pos=c(3,1), \n  var.cex=1.2, \n  cex=1.25, cex.lab=1.2, \n  scale=2.8,\n  prefix=\"Canonical dimension \")\npar(op)\n\n\n\n\n\n\nFigure 13.9: Canonical HE plot for the corrected SocialCog MANOVA. The variable vectors show the correlations of the responses with the canonical variables. The embedded green lines show the projections of the H ellipses for the contrasts Dx1 and Dx2 in canonical space.\n\n\n\n\nThe HE plot version of this canonical plot is shown in Figure 13.9. Because the heplot() method for a \"candisc\" object refits the original model to the \\(\\mathbf{Z}\\) canonical scores, it is easy to also project other linear hypotheses into this space. Note that in this view, both the Dx1 and Dx2 contrasts project outside \\(\\mathbf{E}\\) ellipse.2.\nThis canonical HE plot has a very simple description:\n\nDimension 1 orders the groups from control to schizoaffective to schizophrenia, while dimension 2 separates the schizoaffective group from the others;\nExternalizing bias and theory of mind contributes most to the first dimension, while personal bias and managing emotions are more aligned with the second; and,\nThe relations of the two contrasts to group differences and to the response variables can be easily read from this plot.\n\n\n#cat(\"Packages used here:\\n\")\nwrite_pkgs(file = .pkg_file)\n#&gt; 10  packages used here:\n#&gt;  broom, candisc, car, carData, corrgram, dplyr, ggplot2, heplots, knitr, tidyr\n\n\n\n\n\n\nCox, D. R. (1968). Notes on some aspects of regression analysis. Journal of the Royal Statistical Society Series A, 131, 265–279.\n\n\nFriendly, M. (2002). Corrgrams: Exploratory displays for correlation matrices. The American Statistician, 56(4), 316–324. https://doi.org/10.1198/000313002533\n\n\nFriendly, M., & Kwan, E. (2003). Effect ordering for data displays. Computational Statistics and Data Analysis, 43(4), 509–539. https://doi.org/10.1016/S0167-9473(02)00290-6\n\n\nHartman, L. I. (2016). Schizophrenia and schizoaffective disorder: One condition or two? [PhD dissertation]. York University.\n\n\nHealy, M. J. R. (1968). Multivariate normal plotting. Journal of the Royal Statistical Society Series C, 17(2), 157–161.\n\n\nHeinrichs, R. W., Pinnock, F., Muharib, E., Hartman, L., Goldberg, J., & McDermid Vaz, S. (2015). Neurocognitive normality in schizophrenia revisited. Schizophrenia Research: Cognition, 2(4), 227–232. https://doi.org/10.1016/j.scog.2015.09.001\n\n\nMardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. Biometrika, 57(3), 519–530. https://doi.org/http://dx.doi.org/10.2307/2334770\n\n\nMardia, K. V. (1974). Applications of some measures of multivariate skewness and kurtosis in testing normality and robustness studies. Sankhya: The Indian Journal of Statistics, Series B, 36(2), 115–128. http://www.jstor.org/stable/25051892",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>Case studies</span>"
+    ]
+  },
+  {
+    "objectID": "13-case-studies.html#footnotes",
+    "href": "13-case-studies.html#footnotes",
+    "title": "\n13  Case studies\n",
+    "section": "",
+    "text": "Actually, multivariate normality of the predictors in \\(\\mathbf{X}\\) is not required in the MLM. This assumption applies only to the conditional values \\(\\mathbf{Y} \\;|\\; \\mathbf{X}\\), i.e., that the errors \\(\\mathbf{\\epsilon}_{i}' \\sim \\mathcal{N}_{p}(\\mathbf{0},\\boldsymbol{\\Sigma})\\) with constant covariance matrix. Moreover, the widely used MVN test statistics, such as Mardia’s (1970) test based on multivariate skewness and kurtosis are known to be quite sensitive to mild departures in kurtosis (Mardia, 1974) which do not threaten the validity of the multivariate tests.↩︎\nThe direct application of significance tests to canonical scores probably requires some adjustment because these are computed to have the optimal between-group discrimination.↩︎",
+    "crumbs": [
+      "Multivariate Linear Models",
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>Case studies</span>"
+    ]
+  },
+  {
+    "objectID": "91-colophon.html",
+    "href": "91-colophon.html",
+    "title": "Colophon",
+    "section": "",
+    "text": "Package versions\nThe principal R package versions used in examples and illustrations are listed below. These were captured via sessioninfo:::package_info() from all library() commands in the text, and scripts which also updated the references to packages.\nAt the time of writing, most of these were current on CRAN repositories but some development versions are indicated as “local” in the source column.\npackage\nversion\ndate\nsource\n\n\n\nbayestestR\n0.15.0\n2024-10-17\nCRAN\n\n\nbroom\n1.0.7\n2024-09-26\nCRAN\n\n\ncandisc\n0.9.0\n2024-10-31\nlocal\n\n\ncar\n3.1-3\n2024-09-27\nCRAN\n\n\ncarData\n3.0-5\n2022-01-06\nCRAN\n\n\ncorpcor\n1.6.10\n2021-09-16\nCRAN\n\n\ncorrelation\n0.8.6\n2024-10-26\nCRAN\n\n\ncorrgram\n1.14\n2021-04-29\nCRAN\n\n\ncorrplot\n0.95\n2024-10-14\nCRAN\n\n\ndatawizard\n0.13.0\n2024-10-05\nCRAN\n\n\ndplyr\n1.1.4\n2023-11-17\nCRAN\n\n\neasystats\n0.7.3\n2024-07-22\nCRAN\n\n\neffects\n4.2-2\n2022-07-13\nCRAN\n\n\neffectsize\n1.0.0\n2024-12-10\nCRAN\n\n\nfactoextra\n1.0.7\n2020-04-01\nCRAN\n\n\nFactoMineR\n2.11\n2024-04-20\nCRAN\n\n\nforcats\n1.0.0\n2023-01-29\nCRAN\n\n\ngenridge\n0.8.0\n2024-12-02\nCRAN\n\n\nGGally\n2.2.1\n2024-02-14\nCRAN\n\n\ngganimate\n1.0.9\n2024-02-27\nCRAN\n\n\nggbiplot\n0.6.2\n2024-01-08\nCRAN\n\n\nggdensity\n1.0.0\n2023-02-09\nCRAN\n\n\nggeffects\n2.0.0\n2024-11-27\nCRAN\n\n\nggpcp\n0.2.0\n2022-11-28\nCRAN\n\n\nggplot2\n3.5.1\n2024-04-23\nCRAN\n\n\nggpubr\n0.6.0\n2023-02-10\nCRAN\n\n\nggrepel\n0.9.6\n2024-09-07\nCRAN\n\n\nggstats\n0.7.0\n2024-09-22\nCRAN\n\n\nheplots\n1.7.3\n2024-12-20\nlocal\n\n\nHotelling\n1.0-8\n2021-09-09\nCRAN\n\n\nimager\n1.0.2\n2024-05-13\nCRAN\n\n\ninsight\n1.0.0\n2024-11-26\nCRAN\n\n\nknitr\n1.49\n2024-11-08\nCRAN\n\n\nlubridate\n1.9.4\n2024-12-08\nCRAN\n\n\nmagrittr\n2.0.3\n2022-03-30\nCRAN\n\n\nmarginaleffects\n0.24.0\n2024-11-25\nCRAN\n\n\nMASS\n7.3-61\n2024-06-13\nCRAN\n\n\nmatlib\n1.0.1\n2024-10-23\nlocal\n\n\nmodelbased\n0.8.9\n2024-10-26\nCRAN\n\n\nmodelsummary\n2.2.0\n2024-09-02\nCRAN\n\n\nparameters\n0.24.0\n2024-11-27\nCRAN\n\n\npatchwork\n1.3.0\n2024-09-16\nCRAN\n\n\nperformance\n0.12.4\n2024-10-18\nCRAN\n\n\npurrr\n1.0.2\n2023-08-10\nCRAN\n\n\nqgraph\n1.9.8\n2023-11-03\nCRAN\n\n\nreadr\n2.1.5\n2024-01-10\nCRAN\n\n\nreport\n0.5.9\n2024-07-10\nCRAN\n\n\nRtsne\n0.17\n2023-12-07\nCRAN\n\n\nsee\n0.9.0\n2024-09-06\nCRAN\n\n\nstringr\n1.5.1\n2023-11-14\nCRAN\n\n\ntibble\n3.2.1\n2023-03-20\nCRAN\n\n\ntidyr\n1.3.1\n2024-01-24\nCRAN\n\n\ntidyverse\n2.0.0\n2023-02-22\nCRAN\n\n\ntourr\n1.2.0\n2024-04-20\nCRAN\n\n\nvcd\n1.4-13\n2024-09-16\nCRAN\n\n\nVisCollin\n0.1.2\n2023-09-05\nCRAN",
+    "crumbs": [
+      "End matter",
+      "Colophon"
+    ]
+  },
+  {
+    "objectID": "95-references.html",
+    "href": "95-references.html",
+    "title": "References",
+    "section": "",
+    "text": "Abbott, E. A. (1884). Flatland: A romance of many dimensions.\nBuccaneer Books.\n\n\nAdler, D., & Murdoch, D. (2023). Rgl: 3D visualization using\nOpenGL. https://CRAN.R-project.org/package=rgl\n\n\nAluja, T., Morineau, A., & Sanchez, G. (2018). Principal\ncomponent analysis for data science. https://pca4ds.github.io/\n\n\nAndrews, D. F. (1972). Plots of high dimensional data.\nBiometrics, 28, 123–136.\n\n\nAnscombe, F. J. (1973). Graphs in statistical analysis. The American\nStatistician, 27, 17–21.\n\n\nArel-Bundock, V. (2024a). Marginaleffects: Predictions, comparisons,\nslopes, marginal means, and hypothesis tests. https://marginaleffects.com/\n\n\nArel-Bundock, V. (2024b). Modelsummary: Summary tables and plots for\nstatistical models and data: Beautiful, customizable, and\npublication-ready. https://modelsummary.com\n\n\nAsimov, D. (1985). Grand tour. SIAM Journal of Scientific and\nStatistical Computing, 6(1), 128–143.\n\n\nBarab’asi, A.-L. (2016). Network science. Cambridge University\nPress.\n\n\nBartlett, M. S. (1937). Properties of sufficiency and statistical tests.\nProceedings of the Royal Society of London. Series A,\n160(901), 268–282. https://doi.org/10.2307/96803\n\n\nBecker, R. A., Cleveland, W. S., & Shyu, M.-J. (1996). The visual\ndesign and control of trellis display. Journal of Computational and\nGraphical Statistics, 5(2), 123–155.\n\n\nBelsley, D. A. (1991). Conditioning diagnostics: Collinearity and\nweak data in regression. Wiley.\n\n\nBelsley, D. A., Kuh, E., & Welsch, R. E. (1980). Regression\ndiagnostics: Identifying influential data and sources of\ncollinearity. John Wiley; Sons.\n\n\nBiecek, P., Baniecki, H., Krzyzinski, M., & Cook, D. (2023).\nPerformance is not enough: A story of the rashomon’s quartet.\nhttps://arxiv.org/abs/2302.13356\n\n\nBlack, C., Southwell, C., Emmerson, L., Lunn, D., & Hart, T. (2018).\nTime-lapse imagery of adélie penguins reveals differential winter\nstrategies and breeding site occupation. PLOS ONE,\n13(3), e0193532. https://doi.org/10.1371/journal.pone.0193532\n\n\nBlishen, B., Carroll, W., & Moore, C. (1987). The 1981 socioeconomic\nindex for occupations in canada. Canadian Review of Sociology/Revue\nCanadienne de Sociologie, 24(4), 465–488. https://doi.org/10.1111/j.1755-618x.1987.tb00639.x\n\n\nBock, R. D. (1963). Programming univariate and multivariate analysis of\nvariance. Technometrics, 5(1), 95–117. https://doi.org/10.1080/00401706.1963.10490061\n\n\nBock, R. D. (1964). A computer program forunivariate and multivariate\nanalysis of variance. Proceedings of Scientific Symposium on\nStatistics.\n\n\nBondy, J. A., & Murty, U. S. R. (2008). Graph theory.\nSpringer.\n\n\nBorg, I., & Groenen, P. J. F. (2005). Modern Multidimensional Scaling: Theory and\nApplications. Springer.\n\n\nBorg, I., Groenen, P. J. F., & Mair, P. (2018). Applied\nmultidimensional scaling and unfolding. In SpringerBriefs in\nStatistics. Springer International Publishing. https://doi.org/10.1007/978-3-319-73471-2\n\n\nBox, G. E. P. (1949). A general distribution theory for a class of\nlikelihood criteria. Biometrika, 36(3-4), 317–346. https://doi.org/10.1093/biomet/36.3-4.317\n\n\nBox, G. E. P. (1950). Problems in the analysis of growth and\nwear curves. Biometrics, 6, 362–389.\n\n\nBox, G. E. P. (1953). Non-normality and tests on variances.\nBiometrika, 40(3/4), 318–335. https://doi.org/10.2307/2333350\n\n\nBrown, M. B., & Forsythe, A. B. (1974). Robust tests for equality of\nvariances. Journal of the American Statistical Association,\n69(346), 364–367. https://doi.org/10.1080/01621459.1974.10482955\n\n\nBrown, P. J., & Zidek, J. V. (1980). Adaptive multivariate ridge\nregression. The Annals of Statistics, 8(1), 64–74. http://www.jstor.org/stable/2240743\n\n\nBuja, A., Cook, D., Asimov, D., & Hurley, C. (2005). Computational\nmethods for high-dimensional rotations in data visualization. In J. S.\nCR Rao EJ Wegman (Ed.), Handbook of statistics (pp. 391–413).\nElsevier. https://doi.org/10.1016/s0169-7161(04)24014-7\n\n\ncagne, M. (1885). Coordonnées parallèles\net axiales: Méthode de transformation\ngéométrique et\nprocédé nouveau de calcul graphique\ndéduits de la considération des\ncoordonnées parallèlles.\nGauthier-Villars. http://historical.library.cornell.edu/cgi-bin/cul.math/docviewer?did=00620001&seq=3\n\n\nCajori, F. (1926). Origins of fourth dimension concepts. The\nAmerican Mathematical Monthly, 33(8), 397–406. https://doi.org/10.1080/00029890.1926.11986607\n\n\nCattell, R. B. (1966). The scree test for the number of factors.\nMultivariate Behavioral Research, 1(2), 245–276. https://doi.org/10.1207/s15327906mbr0102_10\n\n\nChambers, J. M., & Hastie, T. J. (1991). Statistical models in\ns (p. 624). Chapman & Hall/CRC.\n\n\nCleveland, W. S. (1979). Robust locally weighted regression and\nsmoothing scatterplots. Journal of the American Statistical\nAssociation, 74, 829–836.\n\n\nCleveland, W. S. (1985). The elements of graphing data.\nWadsworth Advanced Books.\n\n\nCleveland, W. S., & Devlin, S. J. (1988). Locally weighted\nregression: An approach to regression analysis by local fitting.\nJournal of the American Statistical Association, 83,\n596–610.\n\n\nCleveland, W. S., & McGill, R. (1984). Graphical perception: Theory,\nexperimentation and application to the development of graphical methods.\nJournal of the American Statistical Association, 79,\n531–554.\n\n\nCleveland, W. S., & McGill, R. (1985). Graphical perception and\ngraphical methods for analyzing scientific data. Science,\n229, 828–833.\n\n\nClyde, D. J., Cramer, E. M., & Sherin, R. J. (1966).\nMultivariate statistical programs. Biometric\nLaboratory,University of Miami.\n\n\nCochran, W. G. (1941). The distribution of the largest of a set of\nestimated variances as a fraction of their total. Annals of\nEugenics, 11(1), 47–52. https://doi.org/10.1111/j.1469-1809.1941.tb02271.x\n\n\nConover, W. J., Johnson, M. E., & Johnson, M. M. (1981). A\ncomparative study of tests for homogeneity of variances, with\napplications to the outer continental shelf bidding data.\nTechnometrics, 23(4), 351–361. https://doi.org/10.1080/00401706.1981.10487680\n\n\nCook, D., Buja, A., Cabrera, J., & Hurley, C. (1995). Grand tour and\nprojection pursuit. Journal of Computational and Graphical\nStatistics, 4(3), 155. https://doi.org/10.2307/1390844\n\n\nCook, D., Buja, A., Lee, E.-K., & Wickham, H. (2008). Grand tours,\nprojection pursuit guided tours, and manual controls. In Handbook of\ndata visualization (pp. 295–314). Springer Berlin Heidelberg. https://doi.org/10.1007/978-3-540-33037-0_13\n\n\nCook, D., & Laa, U. (2024). Interactively exploring\nhigh-dimensional data and models in R. Online. https://dicook.github.io/mulgar_book/\n\n\nCook, D., & Swayne, D. F. (2007). Interactive and dynamic\ngraphics for data analysis : With R and\nGGobi. Springer. http://www.ggobi.org/book/\n\n\nCook, R. D. (1977). Detection of influential observation in linear\nregression. Technometrics, 19(1), 15–18. http://links.jstor.org/sici?sici=0040-1706%28197702%2919%3A1%3C15%3ADOIOIL%3E2.0.CO%3B2-8\n\n\nCook, R. D. (1993). Exploring partial residual plots.\nTechnometrics, 35(4), 351–362.\n\n\nCook, R. D. (1996). Added-variable plots and curvature in linear\nregression. Technometrics, 38(3), 275–278. https://doi.org/10.1080/00401706.1996.10484507\n\n\nCook, R. D., & Weisberg, S. (1982). Residuals and influence in\nregression. Chapman; Hall.\n\n\nCook, R. D., & Weisberg, S. (1994). ARES plots for generalized\nlinear models. Computational Statistics & Data Analysis,\n17(3), 303–315. https://doi.org/10.1016/0167-9473(92)00075-3\n\n\nCostantini, G., Epskamp, S., Borsboom, D., Perugini, M., Mõttus, R.,\nWaldorp, L. J., & Cramer, A. O. J. (2015). State of the aRt personality research: A tutorial on network\nanalysis of personality data in R. Journal of Research\nin Personality, 54, 13–29. https://doi.org/10.1016/j.jrp.2014.07.003\n\n\nCotton, R. (2013). Learning R. O’Reilly Media.\n\n\nCox, D. R. (1968). Notes on some aspects of regression analysis.\nJournal of the Royal Statistical Society Series A,\n131, 265–279.\n\n\nCsárdi, G., Nepusz, T., Traag, V., Horvát, S., Zanini, F., Noom, D.,\n& Müller, K. (2024). igraph: Network\nanalysis and visualization in r. https://doi.org/10.5281/zenodo.7682609\n\n\nCurran, J., & Hersh, T. (2021). Hotelling: Hotelling’s t^2 test\nand variants. https://CRAN.R-project.org/package=Hotelling\n\n\nDavies, R., Locke, S., & D’Agostino McGowan, L. (2022).\ndatasauRus: Datasets from the datasaurus dozen. https://CRAN.R-project.org/package=datasauRus\n\n\nDavis, C. (1990). Body image and weight preoccupation: A comparison\nbetween exercising and non-exercising women. Appetite,\n16(1), 84. https://doi.org/10.1016/0195-6663(91)90115-9\n\n\nDempster, A. P. (1969). Elements of continuous multivariate\nanalysis. Addison-Wesley.\n\n\nDempster, A. P. (1972). Covariance selection. Biometrics,\n28(1), 157–175.\n\n\nDixon, W. J. (1965). BMD biomedical computer programs. Health\nSciences Computing Facility, School of Medicine, University of\nCalifornia; Health Sciences Computing Faculty.\n\n\nDray, S., Siberchicot, A., & Jean Thioulouse. Based on earlier work\nby Alice Julien-Laferrière., with contributions from. (2023).\nAdegraphics: An S4 lattice-based package for the representation of\nmultivariate data. http://pbil.univ-lyon1.fr/ADE-4/\n\n\nDuncan, O. D. (1961). A socioeconomic index for all occupations. In Jr.\nA. J. Reiss, P. K. H. O. D. Duncan, & C. C. North (Eds.),\nOccupations and social status. The Free Press.\n\n\nEfron, B., Hastie, T., Johnstone, I., & Tibshirani, R. (2004). Least\nangle regression. The Annals of Statistics, 32(2),\n407–499.\n\n\nEmerson, J. W., Green, W. A., Schloerke, B., Crowley, J., Cook, D.,\nHofmann, H., & Wickham, H. (2013). The generalized pairs plot.\nJournal of Computational and Graphical Statistics,\n22(1), 79–91. http://www.tandfonline.com/doi/ref/10.1080/10618600.2012.694762\n\n\nEuler, L. (1758). Elementa doctrinae solidorum. Novi Commentarii\nAcademiae Scientiarum Petropolitanae, 4, 109–140. https://scholarlycommons.pacific.edu/euler-works/230/\n\n\nFarquhar, A. B., & Farquhar, H. (1891). Economic and industrial\ndelusions: A discourse of the case for protection. Putnam.\n\n\nFienberg, S. E. (1971). Randomization and social affairs: The 1970 draft\nlottery. Science, 171, 255–261.\n\n\nFinn, J. D. (1967). MULTIVARIANCE: Fortran program for\nunivariate and multivariate analysis of variance and covariance.\nSchool of Education, State University of New York at Buffalo.\n\n\nFisher, R. A. (1923). Studies in crop variation. II. The manurial\nresponse of different potato varieties. The Journal of Agricultural\nScience, 13(2), 311–320. https://hdl.handle.net/2440/15179\n\n\nFisher, R. A. (1925b). Statistical methods for research\nworkers. Oliver & Boyd.\n\n\nFisher, R. A. (1925a). Statistical methods for research workers\n(6th ed.). Oliver & Boyd.\n\n\nFisher, R. A. (1936). The use of multiple measurements in taxonomic\nproblems. Annals of Eugenics, 7(2), 179–188. https://doi.org/10.1111/j.1469-1809.1936.tb02137.x\n\n\nFishkeller, M. A., Friedman, J. H., & Tukey, J. W. (1974).\nPRIM-9, an interactive multidimensional data display and\nanalysis system. Proceedings of the Pacific ACM Regional\nConference.\n\n\nFlury, B., & Riedwyl, H. (1988). Multivariate statistics: A\npractical approach. Chapman & Hall.\n\n\nFox, J. (1987). Effect displays for generalized linear models. In C. C.\nClogg (Ed.), Sociological methodology, 1987 (pp. 347–361).\nJossey-Bass.\n\n\nFox, J. (2003). Effect displays in R for generalized linear\nmodels. Journal of Statistical Software, 8(15), 1–27.\n\n\nFox, J. (2016). Applied regression analysis and generalized linear\nmodels (Third edition.). SAGE.\n\n\nFox, J. (2020). Regression diagnostics (2nd ed.).\nSAGE Publications, Inc. https://doi.org/10.4135/9781071878651\n\n\nFox, J. (2021). A mathematical primer for social statistics\n(2nd ed.). SAGE Publications, Inc. https://doi.org/10.4135/9781071878835\n\n\nFox, J., & Monette, G. (1992). Generalized collinearity diagnostics.\nJournal of the American Statistical Association,\n87(417), 178–183.\n\n\nFox, J., & Weisberg, S. (2018a). An R companion to\napplied regression (Third). SAGE Publications. https://books.google.ca/books?id=uPNrDwAAQBAJ\n\n\nFox, J., & Weisberg, S. (2018b). Visualizing fit and lack of fit in\ncomplex regression models with predictor effect plots and partial\nresiduals. Journal of Statistical Software, 87(9). https://doi.org/10.18637/jss.v087.i09\n\n\nFox, J., Weisberg, S., & Price, B. (2023). Car: Companion to\napplied regression. https://CRAN.R-project.org/package=car\n\n\nFox, J., Weisberg, S., Price, B., Friendly, M., & Hong, J. (2022).\nEffects: Effect displays for linear, generalized linear, and other\nmodels. https://www.r-project.org\n\n\nFriedman, J., Hastie, T., Tibshirani, R., Narasimhan, B., Tay, K.,\nSimon, N., & Yang, J. (2023). Glmnet: Lasso and elastic-net\nregularized generalized linear models. https://glmnet.stanford.edu\n\n\nFriendly, M. (1991). SAS System for statistical\ngraphics (1st ed.). SAS Institute. http://www.sas.\ncom/service/doc/pubcat/uspubcat/ind_files/56143.html\n\n\nFriendly, M. (1994). Mosaic displays for multi-way contingency tables.\nJournal of the American Statistical Association, 89,\n190–200. http://www.jstor.org/stable/2291215\n\n\nFriendly, M. (1999). Extending mosaic displays: Marginal, conditional,\nand partial views of categorical data. Journal of Computational and\nGraphical Statistics, 8(3), 373–395. http://datavis.ca/papers/drew/drew.pdf\n\n\nFriendly, M. (2002). Corrgrams: Exploratory displays for correlation\nmatrices. The American Statistician, 56(4), 316–324.\nhttps://doi.org/10.1198/000313002533\n\n\nFriendly, M. (2007). HE plots for multivariate general\nlinear models. Journal of Computational and Graphical\nStatistics, 16(2), 421–444. https://doi.org/10.1198/106186007X208407\n\n\nFriendly, M. (2008). The Golden Age of statistical\ngraphics. Statistical Science, 23(4), 502–535. https://doi.org/10.1214/08-STS268\n\n\nFriendly, M. (2011). Generalized ridge trace plots: Visualizing bias\nand precision with the genridge R package. SCS\nSeminar.\n\n\nFriendly, M. (2013). The generalized ridge trace plot: Visualizing bias\nand precision. Journal of Computational and Graphical\nStatistics, 22(1), 50–68. https://doi.org/10.1080/10618600.2012.681237\n\n\nFriendly, M. (2022). The life and works of andré-michel\nguerry, revisited. Sociological Spectrum, 42(4-6),\n233–259. https://doi.org/10.1080/02732173.2022.2078450\n\n\nFriendly, M. (2023). vcdExtra: Vcd extensions and additions. https://friendly.github.io/vcdExtra/\n\n\nFriendly, M. (2024). Genridge: Generalized ridge trace plots for\nridge regression. https://github.com/friendly/genridge\n\n\nFriendly, M., Fox, J., & Chalmers, P. (2024). Matlib: Matrix\nfunctions for teaching and learning linear algebra and multivariate\nstatistics. https://github.com/friendly/matlib\n\n\nFriendly, M., & Kwan, E. (2003). Effect ordering for data displays.\nComputational Statistics and Data Analysis, 43(4),\n509–539. https://doi.org/10.1016/S0167-9473(02)00290-6\n\n\nFriendly, M., & Kwan, E. (2009). Where’s Waldo:\nVisualizing collinearity diagnostics. The American\nStatistician, 63(1), 56–65. https://doi.org/10.1198/tast.2009.0012\n\n\nFriendly, M., & Meyer, D. (2016). Discrete data analysis with\nR: Visualization and modeling techniques for categorical\nand count data. Chapman & Hall/CRC.\n\n\nFriendly, M., Monette, G., & Fox, J. (2013). Elliptical insights:\nUnderstanding statistical methods through elliptical geometry.\nStatistical Science, 28(1), 1–39. https://doi.org/10.1214/12-STS402\n\n\nFriendly, M., & Wainer, H. (2021). A history of data\nvisualization and graphic communication. Harvard University Press.\nhttps://doi.org/10.4159/9780674259034\n\n\nFuller, W. (2006). Measurement error models (2nd ed.). John\nWiley & Sons.\n\n\nFunkhouser, H. G. (1937). Historical development of the graphical\nrepresentation of statistical data. Osiris, 3(1),\n269–405. http://tinyurl.com/32ema9\n\n\nGabriel, K. R. (1971). The biplot graphic display of matrices with\napplication to principal components analysis. Biometrics,\n58(3), 453–467. https://doi.org/10.2307/2334381\n\n\nGabriel, K. R. (1981). Biplot display of multivariate matrices for\ninspection of data and diagnosis. In V. Barnett (Ed.), Interpreting\nmultivariate data (pp. 147–173). John Wiley; Sons.\n\n\nGalton, F. (1863). Meteorographica, or methods of mapping the\nweather. Macmillan. http://www.mugu.com/galton/books/meteorographica/index.htm\n\n\nGalton, F. (1886). Regression towards mediocrity in hereditary stature.\nJournal of the Anthropological Institute, 15, 246–263.\nhttp://www.jstor.org/cgi-bin/jstor/viewitem/09595295/dm995266/99p0374f/0\n\n\nGalton, F. (1889). Natural inheritance. Macmillan. http://galton.org/books/natural-inheritance/pdf/galton-nat-inh-1up-clean.pdf\n\n\nGannett, H. (1898). Statistical atlas of the united states, eleventh\n(1890) census. U.S. Government Printing Office.\n\n\nGastwirth, J. L., Gel, Y. R., & Miao, W. (2009). The impact of Levene’s test of equality of variances on\nstatistical theory and practice. Statistical Science,\n24(3), 343–360. https://doi.org/10.1214/09-STS301\n\n\nGelman, A., Hullman, J., & Kennedy, L. (2023). Causal quartets:\nDifferent ways to attain the same average treatment effect. http://www.stat.columbia.edu/~gelman/research/unpublished/causal_quartets.pdf\n\n\nGoeman, J., Meijer, R., Chaturvedi, N., & Lueder, M. (2022).\nPenalized: L1 (lasso and fused lasso) and L2 (ridge) penalized\nestimation in GLMs and in the cox model. https://CRAN.R-project.org/package=penalized\n\n\nGorman, K. B., Williams, T. D., & Fraser, W. R. (2014). Ecological\nsexual dimorphism and environmental variability within a community of\nantarctic penguins (genus pygoscelis). PLoS\nONE, 9(3), e90081. https://doi.org/10.1371/journal.pone.0090081\n\n\nGower, J. C., & Hand, D. J. (1996). Biplots. Chapman &\nHall.\n\n\nGower, J. C., Lubbe, S. G., & Roux, N. J. L. (2011).\nUnderstanding biplots. Wiley. http://books.google.ca/books?id=66gQCi5JOKYC\n\n\nGrandjean, M. (2016). A social network analysis of Twitter:\nMapping the digital humanities community. Cogent Arts\n&Amp; Humanities, 3(1), 1171458. https://doi.org/10.1080/23311983.2016.1171458\n\n\nGraybill, F. A. (1961). An introduction to linear statistical\nmodels. McGraw-Hill.\n\n\nGreenacre, M. (1984). Theory and applications of correspondence\nanalysis. Academic Press.\n\n\nGreenacre, M. (2010). Biplots in practice.\nFundación BBVA. https://books.google.ca/books?id=dv4LrFP7U\\_EC\n\n\nGuerry, A.-M. (1833). Essai sur la statistique morale de la\nFrance. Crochard.\n\n\nHahsler, M., Buchta, C., & Hornik, K. (2024). Seriation:\nInfrastructure for ordering objects using seriation. https://github.com/mhahsler/seriation\n\n\nHaitovsky, Y. (1987). On multivariate ridge regression.\nBiometrika, 74(3), 563–570. https://doi.org/10.1093/biomet/74.3.563\n\n\nHarrison, P. (2023). Langevitour: Smooth interactive touring of high\ndimensions, demonstrated with scRNA-seq data. The R Journal,\n15(2), 206–219. https://doi.org/10.32614/RJ-2023-046\n\n\nHarrison, P. (2024). Langevitour: Langevin tour. https://logarithmic.net/langevitour/\n\n\nHart, C., & Wang, E. (2022). Detourr: Portable and performant\ntour animations. https://CRAN.R-project.org/package=detourr\n\n\nHartigan, J. A. (1975a). Clustering algorithms. John Wiley;\nSons.\n\n\nHartigan, J. A. (1975b). Printer graphics for clustering. Journal of\nStatistical Computing and Simulation, 4, 187–213.\n\n\nHartley, H. O. (1950). The use of range in analysis of variance.\nBiometrika, 37(3–4), 271–280. https://doi.org/10.1093/biomet/37.3-4.271\n\n\nHartman, L. I. (2016). Schizophrenia and schizoaffective disorder:\nOne condition or two? [PhD dissertation]. York University.\n\n\nHarwell, M. R., Rubinstein, E. N., Hayes, W. S., & Olds, C. C.\n(1992). Summarizing monte carlo results in methodological research: The\none- and two-factor fixed effects ANOVA cases. Journal\nof Educational and Behavioral Statistics, 17(4), 315–339.\nhttps://doi.org/10.3102/10769986017004315\n\n\nHastie, T., Tibshirani, R., & Friedman, J. (2009). The elements\nof statistical learning: Data mining, inference and prediction (2nd\ned.). Springer. http://www-stat.stanford.edu/~tibs/ElemStatLearn/\n\n\nHealy, M. J. R. (1968). Multivariate normal plotting. Journal of the\nRoyal Statistical Society Series C, 17(2), 157–161.\n\n\nHeinrichs, R. W., Pinnock, F., Muharib, E., Hartman, L., Goldberg, J.,\n& McDermid Vaz, S. (2015). Neurocognitive normality in schizophrenia\nrevisited. Schizophrenia Research: Cognition, 2(4),\n227–232. https://doi.org/10.1016/j.scog.2015.09.001\n\n\nHerschel, J. F. W. (1833). On the investigation of the orbits of\nrevolving double stars: Being a supplement to a paper entitled\n\"micrometrical measures of 364 double stars\". Memoirs of the Royal\nAstronomical Society, 5, 171–222.\n\n\nHoaglin, D. C., & Welsch, R. E. (1978). The hat matrix in regression\nand ANOVA. The American Statistician,\n32(1), 17–22. https://doi.org/10.1080/00031305.1978.10479237\n\n\nHocking, R. R. (2013). Methods and applications of linear models:\nRegression and the analysis of variance. Wiley. https://books.google.ca/books?id=iq2J-1iS6HcC\n\n\nHoerl, A. E., & Kennard, R. W. (1970). Ridge regression:\nBiased estimation for nonorthogonal problems.\nTechnometrics, 12, 55–67.\n\n\nHoerl, A. E., Kennard, R. W., & Baldwin, K. F. (1975). Ridge\nregression: Some simulations. Communications in Statistics,\n4(2), 105–123. https://doi.org/10.1080/03610927508827232\n\n\nHofmann, H., VanderPlas, S., & Ge, Y. (2022). Ggpcp: Parallel\ncoordinate plots in the ggplot2 framework. https://github.com/heike/ggpcp\n\n\nHofstadter, D. R. (1979). Gödel, escher, bach: An eternal golden\nbraid. Basic Books.\n\n\nHøjsgaard, S., Edwards, D., & Lauritzen, S. (2012). Graphical\nmodels with R. Springer Science & Business Media.\n\n\nHorst, A., Hill, A., & Gorman, K. (2022). Palmerpenguins: Palmer\narchipelago (antarctica) penguin data. https://allisonhorst.github.io/palmerpenguins/\n\n\nHotelling, H. (1931). The generalization of Student’s ratio. The Annals of Mathematical\nStatistics, 2(3), 360–378. https://doi.org/10.1214/aoms/1177732979\n\n\nHusson, F., Josse, J., Le, S., & Mazet, J. (2024). FactoMineR:\nMultivariate exploratory data analysis and data mining. http://factominer.free.fr\n\n\nHusson, F., Le, S., & Pagès, J. (2017). Exploratory multivariate\nanalysis by example using r. Chapman & Hall. https://doi.org/10.1201/b21874\n\n\nIBM. (1965). Proceedings of the IBM scientific computing symposium\non statistics: Oct 21-23, 1963 (L. Robinson, Ed.). IBM. https://www.amazon.com/Proceedings-Scientific-Computing-Symposium-Statistics/dp/B000GL5RLU\n\n\nInselberg, A. (1985). The plane with parallel coordinates. The\nVisual Computer, 1, 69–91.\n\n\nIsvoranu, A.-M., Epskamp, S., Waldorp, L. J., & Borsboom, D. (2022).\nNetwork psychometrics with r: A guide for behavioral and social\nscientists. Routledge. https://doi.org/10.4324/9781003111238\n\n\nKassambara, A., & Mundt, F. (2020). Factoextra: Extract and\nvisualize the results of multivariate data analyses. http://www.sthda.com/english/rpkgs/factoextra\n\n\nKastellec, J. P., & Leoni, E. L. (2007). Using graphs instead of\ntables in political science. Perspectives on Politics,\n5(04), 755–771. https://doi.org/10.1017/S1537592707072209\n\n\nKrijthe, J. (2023). Rtsne: T-distributed stochastic neighbor\nembedding using a barnes-hut implementation. https://github.com/jkrijthe/Rtsne\n\n\nKruskal, J. B. (1964). Multidimensional scaling by optimizing goodness\nof fit to a nonmetric hypothesis. Psychometrika,\n29(1), 1–27. https://doi.org/10.1007/bf02289565\n\n\nKwan, E., Lu, I. R. R., & Friendly, M. (2009). Tableplot: A new tool\nfor assessing precise predictions. Zeitschrift für\nPsychologie / Journal of Psychology, 217(1), 38–48. https://doi.org/10.1027/0044-3409.217.1.38\n\n\nLarmarange, J. (2024). Ggstats: Extension to ggplot2 for plotting\nstats. https://larmarange.github.io/ggstats/\n\n\nLarsen, W. A., & McCleary, S. J. (1972). The use of partial residual\nplots in regression analysis. Technometrics, 14,\n781–790.\n\n\nLauritzen, S. L. (1996). Graphical models. Oxford University\nPress.\n\n\nLawless, J. F., & Wang, P. (1976). A simulation study of ridge and\nother regression estimators. Communications in Statistics,\n5, 307–323.\n\n\nLee, E.-K., & Cook, D. (2009). A projection pursuit index for large\np small n data. Statistics and Computing, 20(3),\n381–392. https://doi.org/10.1007/s11222-009-9131-1\n\n\nLee, S. (2021). Liminal: Multivariate data visualization with tours\nand embeddings. https://CRAN.R-project.org/package=liminal\n\n\nLevene, H. (1960). Robust tests for equality of variances. In I. Olkin,\nS. G. Ghurye, W. Hoeffding, W. G. Madow, & H. B. Mann (Eds.),\nContributions to probability and statistics: Essays in honor of\nHarold Hotelling (pp. 278–292). Stanford University\nPress.\n\n\nLix, J. M., L. M. Keselman, & Keselman, H. J. (1996). Consequences\nof assumption violations revisited: A quantitative review of\nalternatives to the one-way analysis of variance F test.\nReview of Educational Research, 66(4), 579–619. https://doi.org/10.3102/00346543066004579\n\n\nLongley, J. W. (1967). An appraisal of least squares programs for the\nelectronic computer from the point of view of the user. Journal of\nthe American Statistical Association, 62, 819–841.\nhttps://doi.org/https://www.tandfonline.com/doi/abs/10.1080/01621459.1967.10500896\n\n\nLüdecke, D. (2024). Ggeffects: Create tidy data frames of marginal\neffects for ggplot from model outputs. https://strengejacke.github.io/ggeffects/\n\n\nLüdecke, D., Ben-Shachar, M. S., Patil, I., Waggoner, P., &\nMakowski, D. (2021). performance: An\nR package for assessment, comparison and testing of\nstatistical models. Journal of Open Source Software,\n6(60), 3139. https://doi.org/10.21105/joss.03139\n\n\nLüdecke, D., Ben-Shachar, M. S., Patil, I., Wiernik, B. M., &\nMakowski, D. (2022). Easystats: Framework for easy statistical modeling,\nvisualization, and reporting. In CRAN. https://easystats.github.io/easystats/\n\n\nMaaten, L. van der, & Hinton, G. (2008). Visualizing data using\nt-SNE. Journal of Machine Learning\nResearch, 9, 2579–2605. http://www.jmlr.org/papers/v9/vandermaaten08a.html\n\n\nMardia, K. V. (1970). Measures of multivariate skewness and kurtosis\nwith applications. Biometrika, 57(3), 519–530.\nhttps://doi.org/http://dx.doi.org/10.2307/2334770\n\n\nMardia, K. V. (1974). Applications of some measures of multivariate\nskewness and kurtosis in testing normality and robustness studies.\nSankhya: The Indian Journal of Statistics, Series B,\n36(2), 115–128. http://www.jstor.org/stable/25051892\n\n\nMarquardt, D. W. (1970). Generalized inverses, ridge regression, biased\nlinear estimation, and nonlinear estimation. Technometrics,\n12, 591–612.\n\n\nMarquardt, D. W., & Snee, R. D. (1975). Ridge regression in\npractice. The American Statistician, 29(1), 3–20. https://doi.org/10.1080/00031305.1975.10479105\n\n\nMartí, R., & Laguna, M. (2003). Heuristics and meta-heuristics for\n2-layer straight line crossing minimization. Discrete Applied\nMathematics, 127(3), 665–678.\n\n\nMatejka, J., & Fitzmaurice, G. (2017, May). Same stats, different\ngraphs. Proceedings of the 2017 CHI Conference on Human\nFactors in Computing Systems. https://doi.org/10.1145/3025453.3025912\n\n\nMatloff, N. (2011). The art of R programming:\nA tour of statistical software design. No Starch\nPress.\n\n\nMcDonald, G. C. (2009). Ridge regression. Wiley Interdisciplinary\nReviews: Computational Statistics, 1(1), 93–100. https://doi.org/10.1002/wics.14\n\n\nMcGowan, L. D., Gerke, T., & Barrett, M. (2023). Causal inference is\nnot just a statistics problem. Journal of Statistics and Data\nScience Education, 1–9. https://doi.org/10.1080/26939169.2023.2276446\n\n\nMeyer, D., Zeileis, A., Hornik, K., & Friendly, M. (2024). Vcd:\nVisualizing categorical data. https://CRAN.R-project.org/package=vcd\n\n\nMeyers, L. S., Gamst, G., & Guarino, A. J. (2006). Applied\nmultivariate research: Design and interpretation. SAGE\nPublications.\n\n\nMonette, G. (1990). Geometry of multiple regression and interactive\n3-D graphics. In J. Fox & S. Long (Eds.), Modern\nmethods of data analysis (pp. 209–256). SAGE Publications.\n\n\nO’Brien, P. C. (1992). Robust procedures for testing equality of\ncovariance matrices. Biometrics, 48(3), 819–827. http://www.jstor.org/stable/2532347\n\n\nOksanen, J., Simpson, G. L., Blanchet, F. G., Kindt, R., Legendre, P.,\nMinchin, P. R., O’Hara, R. B., Solymos, P., Stevens, M. H. H., Szoecs,\nE., Wagner, H., Barbour, M., Bedward, M., Bolker, B., Borcard, D.,\nCarvalho, G., Chirico, M., De Caceres, M., Durand, S., … Weedon, J.\n(2024). Vegan: Community ecology package. https://github.com/vegandevs/vegan\n\n\nOtto, J., & Kahle, D. (2023). Ggdensity: Interpretable bivariate\ndensity visualization with ggplot2. https://jamesotto852.github.io/ggdensity/\n\n\nPearson, K. (1896). Contributions to the mathematical theory of\nevolution—III, regression, heredity and panmixia.\nPhilosophical Transactions of the Royal Society of London,\n187, 253–318.\n\n\nPearson, K. (1901). On lines and planes of closest fit to systems of\npoints in space. Philosophical Magazine, 6(2),\n559–572.\n\n\nPearson, K. (1903). I. Mathematical contributions to the theory of\nevolution. —XI. On the influence of natural selection on the variability\nand correlation of organs. Philosophical Transactions of the Royal\nSociety of London, 200(321–330), 1–66. https://doi.org/10.1098/rsta.1903.0001\n\n\nPedersen, T. L., & Robinson, D. (2024). Gganimate: A grammar of\nanimated graphics. https://gganimate.com\n\n\nPineo, P. O., & Porter, J. (1967). Occupational prestige in canada*.\nCanadian Review of Sociology, 4(1), 24–40.\nhttps://doi.org/https://doi.org/10.1111/j.1755-618X.1967.tb00472.x\n\n\nPineo, P. O., & Porter, J. (2008). Occupational prestige in canada.\nCanadian Review of Sociology, 4(1), 24–40. https://doi.org/10.1111/j.1755-618x.1967.tb00472.x\n\n\nPlayfair, W. (1786). Commercial and political atlas: Representing,\nby copper-plate charts, the progress of the commerce, revenues,\nexpenditure, and debts of england, during the whole of the eighteenth\ncentury. Debrett; Robinson;; Sewell. http://ucpj.uchicago.edu/Isis/journal/demo/v000n000/000000/000000.fg4.html\n\n\nPlayfair, W. (1801). Statistical breviary; shewing, on a principle\nentirely new, the resources of every state and kingdom in\nEurope. Wallis.\n\n\nReaven, G. M., & Miller, R. G. (1968). Study of the relationship\nbetween glucose and insulin responses to an oral glucose load in man.\nDiabetes, 17(9), 560–569. https://doi.org/10.2337/diab.17.9.560\n\n\nReaven, G. M., & Miller, R. G. (1979). An attempt to define the\nnature of chemical diabetes using a multidimensional analysis.\nDiabetologia, 16, 17–24.\n\n\nRobinaugh, D. J., Hoekstra, R. H. A., Toner, E. R., & Borsboom, D.\n(2019). The network approach to psychopathology: A review of the\nliterature 2008–2018 and an agenda for future research.\nPsychological Medicine, 50(3), 353–366. https://doi.org/10.1017/s0033291719003404\n\n\nRogan, J. C., & Keselman, H. J. (1977). Is the ANOVA\nf-test robust to variance heterogeneity when sample sizes are equal?: An\ninvestigation via a coefficient of variation. American Educational\nResearch Journal, 14(4), 493–498. https://doi.org/10.3102/00028312014004493\n\n\nSarkar, D. (2024). Lattice: Trellis graphics for r. https://lattice.r-forge.r-project.org/\n\n\nScheffé, H. A. (1960). The analysis of variance. Wiley.\n\n\nSchloerke, B., Cook, D., Larmarange, J., Briatte, F., Marbach, M.,\nThoen, E., Elberg, A., & Crowley, J. (2024). GGally: Extension\nto ggplot2. https://ggobi.github.io/ggally/\n\n\nScott, D. W. (1992). Multivariate density estimation: Theory,\npractice, and visualization. Wiley.\n\n\nSearle, S. R., Speed, F. M., & Milliken, G. A. (1980). Population\nmarginal means in the linear model: An alternative to least squares\nmeans. The American Statistician, 34(4), 216–221.\n\n\nShapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test\nfor normality (complete samples). Biometrika, 52(3–4),\n591–611. https://doi.org/10.1093/biomet/52.3-4.591\n\n\nShepard, R. N. (1962a). The analysis of proximities: Multidimensional\nscaling with an unknown distance function. i. Psychometrika,\n27(2), 125–140. https://doi.org/10.1007/bf02289630\n\n\nShepard, R. N. (1962b). The analysis of proximities: Multidimensional\nscaling with an unknown distance function. II. Psychometrika,\n27(3), 219–246. https://doi.org/10.1007/bf02289621\n\n\nShepard, R. N., Romney, A. K., Nerlove, S. B., & Board, M. S. S.\n(1972a). Multidimensional scaling; theory and applications in the\nbehavioral sciences: Vols. II. Applications. Seminar Press. https://books.google.ca/books?id=PpFAAQAAIAAJ\n\n\nShepard, R. N., Romney, A. K., Nerlove, S. B., & Board, M. S. S.\n(1972b). Multidimensional scaling: Theory and applications in the\nbehavioral sciences: Vols. I. Theory. Seminar Press. https://books.google.ca/books?id=pJRAAQAAIAAJ\n\n\nShoben, E. J. (1983). Applications of multidimensional scaling in\ncognitive psychology. Applied Psychological Measurement,\n7(4), 473–490. https://doi.org/10.1177/014662168300700406\n\n\nSilverman, B. W. (1986). Density estimation for statistics and data\nanalysis. Chapman & Hall.\n\n\nSimpson, E. H. (1951). The interpretation of interaction in contingency\ntables. Journal of the Royal Statistical Society, Series B,\n30, 238–241.\n\n\nSwayne, D. F., Cook, D., & Buja, A. (1998). XGobi: Interactive\ndynamic data visualization in the x window system. Journal of\nComputational and Graphical Statistics, 7(1), 113–130. https://doi.org/10.1080/10618600.1998.10474764\n\n\nSwayne, D. F., Lang, D. T., Buja, A., & Cook, D. (2003).\nGGobi: Evolving from XGobi into an extensible\nframework for interactive data visualization. Computational\nStatistics &Amp; Data Analysis, 43(4), 423–444. https://doi.org/10.1016/s0167-9473(02)00286-4\n\n\nTeetor, P. (2011). R cookbook.\nO’Reilly Media.\n\n\nTibshirani, R. (1996). Regression shrinkage and selection via the lasso.\nJournal of the Royal Statistical Society, Series B:\nMethodological, 58, 267–288.\n\n\nTiku, M. L., & Balakrishnan, N. (1984). Testing equality of\npopulation variances the robust way. Communications in Statistics -\nTheory and Methods, 13(17), 2143–2159. https://doi.org/10.1080/03610928408828818\n\n\nTimm, N. H. (1975). Multivariate analysis with applications in\neducation and psychology. Wadsworth (Brooks/Cole).\n\n\nTorgerson, W. S. (1952). Multidimensional scaling: I. Theory and method.\nPsychometrika, 17(4), 401–419. https://doi.org/10.1007/bf02288916\n\n\nVanderPlas, S., Ge, Y., Unwin, A., & Hofmann, H. (2023). Penguins go\nparallel: A grammar of graphics framework for generalized parallel\ncoordinate plots. Journal of Computational and Graphical\nStatistics, 1–16. https://doi.org/10.1080/10618600.2023.2195462\n\n\nVelleman, P. F., & Welsh, R. E. (1981). Efficient computing of\nregression diagnostics. The American Statistician,\n35(4), 234–242.\n\n\nVinod, H. D. (1978). A survey of ridge regression and related techniques\nfor improvements over ordinary least squares. The Review of\nEconomics and Statistics, 60(1), 121–131. http://www.jstor.org/stable/1924340\n\n\nWaddell, A., & Oldford, R. W. (2023). Loon: Interactive\nstatistical data visualization. https://CRAN.R-project.org/package=loon\n\n\nWarne, F. T. (2014). A primer on multivariate analysis of\nvariance(MANOVA) for behavioral scientists. Practical Assessment,\nResearch & Evaluation, 19(1). https://scholarworks.umass.edu/pare/vol19/iss1/17/\n\n\nWegman, E. J. (1990). Hyperdimensional data analysis using parallel\ncoordinates. Journal of the American Statistical Association,\n85(411), 664–675.\n\n\nWei, T., & Simko, V. (2024). Corrplot: Visualization of a\ncorrelation matrix. https://github.com/taiyun/corrplot\n\n\nWelch, B. L. (1947). The generalization of \"student’s\" problem when\nseveral different population varlances are involved.\nBiometrika, 34(1–2), 28–35. https://doi.org/10.1093/biomet/34.1-2.28\n\n\nWest, D. B. (2001). Introduction to graph theory. Prentice\nhall.\n\n\nWhittaker, J. (1990). Graphical models in applied multivariate\nstatistics. John Wiley; Sons.\n\n\nWickham, H. (2014). Advanced R. Chapman and\nHall/CRC.\n\n\nWickham, H., & Cook, D. (2024). Tourr: Tour methods for\nmultivariate data visualisation. https://github.com/ggobi/tourr\n\n\nWickham, H., Cook, D., Hofmann, H., & Buja, A. (2011). Tourr: An\nR package for exploring multivariate data with projections.\nJournal of Statistical Software, 40(2). https://doi.org/10.18637/jss.v040.i02\n\n\nWilkinson, G. N., & Rogers, C. E. (1973). Symbolic description of\nfactorial models for analysis of variance. Applied Statistics,\n22(3), 392. https://doi.org/10.2307/2346786\n\n\nWiner, B. J. (1962). Statistical principles in experimental\ndesign. McGraw-Hill.\n\n\nWood, S. N. (2006). Generalized additive models: An introduction\nwith r. Chapman; Hall/CRC Press.\n\n\nWright, K. (2021). Corrgram: Plot a correlogram. https://kwstat.github.io/corrgram/\n\n\nXie, Y. (2021). Animation: A gallery of animations in statistics and\nutilities to create animations. https://yihui.org/animation/\n\n\nXu, Z., & Oldford, R. W. (2021). Loon.tour: Tour in ’loon’.\nhttps://cran.r-project.org/package=loon.tourr\n\n\nZhang, J., & Boos, D. D. (1992). Bootstrap critical values for\ntesting homogeneity of covariance matrices. Journal of the American\nStatistical Association, 87(418), 425–429. http://www.jstor.org/stable/2290273\n\n\nPackage used",
+    "crumbs": [
+      "End matter",
+      "References"
+    ]
+  }
+]
\ No newline at end of file
diff --git a/figs/ch04/fig-crime-biplot2-1.png b/figs/ch04/fig-crime-biplot2-1.png
index 28c57e99..ab5b66b3 100644
Binary files a/figs/ch04/fig-crime-biplot2-1.png and b/figs/ch04/fig-crime-biplot2-1.png differ
diff --git a/figs/ch04/fig-crime-biplot3-1.png b/figs/ch04/fig-crime-biplot3-1.png
index 213f247f..5f7add36 100644
Binary files a/figs/ch04/fig-crime-biplot3-1.png and b/figs/ch04/fig-crime-biplot3-1.png differ
diff --git a/figs/ch04/fig-mtcars-biplot-1.png b/figs/ch04/fig-mtcars-biplot-1.png
index 5855f544..5d3853bd 100644
Binary files a/figs/ch04/fig-mtcars-biplot-1.png and b/figs/ch04/fig-mtcars-biplot-1.png differ
diff --git a/figs/ch06/fig-duncan-check-model-1.png b/figs/ch06/fig-duncan-check-model-1.png
index a7b321a7..4ff3ee5f 100644
Binary files a/figs/ch06/fig-duncan-check-model-1.png and b/figs/ch06/fig-duncan-check-model-1.png differ
diff --git a/figs/ch11/unnamed-chunk-6-1.png b/figs/ch11/unnamed-chunk-6-1.png
new file mode 100644
index 00000000..1d6fdd1f
Binary files /dev/null and b/figs/ch11/unnamed-chunk-6-1.png differ