From 405412fdf7063121818afc435fecf74e32892d4d Mon Sep 17 00:00:00 2001 From: Michael Friendly Date: Sat, 7 Dec 2024 18:02:59 -0500 Subject: [PATCH] more on Ch13 --- 02-getting_started.qmd | 2 +- 12-eqcov.qmd | 70 +++-- R/penguin/penguins-eqcov.R | 11 +- bib/pkgs.bib | 51 +++- docs/01-intro.html | 95 ++++-- docs/02-getting_started.html | 113 +++++--- docs/03-multivariate_plots.html | 213 ++++++++------ docs/04-pca-biplot.html | 167 ++++++----- docs/05-linear_models.html | 107 ++++--- docs/06-linear_models-plots.html | 167 +++++++---- docs/07-lin-mod-topics.html | 113 +++++--- docs/08-collinearity-ridge.html | 147 ++++++---- docs/09-hotelling.html | 115 +++++--- docs/10-mlm-review.html | 157 +++++----- docs/11-mlm-viz.html | 103 ++++--- docs/12-eqcov.html | 165 +++++++---- docs/13-case-studies.html | 115 +++++--- docs/91-colophon.html | 152 ++++++---- docs/95-references.html | 101 ++++--- docs/figs/case-studies/fig-NC-boxplot-1.png | Bin 342576 -> 342806 bytes docs/figs/case-studies/fig-NC-corrgram-1.png | Bin 266331 -> 266338 bytes docs/figs/ch04/fig-crime-biplot2-1.png | Bin 297865 -> 298416 bytes docs/figs/ch04/fig-crime-biplot3-1.png | Bin 292194 -> 293033 bytes docs/figs/ch04/fig-diabetes-tsne-1.png | Bin 116951 -> 126465 bytes docs/figs/ch04/fig-mtcars-biplot-1.png | Bin 226335 -> 226221 bytes docs/figs/ch06/fig-duncan-check-model-1.png | Bin 234194 -> 236065 bytes docs/figs/ch09/fig-banknote-violin-1.png | Bin 297369 -> 297434 bytes docs/figs/ch09/fig-mathscore-violins-1.png | Bin 152169 -> 152204 bytes .../figs/ch12/fig-peng-covEllipse-pairs-1.png | Bin 193270 -> 310997 bytes docs/figs/ch12/fig-peng-covEllipse0-1.png | Bin 191622 -> 244385 bytes docs/index.html | 115 +++++--- docs/search.json | 18 +- docs/site_libs/bootstrap/bootstrap.min.css | 4 +- docs/site_libs/kePrint-0.0.1/kePrint.js | 8 - docs/site_libs/lightable-0.0.1/lightable.css | 272 ------------------ .../quarto-syntax-highlighting.css | 2 + docs/site_libs/quarto-html/quarto.js | 15 +- docs/site_libs/quarto-nav/quarto-nav.js | 37 +++ docs/site_libs/quarto-search/quarto-search.js | 6 +- figs/case-studies/fig-NC-boxplot-1.png | Bin 342576 -> 342806 bytes figs/case-studies/fig-NC-corrgram-1.png | Bin 266331 -> 266338 bytes figs/ch04/fig-crime-biplot2-1.png | Bin 297865 -> 298416 bytes figs/ch04/fig-crime-biplot3-1.png | Bin 292194 -> 293033 bytes figs/ch04/fig-diabetes-tsne-1.png | Bin 116951 -> 126465 bytes figs/ch04/fig-mtcars-biplot-1.png | Bin 226335 -> 226221 bytes figs/ch06/fig-duncan-check-model-1.png | Bin 234194 -> 236065 bytes figs/ch09/fig-banknote-violin-1.png | Bin 297369 -> 297434 bytes figs/ch09/fig-mathscore-violins-1.png | Bin 152169 -> 152204 bytes figs/ch12/fig-peng-covEllipse-pairs-1.png | Bin 193270 -> 310997 bytes figs/ch12/fig-peng-covEllipse0-1.png | Bin 191622 -> 244385 bytes test/matrix-equations.R | 23 ++ 51 files changed, 1529 insertions(+), 1135 deletions(-) delete mode 100644 docs/site_libs/kePrint-0.0.1/kePrint.js delete mode 100644 docs/site_libs/lightable-0.0.1/lightable.css create mode 100644 test/matrix-equations.R diff --git a/02-getting_started.qmd b/02-getting_started.qmd index a8cd98a2..325b05b9 100644 --- a/02-getting_started.qmd +++ b/02-getting_started.qmd @@ -185,7 +185,7 @@ knitr::include_graphics("images/DataSaurusDozen.gif") ::: {.content-visible when-format="pdf"} ```{r} #| label: fig-datasaurus2 -#| out-width: "90%" +#| out-width: "100%" #| echo: false #| fig-cap: "Plots of the Dinosaur Dozen datasets. Source: [Selçuk Korkmaz on X](https://x.com/selcukorkmaz/status/1864583253253927156)" knitr::include_graphics("images/datasaurus-dozen.jpg") diff --git a/12-eqcov.qmd b/12-eqcov.qmd index cdd6c0c1..7ceb665f 100644 --- a/12-eqcov.qmd +++ b/12-eqcov.qmd @@ -125,7 +125,8 @@ So, this tells us that the groups do not differ in variances on all variables ex ## Visualizing Levene's test {#sec-mlevene} To gain some insight into the problem of homogeneity of variance it is helpful how the situation looks in terms of data. -For the Penguin data, it might be simplest just boxplots of the variables and try to see whether the widths of the +For the Penguin data, it might be simplest just boxplots of the variables and try to see whether the +**widths** of the central 50% boxes seem to be the same, as in @fig-peng-boxplots. However, it is perceptually difficult to focus on differences with widths of the boxes within each panel when their centers also differ from group to group. @@ -166,7 +167,9 @@ pengDevs <- colDevs(peng[, vars], peng$species, median) |> abs() ``` -as shown in @fig-peng-devplots. +Making boxplot of the absolute deviations in @fig-peng-devplots your eye can now focus on the average +value, shown by the median '|' line, because Levene's method is testing whether these differ +across groups. ```{r} #| label: fig-peng-devplots @@ -207,12 +210,29 @@ the within-group covariance **matrices** $\mathbf{\Sigma}_i$ are equal for all g $$ \mathbf{\Sigma}_1 = \mathbf{\Sigma}_2 = \cdots = \mathbf{\Sigma}_g \; . $$ -This is much stronger than in the univariate case, because it also required that all the correlations between pairs of variables are the same for all -groups. ... +This is much stronger than in the univariate case, because it also requires that all the correlations between pairs of variables are the same for all groups. For example, in the case of two responses, +we must assume: + +$$ +\begin{pmatrix} +\sigma_1^2 & \textsf{sym} \\ +\rho \sigma_1 \sigma_2 & \sigma_2^2 \\ +\end{pmatrix}_1 = +\begin{pmatrix} +\sigma_1^2 & \textsf{sym} \\ +\rho \sigma_1 \sigma_2 & \sigma_2^2 \\ +\end{pmatrix}_2 = \dots = +\begin{pmatrix} +\sigma_1^2 & \textsf{sym} \\ +\rho \sigma_1 \sigma_2 & \sigma_2^2 \\ +\end{pmatrix} +_g +$$ -**Insert** formulas for 2 x 2 case -**Insert** pairs covEllipses for penguins data + + + To preview the main example, @fig-peng-covEllipse0 shows data ellipses for the main size variables in the `palmerpenguins::penguins` data. @@ -222,39 +242,43 @@ To view the relations ... ```{r} #| label: fig-peng-covEllipse0 #| fig-align: center +#| fig-height: 5 +#| fig-width: 10 #| out-width: "100%" -#| code-fold: show +#| code-fold: true +#| code-summary: "See the code" #| fig-cap: "Data ellipses for bill length and bill depth in the penguins data, also showing the pooled covariance. Left: As is; right: centered at the grand means for easier comparison." op <- par(mar = c(4, 4, 1, 1) + .5, mfrow = c(c(1,2))) covEllipses(cbind(bill_length, bill_depth) ~ species, data=peng, - fill = TRUE, - fill.alpha = 0.1, - lwd = 3, - col = clr) + fill = TRUE, + fill.alpha = 0.1, + lwd = 3, + col = clr) covEllipses(cbind(bill_length, bill_depth) ~ species, data=peng, - center = TRUE, - fill = c(rep(FALSE,3), TRUE), - fill.alpha = .1, - lwd = 3, - col = clr, - label.pos = c(1:3,0)) + center = TRUE, + fill = c(rep(FALSE,3), TRUE), + fill.alpha = .1, + lwd = 3, + col = clr, + label.pos = c(1:3,0)) par(op) ``` - +All pairs: ```{r} #| label: fig-peng-covEllipse-pairs #| fig-align: center #| out-width: "100%" #| code-fold: show #| fig-cap: "All pairwise covariance ellipses for the penguins data." -covEllipses(peng[3:6], peng$species, - variables=1:4, - pooled = FALSE, - fill=c(rep(FALSE,3), TRUE), - fill.alpha=.1) +clr <- c(peng.colors(), "black") +covEllipses(peng[,3:6], peng$species, + variables=1:4, + col = clr, + fill=TRUE, + fill.alpha=.1) ``` They covariance ellipses look pretty similar in size, shape and orientation. diff --git a/R/penguin/penguins-eqcov.R b/R/penguin/penguins-eqcov.R index 6d82db01..a3adc487 100644 --- a/R/penguin/penguins-eqcov.R +++ b/R/penguin/penguins-eqcov.R @@ -7,7 +7,6 @@ library(effects) library(heplots) library(candisc) -# load(here::here("data", "peng.RData")) data(peng, package="heplots") source("R/penguin/penguin-colors.R") # use penguin colors @@ -27,11 +26,17 @@ vars <- c("bill_length", "bill_depth", "flipper_length", "body_mass") # Multivariate levene test pengDevs <- abs(colDevs(peng[, vars], peng$species, median)) -#pengDevs <- data.frame(species = peng$species, pengDevs) - dev.mod <- lm(pengDevs ~ peng$species) Anova(dev.mod) +pengDevs <- data.frame(species = peng$species, pengDevs) +dev.mod <- lm(cbind(bill_length, bill_depth, flipper_length, body_mass) ~ species, + data=pengDevs) +Anova(dev.mod) + +heplot(dev.mod) + + # box plots of deviations dev_long <- data.frame(species = peng$species, pengDevs) |> pivot_longer(bill_length:body_mass, diff --git a/bib/pkgs.bib b/bib/pkgs.bib index d4b57942..17aece1b 100644 --- a/bib/pkgs.bib +++ b/bib/pkgs.bib @@ -45,7 +45,7 @@ @Manual{R-candisc @Manual{R-car, title = {car: Companion to Applied Regression}, author = {John Fox and Sanford Weisberg and Brad Price}, - year = {2023}, + year = {2024}, note = {R package version 3.1-3}, url = {https://r-forge.r-project.org/projects/car/}, } @@ -90,6 +90,14 @@ @Manual{R-corrplot url = {https://github.com/taiyun/corrplot}, } +@Manual{R-datasauRus, + title = {datasauRus: Datasets from the Datasaurus Dozen}, + author = {Colin Gillespie and Steph Locke and Rhian Davies and Lucy {D'Agostino McGowan}}, + year = {2024}, + note = {R package version 0.1.8}, + url = {https://github.com/jumpingrivers/datasauRus}, +} + @Manual{R-datawizard, title = {datawizard: Easy Data Wrangling and Statistical Transformations}, author = {Indrajeet Patil and Etienne Bacher and Dominique Makowski and Daniel Lüdecke and Mattan S. Ben-Shachar and Brenton M. Wiernik}, @@ -167,7 +175,7 @@ @Manual{R-genridge title = {genridge: Generalized Ridge Trace Plots for Ridge Regression}, author = {Michael Friendly}, year = {2024}, - note = {R package version 0.7.1}, + note = {R package version 0.8.0}, url = {https://github.com/friendly/genridge}, } @@ -199,7 +207,7 @@ @Manual{R-ggbiplot title = {ggbiplot: A Grammar of Graphics Implementation of Biplots}, author = {Vincent Q. Vu and Michael Friendly}, year = {2024}, - note = {R package version 0.6.2}, + note = {R package version 0.6.3}, url = {https://github.com/friendly/ggbiplot}, } @@ -273,7 +281,7 @@ @Manual{R-heplots title = {heplots: Visualizing Hypothesis Tests in Multivariate Linear Models}, author = {Michael Friendly and John Fox and Georges Monette}, year = {2024}, - note = {R package version 1.7.1}, + note = {R package version 1.7.3}, url = {http://friendly.github.io/heplots/}, } @@ -309,6 +317,14 @@ @Manual{R-knitr url = {https://yihui.org/knitr/}, } +@Manual{R-langevitour, + title = {langevitour: Langevin Tour}, + author = {Paul Harrison}, + year = {2024}, + note = {R package version 0.7}, + url = {https://logarithmic.net/langevitour/}, +} + @Manual{R-lattice, title = {lattice: Trellis Graphics for R}, author = {Deepayan Sarkar}, @@ -317,6 +333,14 @@ @Manual{R-lattice url = {https://lattice.r-forge.r-project.org/}, } +@Manual{R-liminal, + title = {liminal: Multivariate Data Visualization with Tours and Embeddings}, + author = {Stuart Lee}, + year = {2021}, + note = {R package version 0.1.2}, + url = {https://github.com/sa-lee/liminal/}, +} + @Manual{R-lubridate, title = {lubridate: Make Dealing with Dates a Little Easier}, author = {Vitalie Spinu and Garrett Grolemund and Hadley Wickham}, @@ -344,9 +368,9 @@ @Manual{R-marginaleffects @Manual{R-MASS, title = {MASS: Support Functions and Datasets for Venables and Ripley's MASS}, - author = {Brian Ripley}, + author = {Brian Ripley and Bill Venables}, year = {2024}, - note = {R package version 7.3-60.0.1}, + note = {R package version 7.3-61}, url = {http://www.stats.ox.ac.uk/pub/MASS4/}, } @@ -462,7 +486,7 @@ @Manual{R-rgl title = {rgl: 3D Visualization Using OpenGL}, author = {Daniel Adler and Duncan Murdoch}, year = {2024}, - note = {R package version 1.3.12}, + note = {R package version 1.3.14}, url = {https://github.com/dmurdoch/rgl}, } @@ -623,7 +647,7 @@ @Book{car2019 year = {2019}, publisher = {Sage}, address = {Thousand Oaks {CA}}, - url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/}, + url = {https://www.john-fox.ca/Companion/}, } @Misc{correlationPackage, @@ -850,6 +874,17 @@ @InCollection{knitr2014 note = {ISBN 978-1466561595}, } +@Article{langevitour2023, + title = {langevitour: Smooth Interactive Touring of High Dimensions, Demonstrated with scRNA-Seq Data}, + author = {Paul Harrison}, + journal = {The R Journal}, + year = {2023}, + volume = {15}, + number = {2}, + pages = {206-219}, + doi = {10.32614/RJ-2023-046}, +} + @Book{lattice2008, title = {Lattice: Multivariate Data Visualization with R}, author = {Deepayan Sarkar}, diff --git a/docs/01-intro.html b/docs/01-intro.html index 9bf698e4..91279784 100644 --- a/docs/01-intro.html +++ b/docs/01-intro.html @@ -2,9 +2,9 @@ - + -Visualizing Multivariate Data and Models in R - 1  Introduction +1  Introduction – Visualizing Multivariate Data and Models in R