Merge branch 'master' of https://github.com/friendly/Vis-MLM-book int…

…o master
friendly · Oct 30, 2024 · 159443b · 159443b
2 parents 7d9930b + e4c42b4
commit 159443b
Show file tree

Hide file tree

Showing 12 changed files with 247 additions and 30 deletions.
diff --git a/R/crime/crime-network0.R b/R/crime/crime-network0.R
@@ -16,7 +16,6 @@ crime.cor <- crime |>
 # PCA ordering
 ord <- corrMatOrder(crime.cor, order = "AOE")
 rownames(crime.cor)[ord]
-
 crime.cor <- crime.cor[ord, ord]
 
 # ### "association graph": network of correlations
@@ -26,6 +25,8 @@ q1 <- qgraph(crime.cor,
        minimum = "sig", sampleSize = nrow(crime), alpha = 0.01,
        color = grey(.9), vsize = 12,
        labels = rownames(crime.cor),
+       # curveAll = TRUE, # logical indicating if all edges should be curved
+       # curveDefault = 0.5, # default is 1
        posCol = "blue")
 
 png(filename = "images/crime-cor.png", height = 540, width = 540)
@@ -34,12 +35,14 @@ dev.off()
 
 # compare with spring
 q2 <- qgraph(crime.cor, 
-       title = "Crime data:\ncorrelations\n(spring layout)", title.cex = 1.5,
+       title = "Crime data:\ncorrelations", title.cex = 1.5,
        graph = "cor",
        minimum = "sig", sampleSize = nrow(crime), alpha = 0.01,
        color = grey(.9), vsize = 12,
        labels = rownames(crime.cor),
        layout = "spring", repulsion = 1.2,
+       # curveAll = TRUE, # logical indicating if all edges should be curved
+       # curveDefault = 0.5, # default is 1
        posCol = "blue")
 
 png(filename = "images/crime-cor-spring.png", height = 540, width = 540)

diff --git a/bib/pkgs.bib b/bib/pkgs.bib
@@ -431,6 +431,15 @@ @Manual{R-purrr
   url = {https://purrr.tidyverse.org/},
 }
 
+@Manual{R-qgraph,
+  title = {qgraph: Graph Plotting Methods, Psychometric Data Visualization and
+Graphical Model Estimation},
+  author = {Sacha Epskamp and Giulio Costantini and Jonas Haslbeck and Adela Isvoranu},
+  year = {2023},
+  note = {R package version 1.9.8},
+  url = {https://CRAN.R-project.org/package=qgraph},
+}
+
 @Manual{R-readr,
   title = {readr: Read Rectangular Text Data},
   author = {Hadley Wickham and Jim Hester and Jennifer Bryan},
@@ -890,6 +899,16 @@ @Article{performance2021
   doi = {10.21105/joss.03139},
 }
 
+@Article{qgraph2012,
+  title = {{qgraph}: Network Visualizations of Relationships in Psychometric Data},
+  author = {Sacha Epskamp and Ang\'elique O. J. Cramer and Lourens J. Waldorp and Verena D. Schmittmann and Denny Borsboom},
+  journal = {Journal of Statistical Software},
+  year = {2012},
+  volume = {48},
+  number = {4},
+  pages = {1--18},
+}
+
 @Article{report2023,
   title = {Automated Results Reporting as a Practical Tool to Improve Reproducibility and Methodological Best Practices Adoption},
   author = {Dominique Makowski and Daniel Lüdecke and Indrajeet Patil and Rémi Thériault and Mattan S. Ben-Shachar and Brenton M. Wiernik},

diff --git a/bib/pkgs.txt b/bib/pkgs.txt
@@ -23,6 +23,7 @@ ggplot2
 grid
 knitr
 patchwork
+qgraph
 tidyr
 tourr
 vcd

diff --git a/child/03-network.qmd b/child/03-network.qmd
@@ -7,8 +7,8 @@ With a moderate number of variables, techniques such as smoothing, summarizing w
 data ellipses and fitted curves, and visual thinning can be used to tame "big $N$" datasets
 with thousands of cases. 
 
-However "big $p$" datasets, with more than a moderate number ($p$)
-still remains a challenge. It is hard to see how the more advanced methods
+However "big $p$" datasets, with more than a moderate number ($p$) of variables
+still remain a challenge. It is hard to see how the more advanced methods
 (corrgrams, parallel coordinate) described earlier could cope with $p = 20, 50, 100, 500, \dots$ variables.
 At some point, each of these begins to break down for the purpose of visualizing associations
 among many variables. We are forced to thin the information presented in graphs
@@ -21,14 +21,117 @@ connected by (weighted) _edges_ whose properties reflect the strength of connect
 pairs, such as a correlation. Such diagrams can reveal properties not readily seen by other means.
 
 As an example consider @fig-big5-qgraph-rodrigues, which portrays the correlations
-among 25 items from the NEO-PR-R personality questionnaire, which through factor analysis
-has resulted in the "Big Five" scales of aspects of personality, mnemonically captured
-by the acronym **OCEAN**: Openness, Conscientiousness, Extraversion, Agreeableness and Neuroticism.
+among 25 self-report items reflecting 5 factors (the "Big Five")
+considered in personality psychology to represent the dominant aspects of
+all of personality. These factors are easily remembered by the acronum
+**OCEAN**: **O**penness, **C**onscientiousness, **E**xtraversion, **A**greeableness and **N**euroticism.
+The dataset, `psych::bfi`, contains data from an online sample of $n=2800$ with 5 items for each scale.
+
+In this figure (taken from [Rodrigues (2021)](https://bit.ly/3A6kvq5)),
+the item nodes are labeled according to the OCEAN factor they are assumed to measure.
+For 25 items, there are $25 \times 24 / 2 = 300$ correlations, way too much to see.
+A clearer picture arises when we reduce the number of edges shown according to some
+criterion. Here,
+edges are drawn _only_ between nodes where the correlation is considered significant
+by a method ("glasso") designed to make the graph optimally sparse.
+
+<!-- Actually, the psych::bfi data is:
+25 personality self report items taken from the International Personality Item Pool (ipip.ori.org) were included as part of the Synthetic Aperture Personality Assessment (SAPA) web based personality assessment project. The data from 2800 subjects are included here as a demonstration set for scale construction, factor analysis, and Item Response Theory analysis. Three additional demographic variables (sex, education, and age) are also included.
+-->
+
 
 ```{r}
 #| label: fig-big5-qgraph-rodrigues
+#| echo: false
 #| out-width: "100%"
-#| fig-cap: "Network diagram of the correlations among 25 items from the Big-Five personality scale, 5 items for each scale. The magnitude of a correlation is shown by the thickness and transparency of the edge betwen two item nodes. The sign of a correlation is shown by edge color and style: solid blue for positive and dashed red for negative. _Source_: [Rodrigues (2021)](https://bit.ly/3A6kvq5)"
+#| fig-cap: "Network diagram of the correlations among 25 items from a Big-Five personality scale, 5 items for each scale. The magnitude of a correlation is shown by the thickness and transparency of the edge between two item nodes. The sign of a correlation is shown by edge color and style: solid blue for positive and dashed red for negative. _Source_: [Rodrigues (2021)](https://bit.ly/3A6kvq5)"
 knitr::include_graphics("images/big5-qgraph-rodrigues.png")
 ```
 
+The edges shown in @fig-big5-qgraph-rodrigues reflect the Pearson correlation between a given pair of items by the visual attributes of color and line style: magnitude is shown by both the thickness and transparency of the edge; the sign of the correlation is shown by color and line type:
+solid `r blue` for positive correlations and dashed `r red` for negative ones.
+
+According to some theories, the five personality factors should be largely non-overlapping,
+so there should not be many edges connecting items of one factor with those of another.
+Yet, there are quite a few cross-factor connections in @fig-big5-qgraph-rodrigues,
+so perhaps the theory is wrong, or, more likely, the 25 items are not good representatives of
+these underlying dimensions. The network diagram shown here is a visual tool for thought
+and refinement 
+
+Network diagrams stem from mathematical graph theory (refs:) of the properties
+of nodes and edges used to represent pairwise relationships, ...
+
+-> network science, network psychometrics, ...
+
+-> packages: qgraph, ...
+
+### Crime data
+
+For the present purposes, let's see what network diagrams can tell us about the crime data
+analyzed earlier. Here, I first reorder the variables as in @fig-crime-corrplot-AOE.
+In the call to `qgraph()`, the argument `minimum = "sig"` says to show only the edges
+for significant correlations (at $\alpha = 0.01$ here)
+
+```{r}
+#| label: fig-crime-cor
+#| out-width: "80%"
+#| fig-width: 7
+#| fig-height: 7
+#| fig-cap: "Network diagram depicting the correlations among the crime variables. Only edges for correlations that are significant at the $\\alpha = 0.01$ level are displayed."
+library(qgraph)
+ord <- corrMatOrder(crime.cor, order = "AOE")
+rownames(crime.cor)[ord]
+crime.cor <- crime.cor[ord, ord]
+
+# "association graph": network of correlations
+qgraph(crime.cor, 
+  title = "Crime data:\ncorrelations", title.cex = 1.5,
+  graph = "cor",
+  minimum = "sig", sampleSize = nrow(crime), alpha = 0.01,
+  color = grey(.9), vsize = 12,
+  labels = rownames(crime.cor),
+  posCol = "blue")
+```
+
+<!--
+```{r}
+#| label: fig-crime-cor-image
+#| out-width: "80%"
+#| fig-cap: "Network diagram depicting the correlations among the crime variables."
+knitr::include_graphics("images/crime-cor.png")
+```
+-->
+
+### Partial correlations
+
+Among the more important statistical applications of network graph theory is the idea
+that the inverse of a correlation matrix, $\mathbf{R}^{-1}$, reflect _partial_
+correlations of variables, with the association of all other variables removed,
+and that a network diagram of partial correlations can reveal further structure.
+
+...
+
+
+```{r}
+#| label: fig-crime-partial-spring
+#| out-width: "80%"
+#| fig-width: 7
+#| fig-height: 7
+#| fig-cap: "Network diagram of partial correlations among the crime variables, controlling for all others. Variable nodes have been positioned by a \"spring\" layout method ... "
+qgraph(crime.cor, 
+       title = "Crime data:\npartial correlations", title.cex = 1.5,
+       graph = "pcor",
+       minimum = "sig", sampleSize = nrow(crime), alpha = 0.05,
+       color = grey(.9), vsize = 14,
+       labels = rownames(crime.cor),
+       edge.labels = TRUE, edge.label.cex = 1.7,
+       layout = "spring", repulsion = 1.2,
+       posCol = "blue")
+```
+
+@fig-crime-partial-spring shows that, once all other crime variables are controlled
+for each pair, there remain only a few partial correlations at the $\alpha = 0.05$ level.
+(Of these, only three are significant at $\alpha = 0.01$.)
+
+### Visualizing partial correlations
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,6 +23,7 @@ ggplot2 @@
     grid
     knitr
     patchwork
+    qgraph
     tidyr
     tourr
     vcd
@@ Expand Down @@