Merge pull request #300 from stocnet/develop

v1.4.3
stocnet · Nov 6, 2024 · 999d4f4 · 999d4f4
2 parents 10e0b02 + b141046
commit 999d4f4
Show file tree

Hide file tree

Showing 28 changed files with 385 additions and 183 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: migraph
 Title: Univariate and Multivariate Tests for Multimodal and Other Networks 
-Version: 1.4.2
-Date: 2024-09-04
+Version: 1.4.3
+Date: 2024-11-06
 Description: A set of tools for testing networks.
    It includes functions for univariate and multivariate 
    conditional uniform graph and quadratic assignment procedure testing,

diff --git a/NAMESPACE b/NAMESPACE
@@ -19,6 +19,7 @@ export(labs)
 export(net_regression)
 export(network_reg)
 export(scale_y_discrete)
+export(test_configuration)
 export(test_distribution)
 export(test_fit)
 export(test_gof)
@@ -45,6 +46,7 @@ importFrom(ggplot2,scale_y_discrete)
 importFrom(ggplot2,xlab)
 importFrom(ggplot2,ylab)
 importFrom(manynet,bind_node_attributes)
+importFrom(manynet,generate_configuration)
 importFrom(manynet,generate_random)
 importFrom(manynet,is_complex)
 importFrom(manynet,is_directed)

diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,32 @@
 # migraph 1.4.3
 
-2024-09-04
+2024-11-06
 
 ## Package
 
 - Using different github actions for renaming and releasing binaries (thanks @auzaheta)
 - Updated manynet dependence to >=1.0.5
 
+## Models
+
+- Added `test_configuration()`
+
+## Data
+
+- Updated `mpn_bristol` with info
+- Updated `mpn_ryanair` with info
+- Updated `mpn_elite_mex` with info
+- Updated `mpn_elite_usa`
+- Updated `mpn_senate`
+- Updated `mpn_cow`
+
+## Tutorial
+
+- Updated diversity tutorial
+  - More elaborate start
+  - Broader figure widths
+  - Dropped `alter(Discipline)` from `net_regression()` call
+
 # migraph 1.4.2
 
 2024-09-03

diff --git a/R/data_mpn.R b/R/data_mpn.R
@@ -204,32 +204,32 @@
 #' @docType data
 #' @keywords datasets
 #' @name mpn_senate112
-#' @usage data(mpn_DemSxP)
+#' @usage data(mpn_senate_dem)
 #' @references  
 #' Knoke, David, Mario Diani, James Hollway, and Dimitris C Christopoulos. 2021.
 #' \href{https://www.cambridge.org/core/books/multimodal-political-networks/43EE8C192A1B0DCD65B4D9B9A7842128}{\emph{Multimodal Political Networks}}.
 #' Cambridge University Press. Cambridge University Press.
 #' @format 
 #'   ```{r, echo = FALSE}
-#'   mpn_DemSxP
+#'   mpn_senate_dem
 #'   ```
-"mpn_DemSxP"
+"mpn_senate_dem"
 
 #' @rdname mpn_senate112
-#' @usage data(mpn_RepSxP)
+#' @usage data(mpn_senate_rep)
 #' @format 
 #'   ```{r, echo = FALSE}
-#'   mpn_RepSxP
+#'   mpn_senate_rep
 #'   ```
-"mpn_RepSxP"
+"mpn_senate_rep"
 
 #' @rdname mpn_senate112
-#' @usage data(mpn_OverSxP)
+#' @usage data(mpn_senate_over)
 #' @format 
 #'   ```{r, echo = FALSE}
-#'   mpn_OverSxP
+#'   mpn_senate_over
 #'   ```
-"mpn_OverSxP"
+"mpn_senate_over"
 
 # EVS ####
 

diff --git a/R/model_tests.R b/R/model_tests.R
@@ -71,6 +71,50 @@ test_random <- function(.data, FUN, ...,
   out
 }
 
+#' @rdname tests 
+#' @importFrom manynet generate_configuration
+#' @export
+test_configuration <- function(.data, FUN, ..., 
+                        times = 1000, 
+                        strategy = "sequential", 
+                        verbose = FALSE){
+  args <- unlist(list(...))
+  if (!is.null(args)) {
+    obsd <- FUN(.data, args)
+  } else {
+    obsd <- FUN(.data)
+  }
+  oplan <- future::plan(strategy)
+  on.exit(future::plan(oplan), add = TRUE)
+  rands <- furrr::future_map(1:times, manynet::generate_configuration, n = .data, 
+                             .progress = verbose, 
+                             .options = furrr::furrr_options(seed = T))
+  if (length(args) > 0) {
+    rands <- furrr::future_map(rands, 
+                               manynet::bind_node_attributes, object2 = .data, 
+                               .progress = verbose, 
+                               .options = furrr::furrr_options(seed = T))
+  }
+  if (!is.null(args)) {
+    simd <- furrr::future_map_dbl(rands,
+                                  FUN, args)
+  } else {
+    simd <- furrr::future_map_dbl(rands,
+                                  FUN)
+  }
+  out <- list(test = "configuration",
+              testval = obsd,
+              testdist = simd,
+              mode = manynet::is_directed(.data),
+              diag = manynet::is_complex(.data),
+              cmode = "edges",
+              plteobs = mean(simd <= obsd),
+              pgteobs = mean(simd >= obsd),
+              reps = times)
+  class(out) <- "network_test"
+  out
+}
+
 #' @rdname tests 
 #' @examples 
 #' # (qaptest <- test_permutation(marvel_friends, 

diff --git a/data/mpn_DemSxP.rda b/data/mpn_DemSxP.rda
diff --git a/data/mpn_OverSxP.rda b/data/mpn_OverSxP.rda
diff --git a/data/mpn_RepSxP.rda b/data/mpn_RepSxP.rda
diff --git a/data/mpn_bristol.rda b/data/mpn_bristol.rda
diff --git a/data/mpn_cow_igo.rda b/data/mpn_cow_igo.rda
diff --git a/data/mpn_cow_trade.rda b/data/mpn_cow_trade.rda
diff --git a/data/mpn_elite_mex.rda b/data/mpn_elite_mex.rda
diff --git a/data/mpn_elite_usa_advice.rda b/data/mpn_elite_usa_advice.rda
diff --git a/data/mpn_elite_usa_money.rda b/data/mpn_elite_usa_money.rda
diff --git a/data/mpn_ryanair.rda b/data/mpn_ryanair.rda
diff --git a/data/mpn_senate_dem.rda b/data/mpn_senate_dem.rda
diff --git a/data/mpn_senate_over.rda b/data/mpn_senate_over.rda
diff --git a/data/mpn_senate_rep.rda b/data/mpn_senate_rep.rda
diff --git a/inst/tutorials/tutorial8/diversity.Rmd b/inst/tutorials/tutorial8/diversity.Rmd
@@ -17,12 +17,29 @@ library(manynet)
 library(migraph)
 library(patchwork)
 library(ggplot2)
+clear_glossary()
 knitr::opts_chunk$set(echo = FALSE)
 marvel_friends <- to_unsigned(ison_marvel_relationships, keep = "positive")
 marvel_friends <- to_giant(marvel_friends)
 marvel_friends <- marvel_friends %>% to_subgraph(Appearances >= mean(Appearances))
 ```
 
+
+## This tutorial
+
+<img src="https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExeDFidmtrcTJhenU1NGRtdXA5N292Ynp3Y3BxcHRhYjR2MnowOGJ0ZiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/djRJNZqj508sE/giphy.gif" alt="gif of deadpool greeting" width = "900"/>
+
+Within a network, nodes and ties vary.
+But how much do they vary and are there associations in this variance?
+By the end of this tutorial, you will be able to:
+
+- [ ] &nbsp; measure the richness of a network in terms of attribute types
+- [ ] &nbsp; measure how diverse a network is
+- [ ] &nbsp; measure how heterophilous the ties in a network are
+- [ ] &nbsp; test how heterophilous the ties in a network are compared to random networks
+- [ ] &nbsp; test how heterophilous the ties in a network are compared to permutations of the original network
+- [ ] &nbsp; regress a network's ties on other ties and/or nodal or dyadic attributes
+
 ## Initial visualisation
 
 For this session, we'll explore a couple of different datasets.
@@ -82,7 +99,7 @@ Recall that this data has several nodal attributes.
 Let's explore a couple of these attributes, "Gender" and "PowerOrigin", visually
 using `graphr()`.
 
-```{r plotfriends, exercise=TRUE, purl = FALSE}
+```{r plotfriends, exercise=TRUE, purl = FALSE, fig.width=9}
 
 ```
 
@@ -98,8 +115,8 @@ graphr(____,
 
 ```{r plotfriends-solution}
 graphr(marvel_friends, 
-           node_shape = "Gender",
-           node_color = "PowerOrigin")
+       node_shape = "Gender",
+       node_color = "PowerOrigin")
 ```
 
 These variables seem to be distributed unevenly across the network.
@@ -113,15 +130,16 @@ We therefore need to establish how diverse this network really is.
 
 ## Measuring richness
 
-We can begin by measuring the number of different categories there are.
-Here we might assume that the more different categories there are,
-the more diverse the network is.
-The measure of 'richness' is inherited from the study of biodiversity,
-and calculates the number of different categories are presented in a
-dataset for a given variable.
+Some categorical variables have more categories than others,
+and this can be thought of as indicating greater diversity than e.g. a binary
+categorical variable.
+A measure of this 'richness' is inherited from the study of biodiversity.
+_Richness_ calculates the number of different categories that appear in
+the data for a given variable.
+Let's calculate the richness of all of the variables in our Marvel data.
 
 ```{r rich, exercise=TRUE, purl = FALSE}
-
+net_node_attributes(marvel_friends)
 ```
 
 ```{r rich-hint, purl = FALSE}
@@ -166,12 +184,12 @@ Recall that the Blau index for any given variable is:
 
 $$1 - \sum p_i^2$$
 
-where $p$ represents the proportion belonging to any given category,
-and $i$ indexes each of the given categories.
-A perfectly homogeneous group would receive a score of 0,
+where $p_i$ represents the proportion of observations holding $i$, 
+which indexes each of the given categories.
+A perfectly homogeneous group (all the same category) would receive a score of 0,
 while a perfectly heterogeneous group (with members spread evenly over the maximum categories)
 would receive a score of 1.
-Obtain the network diversity scores for our five attributes.
+Obtain the network diversity scores for the five attributes used above.
 
 ```{r blau, exercise=TRUE, purl = FALSE}
 
@@ -218,12 +236,12 @@ It looks like some origin stories are more gender diverse than others.
 Gods (just Thor here) and humans are all men,
 whereas those with mutant or radiation origin stories are more gender diverse.
 There doesn't appear to be much difference in intellect 
-across gender categories however.
+across gender categories in this data.
 
 Ok, this tells us about how (un)even the distribution of these variables is in this network,
 but it doesn't necessarily tell us whether ties are appearing more frequently
-between nodes of similar (or different) categories.
-For that we need to look at homophily/heterophily.
+_between_ nodes of similar (or different) categories.
+For that we need to look at _homophily_/heterophily.
 
 ## Measuring heterophily
 
@@ -240,6 +258,7 @@ As such, an EI index of -1 suggests perfect homophily, whereas an EI index of +1
 
 Check how homophilic three variables in the network are,
 "Gender", "PowerOrigin", and "Attractive".
+Please assign the results so that we can use them later.
 
 ```{r ei, exercise=TRUE, purl = FALSE}
 
@@ -290,7 +309,7 @@ Plot the results of running this function with respect to the EI index
 on each of the three variables.
 You can specify that one thousand simulations should be used using `times = 1000`.
 
-```{r rando, exercise=TRUE, purl = FALSE}
+```{r rando, exercise=TRUE, purl = FALSE, exercise.timelimit = 120}
 
 ```
 
@@ -337,8 +356,10 @@ a red vertical line for the observed score,
 and a density plot of the scores from the randomly generated networks.
 The grey tails of the distribution are a visual aid indicating the most extreme 5% of scores
 from the distribution.
-
 The results are _really_ interesting.
+
+<img src="https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExNzlsM3J1dm9tb2tlYjB4bGw0cGxuc3RodXpvdjNhcGJ5cXV2MndjbiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/AbYxDs20DECQw/giphy.gif" alt="gif of ironman explosion" width = "900"/>
+
 Despite being the larger coefficients (in absolute terms),
 it looks like we cannot reject the null hypothesis that there is no homophily
 for gender nor for attractiveness.
@@ -365,7 +386,7 @@ Permuting the network retains the structure of the network
 because the ties are kept and only the labels (variables) are reassigned randomly.
 Let's first plot the observed data and some permuted data next to each other.
 
-```{r perm, exercise=TRUE, purl = FALSE}
+```{r perm, exercise=TRUE, purl = FALSE, fig.width=9}
 
 ```
 
@@ -388,7 +409,7 @@ old + new
 ```{r cupqap-qn, echo=FALSE, purl = FALSE}
 question("Which of the following is true?",
   answer("Random networks retain the structure of the original network.",
-         message = learnr::random_encouragement()),
+         message = paste(learnr::random_encouragement(), '<img src="https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExeW9jbXg5dDBndjRpcWc2enhyOTZvbjloeGFpbmJndXNrMm05MHdtZiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/ziLadIVnOGCKk/giphy.gif" alt="gif of thor frustration"/>')),
   answer("Permuted networks retain the structure of the original network.",
          correct = TRUE,
          message = learnr::random_praise()),
@@ -408,7 +429,7 @@ Let's try a test that runs this over a succession of permutations,
 just as we did with random graphs.
 Plot the results for gender and power according to the random and permutation baselines.
 
-```{r testperm, exercise=TRUE, exercise.setup = "rando-solution", purl = FALSE}
+```{r testperm, exercise=TRUE, exercise.setup = "rando-solution", purl = FALSE, exercise.timelimit = 120, fig.width=9}
 
 ```
 
@@ -466,7 +487,7 @@ You may recognise some of the names.
 The main network consists of 32 scholars with directed ties weighted by the total number of messages sent from $i$ to $j$ over the period of the study.
 Nodal attributes collected include the primary discipline and number of citations in the social science citation index at the start of the study.
 
-```{r introeies, exercise=TRUE, purl = FALSE}
+```{r introeies, exercise=TRUE, purl = FALSE, fig.width=9}
 ison_networkers
 graphr(ison_networkers, node_color = "Discipline")
 ```
@@ -488,26 +509,26 @@ simulations or more.
 ```
 
 ```{r qapmax-hint-1, purl = FALSE}
-network_reg(____, ison_networkers, times = 200)
+net_regression(____, ison_networkers, times = 200)
 # If the model runs into a timeout error, please reduce the number of 'times' in the function above.
 ```
 
 ```{r qapmax-hint-2, purl = FALSE}
-weight ~ alter(Citations) + sim(Citations) + 
-                      alter(Discipline) + same(Discipline)
+weight ~ ego(Citations) + alter(Citations) + sim(Citations) + 
+                      ego(Discipline) + same(Discipline)
 ```
 
 ```{r qapmax-solution}
-model1 <- network_reg(weight ~ ego(Citations) + alter(Citations) + sim(Citations) + 
-                      ego(Discipline) + alter(Discipline) + same(Discipline), 
+model1 <- net_regression(weight ~ ego(Citations) + alter(Citations) + sim(Citations) + 
+                      ego(Discipline) + same(Discipline), 
                       ison_networkers, times = 200)
 ```
 
 We can use tidy methods (e.g. `tidy()`, `glance()`) to get the salient information from this model,
 and `{migraph}` includes also a plot method for these results to
 facilitate the quick interpretation of these results.
 
-```{r qapinterp, exercise=TRUE, exercise.setup = "qapmax-solution", purl = FALSE}
+```{r qapinterp, exercise=TRUE, exercise.setup = "qapmax-solution", purl = FALSE, fig.width=9}
 
 ```
 
@@ -539,8 +560,17 @@ question("What can we say from the results from model 1?",
 )
 ```
 
+
+## Free play
+
 While these are the conclusions from this 'play' data, 
 you may have more and more interesting data at hand.
 How would you go about specifying such a model?
 Why is such an approach more appropriate for network data than linear
 or logistic regression?
+
+```{r freeplay, exercise = TRUE, fig.width=9}
+
+```
+
+