introduce standardize_variables() (#18)

* introduce `standrdize_variables()` * update pkgdown * update NEWS.md * new tests * use standardize_variables in the documentation when relevant Co-authored-by: YoannJulliard <[email protected]>
cedricbatailler · Jul 21, 2021 · b21a3a7 · b21a3a7
1 parent 386bcde
commit b21a3a7
Show file tree

Hide file tree

Showing 10 changed files with 228 additions and 38 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -17,6 +17,7 @@ S3method(mdt_within,data.frame)
 S3method(mdt_within_wide,data.frame)
 S3method(print,indirect_index)
 S3method(print,mediation_model)
+S3method(standardize_variables,data.frame)
 export("%>%")
 export(add_index)
 export(apastylr)
@@ -30,6 +31,8 @@ export(mdt_moderated)
 export(mdt_simple)
 export(mdt_within)
 export(mdt_within_wide)
+export(standardise_variables)
+export(standardize_variables)
 importFrom(glue,glue)
 importFrom(magrittr,"%>%")
 importFrom(rlang,":=")

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # JSmediation (Development version)
 
+* Introduce `standardize_variables` to easily center and reduce your variables.
 * Introduce the `compute_indirect_effect_for()` function which computes the 
   indirect effect confidence interval for a specific value of the moderator in 
   a moderated mediation model.

diff --git a/R/compute_indirect_effect_for.R b/R/compute_indirect_effect_for.R
@@ -1,39 +1,39 @@
 #' Compute the indirect effect index for a specific value of the moderator
 #'
-#' @description 
+#' @description
 #'   When computing a moderated mediation, one assesses whether an indirect
 #'   effect changes according a moderator value (Muller et al., 2005).
 #'   [`mdt_moderated`] makes it easy to assess moderated mediation, but it does
 #'   not allow accessing the indirect effect for a specific moderator values.
 #'   `compute_indirect_effect_for` fills this gap.
 #'
-#' @param mediation_model A moderated mediation model fitted with `mdt_moderated``. 
+#' @param mediation_model A moderated mediation model fitted with `mdt_moderated`.
 #' @param Mod The moderator value for which to compute the indirect effect. Must
 #'   be a numeric value, defaults to `0`.
 #' @param times Number of simulations to use to compute the Monte Carlo indirect
 #'   effect confidence interval. Must be numeric, defaults to `5000`.
 #' @param level Alpha threshold to use for the indirect effect's confidence
 #'   interval. Defaults to `.05`.
-#'   
-#' @details 
+#'
+#' @details
 #'   The approach used by `compute_indirect_effect_for` is similar to the
 #'   approach used for simple slope analyses. Specifically, it will fit a new
 #'   moderated mediation model, but with a data set with a different variable
 #'   coding. Behind the scenes, `compute_indirect_effect_for` adjusts the
 #'   moderator variable coding, so that the value we want to compute the
 #'   indirect effect for is now `0`.
-#'   
+#'
 #'   Once done, a new moderated mediation model is applied using the new data
 #'   set. Because of the new coding, and because of how one interprets
-#'   coefficients in a linear regression, \eqn{a \times b}{a * b} is now the 
-#'   indirect effect we wanted to compute (see the Models section). 
-#'   
+#'   coefficients in a linear regression, \eqn{a \times b}{a * b} is now the
+#'   indirect effect we wanted to compute (see the Models section).
+#'
 #'   Thanks to the returned values of \eqn{a}{a} and {b}{b} (\eqn{b_51}{b_51}
 #'   and \eqn{b_64}{b_64}, see the Models section), it is now easy to compute
 #'   \eqn{a \times b}{a * b}. `compute_indirect_effect_for` uses the same
 #'   approach than the [`add_index`] funcion. A Monte Carlo simulation is used
 #'   to compute the indirect effect index (MacKinnon et al., 2004).
-#'   
+#'
 #' @section Models: In a moderated mediation model, three models are used.
 #'   `compute_indirect_effect_for` uses the same model specification as
 #'   [`mdt_moderated`]:
@@ -57,46 +57,45 @@
 #'   \eqn{b_{51}}{b_51}, \eqn{b_{53}}{b_53}, \eqn{b_{64}}{b_64},
 #'   \eqn{b_{65}}{b_65}, \eqn{b_{41}}{b_41}, \eqn{b_{43}}{b_43},
 #'   \eqn{b_{61}}{b_61}, and \eqn{b_{63}}{c63} (see Muller et al., 2005).
-#'   
+#'
 #' @examples
-#' # compute an indirect effect index for a specific value in a moderated 
+#' # compute an indirect effect index for a specific value in a moderated
 #' # mediation.
 #' data(ho_et_al)
 #' ho_et_al$condition_c <- build_contrast(ho_et_al$condition,
 #'                                        "Low discrimination",
 #'                                        "High discrimination")
-#' ho_et_al$linkedfate <- as.numeric(scale(ho_et_al$linkedfate))
-#' ho_et_al$sdo        <- as.numeric(scale(ho_et_al$sdo))
+#' ho_et_al <- standardize_variables(ho_et_al, c(linkedfate, sdo))
 #' moderated_mediation_model <- mdt_moderated(data = ho_et_al,
 #'                                            DV = hypodescent,
 #'                                            IV = condition_c,
 #'                                            M = linkedfate,
-#'                                            Mod = sdo) 
+#'                                            Mod = sdo)
 #' compute_indirect_effect_for(moderated_mediation_model, Mod = 0)
 #'
-#' @references 
+#' @references
 #'   MacKinnon, D. P., Lockwood, C. M., & Williams, J. (2004). Confidence Limits
 #'   for the Indirect Effect: Distribution of the Product and Resampling
 #'   Methods. *Multivariate Behavioral Research*, *39*(1), 99-128. doi:
 #'   10.1207/s15327906mbr3901_4
-#'   
+#'
 #'   Muller, D., Judd, C. M., & Yzerbyt, V. Y. (2005). When moderation
 #'   is mediated and mediation is moderated. *Journal of Personality and
 #'   Social Psychology*, *89*(6), 852-863. doi: 10.1037/0022-3514.89.6.852
 #'
 #' @export
 compute_indirect_effect_for <- function(mediation_model,
-                                         Mod = 0, 
-                                         times = 5000, 
+                                         Mod = 0,
+                                         times = 5000,
                                          level = .05) {
   UseMethod("compute_indirect_effect_for")
 }
 
 #' @export
-compute_indirect_effect_for.moderated_mediation <- 
-  function(mediation_model, 
-           Mod = 0, 
-           times = 5000, 
+compute_indirect_effect_for.moderated_mediation <-
+  function(mediation_model,
+           Mod = 0,
+           times = 5000,
            level = .05) {
 
     # checks
@@ -112,8 +111,8 @@ compute_indirect_effect_for.moderated_mediation <-
     moderator         <- purrr::chuck(mediation_model, "params", "Mod")
 
     # adjust the moderator coding so that 0 is the value we want to look at
-    mediation_dataset <- 
-      mediation_dataset %>% 
+    mediation_dataset <-
+      mediation_dataset %>%
       dplyr::mutate(dplyr::across(.data[[moderator]], ~ .x - Mod))
 
     # run a new moderated mediation model
@@ -144,7 +143,7 @@ compute_indirect_effect_for.moderated_mediation <-
                       )
       )
 
-    indirect_sampling <- param_sampling[ , 1] * param_sampling[ , 2] 
+    indirect_sampling <- param_sampling[ , 1] * param_sampling[ , 2]
 
     indirect_effect(
       type          = glue("Conditional simple mediation index (Mod = {Mod})"),

diff --git a/R/mdt_moderated_index.R b/R/mdt_moderated_index.R
@@ -21,7 +21,7 @@
 #'   Yzerbyt, 2005).
 #'
 #'   \pkg{JSmediation} supports different types of mediated indirect effect
-#'   index: 
+#'   index:
 #'   * **Stage 1:** computes the product between \eqn{a \times Mod}{a * Mod} and
 #'     \eqn{b}.
 #'   * **Stage 2:** computes the product between \eqn{a} and \eqn{b \times
@@ -33,8 +33,7 @@
 #' ho_et_al$condition_c <- build_contrast(ho_et_al$condition,
 #'                                        "Low discrimination",
 #'                                        "High discrimination")
-#' ho_et_al$linkedfate_c <- scale(ho_et_al$linkedfate, scale = FALSE)
-#' ho_et_al$sdo_c <- scale(ho_et_al$sdo, scale = FALSE)
+#' ho_et_al <- standardize_variables(ho_et_al, c(linkedfate, sdo), suffix ="c")
 #' moderated_model <- mdt_moderated(data = ho_et_al,
 #'                                  IV = condition_c,
 #'                                  DV = hypodescent,
@@ -87,15 +86,15 @@ add_index.moderated_mediation <- function(mediation_model, times = 5000, level =
     indirect_sampling <- ab_sampling[ , 1] * ab_sampling[ , 2]
     CI <- stats::quantile(indirect_sampling, c(level / 2, 1 - level / 2))
     contains_zero <- (CI[[1]] < 0 & CI[[2]] > 0)
-    
+
     indirect_index_infos <-
       indirect_effect(type       = type,
                       estimate   = a * b,
                       level      = level,
                       times      = times,
                       sampling   = indirect_sampling)
   }
-  
+
   else if(stage %in% c("total")) {
 
     a1   <- purrr::pluck(mediation_model, "paths", "a * Mod", "point_estimate")
@@ -124,7 +123,7 @@ add_index.moderated_mediation <- function(mediation_model, times = 5000, level =
 
     indirect_sampling <- ab_sampling[ , 1] * ab_sampling[ , 2] +
       ab_sampling[ , 3] * ab_sampling[ , 4]
-    
+
     indirect_index_infos <-
       indirect_effect(
         type          = type,

diff --git a/R/standarize_variables.R b/R/standarize_variables.R
@@ -0,0 +1,67 @@
+#' Standardize variables in a data set.
+#'
+#' @description `standardize_variables()` standardizes the selected columns in a
+#'   data.frame using \code{\link[base:scale]{scale}}. By default, this function
+#'   overwrites the column to be scaled. Use the `suffix` argument to avoid this
+#'   behavior.
+#'
+#'   `standardize_variables()` and `standardise_variables()` are synonyms.
+#'
+#' @param data A data frame containing the variables to standardize.
+#' @param cols <[`tidy-select`][dplyr_tidy_select]> Columns to standardize.
+#'   Defaults to [`dplyr::everything()`].
+#' @param suffix A character suffix to be added to the scaled variables names.
+#'   When suffix is set to`NULL`, the  `standardize_variables()` function will
+#'   overwrite the scaled variables. Defaults to `NULL`.
+#'
+#' @section `standardize_variables` and `grouped_df`:
+#'
+#'   Note that `standardize_variables` ignores grouping. Meaning that if you
+#'   call this function on a grouped data frame (see [dplyr::grouped_df]), the
+#'   __overall__ variables' mean and standard deviation will be used for the
+#'   standardization.
+#'
+#' @return A data frame with the standardized columns.
+#' @export
+#'
+#' @examples
+#' ho_et_al %>%
+#'   standardize_variables(sdo)
+#'
+#' ho_et_al %>%
+#'   standardize_variables(c(sdo, linkedfate), suffix = "scaled")
+standardize_variables <- function(data,
+                                  cols = dplyr::everything(),
+                                  suffix = NULL) {
+  UseMethod("standardize_variables")
+}
+
+#' @rdname standardize_variables
+#' @export
+standardise_variables <- standardize_variables
+
+#' @export
+standardize_variables.data.frame <- function(data,
+                                             cols = dplyr::everything(),
+                                             suffix = NULL) {
+
+  # store grouping variable as symbols
+  grouping_vars <- dplyr::groups(data)
+
+  # defines suffix
+  if (!is.null(suffix)) {
+    suffix <- glue("_{suffix}")
+  } else {
+    suffix <- ""
+  }
+
+  # scales variables
+  data %>%
+    dplyr::ungroup() %>%
+    dplyr::mutate(dplyr::across(
+      .cols = {{ cols }},
+      .fns = ~ as.numeric(scale(.x)),
+      .names = "{.col}{suffix}"
+    )) %>%
+    dplyr::group_by(!!!grouping_vars)
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -21,7 +21,6 @@ authors:
   Dominique Muller:
     href: https://www.dominique.muller.lippc2s.fr/
 
-
 reference:
 
 - title: "Fitting mediation models"
@@ -52,3 +51,4 @@ reference:
 - contents:
   - apastylr
   - build_contrast
+  - standardize_variables
diff --git a/man/add_index.moderated_mediation.Rd b/man/add_index.moderated_mediation.Rd
diff --git a/man/compute_indirect_effect_for.Rd b/man/compute_indirect_effect_for.Rd
diff --git a/man/standardize_variables.Rd b/man/standardize_variables.Rd