Skip to content

Commit

Permalink
Merge pull request #133 from egouldo/manuscript-bug-fix
Browse files Browse the repository at this point in the history
Fix errors for manuscript
  • Loading branch information
egouldo authored Aug 29, 2024
2 parents 9d5b09b + a6a1e93 commit eb505d2
Show file tree
Hide file tree
Showing 18 changed files with 985 additions and 909 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ManyEcoEvo
Title: Meta-analyse data from 'Many-Analysts' style studies
Version: 2.7.1
Version: 2.7.2
Authors@R: c(
person("Elliot", "Gould", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "https://orcid.org/0000-0002-6585-538X")),
Expand All @@ -27,7 +27,7 @@ Imports:
betapart,
cli,
data.table,
dplyr,
dplyr (>= 1.1.4),
forcats,
fs,
glue,
Expand Down Expand Up @@ -67,6 +67,6 @@ Remotes:
daniel1noble/orchaRd,
NightingaleHealth/ggforestplot
Encoding: UTF-8
LazyData: false
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ importFrom(EnvStats,stat_n_text)
importFrom(NatParksPalettes,scale_color_natparks_d)
importFrom(betapart,beta.pair)
importFrom(broom,tidy)
importFrom(broom.mixed,tidy)
importFrom(cli,cli_abort)
importFrom(cli,cli_alert)
importFrom(cli,cli_alert_danger)
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# ManyEcoEvo (development version)
# ManyEcoEvo 2.7.2

<!-- NEWS.md is maintained by https://cynkra.github.io/fledge, do not edit -->

- separated column creation to occur under three conditions: NULL outcome_variable supplied, character string supplied, and expression argument supplied
- separated subset creation to occur separately on results of conditional evaluation
- Added conditional behaviour for when character vector supplied
- feat!: added arg checks #116 and cli output for when this condition is triggered
- explicitly supply `outcome_variable` and `outcome_SE` args for Zr
- #118 docs: Add explanation about updated behaviour when `estimate_type` is missing in `ManyEcoEvo` dataframe
- #118 build: devtools::document()

Expand Down
38 changes: 24 additions & 14 deletions R/fit_uni_mixed_effects.R
Original file line number Diff line number Diff line change
@@ -1,31 +1,27 @@
#' Fit model of Box-Cox transformed deviation scores as a function random-effects inclusion in analyses
#' @description Fits a univariate glm of box-cox transformed absolute deviation from the meta-analytic mean scores as a function of whether the analysis was a mixed effects model \(i.e. included random effects\) or not.
#'
#' @param data Dataframe containing box-cox transformed absolute deviation scores and binary column called `mixed_model` describing whether or not the analysis used a mixed-effects model.
#' @description Fits a univariate glm of Box-Cox transformed absolute deviation from the meta-analytic mean scores as a function of whether the analysis was a mixed effects model (i.e. included random effects) or not.
#'
#' @param data Dataframe containing Box-Cox transformed absolute deviation scores and binary column called `mixed_model` describing whether or not the analysis used a mixed-effects model.
#' @param N threshold number of analyses in each predictor category for fitting model
#' @return A fitted model object of class `glm` and `parsnip`
#' @export
#' @family Model fitting and meta-analysis
#' @examples
#' # library(tidyverse);library(targets);library(metafor);library(tidymodels)
#' # tar_load(meta_analysis_outputs)
#' # fit_uni_mixed_effects(meta_analysis_results$data[[1]])
#' # Note: used tidymodels approach for dynamic outcome var selection
#' # base R approach will be more succinct.
#' @import dplyr
#' @importFrom cli cli_h2 cli_warn cli_alert_warning
#' @importFrom pointblank test_col_exists
#' @importFrom recipes recipe update_role step_mutate step_naomit
#' @importFrom parsnip fit linear_reg
#' @importFrom workflows workflow add_model add_recipe extract_fit_parsnip
#' @seealso [parsnip::details_linear_reg_glm] for details on the [parsnip::linear_reg] engine.
fit_uni_mixed_effects <- function(data) {

cli::cli_h2(c("Fitting glm for box-cox transformed outcome with inclusion of random effects (binary variable) as predictor"))
fit_uni_mixed_effects <- function(data, N = 5) {

if (pointblank::test_col_exists(data,
columns = c("mixed_model",
starts_with("box_cox_abs_")))) {
if (!pointblank::test_col_exists(data,
columns = c("mixed_model",
starts_with("box_cox_abs_")))) {

cli::cli_alert_warning(
c("Columns {.var mixed_model} and ",
Expand All @@ -39,13 +35,25 @@ fit_uni_mixed_effects <- function(data) {

} else if ( length(unique(data$mixed_model)) == 1) {

cli::cli_warn(message = "More than 1 unique value of {.var mixed_model} ",
cli::cli_warn(message = c("More than 1 unique value of {.var mixed_model} ",
"is needed to fit model with {.var mixed_model} ",
"as predictor variable. Returning {.val {NA}}")
"as predictor variable. Returning {.val {NA}}"))

return(NA)

} else if (!pointblank::test_col_vals_gte(data,
columns = n,
value = N,
preconditions = \(x) count(x, mixed_model))) {

cli::cli_warn(message = "Less than {.arg N} = {.val {N}} observations in ",
"each level of {.var mixed_model}. Returning {.val {NA}}.")
print(data %>% count(mixed_model))
return(NA)

} else {
} else{



data <- data %>%
dplyr::select(dplyr::starts_with("box_cox_abs_"),
Expand All @@ -59,6 +67,8 @@ fit_uni_mixed_effects <- function(data) {
recipes::step_mutate(mixed_model = as.factor(mixed_model)) %>%
recipes::step_naomit()

cli::cli_h2(c("Fitting glm for Box-Cox transformed outcome with inclusion of random effects (binary variable) as predictor"))

glm_mod <- parsnip::linear_reg(engine = "glm")

fitted_mod <-
Expand Down
208 changes: 133 additions & 75 deletions R/generate_outlier_subsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,21 @@ generate_outlier_subsets <- function(data, outcome_variable = NULL, n_min = NULL
"estimate_type",
"dataset")

#TODO consider switching to exprs instead of list as input
# see meta_analyse_datasets
if (!is.null(enexpr(ignore_subsets))) {
ignore_subsets_columns <- rlang::call_args(enquo(ignore_subsets)) %>%
map(rlang::f_lhs) %>%
map(rlang::as_string) %>%
list_c() %>%
append(values = required_columns) %>%
unique()
} else {
ignore_subsets_columns <- required_columns
if (rlang::is_list(ignore_subsets)) {
if (!all(map_lgl(ignore_subsets, rlang::is_call))) {
cli_abort("{.arg filter_vars} must be a list of calls")
} else {
required_columns <- ignore_subsets %>%
map(rlang::f_lhs) %>%
map(rlang::as_string) %>%
list_c() %>%
append(values = required_columns) %>%
unique()
}
}

pointblank::expect_col_exists(data,
columns = ignore_subsets_columns)
columns = required_columns)

if (is.list(n_min)) {
map(n_min, ~ {
Expand Down Expand Up @@ -125,59 +125,117 @@ generate_outlier_subsets <- function(data, outcome_variable = NULL, n_min = NULL
formulae_match_n_max <- formulae_match(unique(data$dataset), n_min)
}

matched_formulae <- map(outcome_variable, ~ formulae_match(x = names(.x), y = .x))

# ----- Generate Outlier Subsets -----
if (str_detect(data$estimate_type, "Zr") %>% any(na.rm = TRUE)) {

if (!is.null(enexpr(ignore_subsets))) {
filter_vars <- quos(estimate_type == "Zr",
!!!rlang::call_args(enquo(ignore_subsets)))
} else {
filter_vars <- quo(estimate_type == "Zr")
}

data_Zr <- data %>%
filter(estimate_type == "Zr")

# ---- assign outcome variables ----

if (rlang::is_null(outcome_variable)) {
# NULL value supplied
outcome_variable <- "Zr"

cli::cli_alert_warning(
"Assigning default {.arg outcome_variable} = {.val {outcome_variable}}"
)

data_Zr <- data_Zr %>%
ungroup %>%
mutate(outcome_colname = outcome_variable)

} else if (rlang::is_character(outcome_variable)) {
# Single Value Supplied
stopifnot(length(outcome_variable) == 1)
pointblank::expect_col_exists(data_Zr, columns = {{outcome_variable}})
data_Zr <- data_Zr %>%
ungroup %>%
mutate(outcome_colname = outcome_variable)
}
else{
# expression argument supplied
matched_formulae <- map(outcome_variable,
~ formulae_match(x = names(.x), y = .x))

data_Zr <-
map2(
names(matched_formulae),
matched_formulae,
.f = ~ map_match_formulae(data_Zr, .x, .y)) %>%
bind_rows() %>%
drop_na(outcome_colname)
}

data_Zr <-
map2(names(matched_formulae), matched_formulae,
.f = ~ map_match_formulae(data_Zr, .x, .y)) %>%
bind_rows() %>%
drop_na(outcome_colname) %>%
map_match_formulae(variable_name = "dataset", formulae_match_n_min, col_name = "n_min") %>%
map_match_formulae(variable_name = "dataset", formulae_match_n_max, col_name = "n_max") %>%
# ----- Generate Outlier Subsets for Zr datasets -----

data_Zr <- data_Zr %>%
map_match_formulae(variable_name = "dataset",
formulae_match_n_min,
col_name = "n_min") %>%
map_match_formulae(variable_name = "dataset",
formulae_match_n_max,
col_name = "n_max") %>%
apply_slice_conditionally(
x = .,
filter_vars = filter_vars) %>%
filter_vars = ignore_subsets) %>%
select(-outcome_colname, -n_min, -n_max)

}

# ---- Generate Outlier Subsets for yi datasets -----
if (str_detect(data$estimate_type, "y") %>%
any(na.rm = TRUE)) {

if (!is.null(enexpr(ignore_subsets))) {
filter_vars <- quos(str_detect(estimate_type, "y"),
!!!rlang::call_args(enquo(ignore_subsets)))
} else {
filter_vars <- quo(str_detect(estimate_type, "y"))
}

data_yi <- data %>%
filter(str_detect(estimate_type, "y"))

# ---- assign outcome variables ----

if (rlang::is_null(outcome_variable)) {
# NULL value supplied
outcome_variable <- "Z"

cli::cli_alert_warning(
"Assigning default {.arg outcome_variable} = {.val {outcome_variable}}"
)

data_yi <- data_yi %>%
ungroup %>%
mutate(outcome_colname = outcome_variable)
} else if (rlang::is_character(outcome_variable)) {
# Single Value Supplied
stopifnot(length(outcome_variable) == 1)
pointblank::expect_col_exists(data_yi, columns = {{outcome_variable}})
data_yi <- data_yi %>%
ungroup %>%
mutate(outcome_colname = outcome_variable)
} else {
# expression argument supplied
matched_formulae <- map(outcome_variable,
~ formulae_match(x = names(.x), y = .x))

data_yi <-
map2(
names(matched_formulae),
matched_formulae,
.f = ~ map_match_formulae(data_yi, .x, .y)) %>%
bind_rows() %>%
drop_na(outcome_colname)
}

data_yi <- map2(
names(matched_formulae),
matched_formulae,
.f = ~ map_match_formulae(data_yi, .x, .y)) %>%
bind_rows() %>%
drop_na(outcome_colname) %>%
map_match_formulae(variable_name = "dataset", formulae_match_n_min, col_name = "n_min") %>%
map_match_formulae(variable_name = "dataset", formulae_match_n_max, col_name = "n_max") %>%
data_yi <- data_yi %>%
map_match_formulae(variable_name = "dataset",
formulae_match_n_min,
col_name = "n_min") %>%
map_match_formulae(variable_name = "dataset",
formulae_match_n_max,
col_name = "n_max") %>%
apply_slice_conditionally(
x = .,
filter_vars = filter_vars) %>%
select(-outcome_colname, -n_min, -n_max)

}

out <- if (exists(x = "data_Zr") & exists(x = "data_yi")) {
Expand Down Expand Up @@ -220,37 +278,37 @@ apply_slice_conditionally <- function(x, filter_vars){
if ("exclusion_set" %in% colnames(.)) {
exclusion_set }
else {"complete"}), {
x %>%
filter(!!!filter_vars) %>%
mutate(data =
pmap(list(data, outcome_colname, n_min, n_max),
.f = ~ slice_conditionally(..1,
n_min = ..3,
n_max = ..4,
outcome_variable = ..2
))) %>%
mutate( exclusion_set =
if ("exclusion_set" %in% colnames(.)) {
paste0(exclusion_set, "-rm_outliers") }
else {"complete-rm_outliers"},
data =
map2(
.x = data,
.y = data,
.f = ~ semi_join(.x, .y,
by = join_by(id_col)) %>%
distinct()
),
diversity_data =
map2(
.x = diversity_data,
.y = data,
.f = ~ semi_join(.x, .y,
by = join_by(id_col)) %>%
distinct()
)
)
})
x %>%
filter(!!!filter_vars) %>%
mutate(data =
pmap(list(data, outcome_colname, n_min, n_max),
.f = ~ slice_conditionally(..1,
n_min = ..3,
n_max = ..4,
outcome_variable = ..2
))) %>%
mutate( exclusion_set =
if ("exclusion_set" %in% colnames(.)) {
paste0(exclusion_set, "-rm_outliers") }
else {"complete-rm_outliers"},
data =
map2(
.x = data,
.y = data,
.f = ~ semi_join(.x, .y,
by = join_by(id_col)) %>%
distinct()
),
diversity_data =
map2(
.x = diversity_data,
.y = data,
.f = ~ semi_join(.x, .y,
by = join_by(id_col)) %>%
distinct()
)
)
})

return(out)
}
Expand Down
Loading

0 comments on commit eb505d2

Please sign in to comment.