Merge pull request #121 from egouldo/118-log-transform-yi

118 log transform yi
egouldo · Aug 28, 2024 · 98ae1c1 · 98ae1c1
2 parents cfc17ed + 807c548
commit 98ae1c1
Show file tree

Hide file tree

Showing 147 changed files with 4,425 additions and 2,301 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ManyEcoEvo
 Title: Meta-analyse data from 'Many-Analysts' style studies
-Version: 2.4.2
+Version: 2.7.0
 Authors@R: c(
     person("Elliot", "Gould", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "https://orcid.org/0000-0002-6585-538X")),

diff --git a/ManyEcoEvo.Rproj b/ManyEcoEvo.Rproj
@@ -15,4 +15,4 @@ LaTeX: pdfLaTeX
 BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
-PackageRoxygenize: rd,collate,namespace,vignette
+PackageRoxygenize: rd,collate,namespace
diff --git a/NAMESPACE b/NAMESPACE
@@ -45,6 +45,7 @@ export(fit_metafor_uni)
 export(fit_multivar_MA)
 export(fit_sorensen_glm)
 export(fit_uni_mixed_effects)
+export(folded_params)
 export(generate_collinearity_subset)
 export(generate_exclusion_subsets)
 export(generate_expertise_subsets)
@@ -59,6 +60,8 @@ export(identity_back)
 export(inverse_back)
 export(log_back)
 export(log_transform)
+export(log_transform_response)
+export(log_transform_yi)
 export(logit_back)
 export(make_param_table)
 export(make_viz)
@@ -85,8 +88,10 @@ export(preprocess_prediction_files)
 export(preprocess_updated_prediction_files)
 export(probit_back)
 export(read_submission_data)
+export(rename_prediction_cols)
 export(rm_inf_na)
 export(run_model_checks)
+export(split_yi_subsets)
 export(square_back)
 export(square_root_back)
 export(standardise_response)
@@ -106,72 +111,151 @@ export(summarise_study)
 export(summarise_variable_counts)
 export(validate_predictions)
 export(validate_predictions_df_blue_tit)
-export(validate_predictions_df_euc)
-import(NatParksPalettes)
-import(broom)
-import(broom.mixed)
-import(cli)
+export(variance_box_cox)
 import(dplyr)
-import(forcats)
-import(ggbeeswarm)
-import(ggforestplot)
 import(ggplot2)
 import(lme4)
 import(metafor)
-import(purrr)
-import(recipes)
-import(rlang)
-import(see)
-import(stringr)
-import(tidyr)
 importFrom(EnvStats,stat_n_text)
-importFrom(broom.mixed,tidy)
+importFrom(NatParksPalettes,scale_color_natparks_d)
+importFrom(betapart,beta.pair)
+importFrom(broom,tidy)
 importFrom(cli,cli_abort)
+importFrom(cli,cli_alert)
+importFrom(cli,cli_alert_danger)
 importFrom(cli,cli_alert_info)
+importFrom(cli,cli_alert_success)
 importFrom(cli,cli_alert_warning)
+importFrom(cli,cli_bullets)
+importFrom(cli,cli_h1)
 importFrom(cli,cli_h2)
+importFrom(cli,cli_ol)
+importFrom(cli,cli_warn)
+importFrom(cli,style_italic)
 importFrom(data.table,setnames)
+importFrom(forcats,as_factor)
 importFrom(forcats,fct_relevel)
+importFrom(forcats,fct_reorder)
+importFrom(fs,file_exists)
+importFrom(ggbeeswarm,geom_quasirandom)
+importFrom(ggforestplot,theme_forest)
+importFrom(ggplot2,aes)
+importFrom(ggplot2,coord_flip)
+importFrom(ggplot2,element_line)
+importFrom(ggplot2,element_text)
+importFrom(ggplot2,geom_pointrange)
 importFrom(ggplot2,ggplot)
+importFrom(ggplot2,guides)
+importFrom(ggplot2,labs)
+importFrom(ggplot2,theme)
 importFrom(glue,glue)
 importFrom(lifecycle,deprecated)
 importFrom(lme4,lmer)
 importFrom(magrittr,"%>%")
+importFrom(metafor,rma.mv)
 importFrom(metaviz,viz_funnel)
+importFrom(orchaRd,i2_ml)
 importFrom(parameters,parameters)
+importFrom(parsnip,fit)
+importFrom(parsnip,linear_reg)
 importFrom(performance,performance)
+importFrom(pointblank,action_levels)
 importFrom(pointblank,col_exists)
+importFrom(pointblank,col_is_character)
+importFrom(pointblank,col_is_integer)
+importFrom(pointblank,col_is_numeric)
+importFrom(pointblank,col_vals_in_set)
 importFrom(pointblank,col_vals_not_null)
+importFrom(pointblank,create_agent)
+importFrom(pointblank,expect_col_exists)
+importFrom(pointblank,expect_col_is_character)
+importFrom(pointblank,expect_col_is_numeric)
+importFrom(pointblank,expect_col_vals_in_set)
 importFrom(pointblank,has_columns)
 importFrom(pointblank,stop_if_not)
+importFrom(pointblank,test_col_exists)
 importFrom(pointblank,test_col_vals_gte)
 importFrom(pointblank,vars)
+importFrom(pointblank,warn_on_fail)
+importFrom(purrr,discard)
+importFrom(purrr,exec)
+importFrom(purrr,flatten_dbl)
+importFrom(purrr,is_scalar_vector)
 importFrom(purrr,keep)
+importFrom(purrr,keep_at)
+importFrom(purrr,list_c)
 importFrom(purrr,list_flatten)
 importFrom(purrr,list_rbind)
 importFrom(purrr,map)
 importFrom(purrr,map2)
 importFrom(purrr,map_chr)
+importFrom(purrr,map_dfr)
 importFrom(purrr,map_if)
+importFrom(purrr,map_int)
+importFrom(purrr,map_lgl)
 importFrom(purrr,pluck)
 importFrom(purrr,pmap)
 importFrom(purrr,possibly)
 importFrom(purrr,reduce)
 importFrom(purrr,reduce2)
 importFrom(purrr,set_names)
+importFrom(purrr,simplify)
+importFrom(purrr,transpose)
+importFrom(readr,read_csv)
+importFrom(recipes,juice)
+importFrom(recipes,prep)
+importFrom(recipes,recipe)
+importFrom(recipes,step_mutate)
+importFrom(recipes,step_naomit)
+importFrom(recipes,tidy)
+importFrom(recipes,update_role)
+importFrom(rlang,"!!")
+importFrom(rlang,":=")
+importFrom(rlang,as_function)
+importFrom(rlang,as_string)
 importFrom(rlang,caller_env)
 importFrom(rlang,enquo)
+importFrom(rlang,ensym)
+importFrom(rlang,exec)
 importFrom(rlang,expr)
+importFrom(rlang,exprs)
+importFrom(rlang,f_lhs)
+importFrom(rlang,inject)
+importFrom(rlang,is_call)
+importFrom(rlang,is_list)
 importFrom(rlang,is_na)
 importFrom(rlang,is_null)
 importFrom(rlang,na_chr)
 importFrom(rlang,new_formula)
 importFrom(sae,bxcx)
+importFrom(see,geom_jitter2)
+importFrom(see,scale_fill_material_d)
+importFrom(see,theme_modern)
 importFrom(stringr,str_detect)
+importFrom(stringr,str_remove)
+importFrom(stringr,str_split)
+importFrom(stringr,str_starts)
+importFrom(tibble,as_tibble)
+importFrom(tibble,as_tibble_row)
+importFrom(tibble,column_to_rownames)
 importFrom(tibble,enframe)
+importFrom(tibble,rownames_to_column)
 importFrom(tibble,tibble)
+importFrom(tidyr,any_of)
+importFrom(tidyr,drop_na)
 importFrom(tidyr,hoist)
+importFrom(tidyr,nest)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
+importFrom(tidyr,separate)
+importFrom(tidyr,tibble)
+importFrom(tidyr,unite)
 importFrom(tidyr,unnest)
+importFrom(tidyr,unnest_longer)
+importFrom(tidyselect,all_of)
+importFrom(tidyselect,where)
 importFrom(timetk,step_box_cox)
+importFrom(workflows,add_model)
+importFrom(workflows,add_recipe)
+importFrom(workflows,extract_fit_parsnip)
+importFrom(workflows,workflow)
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,31 @@
-# ManyEcoEvo 2.4.2
+# ManyEcoEvo (development version)
 
-# ManyEcoEvo 2.4.1
+<!-- NEWS.md is maintained by https://cynkra.github.io/fledge, do not edit -->
 
-* Initial CRAN submission.
+- #118 docs: Add explanation about updated behaviour when `estimate_type` is missing in `ManyEcoEvo` dataframe
+- #118 build: devtools::document()
+
+# ManyEcoEvo 2.6.0
+
+- Update arg supply to targets call on prepare_response_variables() after #118 updates
+- #118 add pmap internal helper function for differential application of transformation / standardisation in `standardise_response()`
+- #118 delete old pmap helper function
+- `dat` to `data` to help with auto-matching in pmap within prepare_response_variables() wrapper #118
+- ensure all family fns have ... arg for pmap application in prepare_response_variables() since all fns have different argument lengths and names
+- accidentally deleted when upgrading for #118, have added creation of transform_datasets tibbles for all cases now, and then these will apply the appropriate functions in final code chunk at end
+- #118 ensure application of Z_VZ_preds takes the generalised colnames yi, yi_se instead of using hard-coded dataset application #97
+- #118 call new arg `dataset_log_transform` in fn to log-transform outcomes for euc yi analysis
+- #118 add log-transformation equivalent to `standardise_response()` and `process_resonse()`
+- #102 add function documentation, including examples
+- #118 extract `lower` and `upper` transformed vals in line with addition of `log_transform_response()` / changes to `standardise_response()`
+- #116 check appropriate required variable (i.e. function needs `back_transformed_data`, but checked for `augmented_data` in `dat` arg, wouldn't throw required error because `augmented_data` was present in `dat`
+- #102 add import, return, and see also roxygen doc tags, replace note with details tag, rename fn doc title
+- #116 update argument checks conditional expression
+- #118 match output to `log_transform_yi()` (now returns additional cols `lower` and `upper`, not only `c("Z","VZ")`)
+- #118 match process to `log_transform_yi()` and #97 generalise processing to both euc/bt datasets without hard-coding dataset names in fns, and remove associated dataset-specific argument checking #116
+- #118 adapt response variable preparation to accept additional argument `dataset_log_transform` apply argument checks #116, add roxygen param #102
+- #118 adapt response variable processing to accept either/or/none for dataset standardisation/log-transformation.
+- equivalent to `pred_to_z()`
+- #102 write documentation
+- #102 add import tags for `log_transform()` and link to equivalent functions, apply default argument values / checks
+- #97 rename out argument
diff --git a/R/ManyEcoEvo-package.R b/R/ManyEcoEvo-package.R
@@ -2,7 +2,6 @@
 "_PACKAGE"
 
 ## usethis namespace: start
-#' @import rlang
 #' @importFrom glue glue
 #' @importFrom lifecycle deprecated
 ## usethis namespace: end

diff --git a/R/anonymise_teams.R b/R/anonymise_teams.R
@@ -6,7 +6,7 @@
 #' @return A `df` with anonymised values of `id_col` based on the `New_Identifier` colum of `lookup`
 #' @export
 #' @importFrom pointblank col_vals_not_null
-#' @import tidyr
+#' @importFrom tidyr separate unite
 #' @import dplyr
 anonymise_teams <- function(df, lookup) { # TODO actually... this is anonymise_id_col()
   df %>%

diff --git a/R/apply_VZ_exclusions.R b/R/apply_VZ_exclusions.R
@@ -1,22 +1,130 @@
 #' Apply VZ exclusion to a data-frame containing list-columns of yi subsets
 #'
-#' @param df A dataframe of yi data subsets generated from `generate_yi_subsets\(\)`.
-#' @param VZ_cutoff A numeric vector of length 1, values equal to or greater than this value of VZ will be filtered out of the dataframes stored in `df`'s list-column `data`.
-#'
+#' @param df A dataframe of yi data subsets generated by [generate_yi_subsets()] or [split_yi_subsets()].
+#' @param VZ_cutoff A numeric vector of length 1, values equal to or greater than this value of VZ will be filtered out of the dataframes stored in `df`'s list-column `data`, else a named list of numeric values, where the names are the dataset names and the values are the `VZ_cutoff`s for each `dataset` in `df`.
+#' @param VZ_colname Either A character vector of length 1, the name of the column in the dataframes stored in `df`'s list-column `data` that contains the VZ values. Or else a named list of character values, where the names are the dataset names and the values are the `VZ_colname`s  for each `dataset` in `df`.
 #' @return A dataframe of yi subsets, whose extreme values of VZ have been removed.
 #' @export
+#' @import dplyr
+#' @importFrom purrr map map2
+#' @importFrom pointblank col_exists
+#' @importFrom cli cli_alert_warning
+#' @seealso Applies [exclude_extreme_VZ()] to each dataframe in the list-column `data` of `df`.
 #' @family Multi-dataset Wrapper Functions
-apply_VZ_exclusions <- function(df = data.frame(), VZ_cutoff = numeric(1L)) {
-  pointblank::col_exists(df, columns = c("data", "diversity_data"))
-
+#' @details
+#' `df` must contain the columns `"data"`, `"diversity_data"` and `"dataset"`.
+#' If only one value of `VZ_colname` and `VZ_cutoff` is supplied, it will be recycled to match the number of datasets in `df`.
+#' 
+#' If a named list is supplied for `VZ_colname` and `VZ_cutoff`, the names must match the dataset names in `df`.
+#' @examples
+#' data(ManyEcoEvo_yi)
+#' ManyEcoEvo_yi %>%
+#'   prepare_response_variables(
+#'     estimate_type = "yi",
+#'     param_table =
+#'       ManyEcoEvo:::analysis_data_param_tables,
+#'     dataset_standardise = "blue tit",
+#'     dataset_log_transform = "eucalyptus") %>%
+#'   generate_yi_subsets() %>% 
+#'   apply_VZ_exclusions(VZ_colname = 
+#'                         list("eucalyptus" = "se_log", 
+#'                              "blue tit" = "VZ"), 
+#'                       VZ_cutoff = 3)
+apply_VZ_exclusions <- function(df = data.frame(), VZ_colname, VZ_cutoff) {
+  # ---- Argument Checking -----
+  pointblank::col_exists(df, columns = c("data", "diversity_data", "dataset"))
+
+  if (!is.null(names(VZ_colname))) {
+    pointblank::expect_col_vals_make_set(object = df, 
+                                         columns = dataset, 
+                                         set = names(VZ_colname))
+  }
+
+  if (!is.null(names(VZ_cutoff))) {
+    pointblank::expect_col_vals_make_set(object = df, 
+                                         columns = dataset, 
+                                         set = names(VZ_cutoff))
+  }
+
+  cli::cli_h1("Applying VZ exclusions")
+
+  if (is.list(VZ_colname)) {
+    map(VZ_colname, ~ {
+      stopifnot(
+        is.character(.x)
+      )
+    })
+  } else {
+    stopifnot(
+      is.character(VZ_colname)
+    )
+    # ----- Format VZ exclusions when VZ_colname is not list -----
+    if (length(VZ_colname) < length(unique(df$dataset))) {
+      cli::cli_alert_warning("{.arg VZ_colname} = {.val {VZ_cutoff}} was recycled to match the number of unique datasets in {.arg df}.")
+      VZ_colname <- rep(VZ_colname, length(unique(df$dataset)))
+    }
+  }
+
+  if (is.list(VZ_cutoff)) {
+    map(VZ_cutoff, ~ {
+      stopifnot(
+        is.numeric(.x)
+      )
+    })
+  } else {
+    stopifnot(
+      is.numeric(VZ_cutoff)
+    )
+    # ----- Format VZ exclusions when VZ_cutoff is not list -----
+    if (length(VZ_cutoff) < length(unique(df$dataset))) {
+      cli::cli_alert_warning("{.arg VZ_cutoff} = {.val {VZ_cutoff}} was recycled to match the number of unique datasets in {.arg df}.")
+      VZ_cutoff <- rep(VZ_cutoff, length(unique(df$dataset)))
+    }
+  }
+
+  # ----- Create formulas for matching VZ_cutoff and VZ_colname to df$dataset -----
+
+  if (is.list(VZ_colname)) {
+    formulae_match_VZ_colname <- map2(names(VZ_colname), 
+                                      VZ_colname, 
+                                      rlang::new_formula)
+  } else {
+    formulae_match_VZ_colname <- map2(unique(df$dataset), 
+                                      VZ_colname, 
+                                      rlang::new_formula)
+  }
+
+  if (is.list(VZ_cutoff)) {
+    formulae_match_VZ_cutoff <- map2(names(VZ_cutoff), 
+                                     VZ_cutoff, 
+                                     rlang::new_formula)
+  } else {
+    formulae_match_VZ_cutoff <- map2(unique(df$dataset), 
+                                     VZ_cutoff, 
+                                     rlang::new_formula)
+  }
+
+  # ----- Apply VZ exclusions -----
   df_out <- df %>%
+    ungroup() %>% 
     mutate(
-      data = map(data, exclude_extreme_VZ, !!{{ VZ_cutoff }}), # TODO check whether we should run on effects_analysis instead of data
+      VZ_colname_val = case_match(dataset,
+                                  !!!formulae_match_VZ_colname,
+                                  .default = NA), 
+      VZ_cutoff_val = case_match(dataset,
+                                 !!!formulae_match_VZ_cutoff,
+                                 .default = NA),
+      data = pmap( # TODO check whether we should run on effects_analysis instead of data
+        list(data, VZ_colname_val, VZ_cutoff_val),
+        ~ exclude_extreme_VZ(df = ..1, VZ_colname = ..2, VZ_cutoff = ..3)
+      ),
       diversity_data = map2(
         .x = diversity_data,
         .y = data,
         .f = ~ semi_join(.x, .y, by = "id_col")
       )
-    )
+    ) %>% 
+    select(-VZ_colname_val, -VZ_cutoff_val)
+
   return(df_out)
 }