Skip to content

Commit

Permalink
Merge pull request #39 from egouldo/collinearity_removal_analysis
Browse files Browse the repository at this point in the history
Add post-hoc analysis exploring removal of collinear analyses
  • Loading branch information
egouldo authored Jun 13, 2024
2 parents f65dac5 + ec6eefa commit 7c11bf7
Show file tree
Hide file tree
Showing 50 changed files with 1,528 additions and 3,042 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ManyEcoEvo
Title: Meta-analyse data from 'Many-Analysts' style studies
Version: 1.1.0
Version: 1.2.0.9000
Authors@R: c(person(given = "Elliot",
family = "Gould",
email = "[email protected]",
Expand Down Expand Up @@ -68,7 +68,7 @@ Remotes:
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
URL: https://github.com/egouldo/ManyEcoEvo,
https://egouldo.github.io/ManyEcoEvo/
BugReports: https://github.com/egouldo/ManyEcoEvo/issues
Expand Down
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export(fit_metafor_mv_reduced)
export(fit_metafor_uni)
export(fit_sorensen_glm)
export(fit_uni_mixed_effects)
export(generate_collinearity_subset)
export(generate_exclusion_subsets)
export(generate_expertise_subsets)
export(generate_outlier_subsets)
Expand All @@ -56,6 +57,8 @@ export(meta_analyse_datasets)
export(named_group_split)
export(plot_cont_rating_effects)
export(plot_effects_diversity)
export(plot_model_means_box_cox_cat)
export(plot_model_means_orchard)
export(power_back)
export(pred_to_Z)
export(prepare_ManyEcoEvo)
Expand Down Expand Up @@ -89,7 +92,12 @@ export(summarise_variable_counts)
export(validate_predictions)
export(validate_predictions_df_blue_tit)
export(validate_predictions_df_euc)
import(dplyr)
import(ggbeeswarm)
import(ggplot2)
import(metafor)
import(see)
importFrom(EnvStats,stat_n_text)
importFrom(broom,tidy)
importFrom(dplyr,across)
importFrom(dplyr,case_when)
Expand All @@ -108,13 +116,15 @@ importFrom(dplyr,rename)
importFrom(dplyr,right_join)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(forcats,fct_relevel)
importFrom(magrittr,"%>%")
importFrom(pointblank,col_vals_not_null)
importFrom(purrr,map)
importFrom(purrr,map_dfr)
importFrom(purrr,set_names)
importFrom(rlang,is_na)
importFrom(rlang,na_chr)
importFrom(sae,bxcx)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,separate)
Expand Down
70 changes: 70 additions & 0 deletions R/generate_collinearity_subset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#' Generate Collinearity Data Subset
#'
#' This function generates a subset of the data that is used to demonstrate the
#' effects of collinearity on regression models. The data is generated by
#' sampling from a multivariate normal distribution with a specified correlation
#' matrix.
#'
#' #'
#' @param ManyEcoEvo a ManyEcoEvo dataframe containing formatted raw `data`, formatted `diversity_data`, the `estimate_type`, `dataset`, `publishable_subset`, and `exclusion_set`. See details.
#' @param collinearity_subset a dataframe containing the column `response_id` containing response ID's to be included in the expert subset
#'
#' @return A ManyEcoEvo dataframe with added column `expertise_subset` with new subsets of `data` and `diversity_data`
#' @export
#' @details
#' Note that this function needs to be run on `ManyEcoEvo` after the following functions have been run (See examples):
#' - `prepare_response_variables()`
#' - `generate_exclusion_subsets()`
#' - `generate_rating_subsets()`
#'
#' `generate_collinearity_subset()` only creates expertise subsets based on the full dataset where `exclusion_set == "complete"` and `publishable_subset == "All"` and `expertise_subset == "All"`.
#' @examples
#' ManyEcoEvo %>%
#' prepare_response_variables(estimate_type = "Zr") |>
#' generate_exclusion_subsets(estimate_type = "Zr") |>
#' generate_rating_subsets() |>
#' generate_expertise_subsets(expert_subset) |>
#' generate_collinearity_subset(collinearity_subset = collinearity_subset)
generate_collinearity_subset <- function(ManyEcoEvo, collinearity_subset) {
# Check if the inputs are a dataframe
if (!is.data.frame(collinearity_subset)) {
stop("collinearity_subset must be a dataframe.")
}

if (!is.data.frame(ManyEcoEvo)) {
stop("ManyEcoEvo must be a dataframe.")
}

# Check if the subset_collumn dataframe has the correct column names
if (!all(c("response_id", "id_col") %in% colnames(collinearity_subset))) {
stop("The input dataframe must contain the column 'response_id' and 'id_col'.")
}

# Check if the response_id column is unique
if (length(unique(collinearity_subset$id_col)) != nrow(collinearity_subset)) {
stop("The 'id_col' column in collinearity_subset must be unique.")
}

collinearity_subset_dataset <- collinearity_subset %>% pluck("dataset", unique)

collinear_removed <- ManyEcoEvo %>%
filter(publishable_subset == "All" & exclusion_set == "complete" & expertise_subset == "All",
dataset %in% collinearity_subset_dataset) %>%
mutate(data = map(.x = data,
.f = dplyr::anti_join, collinearity_subset,
by = join_by(response_id, id_col, dataset) )) %>%
mutate(diversity_data =
map2(.x = diversity_data,
.y = data,
.f = ~ semi_join(.x, .y) %>% distinct),
collinearity_subset = "collinearity_removed")

out <- bind_rows(
ManyEcoEvo %>%
mutate(collinearity_subset = "All"),
collinear_removed
)

return(out)

}
2 changes: 2 additions & 0 deletions R/generate_expertise_subsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#' generate_rating_subsets() |>
#' generate_expertise_subsets(expert_subset)
generate_expertise_subsets <- function(ManyEcoEvo, expert_subset) {
#TODO idea, allow ellipses arg in function and pass those expressions to filter.
# that way isn't hardcoded in the function. Repeat for all other generate / exclude map funs
# NOTE: should be run *after* computing Zr with compute_MA_inputs()
out <- ManyEcoEvo %>%
filter(publishable_subset == "All" & exclusion_set == "complete") %>%
Expand Down
4 changes: 2 additions & 2 deletions R/generate_outlier_subsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ generate_outlier_subsets <- function(ManyEcoEvo){
# TODO: will nolonger work on Zr dataset, because this doesn't contain an estimate_type col?
# TODO: Don't run with the reduced publishability subset.... some of these already only have 10 data points!!
# apply conditional behaviour to trigger both
#
# TODO: do not run for collinearity_removed datasets
if(str_detect(ManyEcoEvo$estimate_type, "Zr") %>% any(na.rm = TRUE)){
ManyEcoEvo_Zr <- ManyEcoEvo %>%
filter(estimate_type == "Zr") %>%
bind_rows(., {ManyEcoEvo %>%
filter(estimate_type == "Zr") %>%
filter(estimate_type == "Zr", collinearity_subset != "collinearity_removed") %>%
mutate(effects_analysis = map(effects_analysis,
~ slice_max(.x, Zr, n = -2) %>%
slice_min(Zr, n = -2))) %>%
Expand Down
5 changes: 3 additions & 2 deletions R/make_viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ make_viz <- function(data) {
if(any(str_detect(unique(data$estimate_type),pattern = "Zr"))){
data_Zr <- data %>%
filter(estimate_type == "Zr") %>%
group_by(exclusion_set, dataset, estimate_type, publishable_subset, expertise_subset, data) %>%
group_by(exclusion_set, dataset, estimate_type, publishable_subset, expertise_subset, collinearity_subset, data) %>%
pivot_longer(names_to = "model_name",
values_to = "model",
cols = c(-exclusion_set,
Expand All @@ -29,7 +29,8 @@ make_viz <- function(data) {
-diversity_indices,
-effects_analysis,
-publishable_subset,
-expertise_subset)) %>%
-expertise_subset,
-collinearity_subset)) %>%
ungroup %>%
select(-data,
-diversity_data,
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
3 changes: 2 additions & 1 deletion _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ tar_option_set(
packages = pkgs,
imports = "ManyEcoEvo",
# debug = c("augmented_data_3efd9941")#, #augmented_data_a4d78efa
cue = tar_cue(mode = "always") #because we have silent errors!
# cue = tar_cue(mode = "always") #because we have silent errors!
)

list(tarchetypes::tar_file_read(name = euc_reviews,
Expand Down Expand Up @@ -80,6 +80,7 @@ list(tarchetypes::tar_file_read(name = euc_reviews,
generate_exclusion_subsets(estimate_type = "Zr") |>
generate_rating_subsets() |>
generate_expertise_subsets(expert_subset) |>
generate_collinearity_subset(ManyEcoEvo:::collinearity_subset) |>
compute_MA_inputs(estimate_type = "Zr") |>
generate_outlier_subsets() |> # TODO run before MA_inputs? diversity indices need to be recalculated!!
filter(expertise_subset != "expert" | exclusion_set != "complete-rm_outliers") |> #TODO mv into generate_outlier_subsets() so aren't created in the first place
Expand Down
Loading

0 comments on commit 7c11bf7

Please sign in to comment.