Skip to content

Commit

Permalink
Merge pull request #27 from egouldo/summary-tbl-funs
Browse files Browse the repository at this point in the history
Create functions implementing summary tables
  • Loading branch information
egouldo authored Nov 15, 2023
2 parents 171145a + 9ef5107 commit 7fa0621
Show file tree
Hide file tree
Showing 37 changed files with 1,527 additions and 14 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ManyEcoEvo
Title: Meta-analyse data from 'Many-Analysts' style studies
Version: 1.0.0
Version: 1.0.0.9000
Authors@R: c(person(given = "Elliot",
family = "Gould",
email = "[email protected]",
Expand Down
34 changes: 34 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ export(compute_metaanalysis_inputs)
export(conversion)
export(conversion_2)
export(convert_predictions)
export(count_analyses_variables_used)
export(count_binary_coded_features)
export(count_teams_analyses)
export(cube_back)
export(divide_back)
export(est_to_zr)
Expand Down Expand Up @@ -72,17 +75,48 @@ export(square_root_back)
export(standardise_response)
export(subset_fns_Zr)
export(subset_fns_yi)
export(summarise_analyses_by_reviewer)
export(summarise_analysis_types)
export(summarise_conclusions)
export(summarise_conclusions_data)
export(summarise_model_composition)
export(summarise_model_composition_data)
export(summarise_reviews)
export(summarise_reviews_per_analysis)
export(summarise_sorensen_index)
export(summarise_sorensen_index_data)
export(summarise_variable_counts)
export(validate_predictions)
export(validate_predictions_df_blue_tit)
export(validate_predictions_df_euc)
import(metafor)
importFrom(broom,tidy)
importFrom(dplyr,across)
importFrom(dplyr,case_when)
importFrom(dplyr,count)
importFrom(dplyr,distinct)
importFrom(dplyr,ends_with)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_split)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n_distinct)
importFrom(dplyr,rename)
importFrom(dplyr,right_join)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(magrittr,"%>%")
importFrom(pointblank,col_vals_not_null)
importFrom(purrr,map)
importFrom(purrr,map_dfr)
importFrom(purrr,set_names)
importFrom(rlang,is_na)
importFrom(rlang,na_chr)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,separate)
importFrom(tidyr,unite)
importFrom(tidyr,unnest)
1 change: 1 addition & 0 deletions R/generate_yi_subsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#' @family Multi-dataset Wrapper Functions
#' @return A `datraframe` with the character columns `dataset`, `estimate_type` and list-cols `data` and `diversity_data`
#' @export
#' @author Elliot Gould
generate_yi_subsets <- function(yi_analysis){

# So: yi_analysis$data[[1]]$back_transformed data is that obj.
Expand Down
161 changes: 161 additions & 0 deletions R/summarise_analysis_types.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#' Summarise Analysis Types
#' @description
#' Generates a summary of the number of analysis teams, total analyses, models with normal error distributions, mixed effects models, and models developed using Bayesian statistical methods for a given analysis type.
#'
#' @details
#' Applies both [count_binary_coded_features()] and [count_teams_analyses()] to generate data summaries.
#'
#'
#' @param ManyEcoEvo_results A tibble of `ManyEcoEvo_results`
#' @param ManyEcoEvo_yi_results A tibble of `ManyEcoEvo_yi_results`
#' @param ManyEcoEvo A tibble of `ManyEcoEvo`
#'
#' @return A summarised tibble with the variables `subset`, `dataset`, `num_teams`, `total_analyses`, `sum_linear`, `sum_mixed`, `sum_Bayesian`.
#' @export
#' @importFrom dplyr filter
#' @importFrom dplyr select
#' @importFrom dplyr group_by
#' @importFrom dplyr rename
#' @importFrom purrr map
#' @importFrom broom tidy
#' @importFrom tidyr unnest
#' @importFrom magrittr "%>%"
#' @importFrom dplyr distinct
#' @importFrom dplyr mutate
#' @import metafor
#' @importFrom dplyr across
#' @importFrom dplyr left_join
#' @importFrom dplyr right_join
#' @importFrom dplyr full_join
#' @importFrom purrr map_dfr
#' @family Multi-dataset Wrapper Functions
#' @author Hannah S. Fraser
#' @author Elliot Gould
#' @examples
#' summarise_analysis_types(ManyEcoEvo_results, ManyEcoEvo_yi_results, ManyEcoEvo)
summarise_analysis_types <- function(ManyEcoEvo_results, ManyEcoEvo_yi_results, ManyEcoEvo) {
effect_ids <- ManyEcoEvo_results %>%
filter(exclusion_set == "complete",
publishable_subset == "All") %>%
select(MA_mod, effects_analysis) %>%
group_by(estimate_type, dataset) %>%
mutate(tidy_mod = map(MA_mod,
~ broom::tidy(.x,
conf.int = TRUE,
include_studies = TRUE) %>%
rename(study_id = term)), .keep = "none") %>%
unnest(tidy_mod) %>%
filter(type =="study") %>%
ungroup %>%
select(study_id) %>%
rename(id_col = study_id) %>% #TODO duplicates for "Bell-2-2-1" and "Bonalbo-1-1-1 WHY?
distinct()

prediction_ids <- ManyEcoEvo_yi_results %>% #TODO Euc mod_data_logged not here!
filter(exclusion_set == "complete",
# dataset == "blue tit"
) %>%
select(MA_mod, effects_analysis, -exclusion_set) %>%
group_by(estimate_type, dataset) %>%
mutate(tidy_mod = map(MA_mod,
~ broom::tidy(.x, conf.int = TRUE, include_studies = TRUE) %>%
rename(study_id = term)), .keep = "none") %>%
unnest(tidy_mod) %>% filter(type == "study") %>%
ungroup %>%
select(study_id) %>%
rename(id_col = study_id) %>%
distinct()

Master <-
ManyEcoEvo %>%
ungroup %>%
select(data) %>%
unnest(data) %>%
mutate(across(c(num_fixed_variables,
num_random_variables,
sample_size,
num_interactions,
Bayesian,
mixed_model),
as.numeric),
lm = ifelse(linear_model == "linear", 1, 0),
glm = ifelse(linear_model == "generalised", 1, 0)) #TODO move this into master processing so don't have to repeat else where!!

effects <- Master %>%
right_join(effect_ids, by = c("id_col")) # repeat for each

predictions <- Master %>%
right_join(prediction_ids, by = c("id_col"))

summarised_data <- full_join(
map_dfr(.x = list(effects, predictions) %>%
purrr::set_names("effects", "predictions"),
count_teams_analyses,
.id = "subset"),
map_dfr(.x = list(effects, predictions) %>%
purrr::set_names("effects", "predictions"),
count_binary_coded_features,
.id = "subset")
)

return(summarised_data)
#TODO next: set up so can run on just one object ManyEcoEvo_results, and account for subsets too!
}


#' Summarise number of analyst teams and total analyses per dataset
#'
#' @param data A dataframe containing the variables `TeamIdentifier` and `dataset`
#'
#' @return A dataframe with the columns `dataset`, `total_teams` and `total_analyses` equal in number of rows to the number of unique values within the `dataset` variable of the input `data`.
#' @export
#' @importFrom dplyr count
#' @importFrom dplyr group_by
#' @importFrom dplyr summarise
#' @importFrom magrittr "%>%"
#' @author Hannah S. Fraser
#' @author Elliot Gould
#' @examples
#' ManyEcoEvo %>%
#' filter(dataset == "blue tit") %>%
#' ungroup %>%
#' select(data) %>%
#' unnest(data) %>%
#' count_teams_analyses()
count_teams_analyses <- function(data) {
data %>%
count(dataset, TeamIdentifier) %>% #TODO consider renaming col
group_by(dataset) %>%
summarise(total_teams = n(),
total_analyses = sum(n))
}

#' Summarise binary coded features of analyses
#'
#' @param data A dataframe containing the variables `dataset`, and `lm`, `mixed_model`, `Bayesian`, which are coded as binary numeric vectors.
#'
#' @return A dataframe with the variables
#' @export
#' @importFrom dplyr group_by
#' @importFrom dplyr summarise
#' @importFrom magrittr "%>%"
#' @author Hannah S. Fraser
#' @author Elliot Gould
#' @examples
#' ManyEcoEvo %>%
#' filter(dataset == "eucalyptus") %>%
#' ungroup %>%
#' select(data) %>%
#' unnest(data) %>%
#' mutate(lm = ifelse(linear_model == "linear", 1, 0), #TODO move into master processing
#' glm = ifelse(linear_model == "generalised", 1, 0),
#' Bayesian = as.numeric(Bayesian)) %>%
#' count_binary_coded_features()
count_binary_coded_features <- function(data){
data %>%
group_by(dataset) %>%
summarise(sum_linear = sum(lm, na.rm = TRUE),
sum_mixed = sum(mixed_model,na.rm = TRUE),
sum_Bayesian = sum(Bayesian,na.rm = TRUE),
sum_glm = sum(glm, na.rm = TRUE))
}
129 changes: 129 additions & 0 deletions R/summarise_conclusions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#' Summarise counts of qualitative conclusions across all datasets
#'
#' @details
#' Data summary is generated with [summarise_conclusions_data()].
#'
#' @param ManyEcoEvo_results A tibble of `ManyEcoEvo_results`
#' @param ManyEcoEvo_yi_results A tibble of `ManyEcoEvo_yi_results`
#' @param ManyEcoEvo A tibble of `ManyEcoEvo`
#'
#' @return A dataframe with count values for each unique `Conclusion` in columns for each `subset` ("effects", "predictions", "all"), for each `dataset`.
#' @export
#' @importFrom dplyr filter
#' @importFrom dplyr select
#' @importFrom dplyr group_by
#' @importFrom dplyr rename
#' @importFrom purrr map
#' @importFrom broom tidy
#' @importFrom tidyr unnest
#' @importFrom magrittr "%>%"
#' @importFrom dplyr distinct
#' @importFrom dplyr mutate
#' @import metafor
#' @importFrom dplyr across
#' @importFrom dplyr left_join
#' @importFrom dplyr right_join
#' @importFrom dplyr full_join
#' @importFrom purrr map_dfr
#' @importFrom tidyr pivot_wider
#' @family Multi-dataset Wrapper Functions
#' @author Hannah S. Fraser
#' @author Elliot Gould
#' @examples
#' summarise_conclusions(ManyEcoEvo_results,ManyEcoEvo_yi_results,ManyEcoEvo)
summarise_conclusions <- function(ManyEcoEvo_results, ManyEcoEvo_yi_results, ManyEcoEvo) {
effect_ids <- ManyEcoEvo_results %>%
filter(exclusion_set == "complete",
publishable_subset == "All") %>%
ungroup %>%
select(MA_mod, effects_analysis, estimate_type, dataset) %>%
group_by(estimate_type, dataset) %>%
mutate(tidy_mod = map(MA_mod,
~ broom::tidy(.x,
conf.int = TRUE,
include_studies = TRUE) %>%
rename(study_id = term)), .keep = "none") %>%
unnest(tidy_mod) %>%
filter(type =="study") %>%
ungroup %>%
select(study_id) %>%
rename(id_col = study_id) %>% #TODO duplicates for "Bell-2-2-1" and "Bonalbo-1-1-1 WHY?
distinct()

prediction_ids <- ManyEcoEvo_yi_results %>% #TODO Euc mod_data_logged not here!
filter(exclusion_set == "complete",
# dataset == "blue tit"
) %>%
ungroup %>%
select(MA_mod, effects_analysis, -exclusion_set, dataset, estimate_type) %>%
group_by(estimate_type, dataset) %>%
mutate(tidy_mod = map(MA_mod,
~ broom::tidy(.x, conf.int = TRUE, include_studies = TRUE) %>%
rename(study_id = term)), .keep = "none") %>%
unnest(tidy_mod) %>% filter(type == "study") %>%
ungroup %>%
select(study_id) %>%
rename(id_col = study_id) %>%
distinct()

Master <-
ManyEcoEvo %>%
ungroup %>%
select(data) %>%
unnest(data) %>%
mutate(across(c(num_fixed_variables,
num_random_variables,
sample_size,
num_interactions,
Bayesian,
mixed_model),
as.numeric),
lm = ifelse(linear_model == "linear", 1, 0),
glm = ifelse(linear_model == "generalised", 1, 0)) %>% #TODO move this into master processing so don't have to repeat else where!!
filter(Conclusion != "CHECK", !is.na(Conclusion)) #TODO data cleaning, check these

effects <- Master %>%
right_join(effect_ids, by = c("id_col")) # repeat for each

predictions <- Master %>%
right_join(prediction_ids, by = c("id_col"))

summarised_data <-
map_dfr(.x = list(effects, predictions, Master) %>%
purrr::set_names("effects", "predictions", "all"),
.f = summarise_conclusions_data,
.id = "subset") %>%
pivot_wider(names_from = Conclusion,
values_from = n,
values_fill = 0) %>%
ungroup()

return(summarised_data)

}


#' Count qualitative conclusions across all analyses for each dataset
#'
#' @param data A dataframe containing the columns `split_id`, `analysis_id`, `dataset`, `Conclusion`
#'
#' @return A dataframe with counts `n` for each unique value of `Conclusion` for each `dataset`
#' @export
#' @importFrom dplyr filter
#' @importFrom dplyr group_by
#' @importFrom dplyr count
#' @importFrom magrittr "%>%"
#' @author Hannah S. Fraser
#' @author Elliot Gould
#' @examples
#' ManyEcoEvo$data[[1]] %>%
#' filter(Conclusion != "CHECK") %>%
#' summarise_conclusions_data()
summarise_conclusions_data <- function(data){
data %>%
ungroup %>%
filter(split_id == "1", analysis_id == "1" ) %>% #TODO how to generalise to data without split_id
group_by(dataset, Conclusion) %>%
count() %>%
ungroup()
}
Loading

0 comments on commit 7fa0621

Please sign in to comment.