-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #27 from egouldo/summary-tbl-funs
Create functions implementing summary tables
- Loading branch information
Showing
37 changed files
with
1,527 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
Package: ManyEcoEvo | ||
Title: Meta-analyse data from 'Many-Analysts' style studies | ||
Version: 1.0.0 | ||
Version: 1.0.0.9000 | ||
Authors@R: c(person(given = "Elliot", | ||
family = "Gould", | ||
email = "[email protected]", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
#' Summarise Analysis Types | ||
#' @description | ||
#' Generates a summary of the number of analysis teams, total analyses, models with normal error distributions, mixed effects models, and models developed using Bayesian statistical methods for a given analysis type. | ||
#' | ||
#' @details | ||
#' Applies both [count_binary_coded_features()] and [count_teams_analyses()] to generate data summaries. | ||
#' | ||
#' | ||
#' @param ManyEcoEvo_results A tibble of `ManyEcoEvo_results` | ||
#' @param ManyEcoEvo_yi_results A tibble of `ManyEcoEvo_yi_results` | ||
#' @param ManyEcoEvo A tibble of `ManyEcoEvo` | ||
#' | ||
#' @return A summarised tibble with the variables `subset`, `dataset`, `num_teams`, `total_analyses`, `sum_linear`, `sum_mixed`, `sum_Bayesian`. | ||
#' @export | ||
#' @importFrom dplyr filter | ||
#' @importFrom dplyr select | ||
#' @importFrom dplyr group_by | ||
#' @importFrom dplyr rename | ||
#' @importFrom purrr map | ||
#' @importFrom broom tidy | ||
#' @importFrom tidyr unnest | ||
#' @importFrom magrittr "%>%" | ||
#' @importFrom dplyr distinct | ||
#' @importFrom dplyr mutate | ||
#' @import metafor | ||
#' @importFrom dplyr across | ||
#' @importFrom dplyr left_join | ||
#' @importFrom dplyr right_join | ||
#' @importFrom dplyr full_join | ||
#' @importFrom purrr map_dfr | ||
#' @family Multi-dataset Wrapper Functions | ||
#' @author Hannah S. Fraser | ||
#' @author Elliot Gould | ||
#' @examples | ||
#' summarise_analysis_types(ManyEcoEvo_results, ManyEcoEvo_yi_results, ManyEcoEvo) | ||
summarise_analysis_types <- function(ManyEcoEvo_results, ManyEcoEvo_yi_results, ManyEcoEvo) { | ||
effect_ids <- ManyEcoEvo_results %>% | ||
filter(exclusion_set == "complete", | ||
publishable_subset == "All") %>% | ||
select(MA_mod, effects_analysis) %>% | ||
group_by(estimate_type, dataset) %>% | ||
mutate(tidy_mod = map(MA_mod, | ||
~ broom::tidy(.x, | ||
conf.int = TRUE, | ||
include_studies = TRUE) %>% | ||
rename(study_id = term)), .keep = "none") %>% | ||
unnest(tidy_mod) %>% | ||
filter(type =="study") %>% | ||
ungroup %>% | ||
select(study_id) %>% | ||
rename(id_col = study_id) %>% #TODO duplicates for "Bell-2-2-1" and "Bonalbo-1-1-1 WHY? | ||
distinct() | ||
|
||
prediction_ids <- ManyEcoEvo_yi_results %>% #TODO Euc mod_data_logged not here! | ||
filter(exclusion_set == "complete", | ||
# dataset == "blue tit" | ||
) %>% | ||
select(MA_mod, effects_analysis, -exclusion_set) %>% | ||
group_by(estimate_type, dataset) %>% | ||
mutate(tidy_mod = map(MA_mod, | ||
~ broom::tidy(.x, conf.int = TRUE, include_studies = TRUE) %>% | ||
rename(study_id = term)), .keep = "none") %>% | ||
unnest(tidy_mod) %>% filter(type == "study") %>% | ||
ungroup %>% | ||
select(study_id) %>% | ||
rename(id_col = study_id) %>% | ||
distinct() | ||
|
||
Master <- | ||
ManyEcoEvo %>% | ||
ungroup %>% | ||
select(data) %>% | ||
unnest(data) %>% | ||
mutate(across(c(num_fixed_variables, | ||
num_random_variables, | ||
sample_size, | ||
num_interactions, | ||
Bayesian, | ||
mixed_model), | ||
as.numeric), | ||
lm = ifelse(linear_model == "linear", 1, 0), | ||
glm = ifelse(linear_model == "generalised", 1, 0)) #TODO move this into master processing so don't have to repeat else where!! | ||
|
||
effects <- Master %>% | ||
right_join(effect_ids, by = c("id_col")) # repeat for each | ||
|
||
predictions <- Master %>% | ||
right_join(prediction_ids, by = c("id_col")) | ||
|
||
summarised_data <- full_join( | ||
map_dfr(.x = list(effects, predictions) %>% | ||
purrr::set_names("effects", "predictions"), | ||
count_teams_analyses, | ||
.id = "subset"), | ||
map_dfr(.x = list(effects, predictions) %>% | ||
purrr::set_names("effects", "predictions"), | ||
count_binary_coded_features, | ||
.id = "subset") | ||
) | ||
|
||
return(summarised_data) | ||
#TODO next: set up so can run on just one object ManyEcoEvo_results, and account for subsets too! | ||
} | ||
|
||
|
||
#' Summarise number of analyst teams and total analyses per dataset | ||
#' | ||
#' @param data A dataframe containing the variables `TeamIdentifier` and `dataset` | ||
#' | ||
#' @return A dataframe with the columns `dataset`, `total_teams` and `total_analyses` equal in number of rows to the number of unique values within the `dataset` variable of the input `data`. | ||
#' @export | ||
#' @importFrom dplyr count | ||
#' @importFrom dplyr group_by | ||
#' @importFrom dplyr summarise | ||
#' @importFrom magrittr "%>%" | ||
#' @author Hannah S. Fraser | ||
#' @author Elliot Gould | ||
#' @examples | ||
#' ManyEcoEvo %>% | ||
#' filter(dataset == "blue tit") %>% | ||
#' ungroup %>% | ||
#' select(data) %>% | ||
#' unnest(data) %>% | ||
#' count_teams_analyses() | ||
count_teams_analyses <- function(data) { | ||
data %>% | ||
count(dataset, TeamIdentifier) %>% #TODO consider renaming col | ||
group_by(dataset) %>% | ||
summarise(total_teams = n(), | ||
total_analyses = sum(n)) | ||
} | ||
|
||
#' Summarise binary coded features of analyses | ||
#' | ||
#' @param data A dataframe containing the variables `dataset`, and `lm`, `mixed_model`, `Bayesian`, which are coded as binary numeric vectors. | ||
#' | ||
#' @return A dataframe with the variables | ||
#' @export | ||
#' @importFrom dplyr group_by | ||
#' @importFrom dplyr summarise | ||
#' @importFrom magrittr "%>%" | ||
#' @author Hannah S. Fraser | ||
#' @author Elliot Gould | ||
#' @examples | ||
#' ManyEcoEvo %>% | ||
#' filter(dataset == "eucalyptus") %>% | ||
#' ungroup %>% | ||
#' select(data) %>% | ||
#' unnest(data) %>% | ||
#' mutate(lm = ifelse(linear_model == "linear", 1, 0), #TODO move into master processing | ||
#' glm = ifelse(linear_model == "generalised", 1, 0), | ||
#' Bayesian = as.numeric(Bayesian)) %>% | ||
#' count_binary_coded_features() | ||
count_binary_coded_features <- function(data){ | ||
data %>% | ||
group_by(dataset) %>% | ||
summarise(sum_linear = sum(lm, na.rm = TRUE), | ||
sum_mixed = sum(mixed_model,na.rm = TRUE), | ||
sum_Bayesian = sum(Bayesian,na.rm = TRUE), | ||
sum_glm = sum(glm, na.rm = TRUE)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
#' Summarise counts of qualitative conclusions across all datasets | ||
#' | ||
#' @details | ||
#' Data summary is generated with [summarise_conclusions_data()]. | ||
#' | ||
#' @param ManyEcoEvo_results A tibble of `ManyEcoEvo_results` | ||
#' @param ManyEcoEvo_yi_results A tibble of `ManyEcoEvo_yi_results` | ||
#' @param ManyEcoEvo A tibble of `ManyEcoEvo` | ||
#' | ||
#' @return A dataframe with count values for each unique `Conclusion` in columns for each `subset` ("effects", "predictions", "all"), for each `dataset`. | ||
#' @export | ||
#' @importFrom dplyr filter | ||
#' @importFrom dplyr select | ||
#' @importFrom dplyr group_by | ||
#' @importFrom dplyr rename | ||
#' @importFrom purrr map | ||
#' @importFrom broom tidy | ||
#' @importFrom tidyr unnest | ||
#' @importFrom magrittr "%>%" | ||
#' @importFrom dplyr distinct | ||
#' @importFrom dplyr mutate | ||
#' @import metafor | ||
#' @importFrom dplyr across | ||
#' @importFrom dplyr left_join | ||
#' @importFrom dplyr right_join | ||
#' @importFrom dplyr full_join | ||
#' @importFrom purrr map_dfr | ||
#' @importFrom tidyr pivot_wider | ||
#' @family Multi-dataset Wrapper Functions | ||
#' @author Hannah S. Fraser | ||
#' @author Elliot Gould | ||
#' @examples | ||
#' summarise_conclusions(ManyEcoEvo_results,ManyEcoEvo_yi_results,ManyEcoEvo) | ||
summarise_conclusions <- function(ManyEcoEvo_results, ManyEcoEvo_yi_results, ManyEcoEvo) { | ||
effect_ids <- ManyEcoEvo_results %>% | ||
filter(exclusion_set == "complete", | ||
publishable_subset == "All") %>% | ||
ungroup %>% | ||
select(MA_mod, effects_analysis, estimate_type, dataset) %>% | ||
group_by(estimate_type, dataset) %>% | ||
mutate(tidy_mod = map(MA_mod, | ||
~ broom::tidy(.x, | ||
conf.int = TRUE, | ||
include_studies = TRUE) %>% | ||
rename(study_id = term)), .keep = "none") %>% | ||
unnest(tidy_mod) %>% | ||
filter(type =="study") %>% | ||
ungroup %>% | ||
select(study_id) %>% | ||
rename(id_col = study_id) %>% #TODO duplicates for "Bell-2-2-1" and "Bonalbo-1-1-1 WHY? | ||
distinct() | ||
|
||
prediction_ids <- ManyEcoEvo_yi_results %>% #TODO Euc mod_data_logged not here! | ||
filter(exclusion_set == "complete", | ||
# dataset == "blue tit" | ||
) %>% | ||
ungroup %>% | ||
select(MA_mod, effects_analysis, -exclusion_set, dataset, estimate_type) %>% | ||
group_by(estimate_type, dataset) %>% | ||
mutate(tidy_mod = map(MA_mod, | ||
~ broom::tidy(.x, conf.int = TRUE, include_studies = TRUE) %>% | ||
rename(study_id = term)), .keep = "none") %>% | ||
unnest(tidy_mod) %>% filter(type == "study") %>% | ||
ungroup %>% | ||
select(study_id) %>% | ||
rename(id_col = study_id) %>% | ||
distinct() | ||
|
||
Master <- | ||
ManyEcoEvo %>% | ||
ungroup %>% | ||
select(data) %>% | ||
unnest(data) %>% | ||
mutate(across(c(num_fixed_variables, | ||
num_random_variables, | ||
sample_size, | ||
num_interactions, | ||
Bayesian, | ||
mixed_model), | ||
as.numeric), | ||
lm = ifelse(linear_model == "linear", 1, 0), | ||
glm = ifelse(linear_model == "generalised", 1, 0)) %>% #TODO move this into master processing so don't have to repeat else where!! | ||
filter(Conclusion != "CHECK", !is.na(Conclusion)) #TODO data cleaning, check these | ||
|
||
effects <- Master %>% | ||
right_join(effect_ids, by = c("id_col")) # repeat for each | ||
|
||
predictions <- Master %>% | ||
right_join(prediction_ids, by = c("id_col")) | ||
|
||
summarised_data <- | ||
map_dfr(.x = list(effects, predictions, Master) %>% | ||
purrr::set_names("effects", "predictions", "all"), | ||
.f = summarise_conclusions_data, | ||
.id = "subset") %>% | ||
pivot_wider(names_from = Conclusion, | ||
values_from = n, | ||
values_fill = 0) %>% | ||
ungroup() | ||
|
||
return(summarised_data) | ||
|
||
} | ||
|
||
|
||
#' Count qualitative conclusions across all analyses for each dataset | ||
#' | ||
#' @param data A dataframe containing the columns `split_id`, `analysis_id`, `dataset`, `Conclusion` | ||
#' | ||
#' @return A dataframe with counts `n` for each unique value of `Conclusion` for each `dataset` | ||
#' @export | ||
#' @importFrom dplyr filter | ||
#' @importFrom dplyr group_by | ||
#' @importFrom dplyr count | ||
#' @importFrom magrittr "%>%" | ||
#' @author Hannah S. Fraser | ||
#' @author Elliot Gould | ||
#' @examples | ||
#' ManyEcoEvo$data[[1]] %>% | ||
#' filter(Conclusion != "CHECK") %>% | ||
#' summarise_conclusions_data() | ||
summarise_conclusions_data <- function(data){ | ||
data %>% | ||
ungroup %>% | ||
filter(split_id == "1", analysis_id == "1" ) %>% #TODO how to generalise to data without split_id | ||
group_by(dataset, Conclusion) %>% | ||
count() %>% | ||
ungroup() | ||
} |
Oops, something went wrong.