Skip to content

Commit

Permalink
Fix #104 reorganise yi computational pipeline in prep for #88
Browse files Browse the repository at this point in the history
- #104 mv `assign_transformation_type()` and `convert_predictions()` out of `standardise_response()` into `back_transform_response_vars_yi()`
- Call `back_transform_response_vars_yi()` in `prepare_response_variables()` for yi workflow (was not being called currently) #104
- #104 keep addition of `param_table` inside `standardise_response()`
  • Loading branch information
egouldo committed Aug 12, 2024
1 parent d8caded commit 5a101e2
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 63 deletions.
44 changes: 43 additions & 1 deletion R/prepare_response_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,51 @@ prepare_response_variables <- function(ManyEcoEvo,
stopifnot(is.data.frame(ManyEcoEvo))
# TODO run checks on ManyEcoEvo
match.arg(estimate_type, choices = c("Zr", "yi", "y25", "y50", "y75"), several.ok = FALSE)

if (estimate_type != "Zr") {
if (is.null(param_table)) {

cli::cli_abort("{.arg param_table} must be supplied for {.val {estimate_type}} data")
}

# ------ Back transform if estimate_type is yi only ------
out <- ManyEcoEvo %>%
ungroup() %>%
# dplyr::group_by(dataset) %>% #NOTE: mapping doesn't work properly when tibble is rowwise!
dplyr::mutate(
data = purrr::map2(
.x = data,
.y = dataset,
.f = ~ back_transform_response_vars_yi(
dat = .x,
estimate_type = !!{
estimate_type
},
dataset = .y
)
),
diversity_data =
map2(
.x = diversity_data,
.y = data,
.f = ~ semi_join(.x, .y) %>%
distinct()
)
)
return(out)
} else{
if (!is.null(param_table)) {
cli::cli_abort("{.arg param_table} must be NULL for {.val {estimate_type}} data")
}
}

# ------ Standardise Response Variables for Meta-analysis ------
out <- ManyEcoEvo %>%
ungroup() %>%
# dplyr::group_by(dataset) %>% #NOTE: mapping doesn't work properly when tibble is rowwise!
dplyr::mutate(data = purrr::map2(
.x = data, .y = dataset,
.x = data,
.y = dataset,
.f = ~ standardise_response(
dat = .x,
estimate_type = !!{
Expand All @@ -29,5 +69,7 @@ prepare_response_variables <- function(ManyEcoEvo,
dataset = .y
)
))

return(out)

}
31 changes: 7 additions & 24 deletions R/prepare_response_variables_yi.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ prepare_response_variables_yi <- function(ManyEcoEvo,
stopifnot(is.data.frame(ManyEcoEvo))
# TODO run checks on ManyEcoEvo
match.arg(estimate_type, choices = c("yi", "y25", "y50", "y75"), several.ok = FALSE)

out <- ManyEcoEvo %>%
ungroup() %>%
# dplyr::group_by(dataset) %>% #NOTE: mapping doesn't work properly when tibble is rowwise!
Expand Down Expand Up @@ -43,24 +44,21 @@ prepare_response_variables_yi <- function(ManyEcoEvo,
#'
#' @param dat A dataframe of out of sample predictions analyst submission data
#' @param estimate_type The type of estimate to be standardised. Character vector of length 1, whose value may be "yi", "y25", "y50", "y75".
#' @param param_table A table of estimated 'population' parameters for each variable in the analysis datasets.
#' @param dataset One of either "blue tit" or "eucalyptus"
#'
#' @return A tibble of analyst data with standardised values contained in a list-column called 'back_transformed_data'
#' @export
#' @family analyst-data
back_transform_response_vars_yi <- function(dat,
estimate_type = character(1L),
param_table = NULL,
dataset = character(1L)) {
# TODO insert checks that appropriate columns exist
# TODO apply to data and check that all cases accounted for!
match.arg(estimate_type, choices = c("yi", "y25", "y50", "y75"), several.ok = FALSE)
match.arg(dataset, choices = c("eucalyptus", "blue tit"), several.ok = FALSE)
cli::cli_h1(glue::glue("Computing meta-analysis inputs", "for estimate type ", "{estimate_type}"))



cli::cli_h2(paste0("Transforming out of sample predictions from link to response scale"))

dat <- dat %>%
pointblank::col_exists(
columns =
Expand All @@ -74,27 +72,12 @@ back_transform_response_vars_yi <- function(dat,
"response_transformation_status"
)
) %>% # add check for response transformation
dplyr::group_by(
dplyr::group_by( #TODO group on id_col
TeamIdentifier,
submission_id,
analysis_id,
split_id
) %>%
dplyr::mutate(params = purrr::map(
.x = response_variable_name,
.y = param_table,
.f = ~ dplyr::filter(.y, variable == .x)
)) %>%
dplyr::mutate(nrow_params = purrr::map_int(params, nrow)) %>%
dplyr::mutate(params = purrr::map2(params,
nrow_params,
.f = ~ if (.y > 0) {
.x
} else {
NA
}
)) %>%
dplyr::select(-nrow_params) %>%
) %>%
dplyr::mutate(
transformation_type =
assign_transformation_type(
Expand Down Expand Up @@ -122,7 +105,7 @@ back_transform_response_vars_yi <- function(dat,
rlang::na_lgl
}
)
) # TODO note that the blue tit and eucalyptus back transformed dat has different names for the out of sample prediction estimates. For BT it's "estimate", for Euc it's "fit" (this is because of the way we asked analysts to submit their data...).

)
return(dat)
}
51 changes: 13 additions & 38 deletions R/standardise_response.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ standardise_response <- function(dat,
# TODO apply to data and check that all cases accounted for!
match.arg(estimate_type, choices = c("Zr", "yi", "y25", "y50", "y75"), several.ok = FALSE)
match.arg(dataset, choices = c("eucalyptus", "blue tit"), several.ok = FALSE)

cli::cli_h1(glue::glue("Computing meta-analysis inputs", "for estimate type ", "{estimate_type}"))

if (estimate_type == "Zr") {
# Convert Effect Sizes to Zr -------
# ------ Convert Effect Sizes to Zr -------
cli::cli_h2(paste0("Computing standardised effect sizes ", "{.code Zr}", " and variance ", "{.code VZr}"))

dat <- dat %>%
# unnest(back_transformed_estimate) %>%
dplyr::mutate(Zr_VZr = purrr::pmap(
Expand All @@ -44,8 +46,10 @@ standardise_response <- function(dat,
.f = est_to_zr
)) %>%
tidyr::unnest(cols = c(Zr_VZr))
} else { # estimate_type != Zr, i.e. == "y*"
cli::cli_h2(paste0("Transforming out of sample predictions from link to response scale"))
} else {
# ------ Convert predictions to Z -------
cli::cli_h2(paste0("Standardising out-of-sample predictions"))

dat <- dat %>%
pointblank::col_exists(
columns =
Expand Down Expand Up @@ -82,49 +86,20 @@ standardise_response <- function(dat,
NA
}
)) %>%
dplyr::select(-nrow_params) %>%
dplyr::mutate(
transformation_type =
assign_transformation_type(
response_transformation = response_transformation_status,
link_fun = transformation
)
) %>%
dplyr::mutate(
back_transformed_data =
purrr::pmap(
.l = list(
augmented_data,
transformation_type, # TODO update, gh issue 162
response_transformation_status,
transformation
), # TODO update, gh issue 162 #NOTE: see #127 / #38 on GH.
.f = ~ if (!rlang::is_na(..1) | !rlang::is_na(..2)) {
convert_predictions(
augmented_data = ..1,
transformation_type = ..2,
response_transformation = ..3,
link_fun = ..4
)
} else {
rlang::na_lgl
}
)
)

cli::cli_h2(paste0("Standardising out-of-sample predictions"))

dat <- dat %>%
dplyr::select(-nrow_params) %>%
dplyr::mutate(
back_transformed_data = # TODO rename standardised_data and fix up downstream dependencies
back_transformed_data = # TODO rename standardised_data and fix up downstream (and upstream wrappers, when not standardised) dependencies
purrr::pmap(
list(
back_transformed_data,
params,
dataset
),
.f = ~ if (!rlang::is_na(..1) | !rlang::is_na(..2)) {
pred_to_Z(..1, params = ..2, dataset = ..3)
pred_to_Z(
back_transformed_data = ..1,
params = ..2,
dataset = ..3)
} else {
NA
}
Expand Down

0 comments on commit 5a101e2

Please sign in to comment.