Fix #104 reorganise yi computational pipeline in prep for #88

- #104 mv `assign_transformation_type()` and `convert_predictions()` out of `standardise_response()` into `back_transform_response_vars_yi()` - Call `back_transform_response_vars_yi()` in `prepare_response_variables()` for yi workflow (was not being called currently) #104 - #104 keep addition of `param_table` inside `standardise_response()`
egouldo · Aug 12, 2024 · 5a101e2 · 5a101e2
1 parent d8caded
commit 5a101e2
Show file tree

Hide file tree

Showing 3 changed files with 63 additions and 63 deletions.
diff --git a/R/prepare_response_variables.R b/R/prepare_response_variables.R
@@ -15,11 +15,51 @@ prepare_response_variables <- function(ManyEcoEvo,
   stopifnot(is.data.frame(ManyEcoEvo))
   # TODO run checks on ManyEcoEvo
   match.arg(estimate_type, choices = c("Zr", "yi", "y25", "y50", "y75"), several.ok = FALSE)
+
+  if (estimate_type != "Zr") {
+    if (is.null(param_table)) {
+
+      cli::cli_abort("{.arg param_table} must be supplied for {.val {estimate_type}} data")
+    }
+
+    # ------ Back transform if estimate_type is yi only ------
+    out <- ManyEcoEvo %>%
+      ungroup() %>%
+      # dplyr::group_by(dataset) %>% #NOTE: mapping doesn't work properly when tibble is rowwise!
+      dplyr::mutate(
+        data = purrr::map2(
+          .x = data, 
+          .y = dataset,
+          .f = ~ back_transform_response_vars_yi(
+            dat = .x,
+            estimate_type = !!{
+              estimate_type
+            },
+            dataset = .y
+          )
+        ),
+        diversity_data = 
+          map2(
+            .x = diversity_data,
+            .y = data,
+            .f = ~ semi_join(.x, .y) %>% 
+              distinct()
+          )
+      )
+    return(out)
+  } else{
+    if (!is.null(param_table)) {
+      cli::cli_abort("{.arg param_table} must be NULL for {.val {estimate_type}} data")
+    }
+  }
+
+  # ------ Standardise Response Variables for Meta-analysis ------
   out <- ManyEcoEvo %>%
     ungroup() %>%
     # dplyr::group_by(dataset) %>% #NOTE: mapping doesn't work properly when tibble is rowwise!
     dplyr::mutate(data = purrr::map2(
-      .x = data, .y = dataset,
+      .x = data, 
+      .y = dataset,
       .f = ~ standardise_response(
         dat = .x,
         estimate_type = !!{
@@ -29,5 +69,7 @@ prepare_response_variables <- function(ManyEcoEvo,
         dataset = .y
       )
     ))
+
   return(out)
+
 }
diff --git a/R/prepare_response_variables_yi.R b/R/prepare_response_variables_yi.R
@@ -15,6 +15,7 @@ prepare_response_variables_yi <- function(ManyEcoEvo,
   stopifnot(is.data.frame(ManyEcoEvo))
   # TODO run checks on ManyEcoEvo
   match.arg(estimate_type, choices = c("yi", "y25", "y50", "y75"), several.ok = FALSE)
+
   out <- ManyEcoEvo %>%
     ungroup() %>%
     # dplyr::group_by(dataset) %>% #NOTE: mapping doesn't work properly when tibble is rowwise!
@@ -43,24 +44,21 @@ prepare_response_variables_yi <- function(ManyEcoEvo,
 #'
 #' @param dat A dataframe of out of sample predictions analyst submission data
 #' @param estimate_type The type of estimate to be standardised. Character vector of length 1, whose value may be  "yi", "y25", "y50", "y75".
-#' @param param_table A table of estimated 'population' parameters for each variable in the analysis datasets.
 #' @param dataset One of either "blue tit" or "eucalyptus"
 #'
 #' @return A tibble of analyst data with standardised values contained in a list-column called 'back_transformed_data'
 #' @export
 #' @family analyst-data
 back_transform_response_vars_yi <- function(dat,
                                             estimate_type = character(1L),
-                                            param_table = NULL,
                                             dataset = character(1L)) {
   # TODO insert checks that appropriate columns exist
   # TODO apply to data and check that all cases accounted for!
   match.arg(estimate_type, choices = c("yi", "y25", "y50", "y75"), several.ok = FALSE)
   match.arg(dataset, choices = c("eucalyptus", "blue tit"), several.ok = FALSE)
-  cli::cli_h1(glue::glue("Computing meta-analysis inputs", "for estimate type ", "{estimate_type}"))
-
-
+
   cli::cli_h2(paste0("Transforming out of sample predictions from link to response scale"))
+
   dat <- dat %>%
     pointblank::col_exists(
       columns =
@@ -74,27 +72,12 @@ back_transform_response_vars_yi <- function(dat,
           "response_transformation_status"
         )
     ) %>% # add check for  response transformation
-    dplyr::group_by(
+    dplyr::group_by( #TODO group on id_col
       TeamIdentifier,
       submission_id,
       analysis_id,
       split_id
-    ) %>%
-    dplyr::mutate(params = purrr::map(
-      .x = response_variable_name,
-      .y = param_table,
-      .f = ~ dplyr::filter(.y, variable == .x)
-    )) %>%
-    dplyr::mutate(nrow_params = purrr::map_int(params, nrow)) %>%
-    dplyr::mutate(params = purrr::map2(params,
-      nrow_params,
-      .f = ~ if (.y > 0) {
-        .x
-      } else {
-        NA
-      }
-    )) %>%
-    dplyr::select(-nrow_params) %>%
+    ) %>% 
     dplyr::mutate(
       transformation_type =
         assign_transformation_type(
@@ -122,7 +105,7 @@ back_transform_response_vars_yi <- function(dat,
             rlang::na_lgl
           }
         )
-    ) # TODO note that the blue tit and eucalyptus back transformed dat has different names for the out of sample prediction estimates. For BT it's "estimate", for Euc it's "fit" (this is because of the way we asked analysts to submit their data...).
-
+    ) 
+  
   return(dat)
 }
diff --git a/R/standardise_response.R b/R/standardise_response.R
@@ -28,11 +28,13 @@ standardise_response <- function(dat,
   # TODO apply to data and check that all cases accounted for!
   match.arg(estimate_type, choices = c("Zr", "yi", "y25", "y50", "y75"), several.ok = FALSE)
   match.arg(dataset, choices = c("eucalyptus", "blue tit"), several.ok = FALSE)
+
   cli::cli_h1(glue::glue("Computing meta-analysis inputs", "for estimate type ", "{estimate_type}"))
 
   if (estimate_type == "Zr") {
-    # Convert Effect Sizes to Zr -------
+    # ------ Convert Effect Sizes to Zr -------
     cli::cli_h2(paste0("Computing standardised effect sizes ", "{.code Zr}", " and variance ", "{.code VZr}"))
+
     dat <- dat %>%
       # unnest(back_transformed_estimate) %>%
       dplyr::mutate(Zr_VZr = purrr::pmap(
@@ -44,8 +46,10 @@ standardise_response <- function(dat,
         .f = est_to_zr
       )) %>%
       tidyr::unnest(cols = c(Zr_VZr))
-  } else { # estimate_type != Zr, i.e. == "y*"
-    cli::cli_h2(paste0("Transforming out of sample predictions from link to response scale"))
+  } else { 
+    # ------ Convert predictions to Z -------
+    cli::cli_h2(paste0("Standardising out-of-sample predictions"))
+
     dat <- dat %>%
       pointblank::col_exists(
         columns =
@@ -82,49 +86,20 @@ standardise_response <- function(dat,
                                     NA
                                   }
                       )) %>%
-      dplyr::select(-nrow_params) %>%
-      dplyr::mutate(
-        transformation_type =
-          assign_transformation_type(
-            response_transformation = response_transformation_status,
-            link_fun = transformation
-          )
-      ) %>%
-      dplyr::mutate(
-        back_transformed_data =
-          purrr::pmap(
-            .l = list(
-              augmented_data,
-              transformation_type, # TODO update, gh issue 162
-              response_transformation_status,
-              transformation
-            ), # TODO update, gh issue 162 #NOTE: see #127 / #38 on GH.
-            .f = ~ if (!rlang::is_na(..1) | !rlang::is_na(..2)) {
-              convert_predictions(
-                augmented_data = ..1,
-                transformation_type = ..2,
-                response_transformation = ..3,
-                link_fun = ..4
-              )
-            } else {
-              rlang::na_lgl
-            }
-          )
-      )
-
-    cli::cli_h2(paste0("Standardising out-of-sample predictions"))
-
-    dat <- dat %>%
+      dplyr::select(-nrow_params) %>% 
       dplyr::mutate(
-        back_transformed_data = # TODO rename standardised_data and fix up downstream dependencies
+        back_transformed_data = # TODO rename standardised_data and fix up downstream (and upstream wrappers, when not standardised) dependencies
           purrr::pmap(
             list(
               back_transformed_data,
               params,
               dataset
             ),
             .f = ~ if (!rlang::is_na(..1) | !rlang::is_na(..2)) {
-              pred_to_Z(..1, params = ..2, dataset = ..3)
+              pred_to_Z(
+                back_transformed_data = ..1, 
+                params = ..2, 
+                dataset = ..3)
             } else {
               NA
             }