Migration from tlfcarver to minimise issues

pfizer-opensource · Sep 18, 2024 · f0aa811 · f0aa811
1 parent 3dea9d8
commit f0aa811
Show file tree

Hide file tree

Showing 9 changed files with 662 additions and 616 deletions.
diff --git a/R/dataset_merge.R b/R/dataset_merge.R
@@ -0,0 +1,134 @@
+# Copyright 2024 Pfizer Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#' Merge Datasets
+#'
+#' @param ... Datasets to be merged.
+#' @param byvars By variables required to perform merge.
+#' @param subset Dataset specific subset conditions as `list`, default is `NULL`.
+#' Has to be specified in the same order of datasets to be merged
+#'
+#' @return A `data.frame`
+#' @export
+#'
+#' @examples
+#' dataset_merge(
+#'   lab_data$adsl,
+#'   lab_data$adlb,
+#'   byvars = "STUDYID~USUBJID~SUBJID",
+#'   subset = list("SEX=='F'", "PARAMCD == 'L00021S'")
+#' )
+#'
+#' dataset_merge(
+#'   lab_data$adsl,
+#'   lab_data$adlb,
+#'   byvars = "STUDYID~USUBJID~SUBJID",
+#'   subset = list("SEX=='F'", NA_character_)
+#' )
+#'
+#' dataset_merge(
+#'   lab_data$adsl,
+#'   lab_data$adlb,
+#'   byvars = "STUDYID~USUBJID~SUBJID",
+#'   subset = list(NA_character_, "PARAMCD == 'L00021S'")
+#' )
+#'
+#' dataset_merge(
+#'   lab_data$adsl,
+#'   lab_data$adlb,
+#'   byvars = "STUDYID~USUBJID~SUBJID",
+#'   subset = list("USUBJID == 'XYZ1 1003 10031009'", NA_character_)
+#' )
+#'
+#' dataset_merge(
+#'   waterfall_plot_data$adrs,
+#'   waterfall_plot_data$adtr,
+#'   byvars = "STUDYID~USUBJID~TRT01P",
+#'   subset = list("PARAMCD == 'BOR_C'", NA_character_)
+#' )
+#'
+#' ## more than 2 datasets
+#'
+#' dataset_merge(
+#'   dplyr::filter(lab_data$adsl, USUBJID == "XYZ1 1003 10031009"),
+#'   lab_data$adsl,
+#'   lab_data$adlb,
+#'   byvars = "STUDYID~USUBJID~SUBJID"
+#' )
+#'
+dataset_merge <- function(..., byvars, subset = NULL) {
+  dfs <- list2(...)
+  stopifnot("At least two datasets required for merging" = length(dfs) >= 2)
+  byvars <- str_to_vec(byvars)
+  if (!every(dfs, \(x) all(byvars %in% names(x)))) stop("`byvars` not present")
+
+  if (length(subset) > 0) {
+    stopifnot("Length of subsets and datasets should be equal" = length(dfs) == length(subset))
+    if (every(subset, is.na)) stop("All subsets cannot be `NA`, use `subset = NULL` instead")
+    dfs <- map(seq_along(dfs), \(i) {
+      df_sub <- dfs[[i]]
+      if (!is.na(subset[[i]])) {
+        df_sub <- df_sub |>
+          filter(!!!parse_exprs(subset[[i]]))
+      }
+      df_sub
+    })
+  }
+
+  df_list <- map(seq_along(dfs), \(x) {
+    out <- dfs[[x]]
+    if (x < length(dfs)) {
+      out <- out |>
+        select(all_of(union(byvars, setdiff(
+          names(dfs[[x]]), names(dfs[[x + 1]])
+        ))))
+    }
+    out
+  })
+  reduce(df_list, left_join, byvars)
+}
+
+#' Merge adsl dataset with the analysis dataset
+#'
+#' @param adsl adsl dataset
+#' @param adsl_subset population variable subset condition
+#' @param dataset_add analysis dataset
+#'
+#' @return merged dataset
+#' @export
+#'
+#' @examples
+#' data(lab_data)
+#'
+#' adsl_merge(
+#'   adsl = lab_data$adsl,
+#'   adsl_subset = "SAFFL=='Y'",
+#'   dataset_add = lab_data$adlb
+#' )
+#'
+adsl_merge <- function(adsl = NULL, adsl_subset = "", dataset_add = NULL) {
+  stopifnot(length(adsl) > 0)
+  stopifnot(nrow(adsl) > 0)
+  stopifnot(length(dataset_add) > 0)
+
+  if (adsl_subset != "" && !is.na(adsl_subset)) {
+    adsl <- adsl |>
+      filter(!!!parse_exprs(adsl_subset))
+  }
+
+  byvars <- grep("STUDYID|USUBJID|SUBJID", names(dataset_add), value = TRUE)
+
+  adsl |>
+    select(all_of(c(byvars, setdiff(names(adsl), names(dataset_add))))) |>
+    left_join(dataset_add, by = byvars)
+}
diff --git a/R/graph_utils.R b/R/graph_utils.R
@@ -21,7 +21,7 @@
 #' @export
 #'
 #' @examples
-#' library(tlfcarver)
+#' library(carver)
 #' library(ggplot2)
 #' ggplot(data = mtcars, mapping = aes(x = mpg, y = hp)) +
 #'   geom_point() +
@@ -160,7 +160,7 @@ g_seriessym <- function(gdata,
 #' @export
 #'
 #' @examples
-#' library(tlfcarver)
+#' library(carver)
 #' empty_plot()
 empty_plot <- function(message = "No data available for these values",
                        fontsize = 8) {
@@ -205,7 +205,7 @@ def_axis_spec <- function(arg, vec, val) {
 #' @export
 #'
 #' @examples
-#' library(tlfcarver)
+#' library(carver)
 #'
 #' plot_axis_opts(
 #'   xlinearopts = list(
@@ -600,7 +600,7 @@ plot_title_nsubj <- function(datain, plot_data, by) {
 #' @export
 #'
 #' @examples
-#' library(tlfcarver)
+#' library(carver)
 #' MPG <- ggplot2::mpg
 #' MPG[["cyl"]] <- as.character(MPG[["cyl"]])
 #' tbl_to_plot(

diff --git a/tests/testthat/test-ae_volcano_plot.R b/tests/testthat/test-ae_volcano_plot.R
@@ -0,0 +1,85 @@
+data("ae_risk")
+vaxis_opts <- ae_volcano_opts(
+  datain = ae_risk,
+  statistic = "Risk Ratio",
+  trt1_label = "Control",
+  trt2_label = "Exposure",
+  pvalue_trans = "-log10",
+  xref_offset = 1
+)
+axis_opts <- plot_axis_opts(
+  ylinearopts = vaxis_opts$ylinearopts,
+  yaxis_scale = vaxis_opts$yaxis_scale,
+  xaxis_label = vaxis_opts$xaxis_label,
+  yaxis_label = vaxis_opts$yaxis_label
+)
+
+test_that("Test 1: Volcano plot Options works", {
+  expect_type(vaxis_opts, "list")
+  expect_named(
+    vaxis_opts,
+    c("xaxis_label", "yaxis_label", "ylinearopts", "yaxis_scale", "xref")
+  )
+  expected <- list(
+    xaxis_label = "<--- Favors Control (N=69) ---- Favors Exposure (N=79) --->\nRisk Ratio",
+    yaxis_label = "-log10 p-value",
+    ylinearopts = list(breaks = as.numeric(paste0("1e-", 0:20)), labels = as.character(0:20)),
+    yaxis_scale = reverselog_trans(10),
+    xref = c(1, 0, 2)
+  )
+  expect_equal(vaxis_opts, expected)
+  vaxis_opts2 <- ae_volcano_opts(
+    datain = ae_risk,
+    pvalue_trans = "none"
+  )
+  expected2 <- list(
+    xaxis_label = "<--- Favors Control (N=69) ---- Favors Exposure (N=79) --->\nRisk Ratio",
+    yaxis_label = "p-value",
+    ylinearopts = list(
+      breaks = c(0.05, 0, rep(1, 10) / 10^(9:0)),
+      labels = as.character(c(0.05, 0, rep(1, 10) / 10^(9:0)))
+    ),
+    yaxis_scale = "identity",
+    xref = c(1, 0, 2)
+  )
+  expect_equal(vaxis_opts2, expected2)
+})
+
+test_that("Test 1: Volcano plot with standard inputs", {
+  volcano_test <- ae_volcano_plot(
+    datain = ae_risk,
+    axis_opts = axis_opts,
+    legend_opts = list(label = "", pos = "bottom", dir = "horizontal"),
+    xref = vaxis_opts$xref,
+    pvalue_sig = 0.05
+  )
+  expect_type(volcano_test, "list")
+  expect_true("ggplot" %in% class(volcano_test))
+  expect_snapshot(volcano_test[["mapping"]])
+  expect_snapshot(volcano_test[["labels"]])
+  volcano_test2 <- ae_volcano_plot(
+    datain = ae_risk,
+    axis_opts = axis_opts,
+    xref = vaxis_opts$xref,
+    pvalue_sig = 0.004
+  )
+  purrr::walk(
+    volcano_test2$layers,
+    \(x) expect_snapshot(x$aes_params)
+  )
+})
+
+test_that("Test 2: Volcano plot with interactive output", {
+  volcano_test <- ae_volcano_plot(
+    datain = ae_risk,
+    axis_opts = axis_opts,
+    legend_opts = list(label = "", pos = "bottom", dir = "horizontal"),
+    xref = vaxis_opts$xref,
+    pvalue_sig = 0.05,
+    interactive = "Y"
+  )
+  expect_type(volcano_test, "list")
+  expect_true("plotly" %in% class(volcano_test))
+  expect_equal(volcano_test$height, 700)
+  expect_equal(volcano_test$width, 800)
+})
diff --git a/tests/testthat/test-dataset_merge.R b/tests/testthat/test-dataset_merge.R
@@ -0,0 +1,54 @@
+df1 <- iris |>
+  dplyr::select(Species) |>
+  dplyr::distinct()
+
+df2 <- iris |>
+  dplyr::select(Species, dplyr::ends_with("Width"))
+
+test_that("dataset_merge works", {
+  expected <- dplyr::left_join(df1, df2, by = "Species")
+  actual <- dataset_merge(df1, df2, byvars = "Species")
+  expect_identical(expected, actual)
+})
+
+test_that("dataset_merge works with subsets and for > 2 datasets to merge", {
+  df12 <- dplyr::filter(df1, Species == "setosa")
+  df21 <- dplyr::filter(df2, Sepal.Width > 3)
+  df13 <- dplyr::filter(df1, Species != "versicolor")
+  expected1 <- dplyr::left_join(df1, df21, by = "Species")
+  actual1 <-
+    dataset_merge(df1, df2, byvars = "Species", subset = list(NA_character_, "Sepal.Width > 3"))
+  expected2 <- dplyr::left_join(df12, df21, by = "Species")
+  actual2 <-
+    dataset_merge(df1, df2, byvars = "Species", subset = list("Species == 'setosa'", "Sepal.Width > 3")) # nolint
+  expected3 <- purrr::reduce(list(df13, df12, df2), dplyr::left_join, "Species")
+  actual3 <-
+    dataset_merge(
+      df1, df1, df2,
+      byvars = "Species",
+      subset = list("Species != 'versicolor'", "Species == 'setosa'", NA_character_)
+    )
+  expect_identical(expected1, actual1)
+  expect_identical(expected2, actual2)
+  expect_identical(expected3, actual3)
+})
+
+test_that("dataset_merge returns expected errors", {
+  expect_error(
+    dataset_merge(
+      df1,
+      df2,
+      byvars = "xxx"
+    ),
+    "`byvars` not present"
+  )
+  expect_error(
+    dataset_merge(
+      df1,
+      df2,
+      byvars = "Species",
+      subset = list(NA_character_, NA_character_)
+    ),
+    "All subsets cannot be `NA`, use `subset = NULL` instead"
+  )
+})