Skip to content

Commit

Permalink
Migration from tlfcarver to minimise issues
Browse files Browse the repository at this point in the history
  • Loading branch information
smritia committed Sep 18, 2024
1 parent 3dea9d8 commit f0aa811
Show file tree
Hide file tree
Showing 9 changed files with 662 additions and 616 deletions.
134 changes: 134 additions & 0 deletions R/dataset_merge.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Copyright 2024 Pfizer Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Merge Datasets
#'
#' @param ... Datasets to be merged.
#' @param byvars By variables required to perform merge.
#' @param subset Dataset specific subset conditions as `list`, default is `NULL`.
#' Has to be specified in the same order of datasets to be merged
#'
#' @return A `data.frame`
#' @export
#'
#' @examples
#' dataset_merge(
#' lab_data$adsl,
#' lab_data$adlb,
#' byvars = "STUDYID~USUBJID~SUBJID",
#' subset = list("SEX=='F'", "PARAMCD == 'L00021S'")
#' )
#'
#' dataset_merge(
#' lab_data$adsl,
#' lab_data$adlb,
#' byvars = "STUDYID~USUBJID~SUBJID",
#' subset = list("SEX=='F'", NA_character_)
#' )
#'
#' dataset_merge(
#' lab_data$adsl,
#' lab_data$adlb,
#' byvars = "STUDYID~USUBJID~SUBJID",
#' subset = list(NA_character_, "PARAMCD == 'L00021S'")
#' )
#'
#' dataset_merge(
#' lab_data$adsl,
#' lab_data$adlb,
#' byvars = "STUDYID~USUBJID~SUBJID",
#' subset = list("USUBJID == 'XYZ1 1003 10031009'", NA_character_)
#' )
#'
#' dataset_merge(
#' waterfall_plot_data$adrs,
#' waterfall_plot_data$adtr,
#' byvars = "STUDYID~USUBJID~TRT01P",
#' subset = list("PARAMCD == 'BOR_C'", NA_character_)
#' )
#'
#' ## more than 2 datasets
#'
#' dataset_merge(
#' dplyr::filter(lab_data$adsl, USUBJID == "XYZ1 1003 10031009"),
#' lab_data$adsl,
#' lab_data$adlb,
#' byvars = "STUDYID~USUBJID~SUBJID"
#' )
#'
dataset_merge <- function(..., byvars, subset = NULL) {
dfs <- list2(...)
stopifnot("At least two datasets required for merging" = length(dfs) >= 2)
byvars <- str_to_vec(byvars)
if (!every(dfs, \(x) all(byvars %in% names(x)))) stop("`byvars` not present")

if (length(subset) > 0) {
stopifnot("Length of subsets and datasets should be equal" = length(dfs) == length(subset))
if (every(subset, is.na)) stop("All subsets cannot be `NA`, use `subset = NULL` instead")
dfs <- map(seq_along(dfs), \(i) {
df_sub <- dfs[[i]]
if (!is.na(subset[[i]])) {
df_sub <- df_sub |>
filter(!!!parse_exprs(subset[[i]]))
}
df_sub
})
}

df_list <- map(seq_along(dfs), \(x) {
out <- dfs[[x]]
if (x < length(dfs)) {
out <- out |>
select(all_of(union(byvars, setdiff(
names(dfs[[x]]), names(dfs[[x + 1]])
))))
}
out
})
reduce(df_list, left_join, byvars)
}

#' Merge adsl dataset with the analysis dataset
#'
#' @param adsl adsl dataset
#' @param adsl_subset population variable subset condition
#' @param dataset_add analysis dataset
#'
#' @return merged dataset
#' @export
#'
#' @examples
#' data(lab_data)
#'
#' adsl_merge(
#' adsl = lab_data$adsl,
#' adsl_subset = "SAFFL=='Y'",
#' dataset_add = lab_data$adlb
#' )
#'
adsl_merge <- function(adsl = NULL, adsl_subset = "", dataset_add = NULL) {
stopifnot(length(adsl) > 0)
stopifnot(nrow(adsl) > 0)
stopifnot(length(dataset_add) > 0)

if (adsl_subset != "" && !is.na(adsl_subset)) {
adsl <- adsl |>
filter(!!!parse_exprs(adsl_subset))
}

byvars <- grep("STUDYID|USUBJID|SUBJID", names(dataset_add), value = TRUE)

adsl |>
select(all_of(c(byvars, setdiff(names(adsl), names(dataset_add))))) |>
left_join(dataset_add, by = byvars)
}
8 changes: 4 additions & 4 deletions R/graph_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#' @export
#'
#' @examples
#' library(tlfcarver)
#' library(carver)
#' library(ggplot2)
#' ggplot(data = mtcars, mapping = aes(x = mpg, y = hp)) +
#' geom_point() +
Expand Down Expand Up @@ -160,7 +160,7 @@ g_seriessym <- function(gdata,
#' @export
#'
#' @examples
#' library(tlfcarver)
#' library(carver)
#' empty_plot()
empty_plot <- function(message = "No data available for these values",
fontsize = 8) {
Expand Down Expand Up @@ -205,7 +205,7 @@ def_axis_spec <- function(arg, vec, val) {
#' @export
#'
#' @examples
#' library(tlfcarver)
#' library(carver)
#'
#' plot_axis_opts(
#' xlinearopts = list(
Expand Down Expand Up @@ -600,7 +600,7 @@ plot_title_nsubj <- function(datain, plot_data, by) {
#' @export
#'
#' @examples
#' library(tlfcarver)
#' library(carver)
#' MPG <- ggplot2::mpg
#' MPG[["cyl"]] <- as.character(MPG[["cyl"]])
#' tbl_to_plot(
Expand Down
85 changes: 85 additions & 0 deletions tests/testthat/test-ae_volcano_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
data("ae_risk")
vaxis_opts <- ae_volcano_opts(
datain = ae_risk,
statistic = "Risk Ratio",
trt1_label = "Control",
trt2_label = "Exposure",
pvalue_trans = "-log10",
xref_offset = 1
)
axis_opts <- plot_axis_opts(
ylinearopts = vaxis_opts$ylinearopts,
yaxis_scale = vaxis_opts$yaxis_scale,
xaxis_label = vaxis_opts$xaxis_label,
yaxis_label = vaxis_opts$yaxis_label
)

test_that("Test 1: Volcano plot Options works", {
expect_type(vaxis_opts, "list")
expect_named(
vaxis_opts,
c("xaxis_label", "yaxis_label", "ylinearopts", "yaxis_scale", "xref")
)
expected <- list(
xaxis_label = "<--- Favors Control (N=69) ---- Favors Exposure (N=79) --->\nRisk Ratio",
yaxis_label = "-log10 p-value",
ylinearopts = list(breaks = as.numeric(paste0("1e-", 0:20)), labels = as.character(0:20)),
yaxis_scale = reverselog_trans(10),
xref = c(1, 0, 2)
)
expect_equal(vaxis_opts, expected)
vaxis_opts2 <- ae_volcano_opts(
datain = ae_risk,
pvalue_trans = "none"
)
expected2 <- list(
xaxis_label = "<--- Favors Control (N=69) ---- Favors Exposure (N=79) --->\nRisk Ratio",
yaxis_label = "p-value",
ylinearopts = list(
breaks = c(0.05, 0, rep(1, 10) / 10^(9:0)),
labels = as.character(c(0.05, 0, rep(1, 10) / 10^(9:0)))
),
yaxis_scale = "identity",
xref = c(1, 0, 2)
)
expect_equal(vaxis_opts2, expected2)
})

test_that("Test 1: Volcano plot with standard inputs", {
volcano_test <- ae_volcano_plot(
datain = ae_risk,
axis_opts = axis_opts,
legend_opts = list(label = "", pos = "bottom", dir = "horizontal"),
xref = vaxis_opts$xref,
pvalue_sig = 0.05
)
expect_type(volcano_test, "list")
expect_true("ggplot" %in% class(volcano_test))
expect_snapshot(volcano_test[["mapping"]])
expect_snapshot(volcano_test[["labels"]])
volcano_test2 <- ae_volcano_plot(
datain = ae_risk,
axis_opts = axis_opts,
xref = vaxis_opts$xref,
pvalue_sig = 0.004
)
purrr::walk(
volcano_test2$layers,
\(x) expect_snapshot(x$aes_params)
)
})

test_that("Test 2: Volcano plot with interactive output", {
volcano_test <- ae_volcano_plot(
datain = ae_risk,
axis_opts = axis_opts,
legend_opts = list(label = "", pos = "bottom", dir = "horizontal"),
xref = vaxis_opts$xref,
pvalue_sig = 0.05,
interactive = "Y"
)
expect_type(volcano_test, "list")
expect_true("plotly" %in% class(volcano_test))
expect_equal(volcano_test$height, 700)
expect_equal(volcano_test$width, 800)
})
54 changes: 54 additions & 0 deletions tests/testthat/test-dataset_merge.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
df1 <- iris |>
dplyr::select(Species) |>
dplyr::distinct()

df2 <- iris |>
dplyr::select(Species, dplyr::ends_with("Width"))

test_that("dataset_merge works", {
expected <- dplyr::left_join(df1, df2, by = "Species")
actual <- dataset_merge(df1, df2, byvars = "Species")
expect_identical(expected, actual)
})

test_that("dataset_merge works with subsets and for > 2 datasets to merge", {
df12 <- dplyr::filter(df1, Species == "setosa")
df21 <- dplyr::filter(df2, Sepal.Width > 3)
df13 <- dplyr::filter(df1, Species != "versicolor")
expected1 <- dplyr::left_join(df1, df21, by = "Species")
actual1 <-
dataset_merge(df1, df2, byvars = "Species", subset = list(NA_character_, "Sepal.Width > 3"))
expected2 <- dplyr::left_join(df12, df21, by = "Species")
actual2 <-
dataset_merge(df1, df2, byvars = "Species", subset = list("Species == 'setosa'", "Sepal.Width > 3")) # nolint
expected3 <- purrr::reduce(list(df13, df12, df2), dplyr::left_join, "Species")
actual3 <-
dataset_merge(
df1, df1, df2,
byvars = "Species",
subset = list("Species != 'versicolor'", "Species == 'setosa'", NA_character_)
)
expect_identical(expected1, actual1)
expect_identical(expected2, actual2)
expect_identical(expected3, actual3)
})

test_that("dataset_merge returns expected errors", {
expect_error(
dataset_merge(
df1,
df2,
byvars = "xxx"
),
"`byvars` not present"
)
expect_error(
dataset_merge(
df1,
df2,
byvars = "Species",
subset = list(NA_character_, NA_character_)
),
"All subsets cannot be `NA`, use `subset = NULL` instead"
)
})
Loading

0 comments on commit f0aa811

Please sign in to comment.