diff --git a/NEWS.md b/NEWS.md index 27f14385..86f99365 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,13 @@ -# xportr (development version) +# xportr 0.3.1.9001 ## New Features and Bug Fixes - -## Documentation +* `xportr_write()` now accepts `metadata` argument which can be used to set the dataset label to stay consistent with the other `xportr_*` functions. It is noteworthy that the dataset label set using the `xportr_df_label()` function will be retained during the `xportr_write()`. +* Exporting a new dataset `dataset_spec` that contains the Dataset Specification for ADSL. ## Deprecation and Breaking Changes +* The `label` argument from the `xportr_write()` function is deprecated in favor of the `metadata` argument. + +## Documentation # xportr 0.3.1 diff --git a/R/data.R b/R/data.R index 96e24de2..ca83a2a6 100644 --- a/R/data.R +++ b/R/data.R @@ -56,7 +56,7 @@ #' } "adsl" -#' Example Dataset Specification +#' Example Dataset Variable Specification #' #' @format ## `var_spec` #' A data frame with 216 rows and 19 columns: @@ -82,3 +82,20 @@ #' \item{Developer Notes}{Developer Notes} #' } "var_spec" + +#' Example Dataset Specification +#' +#' @format ## `dataset_spec` +#' A data frame with 1 row and 9 columns: +#' \describe{ +#' \item{Dataset}{ Dataset} +#' \item{Description}{ Dataset description} +#' \item{Class}{ Dataset class} +#' \item{Structure}{ Logical, indicating if there's a specific structure} +#' \item{Purpose}{ Purpose of the dataset} +#' \item{Key, Variables}{ Join Key variables in the dataset} +#' \item{Repeating}{ Indicates if the dataset is repeating} +#' \item{Reference Data}{ Regerence Data} +#' \item{Comment}{ Additional comment} +#' } +"dataset_spec" diff --git a/R/df_label.R b/R/df_label.R index 932bbf58..0621428a 100644 --- a/R/df_label.R +++ b/R/df_label.R @@ -83,6 +83,10 @@ xportr_df_label <- function(.df, abort("Length of dataset label must be 40 characters or less.") } + if (stringr::str_detect(label, "[^[:ascii:]]")) { + abort("`label` cannot contain any non-ASCII, symbol or special characters.") + } + attr(.df, "label") <- label .df diff --git a/R/write.R b/R/write.R index 57367fc2..0dd13541 100644 --- a/R/write.R +++ b/R/write.R @@ -7,10 +7,12 @@ #' @param .df A data frame to write. #' @param path Path where transport file will be written. File name sans will be #' used as `xpt` name. -#' @param label Dataset label. It must be <=40 characters. +#' @param label `r lifecycle::badge("deprecated")` Previously used to to set the Dataset label. +#' Use the `metadata` argument to set the dataset label. #' @param strict_checks If TRUE, xpt validation will report errors and not write #' out the dataset. If FALSE, xpt validation will report warnings and continue #' with writing out the dataset. Defaults to FALSE +#' @inheritParams xportr_length #' #' @details #' * Variable and dataset labels are stored in the "label" attribute. @@ -32,17 +34,43 @@ #' Param = c("param1", "param2", "param3") #' ) #' +#' var_spec <- data.frame(dataset = "adsl", label = "Subject-Level Analysis Dataset") #' xportr_write(adsl, #' path = paste0(tempdir(), "/adsl.xpt"), -#' label = "Subject-Level Analysis", +#' metadata = var_spec, #' strict_checks = FALSE #' ) #' -xportr_write <- function(.df, path, label = NULL, strict_checks = FALSE) { +xportr_write <- function(.df, + path, + metadata = NULL, + domain = NULL, + strict_checks = FALSE, + label = deprecated()) { path <- normalizePath(path, mustWork = FALSE) name <- tools::file_path_sans_ext(basename(path)) + ## Common section to detect domain from argument or pipes + + df_arg <- tryCatch(as_name(enexpr(.df)), error = function(err) NULL) + domain <- get_domain(.df, df_arg, domain) + if (!is.null(domain)) attr(.df, "_xportr.df_arg_") <- domain + + ## End of common section + + if (!missing(label)) { + lifecycle::deprecate_warn( + when = "0.3.2", + what = "xportr_write(label = )", + with = "xportr_write(metadata = )" + ) + metadata <- data.frame(dataset = domain, label = label) + } + if (!is.null(metadata)) { + .df <- xportr_df_label(.df, metadata = metadata, domain = domain) + } + if (nchar(name) > 8) { abort("`.df` file name must be 8 characters or less.") } @@ -51,18 +79,6 @@ xportr_write <- function(.df, path, label = NULL, strict_checks = FALSE) { abort("`.df` cannot contain any non-ASCII, symbol or underscore characters.") } - if (!is.null(label)) { - if (nchar(label) > 40) { - abort("`label` must be 40 characters or less.") - } - - if (stringr::str_detect(label, "[^[:ascii:]]")) { - abort("`label` cannot contain any non-ASCII, symbol or special characters.") - } - - attr(.df, "label") <- label - } - checks <- xpt_validate(.df) if (length(checks) > 0) { diff --git a/README.Rmd b/README.Rmd index 7af50e6d..89d6e94b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -19,6 +19,7 @@ library(fontawesome) # xportr +[](https://RValidationHub.slack.com) [![R build status](https://github.com/atorus-research/xportr/workflows/R-CMD-check/badge.svg)](https://github.com/atorus-research/xportr/actions?workflow=R-CMD-check) [](https://app.codecov.io/gh/atorus-research/xportr) [](https://github.com/atorus-research/xportr/blob/master/LICENSE) @@ -121,6 +122,9 @@ spec_path <- system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = " var_spec <- readxl::read_xlsx(spec_path, sheet = "Variables") %>% dplyr::rename(type = "Data Type") %>% rlang::set_names(tolower) +dataset_spec <- readxl::read_xlsx(spec_path, sheet = "Datasets") %>% + dplyr::rename(label = "Description") %>% + rlang::set_names(tolower) ``` Each `xportr_` function has been written in a way to take in a part of the specification file and apply that piece to the dataset. Setting `verbose = "warn"` will send appropriate warning message to the console. We have suppressed the warning for the sake of brevity. @@ -132,7 +136,8 @@ adsl %>% xportr_label(var_spec, "ADSL", verbose = "warn") %>% xportr_order(var_spec, "ADSL", verbose = "warn") %>% xportr_format(var_spec, "ADSL") %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec, "ADSL") %>% + xportr_write("adsl.xpt") ``` The `xportr_metadata()` function can reduce duplication by setting the variable specification and domain explicitly at the top of a pipeline. If you would like to use the `verbose` argument, you will need to set in each function call. @@ -145,7 +150,8 @@ adsl %>% xportr_label() %>% xportr_order() %>% xportr_format() %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec) %>% + xportr_write("adsl.xpt") ``` That's it! We now have a xpt file created in R with all appropriate types, lengths, labels, ordering and formats. Please check out the [Get Started](https://atorus-research.github.io/xportr/articles/xportr.html) for more information and detailed walk through of each `xportr_` function. diff --git a/README.md b/README.md index bbd581f9..147964b2 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,9 @@ spec_path <- system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = " var_spec <- readxl::read_xlsx(spec_path, sheet = "Variables") %>% dplyr::rename(type = "Data Type") %>% rlang::set_names(tolower) +dataset_spec <- readxl::read_xlsx(spec_path, sheet = "Datasets") %>% + dplyr::rename(label = "Description") %>% + rlang::set_names(tolower) ``` Each `xportr_` function has been written in a way to take in a part of @@ -140,7 +143,8 @@ adsl %>% xportr_label(var_spec, "ADSL", verbose = "warn") %>% xportr_order(var_spec, "ADSL", verbose = "warn") %>% xportr_format(var_spec, "ADSL") %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec, "ADSL") %>% + xportr_write("adsl.xpt") ``` The `xportr_metadata()` function can reduce duplication by setting the @@ -156,7 +160,8 @@ adsl %>% xportr_label() %>% xportr_order() %>% xportr_format() %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec) %>% + xportr_write("adsl.xpt") ``` That’s it! We now have a xpt file created in R with all appropriate diff --git a/_pkgdown.yml b/_pkgdown.yml index 28abfbf0..dbeae1cc 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -48,6 +48,7 @@ reference: - contents: - adsl - var_spec + - dataset_spec articles: - title: ~ diff --git a/data/dataset_spec.rda b/data/dataset_spec.rda new file mode 100644 index 00000000..be9c31c4 Binary files /dev/null and b/data/dataset_spec.rda differ diff --git a/man/dataset_spec.Rd b/man/dataset_spec.Rd new file mode 100644 index 00000000..7ab0d370 --- /dev/null +++ b/man/dataset_spec.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{dataset_spec} +\alias{dataset_spec} +\title{Example Dataset Specification} +\format{ +\subsection{\code{dataset_spec}}{ + +A data frame with 1 row and 9 columns: +\describe{ +\item{Dataset}{\if{html}{\out{}} Dataset} +\item{Description}{\if{html}{\out{}} Dataset description} +\item{Class}{\if{html}{\out{}} Dataset class} +\item{Structure}{\if{html}{\out{}} Logical, indicating if there's a specific structure} +\item{Purpose}{\if{html}{\out{}} Purpose of the dataset} +\item{Key, Variables}{\if{html}{\out{}} Join Key variables in the dataset} +\item{Repeating}{\if{html}{\out{}} Indicates if the dataset is repeating} +\item{Reference Data}{\if{html}{\out{}} Regerence Data} +\item{Comment}{\if{html}{\out{}} Additional comment} +} +} +} +\usage{ +dataset_spec +} +\description{ +Example Dataset Specification +} +\keyword{datasets} diff --git a/man/var_spec.Rd b/man/var_spec.Rd index 1b688c9c..5460c33d 100644 --- a/man/var_spec.Rd +++ b/man/var_spec.Rd @@ -3,7 +3,7 @@ \docType{data} \name{var_spec} \alias{var_spec} -\title{Example Dataset Specification} +\title{Example Dataset Variable Specification} \format{ \subsection{\code{var_spec}}{ @@ -35,6 +35,6 @@ A data frame with 216 rows and 19 columns: var_spec } \description{ -Example Dataset Specification +Example Dataset Variable Specification } \keyword{datasets} diff --git a/man/xportr_write.Rd b/man/xportr_write.Rd index f1b89fc9..b59e61bd 100644 --- a/man/xportr_write.Rd +++ b/man/xportr_write.Rd @@ -4,7 +4,14 @@ \alias{xportr_write} \title{Write xpt v5 transport file} \usage{ -xportr_write(.df, path, label = NULL, strict_checks = FALSE) +xportr_write( + .df, + path, + metadata = NULL, + domain = NULL, + strict_checks = FALSE, + label = deprecated() +) } \arguments{ \item{.df}{A data frame to write.} @@ -12,11 +19,19 @@ xportr_write(.df, path, label = NULL, strict_checks = FALSE) \item{path}{Path where transport file will be written. File name sans will be used as \code{xpt} name.} -\item{label}{Dataset label. It must be <=40 characters.} +\item{metadata}{A data frame containing variable level metadata. See +'Metadata' section for details.} + +\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +the metadata object. If none is passed, then name of the dataset passed as +.df will be used.} \item{strict_checks}{If TRUE, xpt validation will report errors and not write out the dataset. If FALSE, xpt validation will report warnings and continue with writing out the dataset. Defaults to FALSE} + +\item{label}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Previously used to to set the Dataset label. +Use the \code{metadata} argument to set the dataset label.} } \value{ A data frame. \code{xportr_write()} returns the input data invisibly. @@ -42,9 +57,10 @@ adsl <- data.frame( Param = c("param1", "param2", "param3") ) +var_spec <- data.frame(dataset = "adsl", label = "Subject-Level Analysis Dataset") xportr_write(adsl, path = paste0(tempdir(), "/adsl.xpt"), - label = "Subject-Level Analysis", + metadata = var_spec, strict_checks = FALSE ) diff --git a/tests/testthat/test-write.R b/tests/testthat/test-write.R index ba165e3c..4229c06e 100644 --- a/tests/testthat/test-write.R +++ b/tests/testthat/test-write.R @@ -10,13 +10,48 @@ test_that("xportr_write: exported data can be saved to a file", { expect_equal(read_xpt(tmp), data_to_save) }) -test_that("xportr_write: exported data can be saved to a file with a label", { +test_that("xportr_write: exported data can still be saved to a file with a label", { tmpdir <- tempdir() tmp <- file.path(tmpdir, "xyz.xpt") on.exit(unlink(tmpdir)) - xportr_write(data_to_save, path = tmp, label = "Lorem ipsum dolor sit amet") + suppressWarnings(xportr_write(data_to_save, path = tmp, label = "Lorem ipsum dolor sit amet")) + expect_output(str(read_xpt(tmp)), "Lorem ipsum dolor sit amet") +}) + +test_that("xportr_write: exported data can be saved to a file with a metadata", { + tmpdir <- tempdir() + tmp <- file.path(tmpdir, "xyz.xpt") + + on.exit(unlink(tmpdir)) + + xportr_write( + data_to_save, + path = tmp, + metadata = data.frame( + dataset = "data_to_save", + label = "Lorem ipsum dolor sit amet" + ) + ) + expect_output(str(read_xpt(tmp)), "Lorem ipsum dolor sit amet") +}) + +test_that("xportr_write: exported data can be saved to a file with a existing metadata", { + tmpdir <- tempdir() + tmp <- file.path(tmpdir, "xyz.xpt") + + on.exit(unlink(tmpdir)) + + df <- xportr_df_label( + data_to_save, + data.frame( + dataset = "data_to_save", + label = "Lorem ipsum dolor sit amet" + ) + ) + + xportr_write(df, path = tmp) expect_output(str(read_xpt(tmp)), "Lorem ipsum dolor sit amet") }) @@ -26,7 +61,16 @@ test_that("xportr_write: expect error when invalid multibyte string is passed in on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "Lorizzle ipsizzle dolizzl\xe7 pizzle")) + expect_error( + xportr_write( + data_to_save, + tmp, + metadata = data.frame( + dataset = "data_to_save", + label = "Lorizzle ipsizzle dolizzl\xe7 pizzle" + ) + ) + ) }) test_that("xportr_write: expect error when file name is over 8 characters long", { @@ -35,7 +79,7 @@ test_that("xportr_write: expect error when file name is over 8 characters long", on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "asdf")) + expect_error(xportr_write(data_to_save, tmp)) }) test_that("xportr_write: expect error when file name contains non-ASCII symbols or special characters", { @@ -44,7 +88,7 @@ test_that("xportr_write: expect error when file name contains non-ASCII symbols on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "asdf")) + expect_error(xportr_write(data_to_save, tmp)) }) test_that("xportr_write: expect error when label contains non-ASCII symbols or special characters", { @@ -53,7 +97,22 @@ test_that("xportr_write: expect error when label contains non-ASCII symbols or s on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "çtestç")) + expect_error( + xportr_write( + data_to_save, + tmp, + expect_error( + xportr_write( + data_to_save, + tmp, + metadata = data.frame( + dataset = "data_to_save", + label = "çtestç" + ) + ) + ) + ) + ) }) test_that("xportr_write: expect error when label is over 40 characters", { @@ -62,7 +121,16 @@ test_that("xportr_write: expect error when label is over 40 characters", { on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = paste(rep("a", 41), collapse = ""))) + expect_error( + xportr_write( + data_to_save, + tmp, + metadata = data.frame( + dataset = "data_to_save", + label = paste(rep("a", 41), collapse = "") + ) + ) + ) }) test_that("xportr_write: expect error when an xpt validation fails with strict_checks set to TRUE", { @@ -72,7 +140,16 @@ test_that("xportr_write: expect error when an xpt validation fails with strict_c on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "label", strict_checks = TRUE)) + expect_error( + xportr_write( + data_to_save, tmp, + metadata = data.frame( + dataset = "data_to_save", + label = "label" + ), + strict_checks = TRUE + ) + ) }) test_that("xportr_write: expect warning when an xpt validation fails with strict_checks set to FALSE", { @@ -82,18 +159,18 @@ test_that("xportr_write: expect warning when an xpt validation fails with strict on.exit(unlink(tmpdir)) - expect_warning(xportr_write(data_to_save, tmp, label = "label", strict_checks = FALSE)) + expect_warning( + xportr_write( + data_to_save, tmp, + metadata = data.frame( + dataset = "data_to_save", + label = "label" + ), + strict_checks = FALSE + ) + ) }) -test_that("xportr_write: expect warning when an xpt validation fails with strict_checks set to FALSE", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - attr(data_to_save$X, "format.sas") <- "foo" - - on.exit(unlink(tmpdir)) - - expect_warning(xportr_write(data_to_save, tmp, label = "label", strict_checks = FALSE)) -}) test_that("xportr_write: Capture errors by haven and report them as such", { tmpdir <- tempdir() @@ -102,8 +179,18 @@ test_that("xportr_write: Capture errors by haven and report them as such", { on.exit(unlink(tmpdir)) + expect_error( - suppressWarnings(xportr_write(data_to_save, tmp, label = "label", strict_checks = FALSE)), + suppressWarnings( + xportr_write( + data_to_save, tmp, + metadata = data.frame( + dataset = "data_to_save", + label = "label" + ), + strict_checks = FALSE + ) + ), "Error reported by haven" ) }) diff --git a/vignettes/deepdive.Rmd b/vignettes/deepdive.Rmd index 8f1ccac0..c9f6c915 100644 --- a/vignettes/deepdive.Rmd +++ b/vignettes/deepdive.Rmd @@ -171,7 +171,8 @@ adsl %>% xportr_label(var_spec, "ADSL", "message") %>% xportr_order(var_spec, "ADSL", "message") %>% xportr_format(var_spec, "ADSL") %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec, "ADSL") %>% + xportr_write("adsl.xpt") ``` To help reduce these repetitive calls, we have created `xportr_metadata()`. A user can just **set** the _metadata object_ and the Domain name in the first call, and this will be passed on to the other functions. Much cleaner! @@ -185,7 +186,8 @@ adsl %>% xportr_label() %>% xportr_order() %>% xportr_format() %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec) %>% + xportr_write("adsl.xpt") ``` @@ -198,11 +200,16 @@ For the next six sections, we are going to explore the Warnings and Errors messa ### Setting up our metadata object First, let's read in the specification file and call it `var_spec`. Note that we are not using `options()` here. We will do some slight manipulation to the column names by doing all lower case, and changing `Data Type` to `type` and making the Order column numeric. You can also use `options()` for this step as well. The `var_spec` object has five dataset specification files stacked on top of each other. We will make use of the `ADSL` subset of `var_spec`. You can make use of the Search field above the dataset column to subset the specification file for `ADSL` +Similarly, we can read the Dataset spec file and call it `dataset_spec`. ```{r} var_spec <- var_spec %>% rename(type = "Data Type") %>% set_names(tolower) + +dataset_spec <- dataset_spec %>% + rename(label = "Description") %>% + set_names(tolower) ``` ```{r, echo = FALSE} @@ -400,7 +407,8 @@ At the time of `{xportr} v0.3.0` we have not implemented any warnings or error m Finally, we want to write out an `xpt` dataset with all our metadata applied. -We will make use of `xportr_metadata()` to reduce repetitive metadata and domain specifications. We will use default option for verbose, which is just `message` and so not set anything for `verbose`. In `xportr_write()` we will specify the path, which will just be our current working directory, set the dataset label and toggle the `strict_checks` to be `FALSE`. +We will make use of `xportr_metadata()` to reduce repetitive metadata and domain specifications. We will use default option for verbose, which is just `message` and so not set anything for `verbose`. In `xportr_write()` we will specify the path, which will just be our current working directory, set the dataset label and toggle the `strict_checks` to be `FALSE`. +It is also note worthy that you can set the dataset label using the `xportr_df_label` and a `dataset_spec` which will be used by the `xportr_write()` ```{r, echo = TRUE, error = TRUE} adsl %>% @@ -410,7 +418,8 @@ adsl %>% xportr_label() %>% xportr_order() %>% xportr_format() %>% - xportr_write(path = "adsl.xpt", label = "Subject-Level Analysis Dataset", strict_checks = FALSE) + xportr_df_label(dataset_spec) %>% + xportr_write(path = "adsl.xpt", strict_checks = FALSE) ``` Success! We have applied types, lengths, labels, ordering and formats to our dataset. Note the messages written out to the console. Remember the `TRTDUR` and `DCREASCD` and how these are not present in the metadata, but in the dataset. This impacts the messaging for lengths and labels where `{xportr}` is printing out some feedback to us on the two issues. 5 types are coerced, as well as 36 variables re-ordered. Note that `strict_checks` was set to `FALSE`. @@ -419,7 +428,7 @@ The next two examples showcase the `strict_checks = TRUE` option in `xportr_writ ```{r, echo = TRUE, error = TRUE} adsl %>% - xportr_write(path = "adsl.xpt", label = "Subject-Level Analysis Dataset", strict_checks = TRUE) + xportr_write(path = "adsl.xpt", metadata = dataset_spec, domain = "ADSL", strict_checks = TRUE) ``` @@ -439,7 +448,8 @@ adsl %>% xportr_label() %>% xportr_type() %>% xportr_format() %>% - xportr_write(path = "adsl.xpt", label = "Subject-Level Analysis Dataset", strict_checks = TRUE) + xportr_df_label(dataset_spec) %>% + xportr_write(path = "adsl.xpt", strict_checks = TRUE) ``` diff --git a/vignettes/xportr.Rmd b/vignettes/xportr.Rmd index 1c6acdb0..2e39f386 100644 --- a/vignettes/xportr.Rmd +++ b/vignettes/xportr.Rmd @@ -278,7 +278,7 @@ adsl %>% xportr_label(var_spec, "ADSL", "message") %>% xportr_order(var_spec, "ADSL", "message") %>% xportr_format(var_spec, "ADSL") %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_write("adsl.xpt") ``` That's it! We now have a `xpt` file created in R with all appropriate types, lengths, labels, ordering and formats from our specification file. If you are interested in exploring more of the custom