diff --git a/.Rbuildignore b/.Rbuildignore index a288e13..bcb0e71 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,9 @@ $run_dev.* ^\.github$ ^app\.R$ ^rsconnect$ +^doc$ +^Meta$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ + diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..2e17b89 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + # pull_request: + # branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.gitignore b/.gitignore index 5b6a065..0691ddd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,7 @@ .Rhistory .RData .Ruserdata +inst/doc +/doc/ +/Meta/ +docs diff --git a/DESCRIPTION b/DESCRIPTION index fd7b623..3374664 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,10 +34,13 @@ Imports: Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 -URL: https://github.com/NeuroShepherd/ordinalsimr +URL: https://github.com/NeuroShepherd/ordinalsimr, https://neuroshepherd.github.io/ordinalsimr/ BugReports: https://github.com/NeuroShepherd/ordinalsimr/issues Suggests: + knitr, + rmarkdown, testthat (>= 3.0.0) Config/testthat/edition: 3 Depends: R (>= 2.10) +VignetteBuilder: knitr diff --git a/R/mod_data_entry.R b/R/mod_data_entry.R index 36d3166..d936827 100644 --- a/R/mod_data_entry.R +++ b/R/mod_data_entry.R @@ -28,11 +28,20 @@ mod_data_entry_server <- function(id){ ns <- session$ns default_entry_rows <- getOption("ordinalsimr.default_entry_rows", default = 3) - entered_data = data.frame(`Group 1 Probabilities` = rep(0, default_entry_rows), - `Group 2 Probabilities` = rep(0, default_entry_rows), - check.names = FALSE) + default_dist_option <- getOption("ordinalsimr.default_distributions") + default_distribution_data <- data.frame(`Group 1 Probabilities` = rep(0, default_entry_rows), + `Group 2 Probabilities` = rep(0, default_entry_rows), + check.names = FALSE) + entered_data <- if (is.data.frame(default_dist_option)) { + default_dist_option %>% + dplyr::rename(`Group 1 Probabilities` = 1, + `Group 2 Probabilities` = 2) + } else { + default_distribution_data + } reactive_data_vals <- reactiveVal(entered_data) + observeEvent(input$hottable, { reactive_data_vals(hot_to_r(input$hottable)) } ) observeEvent(input$add_row, { reactive_data_vals(rbind(reactive_data_vals(), 0)) }) observeEvent(input$delete_row, { reactive_data_vals(reactive_data_vals()[-nrow(reactive_data_vals()), ]) }) diff --git a/R/utils.R b/R/utils.R index 3d7fa4b..5268401 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,7 +37,7 @@ parse_ratio_text <- function(text) { #' Calculate Hypothesis Test Parameters #' -#' This function calculates the power, Type II error, and Type I error of tests given p-values. Power, Type II error, and confidence intervals calculated using `stats::binom.test()`. +#' This function calculates the power, Type II error, and Type I error of tests given p-values. Power, Type II error, and confidence intervals calculated using `stats::binom.test()` which implements the Newcombe method. #' #' @param df Data frame where each column is a vector of p-values from a statistical test #' @param alpha Numeric significance level; defaults to 0.05 @@ -78,7 +78,7 @@ calculate_power_t2error <- function(df, alpha = 0.05, power_confidence_int = 95, #' Calculate Type 1 Error #' -#' Calculate Type 1 error for a distribution, and the confidence interval around this estimate. Type I error and confidence intervals calculated using `stats::binom.test()`. +#' Calculate Type 1 error for a distribution, and the confidence interval around this estimate. Type I error and confidence intervals calculated using `stats::binom.test()` which implements the Newcombe method. #' #' @param df data frame #' @param alpha significance level diff --git a/README.Rmd b/README.Rmd index 84c8bb2..997a40a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -21,7 +21,9 @@ knitr::opts_chunk$set( [![CRAN status](https://www.r-pkg.org/badges/version/ordinalsimr)](https://CRAN.R-project.org/package=ordinalsimr) -The goal of ordinalsimr is to ... +The {ordinalsimr} package assists in constructing simulation studies of ordinal data comparing two groups. It is intended to facilitate translation of methodological advances into practical settings for e.g. applied statisticians and data analysts who want to determine an appropriate statistical test to apply on their data or a proposed distribution of data. + +This package is primarily developed as a Shiny application which abstracts away the heavier coding aspect of setting up simulation studies. Instead, users can simply enter parameters and data distributions into the application, and save the results as an `.rds` file. The structure of the Shiny application only allows for one simulation to be specified at a time as opposed to a grid of parameters. However, the underlying functions for running the simulations are accessible. See `vignette("ordinalsimr")` for template code on setting up your own simulations manually. ## Installation @@ -32,6 +34,12 @@ You can install the development version of ordinalsimr from [GitHub](https://git devtools::install_github("NeuroShepherd/ordinalsimr") ``` +## Recommendations + +The application is available at [link], but may be down due to account usage limitations on ShinyApps.io. There is not currently a plan to increase the usage limits so it is *strongly recommended that you run the application locally*. + +Informative progressive bars have not been implemented in the Shiny application, but a simulation that fails to run will almost always fail at the beginning rather than in the middle of the run. Simulations with 1000s of iterations *will* take minutes to hours to run. + diff --git a/README.md b/README.md index a350fce..25e7ac5 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,22 @@ experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](h status](https://www.r-pkg.org/badges/version/ordinalsimr)](https://CRAN.R-project.org/package=ordinalsimr) -The goal of ordinalsimr is to … +The {ordinalsimr} package assists in constructing simulation studies of +ordinal data comparing two groups. It is intended to facilitate +translation of methodological advances into practical settings for +e.g. applied statisticians and data analysts who want to determine an +appropriate statistical test to apply on their data or a proposed +distribution of data. + +This package is primarily developed as a Shiny application which +abstracts away the heavier coding aspect of setting up simulation +studies. Instead, users can simply enter parameters and data +distributions into the application, and save the results as an `.rds` +file. The structure of the Shiny application only allows for one +simulation to be specified at a time as opposed to a grid of parameters. +However, the underlying functions for running the simulations are +accessible. See `vignette("ordinalsimr")` for template code on setting +up your own simulations manually. ## Installation @@ -23,3 +38,15 @@ You can install the development version of ordinalsimr from # install.packages("devtools") devtools::install_github("NeuroShepherd/ordinalsimr") ``` + +## Recommendations + +The application is available at \[link\], but may be down due to account +usage limitations on ShinyApps.io. There is not currently a plan to +increase the usage limits so it is *strongly recommended that you run +the application locally*. + +Informative progressive bars have not been implemented in the Shiny +application, but a simulation that fails to run will almost always fail +at the beginning rather than in the middle of the run. Simulations with +1000s of iterations *will* take minutes to hours to run. diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..16b2156 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: https://neuroshepherd.github.io/ordinalsimr/ +template: + bootstrap: 5 + diff --git a/man/calculate_power_t2error.Rd b/man/calculate_power_t2error.Rd index f844c15..88a154e 100644 --- a/man/calculate_power_t2error.Rd +++ b/man/calculate_power_t2error.Rd @@ -24,5 +24,5 @@ calculate_power_t2error( A data frame with columns for Type 1 error, Type 2 error, and power as well as rows for each test } \description{ -This function calculates the power, Type II error, and Type I error of tests given p-values. Power, Type II error, and confidence intervals calculated using `stats::binom.test()`. +This function calculates the power, Type II error, and Type I error of tests given p-values. Power, Type II error, and confidence intervals calculated using `stats::binom.test()` which implements the Newcombe method. } diff --git a/man/calculate_t1_error.Rd b/man/calculate_t1_error.Rd index eef43db..3864136 100644 --- a/man/calculate_t1_error.Rd +++ b/man/calculate_t1_error.Rd @@ -24,5 +24,5 @@ calculate_t1_error( data frame } \description{ -Calculate Type 1 error for a distribution, and the confidence interval around this estimate. Type I error and confidence intervals calculated using `stats::binom.test()`. +Calculate Type 1 error for a distribution, and the confidence interval around this estimate. Type I error and confidence intervals calculated using `stats::binom.test()` which implements the Newcombe method. } diff --git a/man/ordinalsimr-package.Rd b/man/ordinalsimr-package.Rd index 99fcec3..180afd1 100644 --- a/man/ordinalsimr-package.Rd +++ b/man/ordinalsimr-package.Rd @@ -12,6 +12,7 @@ Simultaneously evaluate multiple ordinal outcome measures. Useful links: \itemize{ \item \url{https://github.com/NeuroShepherd/ordinalsimr} + \item \url{https://neuroshepherd.github.io/ordinalsimr/} \item Report bugs at \url{https://github.com/NeuroShepherd/ordinalsimr/issues} } diff --git a/renv.lock b/renv.lock index ce9df12..6355efc 100644 --- a/renv.lock +++ b/renv.lock @@ -1887,6 +1887,20 @@ ], "Hash": "3a1f41807d648a908e3c7f0334bf85e6" }, + "shinycssloaders": { + "Package": "shinycssloaders", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "digest", + "glue", + "grDevices", + "shiny" + ], + "Hash": "f39bb3c44a9b496723ec7e86f9a771d8" + }, "shinydashboard": { "Package": "shinydashboard", "Version": "0.7.2", diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/data-2024-02-16-049dcfcd9afdad1402588ca3a3f73f95-1.rds b/vignettes/data-2024-02-16-049dcfcd9afdad1402588ca3a3f73f95-1.rds new file mode 100644 index 0000000..075cde8 Binary files /dev/null and b/vignettes/data-2024-02-16-049dcfcd9afdad1402588ca3a3f73f95-1.rds differ diff --git a/vignettes/ordinalsimr.Rmd b/vignettes/ordinalsimr.Rmd new file mode 100644 index 0000000..4d59439 --- /dev/null +++ b/vignettes/ordinalsimr.Rmd @@ -0,0 +1,218 @@ +--- +title: "Using {ordinalsimr}" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using {ordinalsimr}} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(ordinalsimr) +``` + + +# Shiny App + +The {ordinalsimr} package wraps a Shiny application and supporting functions for running simulation studies on several pre-selected statistical tests applicable to ordinal data. The Shiny app is particularly suited for calculating Power and Type II error for a proposed 2-group comparison of an ordinal endpoint. Available parameters to manipulate before running the tests include: + +* Number of simulations to run (integer) +* The sample size (integer) +* The distribution ratio between group 1 and group 2 (ratio of form X:X) +* Group 1 and Group 2 distribution probabilities for levels of the ordinal outcome + +In addition to these parameters for running the simulation, the following can be adjusted in the Distributions page + +* Filtering out the top x% of observations from view in the plot. *This does not remove or alter data from any other processes. It is only included as a convenience function for observing smaller windows of the data.* +* Setting the alpha significance level/p-value threshold +* Setting the confidence intervals for the Power/T2 Error calculations +* Setting the confidence intervals (separately) for the Type I Error in Group 1 and Group 2 + +Bug reports and feature requests can be submitted as issues at [https://github.com/NeuroShepherd/ordinalsimr/issues](https://github.com/NeuroShepherd/ordinalsimr/issues) + +## App Options + +If you find yourself consistently changing the default parameters of the application to new values manually, it is possible to instead set default values for these parameters. All available preset parameters are prefixed with `ordinalsimr.`, but this functionality has so far only been implemented for a limited number of defaults. Options can be set using the `options()` function. + +Example of changing the default number of rows to 6: + +``` +options(ordinalsimr.default_entry_rows = 6) +``` + +### Available Options + +* `ordinalsimr.default_entry_rows`: single integer indicating number of rows +* `ordinalsimr.default_distributions`: data frame to use for simulations. + * Safety checks have not been included on this option. Input must be a 2-column data frame. Column names will be discarded. Recommend that you ensure the sum of values in each column are **near** 1 (e.g. using `dplyr::near()`). + + +### Planned Options + +* `ordinalsimr.default_sample_size`: sample size +* `ordinalsimr.default_simulations`: number of simulation iterations +* ... + +## Using Downloaded Data + +Data downloaded after running a simulation is stored as a `.rds` file, and can be loaded into your R session using `readRDS()`. The data is structured as a named list with 3 elements at the top level, and several sub-elements. A summary of the available information is available in the code below. + +```{r} +output <- readRDS("data-2024-02-16-049dcfcd9afdad1402588ca3a3f73f95-1.rds") +output$comparison_data$distribution_statistics +str(output, max.level = 2) + +``` + +* Top level items + * `comparison_data` are results from comparing Group 1 against Group 2 in the statistical tests (for TII error and power) + * `group1_data` are results from comparing Group 1 data against itself (for TI error of this group) + * `group2_data` are results from comparing Group 2 data against itself (for TI error of this group) +* Second level items + * `p_values` are tables of p-values for all of the tests (columns) in each simulation runs (rows) + * `run_info` are tables of detailed metainformation about the parameters used for each run + * `distribution_statistics` are tables of computed TII error, power, and associated confidence intervals + * `distribution_plot` is a {ggplot2} plot of the distributions of p-values + * `group1_t1error` and `group2_t1error` are tables of TI error and associated confidence intervals + + + +# Coding Your Own Simulations + +This guide will provide a rough overview of how to code your own simulations using the components of this package should you find the Shiny application too limiting for your own purposes. Key information on functions: + +* `run_simulations()` will take simulation input parameters and return a list. The first element contains a matrix of p-values for each run of the simulation, while the second element is multiple lists of information describing group allocations in detail. +* `format_simulation_data()` is recommended for reformatting the two list elements mentioned above. It will return a list of two tibbles. +* `calculate_power_t2error()` and `calculate_t1_error()` can receive the p-value data frames for performing T1 Error, Power, and T2 Error calculations with confidence intervals. See function documentation for additional arguments. + +## Example: Power and Type II Error + +```{r, warning=FALSE} +sim_results <- run_simulations(sample_size = 80, + sample_prob = c(0.5,0.5), + prob0 = c(0.1,0.2,0.3,0.4), + prob1 = c(0.6,0.2,0.1,0.1), + niter = 20 + ) + +formatted_results <- format_simulation_data(sim_results) +names(formatted_results) + +head(formatted_results$p_values) + +calculate_power_t2error(formatted_results$p_values, alpha = 0.05, power_confidence_int = 95) + +``` + + +## Example: Type I Error + +To find the Type I error of a distribution, the code from before is largely unchanged except for the fact that the probability vectors set in `run_simulations` must now be equivalent and the `calculate_t1_error()` function is now applied. + + +```{r, warning=FALSE} +sim_results <- run_simulations(sample_size = 80, + sample_prob = c(0.5,0.5), + prob0 = c(0.1,0.2,0.3,0.4), + prob1 = c(0.1,0.2,0.3,0.4), + niter = 20 + ) + +formatted_results <- format_simulation_data(sim_results) +names(formatted_results) + +head(formatted_results$p_values) + +calculate_t1_error(formatted_results$p_values, alpha = 0.05, t1_error_confidence_int = 95) +``` + + + +## Example: Mapping Over Many Sample Sizes + +The current version of the application can only accept one sample size at a time. A future iteration of the application will ideally implement this feature, but, in the meantime, there are many situations where it would be advantageous to model test results over many different sample sizes. Depending on how big the number of iterations per sample sizes (`niter`) and the actual number of sample sizes being checked, it may only be practical to do this in a parallelized manner with e.g. {furrr} or {parallel}. In any case, an example of such code is included below: + + +```{r, warning=FALSE} + +sample_sizes <- c(30,50,100) + +purrr::map(sample_sizes, + ~ run_simulations(sample_size = .x, + sample_prob = c(0.5,0.5), + prob0 = c(0.1,0.2,0.3,0.4), + prob1 = c(0.6,0.2,0.1,0.1), + niter = 100 + ) %>% + format_simulation_data() %>% + magrittr::extract2("p_values") %>% + calculate_power_t2error(), + .progress = TRUE + ) + +``` + + +## Example: Adjust Multiple Parameters + +It is perhaps more likely that analysts will want to iterate simulations over a variety of different parameters at once. The code below provides a structure for creating a combination grid based on the 5 input parameters that can be altered; this example can easily be altered to include desired parameters by replacing/removing/expanding the listed parameters. + +Note that the `prob0_list` and `prob1_list` must always be of the same length **and** the corresponding sub-elements of the list must also be of the same length. Put in terms of the application, there must be a Group 2 if there is a Group 1, and the vector representing the number of possible outcomes must be the same length for these 2 groups. + +```{r, warning=FALSE} + +# Choose sample sizes +sample_size <- c(50,100) +# Set sample distributions as a proportion c(group1, group2) +sample_prob <- list(c(0.5,0.5), c(0.75,0.25)) +# Trial 1 has matching probabilities between the 2 groups. Trial 2 has non-matching probabilities +prob0_list <- list(trial1_group1 = c(0.1,0.2,0.3,0.4), trial2_group1 = c(0.1,0.2,0.3,0.4) ) +prob1_list <- list(trial1_group2 = c(0.1,0.2,0.3,0.4), trial2_group2 = c(0.2,0.3,0.3,0.2) ) +# Number of iterations +niter <- c(20,100) + +# Use tidyr::expand_grid as it creates a tibble, supporting the nested tibble structure +info_table <- tidyr::expand_grid( + sample_size, + sample_prob, + prob0_list, + prob1_list, + niter +) + +info_table + +# Calculate either Power/T2 error or T1 error depending on your specific needs +many_sims <- info_table %>% + purrr::pmap( + ~run_simulations( + sample_size = ..1, + sample_prob = ..2, + prob0 = ..3, + prob1 = ..4, + niter = ..5 + ) %>% + format_simulation_data() %>% + magrittr::extract2("p_values") %>% + calculate_power_t2error(), + .progress = TRUE + ) + +many_sims[1] + + +``` + + + + + + +