From 8a47d523f4b3dfc30c255ed428de572b9ceacb79 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Tue, 21 May 2024 16:29:25 -0400
Subject: [PATCH 01/23] update renv

---
 renv/profiles/dev/renv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/renv/profiles/dev/renv.lock b/renv/profiles/dev/renv.lock
index 5a3ed1a9..f51610f0 100644
--- a/renv/profiles/dev/renv.lock
+++ b/renv/profiles/dev/renv.lock
@@ -1,6 +1,6 @@
 {
   "R": {
-    "Version": "4.3.1",
+    "Version": "4.3.3",
     "Repositories": [
       {
         "Name": "RSPM",

From 47664dc76c77cfa77a46f4d997e6e849383b087e Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 30 May 2024 11:52:49 -0400
Subject: [PATCH 02/23] add a new vignette to outline the data specification

---
 dev/02_dev.R            |  1 +
 vignettes/data_spec.Rmd | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 vignettes/data_spec.Rmd

diff --git a/dev/02_dev.R b/dev/02_dev.R
index c44dfc1f..41516453 100644
--- a/dev/02_dev.R
+++ b/dev/02_dev.R
@@ -47,6 +47,7 @@ usethis::use_test("app")
 
 ## Vignette ----
 usethis::use_vignette("testgolem")
+usethis::use_vignette(name = "data_spec", title = "Input Data Specification")
 #devtools::build_vignettes()
 
 ## Code Coverage----
diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
new file mode 100644
index 00000000..3d5c2186
--- /dev/null
+++ b/vignettes/data_spec.Rmd
@@ -0,0 +1,21 @@
+---
+title: "Input Data Specification"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Input Data Specification}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+# library(clinsight)
+```
+
+In order to plug your organizations EDC data into the `clinsight` application, 
\ No newline at end of file

From 91645d8445ed02e3fad8b12ae8b5a96dc78be1c6 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Wed, 12 Jun 2024 15:12:38 -0400
Subject: [PATCH 03/23] update data_spec

---
 dev/app.R               | 41 ++++++++++++++++++++++++++++
 vignettes/data_spec.Rmd | 60 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 4 deletions(-)
 create mode 100644 dev/app.R

diff --git a/dev/app.R b/dev/app.R
new file mode 100644
index 00000000..cee3cc14
--- /dev/null
+++ b/dev/app.R
@@ -0,0 +1,41 @@
+
+devtools::load_all()
+# pkg_name <- "clinsight"
+# library(pkg_name, character.only = TRUE)
+
+
+# datapath <- "data1pt"
+datapath <- app_sys("tests/testthat/fixtures/csvtestdata") # For interactive use
+
+metadata <- get_metadata(filepath = here::here("data-raw/metadata.xlsx"))
+usethis::use_data(metadata, overwrite = TRUE) # do I need this?
+my_raw_data <- get_raw_data(data_path = datapath, column_specs = metadata$column_specs)
+  # fix_multiple_choice_vars() - metadata not found
+  
+merged_data <- merge_meta_with_data(
+  data = my_raw_data,
+  meta = metadata
+  )
+# tempdir not useful for production mode
+data_folder <- "."
+data_path <- file.path(data_folder, 
+                       "merged_data.rds")
+saveRDS(merged_data, data_path)
+db_path <- file.path(data_folder, "user_db.sqlite")
+
+# if test_mode == FALSE, you'll need to setup...
+# DB_SECRET env var to setup credentials db
+usethis::edit_r_environ()
+Sys.getenv("DB_SECRET")
+
+# initiate the user db
+db_create(get_review_data(merged_data),
+          db_path = db_path
+          )
+
+run_app(
+  data = data_path, #merged_data, # or db_path works too
+  # user_db = db_path, # defaults to "user_db.sqlite"
+  test_mode = FALSE#, 
+  # onStart = \(){onStop(\(){unlink(data_folder, recursive = TRUE)})} # be careful here
+)
diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index 3d5c2186..31341d8b 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -6,16 +6,68 @@ vignette: >
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
-
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
 )
 ```
-
 ```{r setup}
-# library(clinsight)
+library(clinsight)
+```
+In order to plug your organizations EDC data into the `clinsight` application,
+you'll notice the `run_app()` function in `app.R` collects a `data` argument.
+This vignette is all about the expected format of that `data` object / file.
+
+Depending on the EDC vendor used, the size, shape, and format of their "raw
+data" may vary. We've compiled a few functions that help admin users pre-process
+the data they own. However, before we deep dive into how to use those, let's
+focus on what the final result of those pre-processing steps in order to
+understand what we need to start using the app.
+
+## `Data` Specification
+
+Baked into the `clinsight` package is an internal data set called
+`clinsightful_data`. Here is a preview of that data:
+
+```{r clinsightful_data}
+# data("clinsightful_data") # Run to load the data into an R session. 
+head(clinsight::clinsightful_data)
+# colnames(clinsightful_data)
 ```
 
-In order to plug your organizations EDC data into the `clinsight` application, 
\ No newline at end of file
+### Column specs
+
+- `site_code`: 
+- `subject_id`: 
+- `event_repeat`: 
+- `event_id`: 
+- `event_name`: 
+- `event_date`: 
+- `form_id`: 
+- `form_repeat`: 
+- `edit_date_time`: 
+- `db_update_time`: 
+- `region`: 
+- `day`: 
+- `vis_day`: 
+- `vis_num`: 
+- `event_label`: 
+- `item_name`: 
+- `item_type`: 
+- `item_group`: 
+- `item_value`: 
+- `item_unit`: 
+- `lower_lim`: 
+- `upper_lim`: 
+- `significance`: 
+- `reason_notdone`: 
+
+For more information about 
+
+## Pre-processing
+
+### Raw Data
+
+### Metadata
+

From 8e32b82d3712f538617cfe300107e65a844495c8 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Wed, 26 Jun 2024 14:18:09 -0400
Subject: [PATCH 04/23] commit stuff

---
 R/data.R     | 26 ++++++++++++++++++++++++--
 R/run_app.R  | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 dev/02_dev.R | 30 ++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/R/data.R b/R/data.R
index 789f5b70..efa9e758 100644
--- a/R/data.R
+++ b/R/data.R
@@ -42,9 +42,31 @@
 #' 
 "col_palette"
 
+
 #' Clinical Trial test data
 #'
-#' A data frame containing randomly created clinical trial data. Used for
-#' testing purposes.
+#' A data.frame containing randomly created clinical trial data. Acceptable for 
+#' for the `data` argument in `run_app()` & used for testing purposes.
+#' 
+#' @format a data.frame with 6,483 rows and 24 variables. 
+#' 
 #' @source Created with `data-raw/create_random_data.R`
 "clinsightful_data"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/R/run_app.R b/R/run_app.R
index 30206164..fcff6ba3 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -1,8 +1,9 @@
 #' Run the Shiny Application
 #'
-#' @param meta A data frame containing metadata.
+#' @param meta A data frame containing metadata. See `details` below for data
+#'   specification.
 #' @param data Either a data frame or a character string with the path to the
-#'   app data in .rds format.
+#'   app data in .rds format. See `details` below for data.frame specification.
 #' @param user_db Character string. Path to the app database. If not existing,
 #'   will be created based on app data and metadata, with all data labeled as
 #'   'new'/not yet reviewed.
@@ -13,6 +14,48 @@
 #' @param ... arguments to pass to golem_opts. See `?golem::get_golem_options`
 #'   for more details.
 #' @inheritParams shiny::shinyApp
+#' 
+#' @details
+#' Two of the arguments `meta` and `data` are crucial to successful app
+#' deployment. As such, here are comprehensive data specifications for these
+#' objects:
+#' 
+#' Column specs for the `data` object:
+#' - `site_code`: character or integer, identifier for study site; If an integer,
+#'    recommended to add prefix "Site" as this will display more intuitively in
+#'    the application's UI
+#' - `subject_id`: character, unique identifier for a subject
+#' - `event_repeat`: integer, helps keep track of unique `event_id` for a single
+#'    `subjec_id` and `event_date`
+#' - `event_id`: character, names that help classify types of  `event_name`s
+#'    into like-groups, generally characterized by site visits. For example, 
+#'    "SCR" for the screening visit, "VIS" for Visit X (where X is some integer),
+#'    and "EXIT" for when the patient exits the study trial. However, some
+#'    `event_id`s track events that could apply outside of any visit, like AE,
+#'    ConMed, Medical History, etc.
+#' - `event_name`: character, an "event" generally characterizes some sort of
+#'    site visit, whether that be a "Screening", "Visit X" (where X is some
+#'    integer), "Exit", or "Any Visit".
+#' - `event_date`: Date, the date associated with `event_name`
+#' - `form_id`: character, a unique identifier for forms.
+#' - `form_repeat`: integer, 
+#' - `edit_date_time`: datetime (POSIXct), 
+#' - `db_update_time`: datetime (POSIXct), 
+#' - `region`: character, 
+#' - `day`: difftime num???
+#' - `vis_day`: numeric,
+#' - `vis_num`: numeric,
+#' - `event_label`: character, 
+#' - `item_name`:  character, 
+#' - `item_type`:  character, 
+#' - `item_group`:  character, 
+#' - `item_value`:  character, 
+#' - `item_unit`:  character, 
+#' - `lower_lim`: numeric,
+#' - `upper_lim`: numeric, 
+#' - `significance`:  character, 
+#' - `reason_notdone`:  character, 
+#' 
 #'
 #' @export
 #' 
diff --git a/dev/02_dev.R b/dev/02_dev.R
index 41516453..5e31a17e 100644
--- a/dev/02_dev.R
+++ b/dev/02_dev.R
@@ -11,6 +11,36 @@
 #### CURRENT FILE: DEV SCRIPT #####
 ###################################
 
+# Exploring for insights on data spec
+unique(clinsightful_data[,c("event_id","event_name")])
+unique(clinsightful_data[,c("form_id","form_repeat")])
+unique(clinsightful_data[,c("event_repeat","event_id","event_name")])
+unique(clinsightful_data[,c("event_repeat","event_id","event_name","form_id","form_repeat")])
+
+
+d_1pat <- clinsightful_data |>
+  filter(subject_id == "BEL_04_772") |>
+  filter(event_id == "COMMON_CM")
+
+library(dplyr)
+clinsightful_data |>
+  filter(subject_id == "BEL_04_772") |>
+  group_by(subject_id, event_id, event_repeat, event_date) |>
+  summarize(n = n()) |>
+  # filter(event_repeat != form_repeat) |>
+  print(n = 36)
+
+d_1pat <- clinsightful_data |>
+  filter(subject_id == "BEL_04_772") |>
+  filter(event_id == "COMMON_AE")
+  
+clinsightful_data |>
+  filter(subject_id == "BEL_04_772") |>
+  group_by(subject_id, event_id, event_repeat, event_date, form_repeat) |>
+  summarize(n = n()) |>
+  filter(event_repeat != form_repeat) |>
+  print(n = 36)
+
 # Engineering
 
 ## Dependencies ----

From 85d45b108e13b8d36630d5630ffc975a5953d96b Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 27 Jun 2024 08:49:34 -0400
Subject: [PATCH 05/23] added 'under construction' note to vignette

---
 dev/02_dev.R            |  2 +-
 vignettes/data_spec.Rmd | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/dev/02_dev.R b/dev/02_dev.R
index 5e31a17e..07557991 100644
--- a/dev/02_dev.R
+++ b/dev/02_dev.R
@@ -17,7 +17,7 @@ unique(clinsightful_data[,c("form_id","form_repeat")])
 unique(clinsightful_data[,c("event_repeat","event_id","event_name")])
 unique(clinsightful_data[,c("event_repeat","event_id","event_name","form_id","form_repeat")])
 
-
+data("clinsightful_data")
 d_1pat <- clinsightful_data |>
   filter(subject_id == "BEL_04_772") |>
   filter(event_id == "COMMON_CM")
diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index 31341d8b..3d3a486c 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "Input Data Specification"
+title: "Input Data Specifications"
 output: rmarkdown::html_vignette
 vignette: >
   %\VignetteIndexEntry{Input Data Specification}
@@ -15,6 +15,14 @@ knitr::opts_chunk$set(
 ```{r setup}
 library(clinsight)
 ```
+Note: this page is "under construction" until the `clinsight` package developers
+create helper functions that transform at least one other EDC data source into 
+the desired format. In the meantime, this page provides useful documentation to 
+the existing data spec. Eventually, it will also share guidelines about how to
+combine your input data with metadata sources, etc.
+
+## Intro
+
 In order to plug your organizations EDC data into the `clinsight` application,
 you'll notice the `run_app()` function in `app.R` collects a `data` argument.
 This vignette is all about the expected format of that `data` object / file.

From bfac0e295873a878e2d541116c285e112bf5791c Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 27 Jun 2024 14:46:53 -0400
Subject: [PATCH 06/23] update Rd files

---
 R/run_app.R              | 65 ++++++++++++++++++++++++----------
 dev/02_dev.R             | 41 ++++++++++++++++++++++
 man/clinsightful_data.Rd |  6 ++--
 man/run_app.Rd           | 75 ++++++++++++++++++++++++++++++++++++++--
 vignettes/data_spec.Rmd  |  3 ++
 5 files changed, 166 insertions(+), 24 deletions(-)

diff --git a/R/run_app.R b/R/run_app.R
index fcff6ba3..7bb45893 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -26,7 +26,7 @@
 #'    the application's UI
 #' - `subject_id`: character, unique identifier for a subject
 #' - `event_repeat`: integer, helps keep track of unique `event_id` for a single
-#'    `subjec_id` and `event_date`
+#'    `subject_id` and `event_date`
 #' - `event_id`: character, names that help classify types of  `event_name`s
 #'    into like-groups, generally characterized by site visits. For example, 
 #'    "SCR" for the screening visit, "VIS" for Visit X (where X is some integer),
@@ -37,24 +37,51 @@
 #'    site visit, whether that be a "Screening", "Visit X" (where X is some
 #'    integer), "Exit", or "Any Visit".
 #' - `event_date`: Date, the date associated with `event_name`
-#' - `form_id`: character, a unique identifier for forms.
-#' - `form_repeat`: integer, 
-#' - `edit_date_time`: datetime (POSIXct), 
-#' - `db_update_time`: datetime (POSIXct), 
-#' - `region`: character, 
-#' - `day`: difftime num???
-#' - `vis_day`: numeric,
-#' - `vis_num`: numeric,
-#' - `event_label`: character, 
-#' - `item_name`:  character, 
-#' - `item_type`:  character, 
-#' - `item_group`:  character, 
-#' - `item_value`:  character, 
-#' - `item_unit`:  character, 
-#' - `lower_lim`: numeric,
-#' - `upper_lim`: numeric, 
-#' - `significance`:  character, 
-#' - `reason_notdone`:  character, 
+#' - `form_id`: character, a unique identifier for the form the `item_name` metric
+#'    and `item_value` were pulled from. Note: when `item_type` is continuous,
+#'    `form_id` can contain several different `item_group`s. However, when
+#'    `item_type` is 'other', `item_group` can be made up of several `form_id`
+#'    values.
+#' - `form_repeat`: integer, helps keep track of unique `item_name`s collected
+#'    from a specific `form_id` for a given `subject_id`. `form_repeat` is
+#'    particularly helpful when conslidating data like Adverse Events into this
+#'    data format. Specifically, if more than one AE is collected on a patient,
+#'    they'll have more than one `form_repeat`
+#' - `edit_date_time`: datetime (POSIXct), the last time this record was edited
+#' - `db_update_time`: datetime (POSIXct), the last time the database storing this
+#'    record was updated.
+#' - `region`: character, describing the region code that `site_code` falls under
+#' - `day`: a difftime number, meaning it contains both a number and unit of 
+#'    time. It measures the number of days each visit is from screening
+#' - `vis_day`: numeric, a numeric representation of `day`
+#' - `vis_num`: numeric, a numeric representation of `event_name`
+#' - `event_label`: character, an abbreviation of `event_name`
+#' - `item_name`:  character, describes a metric or parameter of interest.
+#' - `item_type`:  character, classifies `item_name`s into either 'continuous' 
+#'    or 'other', where continuous types are those generally associated with the
+#'    CDISC "basic data structure" (BDS). That is, each `item_name` metric is
+#'    collected over time at a patient visit (`event_name`). The 'other' type
+#'    represents all non-time dependent measures, like demographic info, adverse
+#'    events, Medications, medical history, etc.
+#' - `item_group`:  character, provides is a high level category that groups 
+#'    like-`item_name`s together. For example, and `item_group` = 'Vital Signs'
+#'    will group together pertinent `item_name` metrics like BMI, Pulse, Blood
+#'    pressure, etc.
+#' - `item_value`:  character, the measurement collected for a given `item_name`.
+#'    The value collected may be a number like 150 (when collecting a patient's
+#'    weight) or a word (such as 'white' for the subject's race).
+#' - `item_unit`:  character, tracking the unit of measurement for `item_name` 
+#'    and `item_value`.
+#' - `lower_lim`: numeric, some `item_name`s (particularly the 'continuous' type)
+#'    have a pre-defined range of values that are considered normal. This is the
+#'    lower limit to that range.
+#' - `upper_lim`: numeric, some `item_name`s (particularly the 'continuous' type)
+#'    have a pre-defined range of values that are considered normal. This is the
+#'    upper limit to that range.
+#' - `significance`:  character, either 'CS' which means 'Clinically Significant'
+#'    or 'NCS' which means 'Not Clinically Significant'
+#' - `reason_notdone`:  character, an effort to describe why the `item_value`
+#'    field is `NA` / missing.
 #' 
 #'
 #' @export
diff --git a/dev/02_dev.R b/dev/02_dev.R
index 07557991..f5f06740 100644
--- a/dev/02_dev.R
+++ b/dev/02_dev.R
@@ -12,16 +12,57 @@
 ###################################
 
 # Exploring for insights on data spec
+unique(clinsightful_data[,c("site_code","region")]) |> arrange(site_code)
 unique(clinsightful_data[,c("event_id","event_name")])
 unique(clinsightful_data[,c("form_id","form_repeat")])
 unique(clinsightful_data[,c("event_repeat","event_id","event_name")])
 unique(clinsightful_data[,c("event_repeat","event_id","event_name","form_id","form_repeat")])
+unique(clinsightful_data[,c("event_repeat","event_id","event_name", "day")])
+unique(clinsightful_data[,c("event_repeat","event_id","event_name", "event_label")])
+
+# form exploration
+unique(clinsightful_data[,c("form_id")])
+unique(clinsightful_data[,c("form_id","item_group", "item_type")])
+
+# other
+unique(clinsightful_data[,c("item_group","item_type")])
+other <- clinsightful_data |>
+  filter(item_type == "other")
+unique(other[,c("item_group","item_type","item_name")]) |>
+  print(n=40)
+
+# bds
+unique(clinsightful_data[,c("item_group","item_type")])
+bds <- clinsightful_data |>
+  filter(item_type != "other")
+unique(bds[,c("item_group","item_type","item_name")]) |>
+  print(n=40)
+
+
+unique(clinsightful_data[,c("item_group","item_type")])
+other <- clinsightful_data |>
+  filter(item_group == "Vital signs")
+unique(other[,c("item_group","item_type","item_name")]) |>
+  print(n=40)
+
+# dirty?
+library(clinsight)
+data("clinsightful_data")
+dirty <- clinsightful_data |>
+  filter(item_value %in% c("µg/h","µg/ml"))
+
+str(clinsightful_data$item_value)
+
+# day
+class(clinsightful_data$day)
 
+# exploring events
 data("clinsightful_data")
 d_1pat <- clinsightful_data |>
   filter(subject_id == "BEL_04_772") |>
   filter(event_id == "COMMON_CM")
 
+# exploring event_repeat & event_date
 library(dplyr)
 clinsightful_data |>
   filter(subject_id == "BEL_04_772") |>
diff --git a/man/clinsightful_data.Rd b/man/clinsightful_data.Rd
index dd8d3844..21ae85d0 100644
--- a/man/clinsightful_data.Rd
+++ b/man/clinsightful_data.Rd
@@ -5,7 +5,7 @@
 \alias{clinsightful_data}
 \title{Clinical Trial test data}
 \format{
-An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 6483 rows and 24 columns.
+a data.frame with 6,483 rows and 24 variables.
 }
 \source{
 Created with \code{data-raw/create_random_data.R}
@@ -14,7 +14,7 @@ Created with \code{data-raw/create_random_data.R}
 clinsightful_data
 }
 \description{
-A data frame containing randomly created clinical trial data. Used for
-testing purposes.
+A data.frame containing randomly created clinical trial data. Acceptable for
+for the \code{data} argument in \code{run_app()} & used for testing purposes.
 }
 \keyword{datasets}
diff --git a/man/run_app.Rd b/man/run_app.Rd
index 050d0d70..e3bc2492 100644
--- a/man/run_app.Rd
+++ b/man/run_app.Rd
@@ -39,10 +39,11 @@ request to determine whether the \code{ui} should be used to handle the
 request. Note that the entire request path must match the regular
 expression in order for the match to be considered successful.}
 
-\item{meta}{A data frame containing metadata.}
+\item{meta}{A data frame containing metadata. See \code{details} below for data
+specification.}
 
 \item{data}{Either a data frame or a character string with the path to the
-app data in .rds format.}
+app data in .rds format. See \code{details} below for data.frame specification.}
 
 \item{user_db}{Character string. Path to the app database. If not existing,
 will be created based on app data and metadata, with all data labeled as
@@ -61,3 +62,73 @@ for more details.}
 \description{
 Run the Shiny Application
 }
+\details{
+Two of the arguments \code{meta} and \code{data} are crucial to successful app
+deployment. As such, here are comprehensive data specifications for these
+objects:
+
+Column specs for the \code{data} object:
+\itemize{
+\item \code{site_code}: character or integer, identifier for study site; If an integer,
+recommended to add prefix "Site" as this will display more intuitively in
+the application's UI
+\item \code{subject_id}: character, unique identifier for a subject
+\item \code{event_repeat}: integer, helps keep track of unique \code{event_id} for a single
+\code{subject_id} and \code{event_date}
+\item \code{event_id}: character, names that help classify types of  \code{event_name}s
+into like-groups, generally characterized by site visits. For example,
+"SCR" for the screening visit, "VIS" for Visit X (where X is some integer),
+and "EXIT" for when the patient exits the study trial. However, some
+\code{event_id}s track events that could apply outside of any visit, like AE,
+ConMed, Medical History, etc.
+\item \code{event_name}: character, an "event" generally characterizes some sort of
+site visit, whether that be a "Screening", "Visit X" (where X is some
+integer), "Exit", or "Any Visit".
+\item \code{event_date}: Date, the date associated with \code{event_name}
+\item \code{form_id}: character, a unique identifier for the form the \code{item_name} metric
+and \code{item_value} were pulled from. Note: when \code{item_type} is continuous,
+\code{form_id} can contain several different \code{item_group}s. However, when
+\code{item_type} is 'other', \code{item_group} can be made up of several \code{form_id}
+values.
+\item \code{form_repeat}: integer, helps keep track of unique \code{item_name}s collected
+from a specific \code{form_id} for a given \code{subject_id}. \code{form_repeat} is
+particularly helpful when conslidating data like Adverse Events into this
+data format. Specifically, if more than one AE is collected on a patient,
+they'll have more than one \code{form_repeat}
+\item \code{edit_date_time}: datetime (POSIXct), the last time this record was edited
+\item \code{db_update_time}: datetime (POSIXct), the last time the database storing this
+record was updated.
+\item \code{region}: character, describing the region code that \code{site_code} falls under
+\item \code{day}: a difftime number, meaning it contains both a number and unit of
+time. It measures the number of days each visit is from screening
+\item \code{vis_day}: numeric, a numeric representation of \code{day}
+\item \code{vis_num}: numeric, a numeric representation of \code{event_name}
+\item \code{event_label}: character, an abbreviation of \code{event_name}
+\item \code{item_name}:  character, describes a metric or parameter of interest.
+\item \code{item_type}:  character, classifies \code{item_name}s into either 'continuous'
+or 'other', where continuous types are those generally associated with the
+CDISC "basic data structure" (BDS). That is, each \code{item_name} metric is
+collected over time at a patient visit (\code{event_name}). The 'other' type
+represents all non-time dependent measures, like demographic info, adverse
+events, Medications, medical history, etc.
+\item \code{item_group}:  character, provides is a high level category that groups
+like-\code{item_name}s together. For example, and \code{item_group} = 'Vital Signs'
+will group together pertinent \code{item_name} metrics like BMI, Pulse, Blood
+pressure, etc.
+\item \code{item_value}:  character, the measurement collected for a given \code{item_name}.
+The value collected may be a number like 150 (when collecting a patient's
+weight) or a word (such as 'white' for the subject's race).
+\item \code{item_unit}:  character, tracking the unit of measurement for \code{item_name}
+and \code{item_value}.
+\item \code{lower_lim}: numeric, some \code{item_name}s (particularly the 'continuous' type)
+have a pre-defined range of values that are considered normal. This is the
+lower limit to that range.
+\item \code{upper_lim}: numeric, some \code{item_name}s (particularly the 'continuous' type)
+have a pre-defined range of values that are considered normal. This is the
+upper limit to that range.
+\item \code{significance}:  character, either 'CS' which means 'Clinically Significant'
+or 'NCS' which means 'Not Clinically Significant'
+\item \code{reason_notdone}:  character, an effort to describe why the \code{item_value}
+field is \code{NA} / missing.
+}
+}
diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index 3d3a486c..a85e361d 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -46,6 +46,8 @@ head(clinsight::clinsightful_data)
 
 ### Column specs
 
+Refer to `run_app()` documentation by running `?clinsight::run_app`.
+
 - `site_code`: 
 - `subject_id`: 
 - `event_repeat`: 
@@ -74,6 +76,7 @@ head(clinsight::clinsightful_data)
 For more information about 
 
 ## Pre-processing
+Place holder for helper functions
 
 ### Raw Data
 

From 797bb43d850cfea347af2f252eb00c5ce7590ea5 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 27 Jun 2024 14:59:46 -0400
Subject: [PATCH 07/23] updating run_app.rd

---
 R/run_app.R    | 19 +++++++++++++++++++
 dev/app.R      | 41 -----------------------------------------
 man/run_app.Rd | 18 ++++++++++++++++++
 3 files changed, 37 insertions(+), 41 deletions(-)
 delete mode 100644 dev/app.R

diff --git a/R/run_app.R b/R/run_app.R
index 7bb45893..083223bf 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -83,6 +83,25 @@
 #' - `reason_notdone`:  character, an effort to describe why the `item_value`
 #'    field is `NA` / missing.
 #' 
+#' 
+#' Specifications for list items that may be included in the `meta` object:
+#' 
+#' `column_specs` a data.frame
+#' 
+#' `events` a data.frame
+#' 
+#' `common_forms` a data.frame
+#' 
+#' `study_forms` a data.frame
+#' 
+#' `general` a data.frame
+#' 
+#' `groups` a data.frame
+#' 
+#' `table_names` a data.frame
+#' 
+#' `items_expanded` a data.frame
+#' 
 #'
 #' @export
 #' 
diff --git a/dev/app.R b/dev/app.R
deleted file mode 100644
index cee3cc14..00000000
--- a/dev/app.R
+++ /dev/null
@@ -1,41 +0,0 @@
-
-devtools::load_all()
-# pkg_name <- "clinsight"
-# library(pkg_name, character.only = TRUE)
-
-
-# datapath <- "data1pt"
-datapath <- app_sys("tests/testthat/fixtures/csvtestdata") # For interactive use
-
-metadata <- get_metadata(filepath = here::here("data-raw/metadata.xlsx"))
-usethis::use_data(metadata, overwrite = TRUE) # do I need this?
-my_raw_data <- get_raw_data(data_path = datapath, column_specs = metadata$column_specs)
-  # fix_multiple_choice_vars() - metadata not found
-  
-merged_data <- merge_meta_with_data(
-  data = my_raw_data,
-  meta = metadata
-  )
-# tempdir not useful for production mode
-data_folder <- "."
-data_path <- file.path(data_folder, 
-                       "merged_data.rds")
-saveRDS(merged_data, data_path)
-db_path <- file.path(data_folder, "user_db.sqlite")
-
-# if test_mode == FALSE, you'll need to setup...
-# DB_SECRET env var to setup credentials db
-usethis::edit_r_environ()
-Sys.getenv("DB_SECRET")
-
-# initiate the user db
-db_create(get_review_data(merged_data),
-          db_path = db_path
-          )
-
-run_app(
-  data = data_path, #merged_data, # or db_path works too
-  # user_db = db_path, # defaults to "user_db.sqlite"
-  test_mode = FALSE#, 
-  # onStart = \(){onStop(\(){unlink(data_folder, recursive = TRUE)})} # be careful here
-)
diff --git a/man/run_app.Rd b/man/run_app.Rd
index e3bc2492..f00bde8f 100644
--- a/man/run_app.Rd
+++ b/man/run_app.Rd
@@ -131,4 +131,22 @@ or 'NCS' which means 'Not Clinically Significant'
 \item \code{reason_notdone}:  character, an effort to describe why the \code{item_value}
 field is \code{NA} / missing.
 }
+
+Specifications for list items that may be included in the \code{meta} object:
+
+\code{column_specs} a data.frame
+
+\code{events} a data.frame
+
+\code{common_forms} a data.frame
+
+\code{study_forms} a data.frame
+
+\code{general} a data.frame
+
+\code{groups} a data.frame
+
+\code{table_names} a data.frame
+
+\code{items_expanded} a data.frame
 }

From 1de747d802835f8011d8e99be8c4a7240606841c Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 27 Jun 2024 15:05:20 -0400
Subject: [PATCH 08/23] remove unneeded changes

---
 R/data.R                    | 14 --------------
 renv/profiles/dev/renv.lock |  2 +-
 2 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/R/data.R b/R/data.R
index efa9e758..615ada6d 100644
--- a/R/data.R
+++ b/R/data.R
@@ -56,17 +56,3 @@
 
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/renv/profiles/dev/renv.lock b/renv/profiles/dev/renv.lock
index 00ff32f1..45905c3c 100644
--- a/renv/profiles/dev/renv.lock
+++ b/renv/profiles/dev/renv.lock
@@ -1,6 +1,6 @@
 {
   "R": {
-    "Version": "4.3.3",
+    "Version": "4.3.1",
     "Repositories": [
       {
         "Name": "RSPM",

From 402fa51f1acb82cd51a82da79a6faadf91073e9b Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 27 Jun 2024 16:09:27 -0400
Subject: [PATCH 09/23] update run_app.Rd

---
 R/run_app.R    |  5 ++++-
 man/run_app.Rd | 20 ++++++++++++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/R/run_app.R b/R/run_app.R
index f613fe83..351e5370 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -1,9 +1,12 @@
 #' Run the Shiny Application
-#'
+#' 
 #' @param meta A data frame containing metadata. See `details` below for data
 #'   specification.
 #' @param data Either a data frame or a character string with the path to the
 #'   app data in .rds format. See `details` below for data.frame specification.
+#' @param data_folder Character string. The folder in which all data resides is
+#'   usually set in the config.yml file. However, this can be overwritten if a
+#'   path is set in this argument. Useful for testing purposes.
 #' @param user_db Character string. Path to the app database. If not existing,
 #'   will be created based on app data and metadata, with all data labeled as
 #'   'new'/not yet reviewed.
diff --git a/man/run_app.Rd b/man/run_app.Rd
index 50660a63..cd323aaf 100644
--- a/man/run_app.Rd
+++ b/man/run_app.Rd
@@ -36,6 +36,18 @@ request to determine whether the \code{ui} should be used to handle the
 request. Note that the entire request path must match the regular
 expression in order for the match to be considered successful.}
 
+\item{data_folder}{Character string. The folder in which all data resides is
+usually set in the config.yml file. However, this can be overwritten if a
+path is set in this argument. Useful for testing purposes.}
+
+\item{credentials_pwd}{Character string with the credentials' database
+password.}
+
+\item{test_mode}{Logical, whether to run the application in test mode.}
+
+\item{...}{arguments to pass to golem_opts. See \code{?golem::get_golem_options}
+for more details.}
+
 \item{meta}{A data frame containing metadata. See \code{details} below for data
 specification.}
 
@@ -47,14 +59,6 @@ will be created based on app data and metadata, with all data labeled as
 'new'/not yet reviewed.}
 
 \item{credentials_db}{Character string. Path to the credentials database.}
-
-\item{credentials_pwd}{Character string with the credentials' database
-password.}
-
-\item{test_mode}{Logical, whether to run the application in test mode.}
-
-\item{...}{arguments to pass to golem_opts. See \code{?golem::get_golem_options}
-for more details.}
 }
 \description{
 Run the Shiny Application

From 1aa41b20e16d72ea6d38632bb788505f4f79de9c Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 27 Jun 2024 16:14:03 -0400
Subject: [PATCH 10/23] Increment version number to 0.0.0.9003

---
 DESCRIPTION | 2 +-
 NEWS.md     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e07271ca..2727adfa 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: clinsight
 Title: ClinSight
-Version: 0.0.0.9002
+Version: 0.0.0.9003
 Authors@R: c(
     person("Leonard Daniël", "Samson", , "lsamson@gcp-service.com", role = c("cre", "aut")),
     person("GCP-Service International Ltd.& Co. KG", role = "fnd")
diff --git a/NEWS.md b/NEWS.md
index 0569b3f6..7b59cfd1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,12 +7,12 @@
 - Created two renv profiles, one for development and one for production. Goal is 
 to minimize the package dependencies of the production version.
 - Removed development package dependencies (for example devtools) that were not needed to run the application.  
-- Improved data anonimization.
+- Improved data anonymization.
 - Changed license.
-
 - Updated Description file.
 - Improved reading of data files within clinsight::run_app()
 - Improved creating test result report.
+- Added data specification to `run_app()` documentation
 
 ## Bug fixes
 

From 3b9f32c6caf31b59ec8e99a7baf0db1743713324 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 27 Jun 2024 16:37:47 -0400
Subject: [PATCH 11/23] update run_app() documentation now that arguments have
 been shifted to a config file

---
 R/run_app.R    | 27 ++++++++++++++-------------
 man/run_app.Rd | 32 +++++++++++++++-----------------
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/R/run_app.R b/R/run_app.R
index 351e5370..a4890ccd 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -1,16 +1,8 @@
 #' Run the Shiny Application
 #' 
-#' @param meta A data frame containing metadata. See `details` below for data
-#'   specification.
-#' @param data Either a data frame or a character string with the path to the
-#'   app data in .rds format. See `details` below for data.frame specification.
 #' @param data_folder Character string. The folder in which all data resides is
 #'   usually set in the config.yml file. However, this can be overwritten if a
 #'   path is set in this argument. Useful for testing purposes.
-#' @param user_db Character string. Path to the app database. If not existing,
-#'   will be created based on app data and metadata, with all data labeled as
-#'   'new'/not yet reviewed.
-#' @param credentials_db Character string. Path to the credentials database.
 #' @param credentials_pwd Character string with the credentials' database
 #'   password.
 #' @param test_mode Logical, whether to run the application in test mode.
@@ -19,11 +11,20 @@
 #' @inheritParams shiny::shinyApp
 #' 
 #' @details
-#' Two of the arguments `meta` and `data` are crucial to successful app
-#' deployment. As such, here are comprehensive data specifications for these
-#' objects:
+#' There are several elements defined in `golem-config.yml` that require
+#' configuration before launching the application for the first time. To name a
+#' few:
 #' 
-#' Column specs for the `data` object:
+#' - `user_db` a Character string providing the path to the app database's.
+#'   If it does not exist, one will be created based on app data and metadata,
+#'   with all data labeled as new'/not yet reviewed.
+#' - `credentials_db` Character string. Path to the credentials database.
+#' 
+#' The other two are `meta_data` and `study_data`, which file paths to RDS files
+#' pertinent to successful app deployment. As such, here are comprehensive data
+#' specifications for these objects:
+#' 
+#' Column specs for the `study_data` RDS object:
 #' - `site_code`: character or integer, identifier for study site; If an integer,
 #'    recommended to add prefix "Site" as this will display more intuitively in
 #'    the application's UI
@@ -87,7 +88,7 @@
 #'    field is `NA` / missing.
 #' 
 #' 
-#' Specifications for list items that may be included in the `meta` object:
+#' Specifications for list items that may be included in the `meta_data` RDS:
 #' 
 #' `column_specs` a data.frame
 #' 
diff --git a/man/run_app.Rd b/man/run_app.Rd
index cd323aaf..cfe81aa4 100644
--- a/man/run_app.Rd
+++ b/man/run_app.Rd
@@ -47,28 +47,26 @@ password.}
 
 \item{...}{arguments to pass to golem_opts. See \code{?golem::get_golem_options}
 for more details.}
-
-\item{meta}{A data frame containing metadata. See \code{details} below for data
-specification.}
-
-\item{data}{Either a data frame or a character string with the path to the
-app data in .rds format. See \code{details} below for data.frame specification.}
-
-\item{user_db}{Character string. Path to the app database. If not existing,
-will be created based on app data and metadata, with all data labeled as
-'new'/not yet reviewed.}
-
-\item{credentials_db}{Character string. Path to the credentials database.}
 }
 \description{
 Run the Shiny Application
 }
 \details{
-Two of the arguments \code{meta} and \code{data} are crucial to successful app
-deployment. As such, here are comprehensive data specifications for these
-objects:
+There are several elements defined in \code{golem-config.yml} that require
+configuration before launching the application for the first time. To name a
+few:
+\itemize{
+\item \code{user_db} a Character string providing the path to the app database's.
+If it does not exist, one will be created based on app data and metadata,
+with all data labeled as new'/not yet reviewed.
+\item \code{credentials_db} Character string. Path to the credentials database.
+}
+
+The other two are \code{meta_data} and \code{study_data}, which file paths to RDS files
+pertinent to successful app deployment. As such, here are comprehensive data
+specifications for these objects:
 
-Column specs for the \code{data} object:
+Column specs for the \code{study_data} RDS object:
 \itemize{
 \item \code{site_code}: character or integer, identifier for study site; If an integer,
 recommended to add prefix "Site" as this will display more intuitively in
@@ -133,7 +131,7 @@ or 'NCS' which means 'Not Clinically Significant'
 field is \code{NA} / missing.
 }
 
-Specifications for list items that may be included in the \code{meta} object:
+Specifications for list items that may be included in the \code{meta_data} RDS:
 
 \code{column_specs} a data.frame
 

From 9a15f5fc8a7056737cf1a227cfdd2f1d4a61f167 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Thu, 27 Jun 2024 16:41:54 -0400
Subject: [PATCH 12/23] fix typo issues

---
 R/run_app.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/run_app.R b/R/run_app.R
index a4890ccd..2803e25a 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -15,14 +15,14 @@
 #' configuration before launching the application for the first time. To name a
 #' few:
 #' 
-#' - `user_db` a Character string providing the path to the app database's.
+#' - `user_db` a Character string providing the path to the app databases.
 #'   If it does not exist, one will be created based on app data and metadata,
 #'   with all data labeled as new'/not yet reviewed.
 #' - `credentials_db` Character string. Path to the credentials database.
 #' 
-#' The other two are `meta_data` and `study_data`, which file paths to RDS files
-#' pertinent to successful app deployment. As such, here are comprehensive data
-#' specifications for these objects:
+#' The other two are `meta_data` and `study_data`, file paths to the app's primary
+#' source of data, stored as RDS files pertinent to successful app deployment.
+#' As such, here are comprehensive data specifications for these objects:
 #' 
 #' Column specs for the `study_data` RDS object:
 #' - `site_code`: character or integer, identifier for study site; If an integer,

From 9fab16e82c258114e97c7cd04e89eed26332b74d Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Fri, 28 Jun 2024 12:39:23 -0400
Subject: [PATCH 13/23] update run_app.Rd to include 'meta_data' info

---
 R/run_app.R    | 94 +++++++++++++++++++++++++++++++++++++----------
 dev/app.R      | 41 +++++++++++++++++++++
 man/run_app.Rd | 99 ++++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 194 insertions(+), 40 deletions(-)
 create mode 100644 dev/app.R

diff --git a/R/run_app.R b/R/run_app.R
index 2803e25a..34bb8ce0 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -20,11 +20,14 @@
 #'   with all data labeled as new'/not yet reviewed.
 #' - `credentials_db` Character string. Path to the credentials database.
 #' 
-#' The other two are `meta_data` and `study_data`, file paths to the app's primary
-#' source of data, stored as RDS files pertinent to successful app deployment.
-#' As such, here are comprehensive data specifications for these objects:
+#' The other two elements are `meta_data` and `study_data`, accepting file paths
+#' to the app's primary source of data, stored as RDS files. As such, below are
+#' comprehensive data specifications for these objects.
+#' 
+#' ### `study_data`
+#' The RDS file ported to the `study_data` element is a data.frame containing the
+#' following required columns:
 #' 
-#' Column specs for the `study_data` RDS object:
 #' - `site_code`: character or integer, identifier for study site; If an integer,
 #'    recommended to add prefix "Site" as this will display more intuitively in
 #'    the application's UI
@@ -87,24 +90,75 @@
 #' - `reason_notdone`:  character, an effort to describe why the `item_value`
 #'    field is `NA` / missing.
 #' 
+#' ### `meta_data`
 #' 
-#' Specifications for list items that may be included in the `meta_data` RDS:
-#' 
-#' `column_specs` a data.frame
-#' 
-#' `events` a data.frame
-#' 
-#' `common_forms` a data.frame
-#' 
-#' `study_forms` a data.frame
-#' 
-#' `general` a data.frame
-#' 
-#' `groups` a data.frame
-#' 
-#' `table_names` a data.frame
+#' The RDS file ported to the `meta_data` configuration is a list of data.frames
+#' which (not surprisingly) contains metadata used for the application. Prior to
+#' launching the application, the metadata will be merged with the `study_data`.
+#' to dictate which variables will be included in the application, and in which 
+#' tab the variables will be displayed. The goal is that most, if not all 
+#' study-specific data will be captured in the metadata, leaving the scripts to 
+#' run the application largely unaltered between studies.
 #' 
-#' `items_expanded` a data.frame
+#' Specifications for the list of data.frames include:
+#' :
+#'  - `events`: Used to create a simple timeline in the application, with
+#'  predefined number of planned visits, N. It contains the following columns:
+#'      - `event_number`: integer. Example: 0, 1, 2, ..., N
+#'      - `event_name`: character. Example: "Screening", "Visit 1", "Visit 2", 
+#'      ..., "Visit N"
+#'      - `event_label`: character. Example: "V0", "V1", "V2", ..., "VN"
+#'   
+#'  - `common_forms`: Used to select and rename the variables of interest in 
+#'  the raw data when transformed into the desired `study_data` format. Note: 
+#'  creating the `study_data` data.frame should use `merge_meta_with_data()` 
+#'  where (not surprisingly), the metadata is merged with the raw study data.
+#'  `common_forms` contains the columns below:
+#'      - `var`: character, the variable name to display in the table, mapped 
+#'      from a known `item_name` provided in `study_data`. Example: 
+#'      `item_name = "AE Name"` will be replaced by "AE_AETERM" when 
+#'      `var = "AE_AETERM"`.
+#'      - `suffix`: Usually blank in this data.frame. This column is more 
+#'      commonly used in the `study_forms` data.frame
+#'      - `item_name`: character, known `item_name`s found in `study_data`. There
+#'      are certain `item_name`s that are required, even if missing in 
+#'      `study_data`, including: "AE Name", "AE start date", "AE end date", "AE 
+#'      date of worsening", "AE CTCAE severity", "AE CTCAE severity worsening", 
+#'      "Serious Adverse Event", and "SAE Start date".
+#'      - `item_type`: character, known `item_type` corresponding to those found
+#'       for `item_name`s in `study_data`.
+#'      - `item_group`: character, known `item_group` corresponding to those found
+#'       for `item_name`s in `study_data`.
+#'   
+#'  - `study_forms`: Contains the same columns as the data.frame `common_forms`,
+#'   and in addition the columns `unit`, `lower_limit`, `upper_limit`. Used to
+#'   select and rename the raw data variables of interest. In addition, the
+#'   `suffix` column is used more regularly in this "study" context. This is because,
+#'   these variable names may have a consistent trunk / stem, with
+#'   varying suffixes to describe a similar style measurement. So instead of creating a 
+#'   new row for these variables in the `meta_data` data.frame, we allow for inclusion of several
+#'   suffixes. For example, VS_PULSE measures beats/min. Typically, these measures
+#'   are collected using VS_PULSE_VSORRES & VS_PULSE_VSREAND, but in this format,
+#'   we can list the stem "VS_PULSE" as the `var` and `"VSORRES, VSREAND"` in the suffix field.
+#'   As for the new columns, they are defined as follows:
+#'       - `unit`: character, unit of measure
+#'       - `lower_limit`: numeric, the lower limit of what's considered clinically significant
+#'       - `upper_limit`: numeric, the upper limit of what's considered clinically significant
+#'       
+#'   
+#'  - `general`: Contains the same columns as `common_forms` and is used in the 
+#'  same way. That is, it's used to select and rename the raw data when 
+#'  transformed into the desired `study_data` format. Note: 
+#'  creating the `study_data` data.frame should use `merge_meta_with_data()` 
+#'  where (not surprisingly), the metadata is merged with the raw study data. 
+#'  Please refer to the `common_forms` spec above. However,
+#'  I will note that there are certain `item_name`s that are required, even if 
+#'  missing in `study_data`, including: "Age", "Sex", "ECOG", "Eligible",
+#'  "WHO.classification", "DiscontinuationReason", "DrugAdminDate", and
+#'  "DrugAdminDose".
+#'  
+#'  - `groups`: Contains the columns `item_group`, `item_type`, `item_scale`,
+#'   `use_unscaled_limits`.
 #' 
 #'
 #' @export
diff --git a/dev/app.R b/dev/app.R
new file mode 100644
index 00000000..cee3cc14
--- /dev/null
+++ b/dev/app.R
@@ -0,0 +1,41 @@
+
+devtools::load_all()
+# pkg_name <- "clinsight"
+# library(pkg_name, character.only = TRUE)
+
+
+# datapath <- "data1pt"
+datapath <- app_sys("tests/testthat/fixtures/csvtestdata") # For interactive use
+
+metadata <- get_metadata(filepath = here::here("data-raw/metadata.xlsx"))
+usethis::use_data(metadata, overwrite = TRUE) # do I need this?
+my_raw_data <- get_raw_data(data_path = datapath, column_specs = metadata$column_specs)
+  # fix_multiple_choice_vars() - metadata not found
+  
+merged_data <- merge_meta_with_data(
+  data = my_raw_data,
+  meta = metadata
+  )
+# tempdir not useful for production mode
+data_folder <- "."
+data_path <- file.path(data_folder, 
+                       "merged_data.rds")
+saveRDS(merged_data, data_path)
+db_path <- file.path(data_folder, "user_db.sqlite")
+
+# if test_mode == FALSE, you'll need to setup...
+# DB_SECRET env var to setup credentials db
+usethis::edit_r_environ()
+Sys.getenv("DB_SECRET")
+
+# initiate the user db
+db_create(get_review_data(merged_data),
+          db_path = db_path
+          )
+
+run_app(
+  data = data_path, #merged_data, # or db_path works too
+  # user_db = db_path, # defaults to "user_db.sqlite"
+  test_mode = FALSE#, 
+  # onStart = \(){onStop(\(){unlink(data_folder, recursive = TRUE)})} # be careful here
+)
diff --git a/man/run_app.Rd b/man/run_app.Rd
index cfe81aa4..e5172702 100644
--- a/man/run_app.Rd
+++ b/man/run_app.Rd
@@ -56,17 +56,19 @@ There are several elements defined in \code{golem-config.yml} that require
 configuration before launching the application for the first time. To name a
 few:
 \itemize{
-\item \code{user_db} a Character string providing the path to the app database's.
+\item \code{user_db} a Character string providing the path to the app databases.
 If it does not exist, one will be created based on app data and metadata,
 with all data labeled as new'/not yet reviewed.
 \item \code{credentials_db} Character string. Path to the credentials database.
 }
 
-The other two are \code{meta_data} and \code{study_data}, which file paths to RDS files
-pertinent to successful app deployment. As such, here are comprehensive data
-specifications for these objects:
+The other two elements are \code{meta_data} and \code{study_data}, accepting file paths
+to the app's primary source of data, stored as RDS files. As such, below are
+comprehensive data specifications for these objects.
+\subsection{\code{study_data}}{
 
-Column specs for the \code{study_data} RDS object:
+The RDS file ported to the \code{study_data} element is a data.frame containing the
+following required columns:
 \itemize{
 \item \code{site_code}: character or integer, identifier for study site; If an integer,
 recommended to add prefix "Site" as this will display more intuitively in
@@ -130,22 +132,79 @@ or 'NCS' which means 'Not Clinically Significant'
 \item \code{reason_notdone}:  character, an effort to describe why the \code{item_value}
 field is \code{NA} / missing.
 }
+}
 
-Specifications for list items that may be included in the \code{meta_data} RDS:
-
-\code{column_specs} a data.frame
-
-\code{events} a data.frame
-
-\code{common_forms} a data.frame
-
-\code{study_forms} a data.frame
-
-\code{general} a data.frame
-
-\code{groups} a data.frame
+\subsection{\code{meta_data}}{
 
-\code{table_names} a data.frame
+The RDS file ported to the \code{meta_data} configuration is a list of data.frames
+which (not surprisingly) contains metadata used for the application. Prior to
+launching the application, the metadata will be merged with the \code{study_data}.
+to dictate which variables will be included in the application, and in which
+tab the variables will be displayed. The goal is that most, if not all
+study-specific data will be captured in the metadata, leaving the scripts to
+run the application largely unaltered between studies.
 
-\code{items_expanded} a data.frame
+Specifications for the list of data.frames include:
+:
+\itemize{
+\item \code{events}: Used to create a simple timeline in the application, with
+predefined number of planned visits, N. It contains the following columns:
+\itemize{
+\item \code{event_number}: integer. Example: 0, 1, 2, ..., N
+\item \code{event_name}: character. Example: "Screening", "Visit 1", "Visit 2",
+..., "Visit N"
+\item \code{event_label}: character. Example: "V0", "V1", "V2", ..., "VN"
+}
+\item \code{common_forms}: Used to select and rename the variables of interest in
+the raw data when transformed into the desired \code{study_data} format. Note:
+creating the \code{study_data} data.frame should use \code{merge_meta_with_data()}
+where (not surprisingly), the metadata is merged with the raw study data.
+\code{common_forms} contains the columns below:
+\itemize{
+\item \code{var}: character, the variable name to display in the table, mapped
+from a known \code{item_name} provided in \code{study_data}. Example:
+\code{item_name = "AE Name"} will be replaced by "AE_AETERM" when
+\code{var = "AE_AETERM"}.
+\item \code{suffix}: Usually blank in this data.frame. This column is more
+commonly used in the \code{study_forms} data.frame
+\item \code{item_name}: character, known \code{item_name}s found in \code{study_data}. There
+are certain \code{item_name}s that are required, even if missing in
+\code{study_data}, including: "AE Name", "AE start date", "AE end date", "AE
+date of worsening", "AE CTCAE severity", "AE CTCAE severity worsening",
+"Serious Adverse Event", and "SAE Start date".
+\item \code{item_type}: character, known \code{item_type} corresponding to those found
+for \code{item_name}s in \code{study_data}.
+\item \code{item_group}: character, known \code{item_group} corresponding to those found
+for \code{item_name}s in \code{study_data}.
+}
+\item \code{study_forms}: Contains the same columns as the data.frame \code{common_forms},
+and in addition the columns \code{unit}, \code{lower_limit}, \code{upper_limit}. Used to
+select and rename the raw data variables of interest. In addition, the
+\code{suffix} column is used more regularly in this "study" context. This is because,
+these variable names may have a consistent trunk / stem, with
+varying suffixes to describe a similar style measurement. So instead of creating a
+new row for these variables in the \code{meta_data} data.frame, we allow for inclusion of several
+suffixes. For example, VS_PULSE measures beats/min. Typically, these measures
+are collected using VS_PULSE_VSORRES & VS_PULSE_VSREAND, but in this format,
+we can list the stem "VS_PULSE" as the \code{var} and \code{"VSORRES, VSREAND"} in the suffix field.
+As for the new columns, they are defined as follows:
+\itemize{
+\item \code{unit}: character, unit of measure
+\item \code{lower_limit}: numeric, the lower limit of what's considered clinically significant
+\item \code{upper_limit}: numeric, the upper limit of what's considered clinically significant
+}
+\item \code{general}: Contains the same columns as \code{common_forms} and is used in the
+same way. That is, it's used to select and rename the raw data when
+transformed into the desired \code{study_data} format. Note:
+creating the \code{study_data} data.frame should use \code{merge_meta_with_data()}
+where (not surprisingly), the metadata is merged with the raw study data.
+Please refer to the \code{common_forms} spec above. However,
+I will note that there are certain \code{item_name}s that are required, even if
+missing in \code{study_data}, including: "Age", "Sex", "ECOG", "Eligible",
+"WHO.classification", "DiscontinuationReason", "DrugAdminDate", and
+"DrugAdminDose".
+\item \code{groups}: Contains the columns \code{item_group}, \code{item_type}, \code{item_scale},
+\code{use_unscaled_limits}.
+}
+}
 }

From 96864abe886e93471b1d927e0986993bf9da75e4 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Fri, 28 Jun 2024 12:43:26 -0400
Subject: [PATCH 14/23] update golem-config pkg version

---
 inst/golem-config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/golem-config.yml b/inst/golem-config.yml
index 760ce8ea..80bfb5b0 100644
--- a/inst/golem-config.yml
+++ b/inst/golem-config.yml
@@ -1,6 +1,6 @@
 default:
   golem_name: clinsight
-  golem_version: 0.0.0.9002
+  golem_version: 0.0.0.9003
   app_prod: no
   data_folder: test_data
   study_data: !expr clinsight::clinsightful_data

From a8c3691dc2344df6200b93d2c32b33fee9e02eaa Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Mon, 1 Jul 2024 15:30:08 -0400
Subject: [PATCH 15/23] move data config information over to vignette instead
 of run_app docs

---
 R/run_app.R              | 149 -------------------------------------
 man/clinsight-package.Rd |   2 +-
 man/run_app.Rd           | 157 ---------------------------------------
 vignettes/data_spec.Rmd  | 136 +++++++++++++++++++++------------
 4 files changed, 91 insertions(+), 353 deletions(-)

diff --git a/R/run_app.R b/R/run_app.R
index 34bb8ce0..f22a699e 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -10,155 +10,6 @@
 #'   for more details.
 #' @inheritParams shiny::shinyApp
 #' 
-#' @details
-#' There are several elements defined in `golem-config.yml` that require
-#' configuration before launching the application for the first time. To name a
-#' few:
-#' 
-#' - `user_db` a Character string providing the path to the app databases.
-#'   If it does not exist, one will be created based on app data and metadata,
-#'   with all data labeled as new'/not yet reviewed.
-#' - `credentials_db` Character string. Path to the credentials database.
-#' 
-#' The other two elements are `meta_data` and `study_data`, accepting file paths
-#' to the app's primary source of data, stored as RDS files. As such, below are
-#' comprehensive data specifications for these objects.
-#' 
-#' ### `study_data`
-#' The RDS file ported to the `study_data` element is a data.frame containing the
-#' following required columns:
-#' 
-#' - `site_code`: character or integer, identifier for study site; If an integer,
-#'    recommended to add prefix "Site" as this will display more intuitively in
-#'    the application's UI
-#' - `subject_id`: character, unique identifier for a subject
-#' - `event_repeat`: integer, helps keep track of unique `event_id` for a single
-#'    `subject_id` and `event_date`
-#' - `event_id`: character, names that help classify types of  `event_name`s
-#'    into like-groups, generally characterized by site visits. For example, 
-#'    "SCR" for the screening visit, "VIS" for Visit X (where X is some integer),
-#'    and "EXIT" for when the patient exits the study trial. However, some
-#'    `event_id`s track events that could apply outside of any visit, like AE,
-#'    ConMed, Medical History, etc.
-#' - `event_name`: character, an "event" generally characterizes some sort of
-#'    site visit, whether that be a "Screening", "Visit X" (where X is some
-#'    integer), "Exit", or "Any Visit".
-#' - `event_date`: Date, the date associated with `event_name`
-#' - `form_id`: character, a unique identifier for the form the `item_name` metric
-#'    and `item_value` were pulled from. Note: when `item_type` is continuous,
-#'    `form_id` can contain several different `item_group`s. However, when
-#'    `item_type` is 'other', `item_group` can be made up of several `form_id`
-#'    values.
-#' - `form_repeat`: integer, helps keep track of unique `item_name`s collected
-#'    from a specific `form_id` for a given `subject_id`. `form_repeat` is
-#'    particularly helpful when conslidating data like Adverse Events into this
-#'    data format. Specifically, if more than one AE is collected on a patient,
-#'    they'll have more than one `form_repeat`
-#' - `edit_date_time`: datetime (POSIXct), the last time this record was edited
-#' - `db_update_time`: datetime (POSIXct), the last time the database storing this
-#'    record was updated.
-#' - `region`: character, describing the region code that `site_code` falls under
-#' - `day`: a difftime number, meaning it contains both a number and unit of 
-#'    time. It measures the number of days each visit is from screening
-#' - `vis_day`: numeric, a numeric representation of `day`
-#' - `vis_num`: numeric, a numeric representation of `event_name`
-#' - `event_label`: character, an abbreviation of `event_name`
-#' - `item_name`:  character, describes a metric or parameter of interest.
-#' - `item_type`:  character, classifies `item_name`s into either 'continuous' 
-#'    or 'other', where continuous types are those generally associated with the
-#'    CDISC "basic data structure" (BDS). That is, each `item_name` metric is
-#'    collected over time at a patient visit (`event_name`). The 'other' type
-#'    represents all non-time dependent measures, like demographic info, adverse
-#'    events, Medications, medical history, etc.
-#' - `item_group`:  character, provides is a high level category that groups 
-#'    like-`item_name`s together. For example, and `item_group` = 'Vital Signs'
-#'    will group together pertinent `item_name` metrics like BMI, Pulse, Blood
-#'    pressure, etc.
-#' - `item_value`:  character, the measurement collected for a given `item_name`.
-#'    The value collected may be a number like 150 (when collecting a patient's
-#'    weight) or a word (such as 'white' for the subject's race).
-#' - `item_unit`:  character, tracking the unit of measurement for `item_name` 
-#'    and `item_value`.
-#' - `lower_lim`: numeric, some `item_name`s (particularly the 'continuous' type)
-#'    have a pre-defined range of values that are considered normal. This is the
-#'    lower limit to that range.
-#' - `upper_lim`: numeric, some `item_name`s (particularly the 'continuous' type)
-#'    have a pre-defined range of values that are considered normal. This is the
-#'    upper limit to that range.
-#' - `significance`:  character, either 'CS' which means 'Clinically Significant'
-#'    or 'NCS' which means 'Not Clinically Significant'
-#' - `reason_notdone`:  character, an effort to describe why the `item_value`
-#'    field is `NA` / missing.
-#' 
-#' ### `meta_data`
-#' 
-#' The RDS file ported to the `meta_data` configuration is a list of data.frames
-#' which (not surprisingly) contains metadata used for the application. Prior to
-#' launching the application, the metadata will be merged with the `study_data`.
-#' to dictate which variables will be included in the application, and in which 
-#' tab the variables will be displayed. The goal is that most, if not all 
-#' study-specific data will be captured in the metadata, leaving the scripts to 
-#' run the application largely unaltered between studies.
-#' 
-#' Specifications for the list of data.frames include:
-#' :
-#'  - `events`: Used to create a simple timeline in the application, with
-#'  predefined number of planned visits, N. It contains the following columns:
-#'      - `event_number`: integer. Example: 0, 1, 2, ..., N
-#'      - `event_name`: character. Example: "Screening", "Visit 1", "Visit 2", 
-#'      ..., "Visit N"
-#'      - `event_label`: character. Example: "V0", "V1", "V2", ..., "VN"
-#'   
-#'  - `common_forms`: Used to select and rename the variables of interest in 
-#'  the raw data when transformed into the desired `study_data` format. Note: 
-#'  creating the `study_data` data.frame should use `merge_meta_with_data()` 
-#'  where (not surprisingly), the metadata is merged with the raw study data.
-#'  `common_forms` contains the columns below:
-#'      - `var`: character, the variable name to display in the table, mapped 
-#'      from a known `item_name` provided in `study_data`. Example: 
-#'      `item_name = "AE Name"` will be replaced by "AE_AETERM" when 
-#'      `var = "AE_AETERM"`.
-#'      - `suffix`: Usually blank in this data.frame. This column is more 
-#'      commonly used in the `study_forms` data.frame
-#'      - `item_name`: character, known `item_name`s found in `study_data`. There
-#'      are certain `item_name`s that are required, even if missing in 
-#'      `study_data`, including: "AE Name", "AE start date", "AE end date", "AE 
-#'      date of worsening", "AE CTCAE severity", "AE CTCAE severity worsening", 
-#'      "Serious Adverse Event", and "SAE Start date".
-#'      - `item_type`: character, known `item_type` corresponding to those found
-#'       for `item_name`s in `study_data`.
-#'      - `item_group`: character, known `item_group` corresponding to those found
-#'       for `item_name`s in `study_data`.
-#'   
-#'  - `study_forms`: Contains the same columns as the data.frame `common_forms`,
-#'   and in addition the columns `unit`, `lower_limit`, `upper_limit`. Used to
-#'   select and rename the raw data variables of interest. In addition, the
-#'   `suffix` column is used more regularly in this "study" context. This is because,
-#'   these variable names may have a consistent trunk / stem, with
-#'   varying suffixes to describe a similar style measurement. So instead of creating a 
-#'   new row for these variables in the `meta_data` data.frame, we allow for inclusion of several
-#'   suffixes. For example, VS_PULSE measures beats/min. Typically, these measures
-#'   are collected using VS_PULSE_VSORRES & VS_PULSE_VSREAND, but in this format,
-#'   we can list the stem "VS_PULSE" as the `var` and `"VSORRES, VSREAND"` in the suffix field.
-#'   As for the new columns, they are defined as follows:
-#'       - `unit`: character, unit of measure
-#'       - `lower_limit`: numeric, the lower limit of what's considered clinically significant
-#'       - `upper_limit`: numeric, the upper limit of what's considered clinically significant
-#'       
-#'   
-#'  - `general`: Contains the same columns as `common_forms` and is used in the 
-#'  same way. That is, it's used to select and rename the raw data when 
-#'  transformed into the desired `study_data` format. Note: 
-#'  creating the `study_data` data.frame should use `merge_meta_with_data()` 
-#'  where (not surprisingly), the metadata is merged with the raw study data. 
-#'  Please refer to the `common_forms` spec above. However,
-#'  I will note that there are certain `item_name`s that are required, even if 
-#'  missing in `study_data`, including: "Age", "Sex", "ECOG", "Eligible",
-#'  "WHO.classification", "DiscontinuationReason", "DrugAdminDate", and
-#'  "DrugAdminDose".
-#'  
-#'  - `groups`: Contains the columns `item_group`, `item_type`, `item_scale`,
-#'   `use_unscaled_limits`.
 #' 
 #'
 #' @export
diff --git a/man/clinsight-package.Rd b/man/clinsight-package.Rd
index a346d98f..57c7ff58 100644
--- a/man/clinsight-package.Rd
+++ b/man/clinsight-package.Rd
@@ -19,7 +19,7 @@ Useful links:
 
 }
 \author{
-\strong{Maintainer}: Leonard Daniël Samson \email{lsamson@gcp-service.com}
+\strong{Maintainer}: Leonard Dani<U+00EB>l Samson \email{lsamson@gcp-service.com}
 
 Other contributors:
 \itemize{
diff --git a/man/run_app.Rd b/man/run_app.Rd
index e5172702..419ccb80 100644
--- a/man/run_app.Rd
+++ b/man/run_app.Rd
@@ -51,160 +51,3 @@ for more details.}
 \description{
 Run the Shiny Application
 }
-\details{
-There are several elements defined in \code{golem-config.yml} that require
-configuration before launching the application for the first time. To name a
-few:
-\itemize{
-\item \code{user_db} a Character string providing the path to the app databases.
-If it does not exist, one will be created based on app data and metadata,
-with all data labeled as new'/not yet reviewed.
-\item \code{credentials_db} Character string. Path to the credentials database.
-}
-
-The other two elements are \code{meta_data} and \code{study_data}, accepting file paths
-to the app's primary source of data, stored as RDS files. As such, below are
-comprehensive data specifications for these objects.
-\subsection{\code{study_data}}{
-
-The RDS file ported to the \code{study_data} element is a data.frame containing the
-following required columns:
-\itemize{
-\item \code{site_code}: character or integer, identifier for study site; If an integer,
-recommended to add prefix "Site" as this will display more intuitively in
-the application's UI
-\item \code{subject_id}: character, unique identifier for a subject
-\item \code{event_repeat}: integer, helps keep track of unique \code{event_id} for a single
-\code{subject_id} and \code{event_date}
-\item \code{event_id}: character, names that help classify types of  \code{event_name}s
-into like-groups, generally characterized by site visits. For example,
-"SCR" for the screening visit, "VIS" for Visit X (where X is some integer),
-and "EXIT" for when the patient exits the study trial. However, some
-\code{event_id}s track events that could apply outside of any visit, like AE,
-ConMed, Medical History, etc.
-\item \code{event_name}: character, an "event" generally characterizes some sort of
-site visit, whether that be a "Screening", "Visit X" (where X is some
-integer), "Exit", or "Any Visit".
-\item \code{event_date}: Date, the date associated with \code{event_name}
-\item \code{form_id}: character, a unique identifier for the form the \code{item_name} metric
-and \code{item_value} were pulled from. Note: when \code{item_type} is continuous,
-\code{form_id} can contain several different \code{item_group}s. However, when
-\code{item_type} is 'other', \code{item_group} can be made up of several \code{form_id}
-values.
-\item \code{form_repeat}: integer, helps keep track of unique \code{item_name}s collected
-from a specific \code{form_id} for a given \code{subject_id}. \code{form_repeat} is
-particularly helpful when conslidating data like Adverse Events into this
-data format. Specifically, if more than one AE is collected on a patient,
-they'll have more than one \code{form_repeat}
-\item \code{edit_date_time}: datetime (POSIXct), the last time this record was edited
-\item \code{db_update_time}: datetime (POSIXct), the last time the database storing this
-record was updated.
-\item \code{region}: character, describing the region code that \code{site_code} falls under
-\item \code{day}: a difftime number, meaning it contains both a number and unit of
-time. It measures the number of days each visit is from screening
-\item \code{vis_day}: numeric, a numeric representation of \code{day}
-\item \code{vis_num}: numeric, a numeric representation of \code{event_name}
-\item \code{event_label}: character, an abbreviation of \code{event_name}
-\item \code{item_name}:  character, describes a metric or parameter of interest.
-\item \code{item_type}:  character, classifies \code{item_name}s into either 'continuous'
-or 'other', where continuous types are those generally associated with the
-CDISC "basic data structure" (BDS). That is, each \code{item_name} metric is
-collected over time at a patient visit (\code{event_name}). The 'other' type
-represents all non-time dependent measures, like demographic info, adverse
-events, Medications, medical history, etc.
-\item \code{item_group}:  character, provides is a high level category that groups
-like-\code{item_name}s together. For example, and \code{item_group} = 'Vital Signs'
-will group together pertinent \code{item_name} metrics like BMI, Pulse, Blood
-pressure, etc.
-\item \code{item_value}:  character, the measurement collected for a given \code{item_name}.
-The value collected may be a number like 150 (when collecting a patient's
-weight) or a word (such as 'white' for the subject's race).
-\item \code{item_unit}:  character, tracking the unit of measurement for \code{item_name}
-and \code{item_value}.
-\item \code{lower_lim}: numeric, some \code{item_name}s (particularly the 'continuous' type)
-have a pre-defined range of values that are considered normal. This is the
-lower limit to that range.
-\item \code{upper_lim}: numeric, some \code{item_name}s (particularly the 'continuous' type)
-have a pre-defined range of values that are considered normal. This is the
-upper limit to that range.
-\item \code{significance}:  character, either 'CS' which means 'Clinically Significant'
-or 'NCS' which means 'Not Clinically Significant'
-\item \code{reason_notdone}:  character, an effort to describe why the \code{item_value}
-field is \code{NA} / missing.
-}
-}
-
-\subsection{\code{meta_data}}{
-
-The RDS file ported to the \code{meta_data} configuration is a list of data.frames
-which (not surprisingly) contains metadata used for the application. Prior to
-launching the application, the metadata will be merged with the \code{study_data}.
-to dictate which variables will be included in the application, and in which
-tab the variables will be displayed. The goal is that most, if not all
-study-specific data will be captured in the metadata, leaving the scripts to
-run the application largely unaltered between studies.
-
-Specifications for the list of data.frames include:
-:
-\itemize{
-\item \code{events}: Used to create a simple timeline in the application, with
-predefined number of planned visits, N. It contains the following columns:
-\itemize{
-\item \code{event_number}: integer. Example: 0, 1, 2, ..., N
-\item \code{event_name}: character. Example: "Screening", "Visit 1", "Visit 2",
-..., "Visit N"
-\item \code{event_label}: character. Example: "V0", "V1", "V2", ..., "VN"
-}
-\item \code{common_forms}: Used to select and rename the variables of interest in
-the raw data when transformed into the desired \code{study_data} format. Note:
-creating the \code{study_data} data.frame should use \code{merge_meta_with_data()}
-where (not surprisingly), the metadata is merged with the raw study data.
-\code{common_forms} contains the columns below:
-\itemize{
-\item \code{var}: character, the variable name to display in the table, mapped
-from a known \code{item_name} provided in \code{study_data}. Example:
-\code{item_name = "AE Name"} will be replaced by "AE_AETERM" when
-\code{var = "AE_AETERM"}.
-\item \code{suffix}: Usually blank in this data.frame. This column is more
-commonly used in the \code{study_forms} data.frame
-\item \code{item_name}: character, known \code{item_name}s found in \code{study_data}. There
-are certain \code{item_name}s that are required, even if missing in
-\code{study_data}, including: "AE Name", "AE start date", "AE end date", "AE
-date of worsening", "AE CTCAE severity", "AE CTCAE severity worsening",
-"Serious Adverse Event", and "SAE Start date".
-\item \code{item_type}: character, known \code{item_type} corresponding to those found
-for \code{item_name}s in \code{study_data}.
-\item \code{item_group}: character, known \code{item_group} corresponding to those found
-for \code{item_name}s in \code{study_data}.
-}
-\item \code{study_forms}: Contains the same columns as the data.frame \code{common_forms},
-and in addition the columns \code{unit}, \code{lower_limit}, \code{upper_limit}. Used to
-select and rename the raw data variables of interest. In addition, the
-\code{suffix} column is used more regularly in this "study" context. This is because,
-these variable names may have a consistent trunk / stem, with
-varying suffixes to describe a similar style measurement. So instead of creating a
-new row for these variables in the \code{meta_data} data.frame, we allow for inclusion of several
-suffixes. For example, VS_PULSE measures beats/min. Typically, these measures
-are collected using VS_PULSE_VSORRES & VS_PULSE_VSREAND, but in this format,
-we can list the stem "VS_PULSE" as the \code{var} and \code{"VSORRES, VSREAND"} in the suffix field.
-As for the new columns, they are defined as follows:
-\itemize{
-\item \code{unit}: character, unit of measure
-\item \code{lower_limit}: numeric, the lower limit of what's considered clinically significant
-\item \code{upper_limit}: numeric, the upper limit of what's considered clinically significant
-}
-\item \code{general}: Contains the same columns as \code{common_forms} and is used in the
-same way. That is, it's used to select and rename the raw data when
-transformed into the desired \code{study_data} format. Note:
-creating the \code{study_data} data.frame should use \code{merge_meta_with_data()}
-where (not surprisingly), the metadata is merged with the raw study data.
-Please refer to the \code{common_forms} spec above. However,
-I will note that there are certain \code{item_name}s that are required, even if
-missing in \code{study_data}, including: "Age", "Sex", "ECOG", "Eligible",
-"WHO.classification", "DiscontinuationReason", "DrugAdminDate", and
-"DrugAdminDose".
-\item \code{groups}: Contains the columns \code{item_group}, \code{item_type}, \code{item_scale},
-\code{use_unscaled_limits}.
-}
-}
-}
diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index a85e361d..54a7e6fd 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -12,66 +12,110 @@ knitr::opts_chunk$set(
   comment = "#>"
 )
 ```
-```{r setup}
-library(clinsight)
-```
-Note: this page is "under construction" until the `clinsight` package developers
-create helper functions that transform at least one other EDC data source into 
-the desired format. In the meantime, this page provides useful documentation to 
-the existing data spec. Eventually, it will also share guidelines about how to
-combine your input data with metadata sources, etc.
+Note: this page is "under construction" until the `clinsight` package developers create helper functions that transform at least one other EDC data source into the desired format. In the meantime, this page provides useful documentation to the existing data spec. Eventually, it will also share guidelines about how to combine your input data with metadata sources, etc.
 
 ## Intro
 
-In order to plug your organizations EDC data into the `clinsight` application,
-you'll notice the `run_app()` function in `app.R` collects a `data` argument.
-This vignette is all about the expected format of that `data` object / file.
+In order to plug your organizations EDC data into the `clinsight` application, please notice this app is outfitted with a `./inst/golem-config.yml` file to control several elements of deployment. Below is a typical configuration file:
+
+```yml
+default:
+  golem_name: clinsight
+  golem_version: 0.0.0.9003
+  app_prod: no
+  data_folder: test_data
+  study_data: !expr clinsight::clinsightful_data
+  meta_data: !expr clinsight::metadata
+  user_db: test_user_db.sqlite
+  credentials_db: test_credentials_db.sqlite
+production:
+  app_prod: yes
+  data_folder: study_data
+  study_data: study_data.rds
+  meta_data: metadata.rds
+  user_db: user_db.sqlite
+  credentials_db: credentials_db.sqlite
+dev:
+  golem_wd: !expr golem::pkg_path()
+```
+
+First and foremost, note that the configurations can be developed for many use cases. For example, the above file is designed to run by default with a 
 
-Depending on the EDC vendor used, the size, shape, and format of their "raw
-data" may vary. We've compiled a few functions that help admin users pre-process
-the data they own. However, before we deep dive into how to use those, let's
-focus on what the final result of those pre-processing steps in order to
-understand what we need to start using the app.
+Depending on the EDC vendor used, the size, shape, and format of their "raw data" may vary. We've compiled a few functions that help admin users pre-process the data they own. However, before we deep dive into how to use those, let's focus on what the final result of those pre-processing steps in order to understand what we need to start using the app.
 
 ## `Data` Specification
 
-Baked into the `clinsight` package is an internal data set called
-`clinsightful_data`. Here is a preview of that data:
+Baked into the `clinsight` package is an internal data set called `clinsightful_data`. Here is a preview of that data:
 
 ```{r clinsightful_data}
+# library(clinsight)
 # data("clinsightful_data") # Run to load the data into an R session. 
+
 head(clinsight::clinsightful_data)
 # colnames(clinsightful_data)
 ```
 
-### Column specs
-
-Refer to `run_app()` documentation by running `?clinsight::run_app`.
-
-- `site_code`: 
-- `subject_id`: 
-- `event_repeat`: 
-- `event_id`: 
-- `event_name`: 
-- `event_date`: 
-- `form_id`: 
-- `form_repeat`: 
-- `edit_date_time`: 
-- `db_update_time`: 
-- `region`: 
-- `day`: 
-- `vis_day`: 
-- `vis_num`: 
-- `event_label`: 
-- `item_name`: 
-- `item_type`: 
-- `item_group`: 
-- `item_value`: 
-- `item_unit`: 
-- `lower_lim`: 
-- `upper_lim`: 
-- `significance`: 
-- `reason_notdone`: 
+There are several elements defined in `golem-config.yml` that require configuration before launching the application for the first time. To name a few:
+
+- `user_db` a Character string providing the path to the app databases. If it does not exist, one will be created based on app data and metadata, with all data labeled as new'/not yet reviewed.
+- `credentials_db` Character string. Path to the credentials database.
+
+The other two elements are `meta_data` and `study_data`, accepting file paths to the app's primary source of data, stored as RDS files. As such, below are comprehensive data specifications for these objects.
+
+### `study_data`
+The RDS file ported to the `study_data` element is a data.frame containing the following required columns:
+
+- `site_code`: character or integer, identifier for study site; If an integer, recommended to add prefix "Site" as this will display more intuitively in the application's UI
+- `subject_id`: character, unique identifier for a subject
+- `event_repeat`: integer, helps keep track of unique `event_id` for a single `subject_id` and `event_date`
+- `event_id`: character, names that help classify types of  `event_name`s into like-groups, generally characterized by site visits. For example,  "SCR" for the screening visit, "VIS" for Visit X (where X is some integer), and "EXIT" for when the patient exits the study trial. However, some `event_id`s track events that could apply outside of any visit, like AE, ConMed, Medical History, etc.
+- `event_name`: character, an "event" generally characterizes some sort of site visit, whether that be a "Screening", "Visit X" (where X is some integer), "Exit", or "Any Visit".
+- `event_date`: Date, the date associated with `event_name`
+- `form_id`: character, a unique identifier for the form the `item_name` metric and `item_value` were pulled from. Note: when `item_type` is continuous, `form_id` can contain several different `item_group`s. However, when `item_type` is 'other', `item_group` can be made up of several `form_id` values.
+- `form_repeat`: integer, helps keep track of unique `item_name`s collected from a specific `form_id` for a given `subject_id`. `form_repeat` is particularly helpful when consolidating data like Adverse Events into this data format. Specifically, if more than one AE is collected on a patient, they'll have more than one `form_repeat`
+- `edit_date_time`: datetime (POSIXct), the last time this record was edited
+- `db_update_time`: datetime (POSIXct), the last time the database storing this record was updated.
+- `region`: character, describing the region code that `site_code` falls under
+- `day`: a difftime number, meaning it contains both a number and unit of time. It measures the number of days each visit is from screening
+- `vis_day`: numeric, a numeric representation of `day`
+- `vis_num`: numeric, a numeric representation of `event_name`
+- `event_label`: character, an abbreviation of `event_name`
+- `item_name`:  character, describes a metric or parameter of interest.
+- `item_type`:  character, classifies `item_name`s into either 'continuous' or 'other', where continuous types are those generally associated with the CDISC "basic data structure" (BDS). That is, each `item_name` metric is collected over time at a patient visit (`event_name`). The 'other' type represents all non-time dependent measures, like demographic info, adverse events, Medications, medical history, etc.
+- `item_group`:  character, provides is a high level category that groups like-`item_name`s together. For example, and `item_group` = 'Vital Signs' will group together pertinent `item_name` metrics like BMI, Pulse, Blood pressure, etc.
+- `item_value`:  character, the measurement collected for a given `item_name`. The value collected may be a number like 150 (when collecting a patient's weight) or a word (such as 'white' for the subject's race).
+- `item_unit`:  character, tracking the unit of measurement for `item_name` and `item_value`.
+- `lower_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the lower limit to that range.
+- `upper_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the upper limit to that range.
+- `significance`:  character, either 'CS' which means 'Clinically Significant' or 'NCS' which means 'Not Clinically Significant'
+- `reason_notdone`:  character, an effort to describe why the `item_value` field is `NA` / missing.
+
+### `meta_data`
+
+The RDS file ported to the `meta_data` configuration is a list of data.frames which (not surprisingly) contains metadata used for the application. Prior to launching the application, the metadata will be merged with the `study_data` to dictate which variables will be included in the application, and in which tab the variables will be displayed. The goal is that most, if not all study-specific data will be captured in the metadata, leaving the scripts to run the application largely unaltered between studies.
+
+Specifications for the list of data.frames include:
+ - `events`: Used to create a simple timeline in the application, with predefined number of planned visits, N. It contains the following columns:
+     - `event_number`: integer. Example: 0, 1, 2, ..., N
+     - `event_name`: character. Example: "Screening", "Visit 1", "Visit 2", ..., "Visit N"
+     - `event_label`: character. Example: "V0", "V1", "V2", ..., "VN"
+  
+ - `common_forms`: Used to select and rename the variables of interest in the raw data when transformed into the desired `study_data` format. Note: creating the `study_data` data.frame should use `merge_meta_with_data()` where (not surprisingly), the metadata is merged with the raw study data. `common_forms` contains the columns below:
+     - `var`: character, the variable name to display in the table, mapped from a known `item_name` provided in `study_data`. Example: `item_name = "AE Name"` will be replaced by "AE_AETERM" when `var = "AE_AETERM"`.
+     - `suffix`: Usually blank in this data.frame. This column is more commonly used in the `study_forms` data.frame
+     - `item_name`: character, known `item_name`s found in `study_data`. There are certain `item_name`s that are required, even if missing in `study_data`, including: "AE Name", "AE start date", "AE end date", "AE date of worsening", "AE CTCAE severity", "AE CTCAE severity worsening", "Serious Adverse Event", and "SAE Start date".
+     - `item_type`: character, known `item_type` corresponding to those found for `item_name`s in `study_data`.
+     - `item_group`: character, known `item_group` corresponding to those found for `item_name`s in `study_data`.
+  
+ - `study_forms`: Contains the same columns as the data.frame `common_forms`, and in addition the columns `unit`, `lower_limit`, `upper_limit`. Used to select and rename the raw data variables of interest. In addition, the `suffix` column is used more regularly in this "study" context. This is because, these variable names may have a consistent trunk / stem, with varying suffixes to describe a similar style measurement. So instead of creating a new row for these variables in the `meta_data` data.frame, we allow for inclusion of several suffixes. For example, VS_PULSE measures beats/min. Typically, these measures are collected using VS_PULSE_VSORRES & VS_PULSE_VSREAND, but in this format, we can list the stem "VS_PULSE" as the `var` and `"VSORRES, VSREAND"` in the suffix field. As for the new columns, they are defined as follows:
+      - `unit`: character, unit of measure
+      - `lower_limit`: numeric, the lower limit of what's considered clinically significant
+      - `upper_limit`: numeric, the upper limit of what's considered clinically significant
+      
+  
+ - `general`: Contains the same columns as `common_forms` and is used in the same way. That is, it's used to select and rename the raw data when transformed into the desired `study_data` format. Note: creating the `study_data` data.frame should use `merge_meta_with_data()` where (not surprisingly), the metadata is merged with the raw study data. Please refer to the `common_forms` spec above. However, I will note that there are certain `item_name`s that are required, even if missing in `study_data`, including: "Age", "Sex", "ECOG", "Eligible", "WHO.classification", "DiscontinuationReason", "DrugAdminDate", and "DrugAdminDose".
+ 
+ - `groups`: Contains the columns `item_group`, `item_type`, `item_scale`,`use_unscaled_limits`.
 
 For more information about 
 

From dad8b58b15e4842736c01ea92a2f77d80f7bb6d7 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Mon, 1 Jul 2024 17:05:42 -0400
Subject: [PATCH 16/23] update data spec to include section for raw data

---
 vignettes/data_spec.Rmd | 87 +++++++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index 54a7e6fd..1932db3b 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "Input Data Specifications"
+title: "App Configuration & Input Data Specifications"
 output: rmarkdown::html_vignette
 vignette: >
   %\VignetteIndexEntry{Input Data Specification}
@@ -12,11 +12,12 @@ knitr::opts_chunk$set(
   comment = "#>"
 )
 ```
+
 Note: this page is "under construction" until the `clinsight` package developers create helper functions that transform at least one other EDC data source into the desired format. In the meantime, this page provides useful documentation to the existing data spec. Eventually, it will also share guidelines about how to combine your input data with metadata sources, etc.
 
 ## Intro
 
-In order to plug your organizations EDC data into the `clinsight` application, please notice this app is outfitted with a `./inst/golem-config.yml` file to control several elements of deployment. Below is a typical configuration file:
+In order to get started and plug your organizations EDC data into the `clinsight` application, please notice this app is outfitted with a `./inst/golem-config.yml` file to configure several elements of deployment. Below is a typical configuration file:
 
 ```yml
 default:
@@ -39,17 +40,19 @@ dev:
   golem_wd: !expr golem::pkg_path()
 ```
 
-First and foremost, note that the configurations can be developed for many use cases. For example, the above file is designed to run by default with a 
+First and foremost, notice that the configuration can vary depending on deployment use case. For example, the above file is designed to run by default with test data built into the app. Only once in `production` mode, does the app actually leverage data you've gathered from your EDC system, as RDS files.
 
 Depending on the EDC vendor used, the size, shape, and format of their "raw data" may vary. We've compiled a few functions that help admin users pre-process the data they own. However, before we deep dive into how to use those, let's focus on what the final result of those pre-processing steps in order to understand what we need to start using the app.
 
-## `Data` Specification
+## Data Specifications
 
-Baked into the `clinsight` package is an internal data set called `clinsightful_data`. Here is a preview of that data:
+Baked into the `clinsight` package is an internal data.frame called `clinsightful_data`, which is comprised of randomly generated test data for this application. Here is a preview of that data:
 
 ```{r clinsightful_data}
-# library(clinsight)
-# data("clinsightful_data") # Run to load the data into an R session. 
+# Run to load the data into an R session. 
+# pkg_name <- "clinsight"
+# library(pkg_name, character.only = TRUE)
+# data("clinsightful_data") 
 
 head(clinsight::clinsightful_data)
 # colnames(clinsightful_data)
@@ -60,35 +63,12 @@ There are several elements defined in `golem-config.yml` that require configurat
 - `user_db` a Character string providing the path to the app databases. If it does not exist, one will be created based on app data and metadata, with all data labeled as new'/not yet reviewed.
 - `credentials_db` Character string. Path to the credentials database.
 
-The other two elements are `meta_data` and `study_data`, accepting file paths to the app's primary source of data, stored as RDS files. As such, below are comprehensive data specifications for these objects.
+The other two elements are `meta_data` and `study_data`, accepting file paths to the app's primary source of data, stored as RDS files. As such, below are comprehensive data specifications for these objects. Note that the `study_data` object should be created with the pre-processing helper function called `merge_meta_with_data()` which accepts your raw data sources and merges them with the `meta_data` object defined in this article.
 
-### `study_data`
-The RDS file ported to the `study_data` element is a data.frame containing the following required columns:
+### Raw Data
+
+Insert Raw data specs
 
-- `site_code`: character or integer, identifier for study site; If an integer, recommended to add prefix "Site" as this will display more intuitively in the application's UI
-- `subject_id`: character, unique identifier for a subject
-- `event_repeat`: integer, helps keep track of unique `event_id` for a single `subject_id` and `event_date`
-- `event_id`: character, names that help classify types of  `event_name`s into like-groups, generally characterized by site visits. For example,  "SCR" for the screening visit, "VIS" for Visit X (where X is some integer), and "EXIT" for when the patient exits the study trial. However, some `event_id`s track events that could apply outside of any visit, like AE, ConMed, Medical History, etc.
-- `event_name`: character, an "event" generally characterizes some sort of site visit, whether that be a "Screening", "Visit X" (where X is some integer), "Exit", or "Any Visit".
-- `event_date`: Date, the date associated with `event_name`
-- `form_id`: character, a unique identifier for the form the `item_name` metric and `item_value` were pulled from. Note: when `item_type` is continuous, `form_id` can contain several different `item_group`s. However, when `item_type` is 'other', `item_group` can be made up of several `form_id` values.
-- `form_repeat`: integer, helps keep track of unique `item_name`s collected from a specific `form_id` for a given `subject_id`. `form_repeat` is particularly helpful when consolidating data like Adverse Events into this data format. Specifically, if more than one AE is collected on a patient, they'll have more than one `form_repeat`
-- `edit_date_time`: datetime (POSIXct), the last time this record was edited
-- `db_update_time`: datetime (POSIXct), the last time the database storing this record was updated.
-- `region`: character, describing the region code that `site_code` falls under
-- `day`: a difftime number, meaning it contains both a number and unit of time. It measures the number of days each visit is from screening
-- `vis_day`: numeric, a numeric representation of `day`
-- `vis_num`: numeric, a numeric representation of `event_name`
-- `event_label`: character, an abbreviation of `event_name`
-- `item_name`:  character, describes a metric or parameter of interest.
-- `item_type`:  character, classifies `item_name`s into either 'continuous' or 'other', where continuous types are those generally associated with the CDISC "basic data structure" (BDS). That is, each `item_name` metric is collected over time at a patient visit (`event_name`). The 'other' type represents all non-time dependent measures, like demographic info, adverse events, Medications, medical history, etc.
-- `item_group`:  character, provides is a high level category that groups like-`item_name`s together. For example, and `item_group` = 'Vital Signs' will group together pertinent `item_name` metrics like BMI, Pulse, Blood pressure, etc.
-- `item_value`:  character, the measurement collected for a given `item_name`. The value collected may be a number like 150 (when collecting a patient's weight) or a word (such as 'white' for the subject's race).
-- `item_unit`:  character, tracking the unit of measurement for `item_name` and `item_value`.
-- `lower_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the lower limit to that range.
-- `upper_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the upper limit to that range.
-- `significance`:  character, either 'CS' which means 'Clinically Significant' or 'NCS' which means 'Not Clinically Significant'
-- `reason_notdone`:  character, an effort to describe why the `item_value` field is `NA` / missing.
 
 ### `meta_data`
 
@@ -117,12 +97,41 @@ Specifications for the list of data.frames include:
  
  - `groups`: Contains the columns `item_group`, `item_type`, `item_scale`,`use_unscaled_limits`.
 
-For more information about 
 
-## Pre-processing
-Place holder for helper functions
+### The end product: `study_data`
+The RDS file (or data.frame) ported to the `study_data` element contains the following required columns below. 
+
+- `site_code`: character or integer, identifier for study site; If an integer, recommended to add prefix "Site" as this will display more intuitively in the application's UI
+- `subject_id`: character, unique identifier for a subject
+- `event_repeat`: integer, helps keep track of unique `event_id` for a single `subject_id` and `event_date`
+- `event_id`: character, names that help classify types of  `event_name`s into like-groups, generally characterized by site visits. For example,  "SCR" for the screening visit, "VIS" for Visit X (where X is some integer), and "EXIT" for when the patient exits the study trial. However, some `event_id`s track events that could apply outside of any visit, like AE, ConMed, Medical History, etc.
+- `event_name`: character, an "event" generally characterizes some sort of site visit, whether that be a "Screening", "Visit X" (where X is some integer), "Exit", or "Any Visit".
+- `event_date`: Date, the date associated with `event_name`
+- `form_id`: character, a unique identifier for the form the `item_name` metric and `item_value` were pulled from. Note: when `item_type` is continuous, `form_id` can contain several different `item_group`s. However, when `item_type` is 'other', `item_group` can be made up of several `form_id` values.
+- `form_repeat`: integer, helps keep track of unique `item_name`s collected from a specific `form_id` for a given `subject_id`. `form_repeat` is particularly helpful when consolidating data like Adverse Events into this data format. Specifically, if more than one AE is collected on a patient, they'll have more than one `form_repeat`
+- `edit_date_time`: datetime (POSIXct), the last time this record was edited
+- `db_update_time`: datetime (POSIXct), the last time the database storing this record was updated.
+- `region`: character, describing the region code that `site_code` falls under
+- `day`: a difftime number, meaning it contains both a number and unit of time. It measures the number of days each visit is from screening
+- `vis_day`: numeric, a numeric representation of `day`
+- `vis_num`: numeric, a numeric representation of `event_name`
+- `event_label`: character, an abbreviation of `event_name`
+- `item_name`:  character, describes a metric or parameter of interest.
+- `item_type`:  character, classifies `item_name`s into either 'continuous' or 'other', where continuous types are those generally associated with the CDISC "basic data structure" (BDS). That is, each `item_name` metric is collected over time at a patient visit (`event_name`). The 'other' type represents all non-time dependent measures, like demographic info, adverse events, Medications, medical history, etc.
+- `item_group`:  character, provides is a high level category that groups like-`item_name`s together. For example, and `item_group` = 'Vital Signs' will group together pertinent `item_name` metrics like BMI, Pulse, Blood pressure, etc.
+- `item_value`:  character, the measurement collected for a given `item_name`. The value collected may be a number like 150 (when collecting a patient's weight) or a word (such as 'white' for the subject's race).
+- `item_unit`:  character, tracking the unit of measurement for `item_name` and `item_value`.
+- `lower_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the lower limit to that range.
+- `upper_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the upper limit to that range.
+- `significance`:  character, either 'CS' which means 'Clinically Significant' or 'NCS' which means 'Not Clinically Significant'
+- `reason_notdone`:  character, an effort to describe why the `item_value` field is `NA` / missing.
+
+
+
+
+
+
+
 
-### Raw Data
 
-### Metadata
 

From 802fb2c351c0224e386bc4e31727148b15b07fcb Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Tue, 9 Jul 2024 16:01:07 -0400
Subject: [PATCH 17/23] get all the right peices into the vignette before
 testing them out

---
 dev/02_dev.R            |   5 ++
 vignettes/data_spec.Rmd | 141 +++++++++++++++++++++++++++++-----------
 2 files changed, 108 insertions(+), 38 deletions(-)

diff --git a/dev/02_dev.R b/dev/02_dev.R
index f5f06740..0f445809 100644
--- a/dev/02_dev.R
+++ b/dev/02_dev.R
@@ -82,6 +82,11 @@ clinsightful_data |>
   filter(event_repeat != form_repeat) |>
   print(n = 36)
 
+
+# metadata
+
+clinsight::metadata$column_specs$col_type |> unique()
+
 # Engineering
 
 ## Dependencies ----
diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index 1932db3b..f4317f9a 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -40,9 +40,16 @@ dev:
   golem_wd: !expr golem::pkg_path()
 ```
 
-First and foremost, notice that the configuration can vary depending on deployment use case. For example, the above file is designed to run by default with test data built into the app. Only once in `production` mode, does the app actually leverage data you've gathered from your EDC system, as RDS files.
+First and foremost, notice that the configuration can vary depending on deployment use case. For example, the above file is designed to run (by default) with test data built into the app. Only once in `production` mode, does the app actually leverage data you've gathered from your EDC system, as RDS files. As seen above, there are several other elements defined in `golem-config.yml` that can be configured before launching the application for the first time. To name a few:
+
+- `user_db` a Character string providing the path to the app's review database. If it does not exist, one will be created based on app data and metadata, with all data labeled as new'/not yet reviewed.
+- `credentials_db` Character string. Path to the credentials database.
+
+The other two elements are `meta_data` and `study_data`, accepting file paths to the app's primary data sources, stored as RDS files. The `study_data` object should be created with the pre-processing helper function called `merge_meta_with_data()` which accepts raw data sources and merges them with the `meta_data` object defined in this article.
+
+Depending on the EDC vendor used, the size, shape, and format of their "raw data" may vary. We've compiled a few functions that help admin users pre-process the data they own. However, before we deep dive into how to use those, let's focus on what the final result of those pre-processing steps to understand what's being fed to the app.
+
 
-Depending on the EDC vendor used, the size, shape, and format of their "raw data" may vary. We've compiled a few functions that help admin users pre-process the data they own. However, before we deep dive into how to use those, let's focus on what the final result of those pre-processing steps in order to understand what we need to start using the app.
 
 ## Data Specifications
 
@@ -55,26 +62,69 @@ Baked into the `clinsight` package is an internal data.frame called `clinsightfu
 # data("clinsightful_data") 
 
 head(clinsight::clinsightful_data)
-# colnames(clinsightful_data)
+
 ```
 
-There are several elements defined in `golem-config.yml` that require configuration before launching the application for the first time. To name a few:
+This object is an example of a healthy `study_data` object, that should preferably be stored as an RDS file prior to launching the app. let's inspect it a little more:
 
-- `user_db` a Character string providing the path to the app databases. If it does not exist, one will be created based on app data and metadata, with all data labeled as new'/not yet reviewed.
-- `credentials_db` Character string. Path to the credentials database.
+### `study_data`
 
-The other two elements are `meta_data` and `study_data`, accepting file paths to the app's primary source of data, stored as RDS files. As such, below are comprehensive data specifications for these objects. Note that the `study_data` object should be created with the pre-processing helper function called `merge_meta_with_data()` which accepts your raw data sources and merges them with the `meta_data` object defined in this article.
+The RDS file (or data.frame) ported to the `study_data` element contains the following required columns below. 
 
-### Raw Data
+- `site_code`: character or integer, identifier for study site; If an integer, recommended to add prefix "Site" as this will display more intuitively in the application's UI
+- `subject_id`: character, unique identifier for a subject
+- `event_repeat`: integer, helps keep track of unique `event_id` for a single `subject_id` and `event_date`
+- `event_id`: character, names that help classify types of  `event_name`s into like-groups, generally characterized by site visits. For example,  "SCR" for the screening visit, "VIS" for Visit X (where X is some integer), and "EXIT" for when the patient exits the study trial. However, some `event_id`s track events that could apply outside of any visit, like AE, ConMed, Medical History, etc.
+- `event_name`: character, an "event" generally characterizes some sort of site visit, whether that be a "Screening", "Visit X" (where X is some integer), "Exit", or "Any Visit".
+- `event_date`: Date, the date associated with `event_name`
+- `form_id`: character, a unique identifier for the form the `item_name` metric and `item_value` were pulled from. Note: when `item_type` is continuous, `form_id` can contain several different `item_group`s. However, when `item_type` is 'other', `item_group` can be made up of several `form_id` values.
+- `form_repeat`: integer, helps keep track of unique `item_name`s collected from a specific `form_id` for a given `subject_id`. `form_repeat` is particularly helpful when consolidating data like Adverse Events into this data format. Specifically, if more than one AE is collected on a patient, they'll have more than one `form_repeat`
+- `edit_date_time`: datetime (POSIXct), the last time this record was edited
+- `db_update_time`: datetime (POSIXct), the last time the database storing this record was updated.
+- `region`: character, describing the region code that `site_code` falls under
+- `day`: a difftime number, meaning it contains both a number and unit of time. It measures the number of days each visit is from screening
+- `vis_day`: numeric, a numeric representation of `day`
+- `vis_num`: numeric, a numeric representation of `event_name`
+- `event_label`: character, an abbreviation of `event_name`
+- `item_name`:  character, describes a metric or parameter of interest.
+- `item_type`:  character, classifies `item_name`s into either 'continuous' or 'other', where continuous types are those generally associated with the CDISC "basic data structure" (BDS). That is, each `item_name` metric is collected over time at a patient visit (`event_name`). The 'other' type represents all non-time dependent measures, like demographic info, adverse events, Medications, medical history, etc.
+- `item_group`:  character, provides is a high level category that groups like-`item_name`s together. For example, and `item_group` = 'Vital Signs' will group together pertinent `item_name` metrics like BMI, Pulse, Blood pressure, etc.
+- `item_value`:  character, the measurement collected for a given `item_name`. The value collected may be a number like 150 (when collecting a patient's weight) or a word (such as 'white' for the subject's race).
+- `item_unit`:  character, tracking the unit of measurement for `item_name` and `item_value`.
+- `lower_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the lower limit to that range.
+- `upper_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the upper limit to that range.
+- `significance`:  character, either 'CS' which means 'Clinically Significant' or 'NCS' which means 'Not Clinically Significant'
+- `reason_notdone`:  character, an effort to describe why the `item_value` field is `NA` / missing.
+
+
+
+### Processing your Raw Data
+
+So, the next logical question is "How do I get my EDC's data into the `study_data` format?" Well, this package currently offers a pre-processing helper function called `merge_meta_with_data()` which accepts raw data sources from the [Vedoc] EDC vendor and merges them with the `meta_data` object (defined below) to create a viable `study_data` object. As such, we'll spend some time covering what this helper function expects of your raw data and `meta_data` object and how it transforms it into the `study_data` object we need for app launch.
 
-Insert Raw data specs
+First, let's discuss the app's metadata needs!
 
+### The `meta_data`
 
-### `meta_data`
+The RDS file ported to the `meta_data` configuration above is a list of data.frames which (not surprisingly) contains metadata information for the application. However, it is also the same object used in `merge_meta_with_data()` to produce our `study_data` object!
 
-The RDS file ported to the `meta_data` configuration is a list of data.frames which (not surprisingly) contains metadata used for the application. Prior to launching the application, the metadata will be merged with the `study_data` to dictate which variables will be included in the application, and in which tab the variables will be displayed. The goal is that most, if not all study-specific data will be captured in the metadata, leaving the scripts to run the application largely unaltered between studies.
+That is, the metadata will be merged with the raw data to dictate which variables will be included in the `study_data`, and in which tab the variables will be displayed. The goal is that most, if not all study-specific data will be captured in the metadata, leaving the scripts to run the application largely unaltered between studies.
+
+Just like for `study_data`, this package also bundles a built-in metadata object called `metadata`. To view an example metadata file, run the following chunk of code:
+
+```{r metadata}
+
+meta_data <- clinsight::metadata
+lapply(meta_data, head)
+
+```
 
 Specifications for the list of data.frames include:
+
+ - `column_specs`: Used to map raw data variable names over to new names, and assign a column type.
+     - `name_raw`: character, variable name in the raw data source
+     - `name_new`: character, desired variable name to use in `study_data` and in the application
+     - `col_type`: character, either "C", "D", "T", or "i" which stands for "character", "date", "time", and "integer" respectively.
  - `events`: Used to create a simple timeline in the application, with predefined number of planned visits, N. It contains the following columns:
      - `event_number`: integer. Example: 0, 1, 2, ..., N
      - `event_name`: character. Example: "Screening", "Visit 1", "Visit 2", ..., "Visit N"
@@ -91,46 +141,61 @@ Specifications for the list of data.frames include:
       - `unit`: character, unit of measure
       - `lower_limit`: numeric, the lower limit of what's considered clinically significant
       - `upper_limit`: numeric, the upper limit of what's considered clinically significant
-      
   
  - `general`: Contains the same columns as `common_forms` and is used in the same way. That is, it's used to select and rename the raw data when transformed into the desired `study_data` format. Note: creating the `study_data` data.frame should use `merge_meta_with_data()` where (not surprisingly), the metadata is merged with the raw study data. Please refer to the `common_forms` spec above. However, I will note that there are certain `item_name`s that are required, even if missing in `study_data`, including: "Age", "Sex", "ECOG", "Eligible", "WHO.classification", "DiscontinuationReason", "DrugAdminDate", and "DrugAdminDose".
  
  - `groups`: Contains the columns `item_group`, `item_type`, `item_scale`,`use_unscaled_limits`.
 
+ - `table_names`: Used for ... #TODO
+     - `table_name`: character, 
+     - `raw_name`: character,
 
-### The end product: `study_data`
-The RDS file (or data.frame) ported to the `study_data` element contains the following required columns below. 
 
-- `site_code`: character or integer, identifier for study site; If an integer, recommended to add prefix "Site" as this will display more intuitively in the application's UI
-- `subject_id`: character, unique identifier for a subject
-- `event_repeat`: integer, helps keep track of unique `event_id` for a single `subject_id` and `event_date`
-- `event_id`: character, names that help classify types of  `event_name`s into like-groups, generally characterized by site visits. For example,  "SCR" for the screening visit, "VIS" for Visit X (where X is some integer), and "EXIT" for when the patient exits the study trial. However, some `event_id`s track events that could apply outside of any visit, like AE, ConMed, Medical History, etc.
-- `event_name`: character, an "event" generally characterizes some sort of site visit, whether that be a "Screening", "Visit X" (where X is some integer), "Exit", or "Any Visit".
-- `event_date`: Date, the date associated with `event_name`
-- `form_id`: character, a unique identifier for the form the `item_name` metric and `item_value` were pulled from. Note: when `item_type` is continuous, `form_id` can contain several different `item_group`s. However, when `item_type` is 'other', `item_group` can be made up of several `form_id` values.
-- `form_repeat`: integer, helps keep track of unique `item_name`s collected from a specific `form_id` for a given `subject_id`. `form_repeat` is particularly helpful when consolidating data like Adverse Events into this data format. Specifically, if more than one AE is collected on a patient, they'll have more than one `form_repeat`
-- `edit_date_time`: datetime (POSIXct), the last time this record was edited
-- `db_update_time`: datetime (POSIXct), the last time the database storing this record was updated.
-- `region`: character, describing the region code that `site_code` falls under
-- `day`: a difftime number, meaning it contains both a number and unit of time. It measures the number of days each visit is from screening
-- `vis_day`: numeric, a numeric representation of `day`
-- `vis_num`: numeric, a numeric representation of `event_name`
-- `event_label`: character, an abbreviation of `event_name`
-- `item_name`:  character, describes a metric or parameter of interest.
-- `item_type`:  character, classifies `item_name`s into either 'continuous' or 'other', where continuous types are those generally associated with the CDISC "basic data structure" (BDS). That is, each `item_name` metric is collected over time at a patient visit (`event_name`). The 'other' type represents all non-time dependent measures, like demographic info, adverse events, Medications, medical history, etc.
-- `item_group`:  character, provides is a high level category that groups like-`item_name`s together. For example, and `item_group` = 'Vital Signs' will group together pertinent `item_name` metrics like BMI, Pulse, Blood pressure, etc.
-- `item_value`:  character, the measurement collected for a given `item_name`. The value collected may be a number like 150 (when collecting a patient's weight) or a word (such as 'white' for the subject's race).
-- `item_unit`:  character, tracking the unit of measurement for `item_name` and `item_value`.
-- `lower_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the lower limit to that range.
-- `upper_lim`: numeric, some `item_name`s (particularly the 'continuous' type) have a pre-defined range of values that are considered normal. This is the upper limit to that range.
-- `significance`:  character, either 'CS' which means 'Clinically Significant' or 'NCS' which means 'Not Clinically Significant'
-- `reason_notdone`:  character, an effort to describe why the `item_value` field is `NA` / missing.
+#### `get_metadata()` & `items_expanded`
+
+So, lastly, you may notice that there is a 7th data.frame called `items_expanded`. This data.frame is actually derived after the user runs a helper function called `get_metadata()` which takes an XLSX file containing the first 6 data.frames (one per tab) and expands the tabs of your choosing with the column of your choosing. In other words, `get_metadata()` helps us get create something a bit more polished, like the `clinsight::metadata`. As you'll see later, this is a crucial step that's relied on heavily in `merge_meta_with_data()`. So, after you've compiled your metadata XLSX spreadsheet with the first 6 data.frames (tabs) mentioned above, you're ready to run code that looks like the following:
+
+```{r get_metadata(), eval = FALSE}
+# usethis::edit_r_environ()
+data_path <- Sys.getenv("METADATA_PATH")
+meta_data <- get_metadata(data_path,
+                          expand_tab_items = c("common_forms", "study_forms", "general"),
+                          expand_cols = "suffix")
+
+```
+
+In summary, `get_metadata` will initiate the `meta_data` object with the first 6 data.frames directly from the Excel file, and then `items_expanded` will be created by expanding `common_forms`, `study_forms`, and `general` data.frames by the values stored in the `suffix` column. The result will be appended onto the `meta_data` object as the 7th data.frame in the list.
 
 
+### Read in your study data with `get_raw_data()`
 
+Now that we have an understanding of our `meta_data` specs, we can discuss how they interact with your raw data. The rest of this vignette will feel more like an R script as we discuss how this happens. First, we need to read in our raw data. There is yet another helper function bundled in this package called `get_raw_data()` to help us do that. It's a pretty simple wrapper function that basically reads in raw data files stored as CSVs from a designated folder. As such, your function call should look something like the following code chunk. Notice, you can either set your raw data path explicitly, or in your `.Renviron` file.
 
+```{r get_raw_data(), eval = FALSE}
+# usethis::edit_r_environ()
+data_path <- Sys.getenv("RAW_DATA_PATH")
+raw_data <- get_raw_data(data_path, column_specs = metadata$column_specs)
+
+```
+
+After the data is read in R, it uses the `column_specs` you provided to rename the variables into the desired naming conventions. Notice that all of these variables are required, so it's extremely important that you identify which variables from your EDC mirror them. `get_raw_data()` will also perform some lite derivations upon load. Specifically, it standardizing a `vis_day` and `vis_num` variables based on `event_id` and `day`. Similarly, `event_name` & `event_label` have some clean up performed on it's values to standardize them for presentation in the app. Last, the data is ordered by `site_code` and `subject_id` before it's returned to the user. Last, notice that everything bound up & returned into a single data.frame for the user to take to the next step.
+
+
+### Finish the job with `merge_meta_with_data()`
+
+Now that our data has been read in and minimally cleaned up, we can finally use the `merge_meta_with_data()` function as shown below:
+
+```{r merge_meta_with_data(), eval = FALSE}
+
+study_data <- merge_meta_with_data(data = raw_data, meta = meta_data)
+
+```
 
+This function uses the rest of the metadata data.frames to further organize your raw data into something usable by the app, including but not limited to the following:
 
+- fixing multiple choice variables using a function called `fix_multiple_choice_vars()`
+- merging the raw data with `meta_data$items_expanded`
+- applying any study specific fixes using a function called `apply_study_specfific_fixes()`. #TODO
 
 
 

From 617749fc5b93a20e2f95aa4ed3e8ae54eaa27844 Mon Sep 17 00:00:00 2001
From: "Aaron Clark (Arcus)" <aclark02@arcusbio.com>
Date: Tue, 9 Jul 2024 16:14:12 -0400
Subject: [PATCH 18/23] making more progress

---
 vignettes/data_spec.Rmd | 48 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index f4317f9a..609b935a 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -153,12 +153,12 @@ Specifications for the list of data.frames include:
 
 #### `get_metadata()` & `items_expanded`
 
-So, lastly, you may notice that there is a 7th data.frame called `items_expanded`. This data.frame is actually derived after the user runs a helper function called `get_metadata()` which takes an XLSX file containing the first 6 data.frames (one per tab) and expands the tabs of your choosing with the column of your choosing. In other words, `get_metadata()` helps us get create something a bit more polished, like the `clinsight::metadata`. As you'll see later, this is a crucial step that's relied on heavily in `merge_meta_with_data()`. So, after you've compiled your metadata XLSX spreadsheet with the first 6 data.frames (tabs) mentioned above, you're ready to run code that looks like the following:
+So, lastly, you may notice that there is a 7th data.frame called `items_expanded`. This data.frame is actually derived after the user runs a helper function called `get_metadata()` which takes an XLSX file containing the first 6 data.frames (one per tab) and expands the tabs of your choosing with the column of your choosing. In other words, `get_metadata()` makes sure your existing metadata is in the correct format and helps us get create something a bit more polished & digestible, like `items_expanded` for `clinsight::metadata`. As you'll see later, performing this step is crucial for `merge_meta_with_data()` which comes next. So, after you've compiled your metadata XLSX spreadsheet with the first 6 data.frames (tabs) mentioned above, you're ready to run code that looks like the following:
 
 ```{r get_metadata(), eval = FALSE}
 # usethis::edit_r_environ()
-data_path <- Sys.getenv("METADATA_PATH")
-meta_data <- get_metadata(data_path,
+meta_path <- Sys.getenv("METADATA_PATH")
+meta_data <- get_metadata(file.path(meta_path, "my_metadata.xlsx"),
                           expand_tab_items = c("common_forms", "study_forms", "general"),
                           expand_cols = "suffix")
 
@@ -166,6 +166,14 @@ meta_data <- get_metadata(data_path,
 
 In summary, `get_metadata` will initiate the `meta_data` object with the first 6 data.frames directly from the Excel file, and then `items_expanded` will be created by expanding `common_forms`, `study_forms`, and `general` data.frames by the values stored in the `suffix` column. The result will be appended onto the `meta_data` object as the 7th data.frame in the list.
 
+Once complete, save your metadata to an .RDS file:
+
+```{r save_metadata, eval = FALSE}
+
+saveRDS(meta_data, file.path(Sys.getenv("METADATA_PATH"), "meta_data.rds"))
+
+```
+
 
 ### Read in your study data with `get_raw_data()`
 
@@ -173,7 +181,7 @@ Now that we have an understanding of our `meta_data` specs, we can discuss how t
 
 ```{r get_raw_data(), eval = FALSE}
 # usethis::edit_r_environ()
-data_path <- Sys.getenv("RAW_DATA_PATH")
+data_path <- Sys.getenv("DATA_PATH")
 raw_data <- get_raw_data(data_path, column_specs = metadata$column_specs)
 
 ```
@@ -198,5 +206,37 @@ This function uses the rest of the metadata data.frames to further organize your
 - applying any study specific fixes using a function called `apply_study_specfific_fixes()`. #TODO
 
 
+Once complete, save your metadata to an .RDS file:
+
+```{r save_metadata, eval = FALSE}
+
+saveRDS(study_data, file.path(Sys.getenv("DATA_PATH"), "study_data.rds"))
+
+```
+
+## Launch the app
+
+Circling back to the configuration file we shared at the beginning of this vignette, you're now ready to launch this application.
+
+```yml
+default:
+  golem_name: clinsight
+  golem_version: 0.0.0.9003
+  app_prod: no
+  data_folder: test_data
+  study_data: !expr clinsight::clinsightful_data
+  meta_data: !expr clinsight::metadata
+  user_db: test_user_db.sqlite
+  credentials_db: test_credentials_db.sqlite
+production:
+  app_prod: yes
+  data_folder: study_data
+  study_data: study_data.rds
+  meta_data: metadata.rds
+  user_db: user_db.sqlite
+  credentials_db: credentials_db.sqlite
+dev:
+  golem_wd: !expr golem::pkg_path()
+```
 
 

From d0fb8607cb933f49a5e3da1fc2f054cbee5bfd16 Mon Sep 17 00:00:00 2001
From: LDSamson <l.d.samson@gmail.com>
Date: Fri, 12 Jul 2024 16:18:28 +0200
Subject: [PATCH 19/23] Fix vignette

---
 vignettes/data_spec.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index 609b935a..8d85d174 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -2,7 +2,7 @@
 title: "App Configuration & Input Data Specifications"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{Input Data Specification}
+  %\VignetteIndexEntry{App Configuration & Input Data Specifications}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
@@ -208,7 +208,7 @@ This function uses the rest of the metadata data.frames to further organize your
 
 Once complete, save your metadata to an .RDS file:
 
-```{r save_metadata, eval = FALSE}
+```{r save_study_data, eval = FALSE}
 
 saveRDS(study_data, file.path(Sys.getenv("DATA_PATH"), "study_data.rds"))
 

From 9e7454067ef233c1e78cceba0f2f08980c8a7e04 Mon Sep 17 00:00:00 2001
From: LDSamson <l.d.samson@gmail.com>
Date: Fri, 12 Jul 2024 16:20:10 +0200
Subject: [PATCH 20/23] Update clinsightful_data description

---
 R/data.R                 | 12 +++++++-----
 man/clinsight-package.Rd |  2 +-
 man/clinsightful_data.Rd |  6 ++++--
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/R/data.R b/R/data.R
index 615ada6d..01b1007b 100644
--- a/R/data.R
+++ b/R/data.R
@@ -45,11 +45,13 @@
 
 #' Clinical Trial test data
 #'
-#' A data.frame containing randomly created clinical trial data. Acceptable for 
-#' for the `data` argument in `run_app()` & used for testing purposes.
-#' 
-#' @format a data.frame with 6,483 rows and 24 variables. 
-#' 
+#' A data.frame containing randomly created clinical trial data. Used for
+#' testing purposes. It will also be used to run the app with example data when
+#' executing `run_app()` with the default configuration settings, or when
+#' running `golem::run_dev()`.
+#'
+#' @format a data.frame with 6,483 rows and 24 variables.
+#'
 #' @source Created with `data-raw/create_random_data.R`
 "clinsightful_data"
 
diff --git a/man/clinsight-package.Rd b/man/clinsight-package.Rd
index 57c7ff58..a346d98f 100644
--- a/man/clinsight-package.Rd
+++ b/man/clinsight-package.Rd
@@ -19,7 +19,7 @@ Useful links:
 
 }
 \author{
-\strong{Maintainer}: Leonard Dani<U+00EB>l Samson \email{lsamson@gcp-service.com}
+\strong{Maintainer}: Leonard Daniël Samson \email{lsamson@gcp-service.com}
 
 Other contributors:
 \itemize{
diff --git a/man/clinsightful_data.Rd b/man/clinsightful_data.Rd
index 21ae85d0..a310f4b9 100644
--- a/man/clinsightful_data.Rd
+++ b/man/clinsightful_data.Rd
@@ -14,7 +14,9 @@ Created with \code{data-raw/create_random_data.R}
 clinsightful_data
 }
 \description{
-A data.frame containing randomly created clinical trial data. Acceptable for
-for the \code{data} argument in \code{run_app()} & used for testing purposes.
+A data.frame containing randomly created clinical trial data. Used for
+testing purposes. It will also be used to run the app with example data when
+executing \code{run_app()} with the default configuration settings, or when
+running \code{golem::run_dev()}.
 }
 \keyword{datasets}

From 8b45810cc393d05032a8525a9cc3369555440d11 Mon Sep 17 00:00:00 2001
From: LDSamson <l.d.samson@gmail.com>
Date: Fri, 12 Jul 2024 16:20:42 +0200
Subject: [PATCH 21/23] Provide updated yaml

---
 vignettes/data_spec.Rmd | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index 8d85d174..abc32343 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -22,22 +22,23 @@ In order to get started and plug your organizations EDC data into the `clinsight
 ```yml
 default:
   golem_name: clinsight
-  golem_version: 0.0.0.9003
+  golem_version: 0.0.0.9004
   app_prod: no
-  data_folder: test_data
+  user_identification: test_user
   study_data: !expr clinsight::clinsightful_data
   meta_data: !expr clinsight::metadata
-  user_db: test_user_db.sqlite
-  credentials_db: test_credentials_db.sqlite
-production:
+  user_db: user_db.sqlite
+  group_roles:
+  - Medical Monitor
+  - Data Manager
+  - Administrator
+  - Investigator
+shinymanager:
   app_prod: yes
-  data_folder: study_data
+  user_identification: shinymanager
   study_data: study_data.rds
   meta_data: metadata.rds
-  user_db: user_db.sqlite
   credentials_db: credentials_db.sqlite
-dev:
-  golem_wd: !expr golem::pkg_path()
 ```
 
 First and foremost, notice that the configuration can vary depending on deployment use case. For example, the above file is designed to run (by default) with test data built into the app. Only once in `production` mode, does the app actually leverage data you've gathered from your EDC system, as RDS files. As seen above, there are several other elements defined in `golem-config.yml` that can be configured before launching the application for the first time. To name a few:
@@ -221,22 +222,23 @@ Circling back to the configuration file we shared at the beginning of this vigne
 ```yml
 default:
   golem_name: clinsight
-  golem_version: 0.0.0.9003
+  golem_version: 0.0.0.9004
   app_prod: no
-  data_folder: test_data
+  user_identification: test_user
   study_data: !expr clinsight::clinsightful_data
   meta_data: !expr clinsight::metadata
-  user_db: test_user_db.sqlite
-  credentials_db: test_credentials_db.sqlite
-production:
+  user_db: user_db.sqlite
+  group_roles:
+  - Medical Monitor
+  - Data Manager
+  - Administrator
+  - Investigator
+shinymanager:
   app_prod: yes
-  data_folder: study_data
+  user_identification: shinymanager
   study_data: study_data.rds
   meta_data: metadata.rds
-  user_db: user_db.sqlite
   credentials_db: credentials_db.sqlite
-dev:
-  golem_wd: !expr golem::pkg_path()
 ```
 
 

From 1b9a2405030042ff7754c6c409754140f12533ec Mon Sep 17 00:00:00 2001
From: LDSamson <l.d.samson@gmail.com>
Date: Fri, 12 Jul 2024 17:37:11 +0200
Subject: [PATCH 22/23] Add a few clarifications

---
 vignettes/data_spec.Rmd | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/vignettes/data_spec.Rmd b/vignettes/data_spec.Rmd
index abc32343..7e312e4d 100644
--- a/vignettes/data_spec.Rmd
+++ b/vignettes/data_spec.Rmd
@@ -13,7 +13,7 @@ knitr::opts_chunk$set(
 )
 ```
 
-Note: this page is "under construction" until the `clinsight` package developers create helper functions that transform at least one other EDC data source into the desired format. In the meantime, this page provides useful documentation to the existing data spec. Eventually, it will also share guidelines about how to combine your input data with metadata sources, etc.
+Note: this page is "under construction" until the `clinsight` package developers create helper functions that transform at least one other EDC data source into the desired format. In the meantime, this page provides useful documentation to the existing data specifications. Eventually, it will also share guidelines about how to combine your input data with metadata sources, etc.
 
 ## Intro
 
@@ -41,15 +41,16 @@ shinymanager:
   credentials_db: credentials_db.sqlite
 ```
 
-First and foremost, notice that the configuration can vary depending on deployment use case. For example, the above file is designed to run (by default) with test data built into the app. Only once in `production` mode, does the app actually leverage data you've gathered from your EDC system, as RDS files. As seen above, there are several other elements defined in `golem-config.yml` that can be configured before launching the application for the first time. To name a few:
+First and foremost, notice that the configuration can vary depending on deployment use case. For example, the above file is designed to run (by default) with test data built into the app. Only once in `production` mode, does the app actually leverage data you've gathered from your EDC system, as RDS files. As seen above, there are several other elements defined in `golem-config.yml` that can be configured before launching the application for the first time. 
 
-- `user_db` a Character string providing the path to the app's review database. If it does not exist, one will be created based on app data and metadata, with all data labeled as new'/not yet reviewed.
-- `credentials_db` Character string. Path to the credentials database.
+The main two elements are `meta_data` and `study_data`, accepting file paths to the app's primary data sources, stored as RDS files. The `study_data` object should be created with the pre-processing helper function called `merge_meta_with_data()` which accepts raw data sources and merges them with the `meta_data` object defined in this article.
 
-The other two elements are `meta_data` and `study_data`, accepting file paths to the app's primary data sources, stored as RDS files. The `study_data` object should be created with the pre-processing helper function called `merge_meta_with_data()` which accepts raw data sources and merges them with the `meta_data` object defined in this article.
+Other elements are:
 
-Depending on the EDC vendor used, the size, shape, and format of their "raw data" may vary. We've compiled a few functions that help admin users pre-process the data they own. However, before we deep dive into how to use those, let's focus on what the final result of those pre-processing steps to understand what's being fed to the app.
+- `user_db` a Character string providing the path to the app's review database. If it does not exist, one will be created based on the `study_data` and `meta_data`, with all data labeled as new/not yet reviewed.
+- `credentials_db` Character string. Path to the credentials database. Only needed when using `shinymanager` for user identification. The database will be created automatically if needed. 
 
+Depending on the EDC vendor used, the size, shape, and format of their "raw data" may vary. We've compiled a few functions that help admin users pre-process the data they own. However, before we deep dive into how to use those, let's focus on what the final result of those pre-processing steps to understand what's being fed to the app.
 
 
 ## Data Specifications
@@ -101,15 +102,15 @@ The RDS file (or data.frame) ported to the `study_data` element contains the fol
 
 ### Processing your Raw Data
 
-So, the next logical question is "How do I get my EDC's data into the `study_data` format?" Well, this package currently offers a pre-processing helper function called `merge_meta_with_data()` which accepts raw data sources from the [Vedoc] EDC vendor and merges them with the `meta_data` object (defined below) to create a viable `study_data` object. As such, we'll spend some time covering what this helper function expects of your raw data and `meta_data` object and how it transforms it into the `study_data` object we need for app launch.
+So, the next logical question is "How do I get my EDC's data into the `study_data` format?" Well, this package currently offers a pre-processing helper function called `merge_meta_with_data()` which accepts raw data sources from the [Viedoc] EDC vendor and merges them with the `meta_data` object (defined below) to create a viable `study_data` object. As such, we'll spend some time covering what this helper function expects of your raw data and `meta_data` object and how it transforms it into the `study_data` object we need for app launch.
 
 First, let's discuss the app's metadata needs!
 
-### The `meta_data`
+### `meta_data`
 
-The RDS file ported to the `meta_data` configuration above is a list of data.frames which (not surprisingly) contains metadata information for the application. However, it is also the same object used in `merge_meta_with_data()` to produce our `study_data` object!
+The `meta_data` object is a list of data.frames which (not surprisingly) contains metadata information for the application. It will provide study-specific settings, and controls where study data in the application will be visible. It can be created in the right format by changing the Excel template in the `data-raw/metadata.xlsx`, and then create a metadata object with the function`get_metadata` (e.g. `meta <- clinsight::get_metadata("path-to-custom-metadata.xlsx")`). The `meta_data` object should be saved as a `.rds` file so that `clinsight` can use it (e.g. `saveRDS(meta, "data_folder/metadata.rds")`).
 
-That is, the metadata will be merged with the raw data to dictate which variables will be included in the `study_data`, and in which tab the variables will be displayed. The goal is that most, if not all study-specific data will be captured in the metadata, leaving the scripts to run the application largely unaltered between studies.
+As stated previously, the metadata should also be used to shape the raw `study_data` in the right format, and to dictate which variables will be included in the `study_data`. This can be done by using the function `merge_meta_with_data()`, and will be described in detail in the next section. The goal is that most, if not all study-specific data will be captured in the metadata, leaving the scripts to run the application largely unaltered between studies.
 
 Just like for `study_data`, this package also bundles a built-in metadata object called `metadata`. To view an example metadata file, run the following chunk of code:
 
@@ -147,7 +148,7 @@ Specifications for the list of data.frames include:
  
  - `groups`: Contains the columns `item_group`, `item_type`, `item_scale`,`use_unscaled_limits`.
 
- - `table_names`: Used for ... #TODO
+ - `table_names`: Used for renaming table column names into a more readable format. It is not required to name all column names; if the column names are not defined here, the raw name will be used instead.
      - `table_name`: character, 
      - `raw_name`: character,
 

From 5e7754397cc2d961315c608c4e58456e4002edab Mon Sep 17 00:00:00 2001
From: LDSamson <l.d.samson@gmail.com>
Date: Fri, 12 Jul 2024 18:03:10 +0200
Subject: [PATCH 23/23] Update version

---
 DESCRIPTION           | 2 +-
 inst/golem-config.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 091b744c..53279592 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: clinsight
 Title: ClinSight
-Version: 0.0.0.9004
+Version: 0.0.0.9005
 Authors@R: c(
     person("Leonard Daniël", "Samson", , "lsamson@gcp-service.com", role = c("cre", "aut")),
     person("GCP-Service International Ltd.& Co. KG", role = "fnd")
diff --git a/inst/golem-config.yml b/inst/golem-config.yml
index ab60c706..28336711 100644
--- a/inst/golem-config.yml
+++ b/inst/golem-config.yml
@@ -1,6 +1,6 @@
 default:
   golem_name: clinsight
-  golem_version: 0.0.0.9004
+  golem_version: 0.0.0.9005
   app_prod: no
   user_identification: test_user
   study_data: !expr clinsight::clinsightful_data