Skip to content

Commit

Permalink
Merge pull request #32 from openpharma/018_improve_data_loading
Browse files Browse the repository at this point in the history
Ensure external metadata file can be used. Improve data loading.
  • Loading branch information
jthompson-arcus authored Jun 18, 2024
2 parents d4f8848 + 2f65717 commit 42afd79
Show file tree
Hide file tree
Showing 57 changed files with 565 additions and 905 deletions.
3 changes: 2 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ _\.new\.png$
^docs$
^dev$
^Meta$
^pkgdown$
^pkgdown$
^test_data$
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ inst/doc
/doc/
/Meta/
*.sqlite
/pkgdown/
/pkgdown/
/test_data/
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: clinsight
Title: ClinSight
Version: 0.0.0.9001
Version: 0.0.0.9002
Authors@R: c(
person("Leonard Daniël", "Samson", , "[email protected]", role = c("cre", "aut")),
person("GCP-Service International Ltd.& Co. KG", role = "fnd")
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ export(adjust_colnames)
export(app_ui)
export(bind_rows_custom)
export(check_appdata)
export(check_available_data)
export(clean_dates)
export(collapse_column_vals)
export(collapse_fct_levels)
Expand Down Expand Up @@ -47,6 +46,7 @@ export(get_db_connection)
export(get_ggplot_layer_names)
export(get_max_time)
export(get_meta_vars)
export(get_metadata)
export(get_raw_data)
export(get_review_data)
export(get_static_overview_data)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Changed

- Improved metadata so that external file can be used.
- Improved data loading by using a config file.
- Created two renv profiles, one for development and one for production. Goal is
to minimize the package dependencies of the production version.
- Removed development package dependencies (for example devtools) that were not needed to run the application.
Expand Down
10 changes: 6 additions & 4 deletions R/app_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ app_sys <- function(...) {
#' Read App Config
#'
#' @param value Value to retrieve from the config file.
#' @param config GOLEM_CONFIG_ACTIVE value. If unset, R_CONFIG_ACTIVE.
#' If unset, "default".
#' @param config GOLEM_CONFIG_ACTIVE value. If unset, R_CONFIG_ACTIVE. If unset,
#' "default".
#' @param use_parent Logical, scan the parent directory for config file.
#' @param file Location of the config file
#' @param file Location of the config file. Can be set with the option
#' 'CONFIG_PATH' so that a study-specific config.yml file can be provided at
#' runtime, without rebuilding the application's package from source.
#'
#' @noRd
get_golem_config <- function(
Expand All @@ -33,7 +35,7 @@ get_golem_config <- function(
),
use_parent = TRUE,
# Modify this if your config file is somewhere else
file = app_sys("golem-config.yml")
file = Sys.getenv("CONFIG_PATH", app_sys("golem-config.yml"))
) {
config::get(
value = value,
Expand Down
28 changes: 12 additions & 16 deletions R/app_server.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,23 @@ app_server <- function(
){
meta <- golem::get_golem_options("meta")
merged_data <- golem::get_golem_options("data")
if(is.character(merged_data)) merged_data <- readRDS(merged_data)
user_db <- golem::get_golem_options("user_db")
credentials_db <- golem::get_golem_options("credentials_db")
test_mode <- golem::get_golem_options("test_mode")

app_data <- get_appdata(merged_data)
app_data <- get_appdata(merged_data, meta = meta)
app_vars <- get_meta_vars(data = app_data, meta = meta)
app_tables <- lapply(
setNames(names(app_data), names(app_data)), \(x){
create_table(app_data[[x]], expected_columns = names(app_vars$items[[x]]))
})
check_appdata(app_data, meta)

if(!file.exists(user_db)){
warning("no database found. New database will be created")
db_create(get_review_data(merged_data), db_path = user_db)
} else{
if(!test_mode){
db_update(get_review_data(merged_data), db_path = user_db, data_synched = FALSE)
}
}

res_auth <- authenticate_server(
test_mode = test_mode,
sites = app_vars$Sites$site_code,
credentials_db = credentials_db,
credentials_pwd = golem::get_golem_options("credentials_pwd")
credentials_pwd = golem::get_golem_options("credentials_pwd")
)

output$user_info <- renderText({
Expand Down Expand Up @@ -84,7 +74,7 @@ app_server <- function(
static_overview_data <- get_static_overview_data(
data = app_data,
expected_general_columns = unique(
with(metadata$items_expanded, item_name[item_group == "General"])
with(meta$items_expanded, item_name[item_group == "General"])
)
)
# think of using the pool package, but functions such as row_update are not yet supported.
Expand Down Expand Up @@ -191,13 +181,20 @@ app_server <- function(

lapply(app_vars$groups, \(x){mod_study_forms_server(
id = paste0("sf_", simplify_string(x)), r = r, form = x,
form_items = app_vars$items[[x]], table_names = app_vars$table_names
form_items = app_vars$items[[x]], table_names = app_vars$table_names,
item_info = meta$groups[meta$groups$item_group == x, ]
) }) |>
unlist(recursive = FALSE)

mod_start_page_server("start_page_1", r, rev_data, navinfo, app_vars$all_forms,
app_vars$table_names)
mod_header_widgets_server("header_widgets_1", r, rev_data, navinfo)
mod_header_widgets_server(
id = "header_widgets_1",
r = r,
rev_data = rev_data,
navinfo = navinfo,
events = meta$events
)

# Only initiate the sidebar after successful login, because it contains a
# modal that pops up if data is out of synch. Modals interfere with shinymanager.
Expand Down Expand Up @@ -257,7 +254,6 @@ app_server <- function(
app_vars$all_forms,
table_names = app_vars$table_names
)

shiny::exportTestValues(
user_db = user_db,
active_participant = r$subject_id,
Expand Down
11 changes: 3 additions & 8 deletions R/fct_SQLite.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ db_temp_connect <- function(db_path, code, drv = RSQLite::SQLite()){
#' Creates application database. To create a database with all data flagged as
#' 'new', use the default settings of `reviewed`, `reviewer`, and `status`.
#'
#' @param data Either a data frame with review data (Usually created with
#' [get_review_data()]), or a character path to the raw data files.
#' @param data A data frame with review data (Usually created with
#' [get_review_data()]).
#' @param db_path A character vector with the path to the database to be
#' created.
#' @param reviewed Character vector. Sets the reviewed tag in the review
Expand All @@ -74,11 +74,6 @@ db_create <- function(
stopifnot(!file.exists(db_path))
stopifnot(reviewed %in% c("Yes", "No", ""))
stopifnot(is.data.frame(data) || is.character(data))
if(!is.data.frame(data)){
data <- get_raw_data(data) |>
merge_meta_with_data() |>
get_review_data()
}
df <- data |>
dplyr::mutate(
reviewed = reviewed,
Expand Down Expand Up @@ -147,7 +142,7 @@ db_update <- function(
cat("Start adding new rows to database\n")
updated_review_data <- update_review_data(
review_df = review_data,
latest_review_data = data, #get_review_data(merge_meta_with_data(data), common_vars),
latest_review_data = data,
common_vars = common_vars,
edit_time_var = edit_time_var
)
Expand Down
42 changes: 25 additions & 17 deletions R/fct_appdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,6 @@ get_raw_data <- function(
dplyr::rename(
setNames(column_specs$name_raw, column_specs$name_new)
) |>
dplyr::mutate(
db_update_time = max(edit_date_time, na.rm = T),
region = dplyr::case_when(
grepl("^AU", site_code) ~ "AUS",
grepl("^DE", site_code) ~ "GER",
grepl("^FR", site_code) ~ "FRA",
TRUE ~ NA_character_
)
) |>
dplyr::mutate(
day = event_date - min(event_date, na.rm = TRUE),
vis_day = ifelse(event_id %in% c("SCR", "VIS", "VISEXT", "VISVAR", "FU1", "FU2"), day, NA),
Expand All @@ -72,10 +63,6 @@ get_raw_data <- function(
),
.by = subject_id
) |>
# Add a fix for MC in raw dataset.
# Otherwise, we have to repeat this calculation multiple times when creating
# other datasets from the raw data
fix_multiple_choice_vars() |>
dplyr::arrange(
factor(site_code, levels = order_string(site_code)),
factor(subject_id, levels = order_string(subject_id))
Expand Down Expand Up @@ -104,13 +91,24 @@ get_raw_data <- function(
#' @export
#'
merge_meta_with_data <- function(
data = raw_data,
meta = metadata,
data,
meta,
expected_columns = c("LBORNR_Lower", "LBORNR_Upper", "LBORRESU",
"LBORRESUOTH", "LBREASND", "unit",
"lower_limit", "upper_limit", "LBCLSIG")
){
stopifnot(is.data.frame(data))
stopifnot(inherits(meta, "list"))
missing_colnames <- with(meta$column_specs, name_new[!name_new %in% names(data)]) |>
paste0(collapse = ", ")
if(nchar(missing_colnames) > 0) stop(
paste0("The following columns are defined in the metadata ",
"(column_specs$name_new) but are missing in the study data:\n",
missing_colnames, ".")
)
merged_data <- data |>
# fix MC values before merging:
fix_multiple_choice_vars(expected_vars = meta$items_expanded$var) |>
dplyr::right_join(meta$items_expanded, by = "var") |>
dplyr::filter(!is.na(item_value)) |>
dplyr::mutate(
Expand Down Expand Up @@ -161,7 +159,7 @@ merge_meta_with_data <- function(
apply_study_specific_fixes <- function(
data,
form_id_vars = c("subject_id", "event_name", "item_group")
){
){
## apply study-specific fixes:
# fix significance in ECG before proceeding (stored in its own separate variable):
ECG_significance <- data |>
Expand Down Expand Up @@ -204,7 +202,17 @@ apply_study_specific_fixes <- function(
),
.by = c(subject_id, form_repeat)
)
data

# Add regions:
data |>
dplyr::mutate(
region = dplyr::case_when(
grepl("^AU", site_code) ~ "AUS",
grepl("^DE", site_code) ~ "GER",
grepl("^FR", site_code) ~ "FRA",
TRUE ~ NA_character_
)
)
}

#' Get appdata
Expand Down
59 changes: 44 additions & 15 deletions R/fct_data_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
#' @param expand_tab_items Character vector with the names of the tabs of which
#' the items need to be expanded. If not empty, a new data frame will be
#' created named 'expanded_items', containing all items in the tabs of
#' `expand_tab_items`.
#' `expand_tab_items`. Will abort if a tab name is provided that does not
#' exist in the metadata.
#' @param expand_cols Column names containing the columns for expansion. Will be
#' ignored if the variable `expand_tab_items` is left empty.
#'
#' @return A list with data frames.
#' @export
#'
get_metadata <- function(
filepath,
Expand All @@ -33,7 +35,6 @@ get_metadata <- function(
message("'items_expanded' already present. Expanding items aborted.")
meta
})

missing_tab_items <- expand_tab_items[!expand_tab_items %in% names(meta)]
if(length(missing_tab_items) > 0) {
stop_message <- paste0(
Expand Down Expand Up @@ -63,32 +64,60 @@ get_metadata <- function(
}

#' Correct multiple choice variables
#'
#' Function to correct multiple choice variables in the data.
#'
#' @param data data frame (typically the raw data)
#' @param meta metadata, list of data frames.
#' In some EDC systems, if there is a multiple choice variable in which multiple
#' answers are possible, the variable will be renamed with a suffix with the
#' multiple answers in it. For example var1, var2, for answers 1 and 2. This
#' function cleans this specific output so that the variable name remains
#' consistent.
#'
#' @param data A data frame.
#' @param expected_vars Character vector containing the expected names of the
#' variables.
#' @param var_column column name in which the variable names are stored
#' @param value_column column name in which the values of the variables are stored
#' @param value_column column name in which the values of the variables are
#' stored
#' @param suffix Multiple choice suffix. Used to define multiple choice values
#' @param common_vars variables used for identifying unique rows in the dataset.
#' @param collapse_with character value to collapse the multiple choice options with.
#' If this value is NULL, the rows will be left as is.
#' @param collapse_with character value to collapse the multiple choice options
#' with. If this value is NULL, the rows will be left as is.
#'
#' @return data frame with corrected multiple choice variables
#' @examples
#' df <- data.frame(
#' ID = "Subj1",
#' var = c("Age", paste0("MH_TRT", 1:4)),
#' item_value = as.character(c(95, 67, 58, 83, 34))
#' )
#' fix_multiple_choice_vars(df, common_vars = "ID")
#' @export
#'
fix_multiple_choice_vars <- function(
data = raw_data,
meta = metadata,
data,
expected_vars = metadata$items_expanded$var,
var_column = "var",
value_column = "item_value",
suffix = "[[:digit:]]+$",
common_vars = c("subject_id", "event_repeat", "event_date", "form_repeat"),
collapse_with = "; "
){
stopifnot(is.data.frame(data))
stopifnot(is.character(expected_vars))
stopifnot("var_column should be a vector of length 1" = {
is.character(var_column) & length(var_column) == 1
})
stopifnot("suffix should be a character vector of length 1" = {
is.character(suffix) & length(suffix) == 1
})
stopifnot(is.character(common_vars))
if(!is.null(collapse_with)){
stopifnot("collapse_with should be a character vector of length 1" = {
is.character(collapse_with) & length(collapse_with) == 1
})
}

all_vars <- unique(data[[var_column]])
expected_vars <- meta$items_expanded$var

missing_vars <- expected_vars[!expected_vars %in% all_vars]
if(length(missing_vars) == 0) return(data)

Expand Down Expand Up @@ -137,7 +166,7 @@ fix_multiple_choice_vars <- function(
#'
get_meta_vars <- function(data = appdata, meta = metadata){
stopifnot(inherits(data, "list"))
stopifnot(inherits(metadata, "list"))
stopifnot(inherits(meta, "list"))
if(length(data) == 0) stop("Empty list with data provided")
vars <- list()
# add metadata variables:
Expand All @@ -148,7 +177,7 @@ get_meta_vars <- function(data = appdata, meta = metadata){
split(~item_group) |>
lapply(\(x){setNames(simplify_string(x$item_name), x$item_name)})
vars$groups <- meta$groups$item_group
common_forms <- c("Adverse events", "Medical History", "Medication", "Conc. Procedures")
common_forms <- unique(meta$common_forms$item_group)
vars$all_forms <- data.frame(
"main_tab" = c(
rep("Common events", times = length(common_forms)),
Expand All @@ -161,7 +190,7 @@ get_meta_vars <- function(data = appdata, meta = metadata){
vars$subject_id <- order_string(get_unique_vars(data, "subject_id")[[1]])
vars$Sites <- get_unique_vars(data, c("site_code", "region")) |>
dplyr::arrange(factor(site_code, levels = order_string(site_code)))
vars$table_names <- setNames(metadata$table_names$raw_name, metadata$table_names$table_name)
vars$table_names <- setNames(meta$table_names$raw_name, meta$table_names$table_name)
vars
}

Expand Down
Loading

0 comments on commit 42afd79

Please sign in to comment.