From 43507da2046395902972e5ac15c14fe639f53128 Mon Sep 17 00:00:00 2001 From: keesvanbochove Date: Tue, 28 Sep 2021 12:12:42 +0200 Subject: [PATCH 01/10] Performance optimization: use data.table fread and fwrite functions instead of readr This reverts commit b2b3c4f8eb7f117dbe390e9940b371dfdde8aedb. --- R/Incremental.R | 12 ++++++------ R/RunStudy.R | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/R/Incremental.R b/R/Incremental.R index 86d8ce73b..57e0d7875 100644 --- a/R/Incremental.R +++ b/R/Incremental.R @@ -20,7 +20,7 @@ computeChecksum <- function(column) { isTaskRequired <- function(..., checksum, recordKeepingFile, verbose = TRUE) { if (file.exists(recordKeepingFile)) { - recordKeeping <- readr::read_csv(recordKeepingFile, col_types = readr::cols()) + recordKeeping <- data.table::fread(recordKeepingFile) task <- recordKeeping[getKeyIndex(list(...), recordKeeping), ] if (nrow(task) == 0) { return(TRUE) @@ -46,7 +46,7 @@ isTaskRequired <- function(..., checksum, recordKeepingFile, verbose = TRUE) { getRequiredTasks <- function(..., checksum, recordKeepingFile) { tasks <- list(...) if (file.exists(recordKeepingFile) && length(tasks[[1]]) > 0) { - recordKeeping <- readr::read_csv(recordKeepingFile, col_types = readr::cols()) + recordKeeping <- data.table::fread(recordKeepingFile) tasks$checksum <- checksum tasks <- tibble::as_tibble(tasks) if (all(names(tasks) %in% names(recordKeeping))) { @@ -83,7 +83,7 @@ recordTasksDone <- function(..., checksum, recordKeepingFile, incremental = TRUE return() } if (file.exists(recordKeepingFile)) { - recordKeeping <- readr::read_csv(recordKeepingFile, col_types = readr::cols()) + recordKeeping <- data.table::fread(recordKeepingFile) idx <- getKeyIndex(list(...), recordKeeping) if (length(idx) > 0) { recordKeeping <- recordKeeping[-idx, ] @@ -95,7 +95,7 @@ recordTasksDone <- function(..., checksum, recordKeepingFile, incremental = TRUE newRow$checksum <- checksum newRow$timeStamp <- Sys.time() recordKeeping <- dplyr::bind_rows(recordKeeping, newRow) - readr::write_csv(recordKeeping, recordKeepingFile) + data.table::fwrite(recordKeeping, recordKeepingFile) } saveIncremental <- function(data, fileName, ...) { @@ -103,12 +103,12 @@ saveIncremental <- function(data, fileName, ...) { return() } if (file.exists(fileName)) { - previousData <- readr::read_csv(fileName, col_types = readr::cols()) + previousData <- data.table::fread(fileName) idx <- getKeyIndex(list(...), previousData) if (length(idx) > 0) { previousData <- previousData[-idx, ] } data <- dplyr::bind_rows(previousData, data) } - readr::write_csv(data, fileName) + data.table::fwrite(data, fileName) } diff --git a/R/RunStudy.R b/R/RunStudy.R index 390d10aff..7091d3af7 100644 --- a/R/RunStudy.R +++ b/R/RunStudy.R @@ -260,7 +260,7 @@ runStudy <- function(connectionDetails = NULL, writeToCsv(counts, file.path(exportFolder, "cohort_count.csv"), incremental = incremental, cohortId = counts$cohortId) # Read in the cohort counts - counts <- readr::read_csv(file.path(exportFolder, "cohort_count.csv"), col_types = readr::cols()) + counts <- data.table::fread(file.path(exportFolder, "cohort_count.csv")) colnames(counts) <- SqlRender::snakeCaseToCamelCase(colnames(counts)) # Export the cohorts from the study @@ -411,9 +411,9 @@ exportResults <- function(exportFolder, databaseId, cohortIdsToExcludeFromResult # Censor out the cohorts based on the IDs passed in for(i in 1:length(filesWithCohortIds)) { fileName <- file.path(tempFolder, filesWithCohortIds[i]) - fileContents <- readr::read_csv(fileName, col_types = readr::cols()) + fileContents <- data.table::fread(fileName) fileContents <- fileContents[!(fileContents$cohort_id %in% cohortIdsToExcludeFromResultsExport),] - readr::write_csv(fileContents, fileName) + data.table::fwrite(fileContents, fileName) } # Zip the results and copy to the main export folder @@ -591,7 +591,7 @@ writeToCsv <- function(data, fileName, incremental = FALSE, ...) { params$fileName = fileName do.call(saveIncremental, params) } else { - readr::write_csv(data, fileName) + data.table::fwrite(data, fileName) } } @@ -672,7 +672,7 @@ recordTasksDone <- function(..., checksum, recordKeepingFile, incremental = TRUE return() } if (file.exists(recordKeepingFile)) { - recordKeeping <- readr::read_csv(recordKeepingFile, col_types = readr::cols()) + recordKeeping <- data.table::fread(recordKeepingFile) idx <- getKeyIndex(list(...), recordKeeping) if (length(idx) > 0) { recordKeeping <- recordKeeping[-idx, ] @@ -684,7 +684,7 @@ recordTasksDone <- function(..., checksum, recordKeepingFile, incremental = TRUE newRow$checksum <- checksum newRow$timeStamp <- Sys.time() recordKeeping <- dplyr::bind_rows(recordKeeping, newRow) - readr::write_csv(recordKeeping, recordKeepingFile) + data.table::fwrite(recordKeeping, recordKeepingFile) } saveIncremental <- function(data, fileName, ...) { @@ -692,12 +692,12 @@ saveIncremental <- function(data, fileName, ...) { return() } if (file.exists(fileName)) { - previousData <- readr::read_csv(fileName, col_types = readr::cols()) + previousData <- data.table::fread(fileName) idx <- getKeyIndex(list(...), previousData) if (length(idx) > 0) { previousData <- previousData[-idx, ] } data <- dplyr::bind_rows(previousData, data) } - readr::write_csv(data, fileName) + data.table::fwrite(data, fileName) } From 69cdc55f3b29a03f7e95298c541ccbc07dca04a9 Mon Sep 17 00:00:00 2001 From: Maxim Moinat Date: Thu, 30 Sep 2021 16:55:20 +0200 Subject: [PATCH 02/10] filter end dates occurring before start date --- inst/sql/sql_server/103.sql | 1 + inst/sql/sql_server/104.sql | 1 + inst/sql/sql_server/105.sql | 1 + inst/sql/sql_server/106.sql | 1 + inst/sql/sql_server/107.sql | 1 + inst/sql/sql_server/108.sql | 1 + inst/sql/sql_server/109.sql | 1 + inst/sql/sql_server/110.sql | 1 + inst/sql/sql_server/111.sql | 1 + inst/sql/sql_server/112.sql | 1 + inst/sql/sql_server/343.sql | 1 + 11 files changed, 11 insertions(+) diff --git a/inst/sql/sql_server/103.sql b/inst/sql/sql_server/103.sql index 2ade6d4d5..349883d4d 100644 --- a/inst/sql/sql_server/103.sql +++ b/inst/sql/sql_server/103.sql @@ -1140,6 +1140,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/104.sql b/inst/sql/sql_server/104.sql index f3a4935bb..6933a1665 100644 --- a/inst/sql/sql_server/104.sql +++ b/inst/sql/sql_server/104.sql @@ -1195,6 +1195,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/105.sql b/inst/sql/sql_server/105.sql index ffc265cda..0fda1fe32 100644 --- a/inst/sql/sql_server/105.sql +++ b/inst/sql/sql_server/105.sql @@ -1365,6 +1365,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/106.sql b/inst/sql/sql_server/106.sql index 4acae7006..1a0173437 100644 --- a/inst/sql/sql_server/106.sql +++ b/inst/sql/sql_server/106.sql @@ -1412,6 +1412,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/107.sql b/inst/sql/sql_server/107.sql index 72f0422f5..7261413bf 100644 --- a/inst/sql/sql_server/107.sql +++ b/inst/sql/sql_server/107.sql @@ -1622,6 +1622,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,548,start_date) ; diff --git a/inst/sql/sql_server/108.sql b/inst/sql/sql_server/108.sql index f0f3d95f4..82c8510fe 100644 --- a/inst/sql/sql_server/108.sql +++ b/inst/sql/sql_server/108.sql @@ -1670,6 +1670,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,548,start_date) ; diff --git a/inst/sql/sql_server/109.sql b/inst/sql/sql_server/109.sql index e914531e8..eddd3223d 100644 --- a/inst/sql/sql_server/109.sql +++ b/inst/sql/sql_server/109.sql @@ -1620,6 +1620,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,548,start_date) ; diff --git a/inst/sql/sql_server/110.sql b/inst/sql/sql_server/110.sql index 49a3a0300..32b378987 100644 --- a/inst/sql/sql_server/110.sql +++ b/inst/sql/sql_server/110.sql @@ -1669,6 +1669,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,548,start_date) ; diff --git a/inst/sql/sql_server/111.sql b/inst/sql/sql_server/111.sql index 09e5825a8..306ac0ee4 100644 --- a/inst/sql/sql_server/111.sql +++ b/inst/sql/sql_server/111.sql @@ -1151,6 +1151,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/112.sql b/inst/sql/sql_server/112.sql index f2d0c1edf..1cc8ae54f 100644 --- a/inst/sql/sql_server/112.sql +++ b/inst/sql/sql_server/112.sql @@ -1204,6 +1204,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/343.sql b/inst/sql/sql_server/343.sql index ab64218e0..b7360c77f 100644 --- a/inst/sql/sql_server/343.sql +++ b/inst/sql/sql_server/343.sql @@ -1203,6 +1203,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,548,start_date) ; From 081d70955c318074cb470615b2cc36cb7e93a7f7 Mon Sep 17 00:00:00 2001 From: Maxim Moinat Date: Thu, 30 Sep 2021 16:59:46 +0200 Subject: [PATCH 03/10] fix negative period in strata cohorts --- inst/sql/sql_server/341.sql | 1 + inst/sql/sql_server/342.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/inst/sql/sql_server/341.sql b/inst/sql/sql_server/341.sql index d1b2c677d..cf0a95ef9 100644 --- a/inst/sql/sql_server/341.sql +++ b/inst/sql/sql_server/341.sql @@ -1203,6 +1203,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,270,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,270,start_date) ; diff --git a/inst/sql/sql_server/342.sql b/inst/sql/sql_server/342.sql index 318fd358b..d0b003879 100644 --- a/inst/sql/sql_server/342.sql +++ b/inst/sql/sql_server/342.sql @@ -1203,6 +1203,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,365,start_date) as start_date, end_date FROM #final_cohort CO +WHERE end_date >= DATEADD(day,365,start_date) ; From d865b1984a790da3b3498f500c5a5362c1578be0 Mon Sep 17 00:00:00 2001 From: keesvanbochove Date: Fri, 1 Oct 2021 14:15:55 +0200 Subject: [PATCH 04/10] Add performance optimization for results shiny app --- inst/shiny/PioneerWatchfulWaitingExplorer/global.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R index c41dd90f6..56a6b35fd 100644 --- a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R +++ b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R @@ -72,7 +72,7 @@ if (dataStorage == "database") { # print(file) tableName <- gsub(".csv$", "", file) camelCaseName <- SqlRender::snakeCaseToCamelCase(tableName) - data <- readr::read_csv(file.path(folder, file), col_types = readr::cols(), guess_max = 1e7, locale = readr::locale(encoding = "UTF-8")) + data <- data.table::fread(file.path(folder, file)) colnames(data) <- SqlRender::snakeCaseToCamelCase(colnames(data)) if (!overwrite && exists(camelCaseName, envir = .GlobalEnv)) { @@ -152,7 +152,7 @@ domainName <- "All" timeWindow <- data.frame(windowId=c(1:4), name=c("-365 to index", "index to 365", "366d to 730d", "731d+")) timeWindow$name <- as.character(timeWindow$name) -cohortXref <- readr::read_csv("./cohortXref.csv", col_types = readr::cols()) +cohortXref <- data.table::fread("./cohortXref.csv") targetCohort <- cohortXref[,c("targetId","targetName")] targetCohort <- unique(targetCohort) targetCohort <- targetCohort[order(targetCohort$targetName),] @@ -177,7 +177,7 @@ strataName <- cohortXref[cohortXref$cohortId == initCharCohortId,c("strataName") comparatorName <- cohortXref[cohortXref$cohortId == initCharCompareCohortId,c("targetName")][1] comparatorStrataName <- cohortXref[cohortXref$cohortId == initCharCompareCohortId,c("strataName")][1] -cohortInfo <- readr::read_csv("./cohorts.csv", col_types = readr::cols()) +cohortInfo <- data.table::fread("./cohorts.csv") cohortInfo <- cohortInfo[order(cohortInfo$name),] # Read in the database terms of use @@ -196,4 +196,4 @@ if(length(cohortStagingCount$name[cohortStagingCount$cohortId == max(ids)]) == 0 KMIds <- data.frame(id = ids, name = names) - +print(KMIds) From 14ddd690e7e7f96e02cd57ca793870e7010806bf Mon Sep 17 00:00:00 2001 From: keesvanbochove Date: Fri, 1 Oct 2021 15:10:53 +0200 Subject: [PATCH 05/10] Fix error in label assignment of time to event plot --- inst/shiny/PioneerWatchfulWaitingExplorer/global.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R index 56a6b35fd..3c51f1d25 100644 --- a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R +++ b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R @@ -188,12 +188,16 @@ database <- database[order(database$databaseId),] # Add Time to Event names and ids + +# Gather unique outcome ids in time to event table ids <- unique(cohortTimeToEvent$outcomeId) -names <- unique(cohortStagingCount$name[cohortStagingCount$cohortId %in% ids]) -if(length(cohortStagingCount$name[cohortStagingCount$cohortId == max(ids)]) == 0){ - names <- c(names, 'Symptomatic progr. free surv.') -} +# Find corresponding cohort names +names <- sapply(ids,function(id){ cohortStagingCount$name[cohortStagingCount$cohortId == id ][1]}) + +# hack/fix which I don't understand +#if(length(cohortStagingCount$name[cohortStagingCount$cohortId == max(ids)]) == 0){ +# names <- c(names, 'Symptomatic progr. free surv.') +#} KMIds <- data.frame(id = ids, name = names) -print(KMIds) From 0ebdcb427af6e172b05ef1ceb555232e561a5928 Mon Sep 17 00:00:00 2001 From: keesvanbochove Date: Fri, 1 Oct 2021 17:00:40 +0200 Subject: [PATCH 06/10] Various fixes to the Shiny app --- R/Shiny.R | 1 + .../PioneerWatchfulWaitingExplorer/global.R | 22 +++++++++++-------- .../PioneerWatchfulWaitingExplorer/server.R | 19 ++++++++-------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/R/Shiny.R b/R/Shiny.R index 66343e881..355e47484 100644 --- a/R/Shiny.R +++ b/R/Shiny.R @@ -24,6 +24,7 @@ launchShinyApp <- function(outputFolder, ensure_installed("shinydashboard") ensure_installed("shinyWidgets") ensure_installed("DT") + ensure_installed("data.table") ensure_installed("VennDiagram") ensure_installed("htmltools") ensure_installed("pool") diff --git a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R index 3c51f1d25..43fbd89a3 100644 --- a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R +++ b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R @@ -1,6 +1,7 @@ library(shiny) library(pool) library(DatabaseConnector) +library(data.table) source("DataPulls.R") connPool <- NULL # Will be initialized if using a DB @@ -138,18 +139,18 @@ if (exists("covariate")) { } # Setup filters -domain <- data.frame() -domain <- rbind(domain,data.frame(name = "All", covariateAnalysisId = c(1:10000))) -domain <- rbind(domain,data.frame(name = "Cohort", covariateAnalysisId = c(10000))) -domain <- rbind(domain,data.frame(name = "Demographics", covariateAnalysisId = c(1:99))) -domain <- rbind(domain,data.frame(name = "Drug", covariateAnalysisId = c(412))) -domain <- rbind(domain,data.frame(name = "Condition", covariateAnalysisId = c(212))) -domain <- rbind(domain,data.frame(name = 'Procedure', covariateAnalysisId = c(712))) +domain <- data.table() +domain <- rbind(domain,data.table(name = "All", covariateAnalysisId = c(1:10000))) +domain <- rbind(domain,data.table(name = "Cohort", covariateAnalysisId = c(10000))) +domain <- rbind(domain,data.table(name = "Demographics", covariateAnalysisId = c(1:99))) +domain <- rbind(domain,data.table(name = "Drug", covariateAnalysisId = c(412))) +domain <- rbind(domain,data.table(name = "Condition", covariateAnalysisId = c(212))) +domain <- rbind(domain,data.table(name = 'Procedure', covariateAnalysisId = c(712))) domain$name <- as.character(domain$name) domainName <- "All" # This must match the featureTimeWindow.csv from the Pioneer study -timeWindow <- data.frame(windowId=c(1:4), name=c("-365 to index", "index to 365", "366d to 730d", "731d+")) +timeWindow <- data.table(windowId=c(1:4), name=c("-365 to index", "index to 365", "366d to 730d", "731d+")) timeWindow$name <- as.character(timeWindow$name) cohortXref <- data.table::fread("./cohortXref.csv") @@ -199,5 +200,8 @@ names <- sapply(ids,function(id){ cohortStagingCount$name[cohortStagingCount$coh # names <- c(names, 'Symptomatic progr. free surv.') #} -KMIds <- data.frame(id = ids, +KMIds <- data.table(id = ids, name = names) + +#Filter out NA value in name which leads to problems with computations and plotting +KMIds <- KMIds[!is.na(KMIds$name)] \ No newline at end of file diff --git a/inst/shiny/PioneerWatchfulWaitingExplorer/server.R b/inst/shiny/PioneerWatchfulWaitingExplorer/server.R index 4da565123..11a41072a 100644 --- a/inst/shiny/PioneerWatchfulWaitingExplorer/server.R +++ b/inst/shiny/PioneerWatchfulWaitingExplorer/server.R @@ -2,6 +2,7 @@ library(shiny) library(shinydashboard) library(DT) library(htmltools) +library(data.table) source("PlotsAndTables.R") source("utilities.R") source("survplot_core.R") @@ -302,7 +303,7 @@ shinyServer(function(input, output, session) { output$TimeToEventDeath <- renderPlot({ target_id <- cohortCount[cohortCount$databaseId %in% input$databasesTimeToEvent & cohortCount$cohortId %in% cohortIdTimeToEvent(), ][[1]] - target_id_entries_num <- cohortCount[cohortCount$cohortId == target_id, "cohortEntries"][[1]] + target_id_entries_num <- sum(cohortCount[cohortCount$cohortId == target_id, "cohortEntries"]) if (length(target_id) == 0 | target_id_entries_num <= 100 | is.null(input$KMPlot)){ plot <- ggplot2::ggplot() @@ -312,7 +313,7 @@ shinyServer(function(input, output, session) { targetIdTimeToEventData <- cohortTimeToEvent %>% dplyr::filter(targetId == target_id, databaseId == input$databasesTimeToEvent) - accumulatedData <- data.frame(time = c(), surv = c(), n.censor = c(), + accumulatedData <- data.table(time = c(), surv = c(), n.censor = c(), n.event = c(), upper = c(), lower = c()) for(plotName in input$KMPlot){ oId <- KMIds$id[KMIds$name == plotName] @@ -330,7 +331,6 @@ shinyServer(function(input, output, session) { color_map <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") names(color_map) <- KMIds$name - plot <- ggsurvplot_core(accumulatedData, risk.table = "nrisk_cumcensor", palette = color_map, @@ -354,12 +354,13 @@ shinyServer(function(input, output, session) { getMetricsTable <- reactive ({ + browser() target_id <- cohortCount[cohortCount$databaseId %in% input$databasesMetricsDistribution & cohortCount$cohortId %in% cohortIdMetricsDistribution(), ][[1]] metricsTable <- metricsDistribution %>% dplyr::filter(cohortDefinitionId == target_id, databaseId == input$databasesMetricsDistribution) names(metricsTable)[names(metricsTable) == 'iqr'] <- 'IQR' - return(metricsTable[c('analysisName', 'IQR', 'minimum', 'q1', 'median', 'q3', 'maximum')]) + return(metricsTable[,c('analysisName', 'IQR', 'minimum', 'q1', 'median', 'q3', 'maximum')]) }) @@ -386,11 +387,11 @@ shinyServer(function(input, output, session) { data <- getCohortCountsTable() databaseIds <- unique(data$databaseId) databaseIds <- sort(databaseIds) - table <- data[data$databaseId == databaseIds[1], columnsToInclude] + table <- data[data$databaseId == databaseIds[1], ..columnsToInclude] colnames(table)[subjectIndex] <- paste(colnames(table)[2], databaseIds[1], sep = "_") if (length(databaseIds) > 1) { for (i in 2:length(databaseIds)) { - temp <- data[data$databaseId == databaseIds[i], columnsToInclude] + temp <- data[data$databaseId == databaseIds[i], ..columnsToInclude] colnames(temp)[subjectIndex] <- paste(colnames(temp)[subjectIndex], databaseIds[i], sep = "_") table <- merge(table, temp, all = TRUE) } @@ -498,11 +499,11 @@ shinyServer(function(input, output, session) { databaseIds <- unique(data$databaseId) databaseIdsWithCounts <- merge(databaseIds, counts, by.x="x", by.y="databaseId") databaseIdsWithCounts <- dplyr::rename(databaseIdsWithCounts, databaseId="x") - table <- data[data$databaseId == databaseIdsWithCounts$databaseId[1], columnsToInclude] + table <- data[data$databaseId == databaseIdsWithCounts$databaseId[1], ..columnsToInclude] colnames(table)[meanColumnIndex] <- paste(colnames(table)[meanColumnIndex], databaseIdsWithCounts$databaseId[1], sep = "_") if (nrow(databaseIdsWithCounts) > 1) { for (i in 2:nrow(databaseIdsWithCounts)) { - temp <- data[data$databaseId == databaseIdsWithCounts$databaseId[i], columnsToInclude] + temp <- data[data$databaseId == databaseIdsWithCounts$databaseId[i], ..columnsToInclude] colnames(temp)[meanColumnIndex] <- paste(colnames(temp)[meanColumnIndex], databaseIdsWithCounts$databaseId[i], sep = "_") table <- merge(table, temp, all = TRUE) } @@ -585,7 +586,7 @@ shinyServer(function(input, output, session) { computeBalance <- reactive({ if (cohortId() == comparatorCohortId()) { - return(data.frame()) + return(data.table()) } covariateFiltered <- getFilteredCovariates() covariateValue <- getCovariateDataSubset(cohortId(), input$database, comparatorCohortId()) From fef0c370af8aa2280230e68fc5a2e91ce34089c2 Mon Sep 17 00:00:00 2001 From: keesvanbochove Date: Fri, 1 Oct 2021 17:16:02 +0200 Subject: [PATCH 07/10] Various changes needed to change data.frame to data.table --- R/CohortConstruction.R | 4 ++-- R/GenerateSurvival.R | 1 + R/ResourceFiles.R | 4 ++-- R/RunStudy.R | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/CohortConstruction.R b/R/CohortConstruction.R index c07d5eab9..543602f50 100644 --- a/R/CohortConstruction.R +++ b/R/CohortConstruction.R @@ -202,7 +202,7 @@ getInclusionStatisticsFromFiles <- function(cohortId, fetchStats <- function(file) { ParallelLogger::logDebug("- Fetching data from ", file) - stats <- readr::read_csv(file, col_types = readr::cols()) + stats <- data.table::fread(file) stats <- stats[stats$cohortDefinitionId == cohortId, ] return(stats) } @@ -459,7 +459,7 @@ saveAndDropTempInclusionStatsTables <- function(connection, if (incremental) { saveIncremental(data, fullFileName, cohortDefinitionId = cohortIds) } else { - readr::write_csv(data, fullFileName) + data.table::fread(data, fullFileName) } } fetchStats("#cohort_inclusion", "cohortInclusion.csv") diff --git a/R/GenerateSurvival.R b/R/GenerateSurvival.R index 1c53ce43c..e53281aef 100644 --- a/R/GenerateSurvival.R +++ b/R/GenerateSurvival.R @@ -23,6 +23,7 @@ generateSurvival <- function(connection, cohortDatabaseSchema, cohortTable, targ id = dplyr::row_number()) %>% dplyr::select(id, timeToEvent, event) + # TODO: Change to Cyclops surv_info <- survival::survfit(survival::Surv(timeToEvent, event) ~ 1, data = km_proc) surv_info <- survminer::surv_summary(surv_info) diff --git a/R/ResourceFiles.R b/R/ResourceFiles.R index ec73a1c24..2cf983b94 100644 --- a/R/ResourceFiles.R +++ b/R/ResourceFiles.R @@ -41,7 +41,7 @@ getCohortsToCreate <- function(cohortGroups = getCohortGroups()) { packageName <- getThisPackageName() cohorts <- data.frame() for(i in 1:nrow(cohortGroups)) { - c <- readr::read_csv(system.file(cohortGroups$fileName[i], package = packageName, mustWork = TRUE), col_types = readr::cols()) + c <- data.table::fread(system.file(cohortGroups$fileName[i], package = packageName, mustWork = TRUE)) c <- c[c('name', 'atlasName', 'atlasId', 'cohortId')] c$cohortType <- cohortGroups$cohortGroup[i] cohorts <- rbind(cohorts, c) @@ -104,7 +104,7 @@ getThisPackageName <- function() { readCsv <- function(resourceFile) { packageName <- getThisPackageName() pathToCsv <- system.file(resourceFile, package = packageName, mustWork = TRUE) - fileContents <- readr::read_csv(pathToCsv, col_types = readr::cols()) + fileContents <- data.table::fread(pathToCsv) return(fileContents) } diff --git a/R/RunStudy.R b/R/RunStudy.R index 7091d3af7..9f7f9c9b5 100644 --- a/R/RunStudy.R +++ b/R/RunStudy.R @@ -219,12 +219,12 @@ runStudy <- function(connectionDetails = NULL, DistribAnalyses <- c('AgeAtDiagnosis', 'YearOfDiagnosis', 'CharlsonAtDiagnosis', 'PsaAtDiagnosis', outcomeBasedAnalyses) outcomes <- getFeatures() - metricsDistribution <- data.frame() + metricsDistribution <- data.table() for(analysis in DistribAnalyses){ outcome <- gsub("TimeTo", "", analysis) outcome <- substring(SqlRender::camelCaseToTitleCase(outcome), 2) - outcomeId <- outcomes[tolower(outcomes$name) == tolower(outcome), "cohortId"][[1]] + outcomeId <- outcomes[tolower(outcomes$name) == tolower(outcome), "cohortId"] if (length(outcomeId) == 0 & analysis %in% outcomeBasedAnalyses){ next From abc8a440075561c4f894a504c5bfebd682daee86 Mon Sep 17 00:00:00 2001 From: keesvanbochove Date: Fri, 1 Oct 2021 18:37:09 +0200 Subject: [PATCH 08/10] Bump to v0.4.4 --- DESCRIPTION | 2 +- codemeta.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4954b2155..fbca0b554 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: PioneerWatchfulWaiting Type: Package Title: PIONEER / EHDEN / OHDSI prostate cancer study -Version: 0.4.3.2 +Version: 0.4.4 Author: Anthony G. Sena, Artem Gorbachev, Kees van Bochove Authors@R: c( person("Anthony G.", "Sena", email = "asena5@its.jnj.com", role = c("aut")), diff --git a/codemeta.json b/codemeta.json index 19edf6801..0961c2cfd 100644 --- a/codemeta.json +++ b/codemeta.json @@ -7,7 +7,7 @@ "codeRepository": "https://github.com/ohdsi-studies/PioneerWatchfulWaiting", "issueTracker": "https://github.com/ohdsi-studies/PioneerWatchfulWaiting/issues", "license": "https://spdx.org/licenses/Apache-2.0", - "version": "0.4.3.2", + "version": "0.4.4", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", @@ -309,5 +309,5 @@ "SystemRequirements": null }, "isPartOf": "https://ohdsi.org", - "fileSize": "39823.066KB" + "fileSize": "1877409.878KB" } From 5a79d488a76e07048db197a1cc1a8fada3d729eb Mon Sep 17 00:00:00 2001 From: keesvanbochove Date: Fri, 1 Oct 2021 18:42:39 +0200 Subject: [PATCH 09/10] Revert "fix negative period in strata cohorts" This reverts commit 081d70955c318074cb470615b2cc36cb7e93a7f7. --- inst/sql/sql_server/341.sql | 1 - inst/sql/sql_server/342.sql | 1 - 2 files changed, 2 deletions(-) diff --git a/inst/sql/sql_server/341.sql b/inst/sql/sql_server/341.sql index cf0a95ef9..d1b2c677d 100644 --- a/inst/sql/sql_server/341.sql +++ b/inst/sql/sql_server/341.sql @@ -1203,7 +1203,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,270,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,270,start_date) ; diff --git a/inst/sql/sql_server/342.sql b/inst/sql/sql_server/342.sql index d0b003879..318fd358b 100644 --- a/inst/sql/sql_server/342.sql +++ b/inst/sql/sql_server/342.sql @@ -1203,7 +1203,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,365,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,365,start_date) ; From 56c2a8ba47db995a38f9e71a0d3df766bbdf04e3 Mon Sep 17 00:00:00 2001 From: keesvanbochove Date: Fri, 1 Oct 2021 18:42:59 +0200 Subject: [PATCH 10/10] Revert "filter end dates occurring before start date" This reverts commit 69cdc55f3b29a03f7e95298c541ccbc07dca04a9. --- inst/sql/sql_server/103.sql | 1 - inst/sql/sql_server/104.sql | 1 - inst/sql/sql_server/105.sql | 1 - inst/sql/sql_server/106.sql | 1 - inst/sql/sql_server/107.sql | 1 - inst/sql/sql_server/108.sql | 1 - inst/sql/sql_server/109.sql | 1 - inst/sql/sql_server/110.sql | 1 - inst/sql/sql_server/111.sql | 1 - inst/sql/sql_server/112.sql | 1 - inst/sql/sql_server/343.sql | 1 - 11 files changed, 11 deletions(-) diff --git a/inst/sql/sql_server/103.sql b/inst/sql/sql_server/103.sql index 349883d4d..2ade6d4d5 100644 --- a/inst/sql/sql_server/103.sql +++ b/inst/sql/sql_server/103.sql @@ -1140,7 +1140,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/104.sql b/inst/sql/sql_server/104.sql index 6933a1665..f3a4935bb 100644 --- a/inst/sql/sql_server/104.sql +++ b/inst/sql/sql_server/104.sql @@ -1195,7 +1195,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/105.sql b/inst/sql/sql_server/105.sql index 0fda1fe32..ffc265cda 100644 --- a/inst/sql/sql_server/105.sql +++ b/inst/sql/sql_server/105.sql @@ -1365,7 +1365,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/106.sql b/inst/sql/sql_server/106.sql index 1a0173437..4acae7006 100644 --- a/inst/sql/sql_server/106.sql +++ b/inst/sql/sql_server/106.sql @@ -1412,7 +1412,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/107.sql b/inst/sql/sql_server/107.sql index 7261413bf..72f0422f5 100644 --- a/inst/sql/sql_server/107.sql +++ b/inst/sql/sql_server/107.sql @@ -1622,7 +1622,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,548,start_date) ; diff --git a/inst/sql/sql_server/108.sql b/inst/sql/sql_server/108.sql index 82c8510fe..f0f3d95f4 100644 --- a/inst/sql/sql_server/108.sql +++ b/inst/sql/sql_server/108.sql @@ -1670,7 +1670,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,548,start_date) ; diff --git a/inst/sql/sql_server/109.sql b/inst/sql/sql_server/109.sql index eddd3223d..e914531e8 100644 --- a/inst/sql/sql_server/109.sql +++ b/inst/sql/sql_server/109.sql @@ -1620,7 +1620,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,548,start_date) ; diff --git a/inst/sql/sql_server/110.sql b/inst/sql/sql_server/110.sql index 32b378987..49a3a0300 100644 --- a/inst/sql/sql_server/110.sql +++ b/inst/sql/sql_server/110.sql @@ -1669,7 +1669,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,548,start_date) ; diff --git a/inst/sql/sql_server/111.sql b/inst/sql/sql_server/111.sql index 306ac0ee4..09e5825a8 100644 --- a/inst/sql/sql_server/111.sql +++ b/inst/sql/sql_server/111.sql @@ -1151,7 +1151,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/112.sql b/inst/sql/sql_server/112.sql index 1cc8ae54f..f2d0c1edf 100644 --- a/inst/sql/sql_server/112.sql +++ b/inst/sql/sql_server/112.sql @@ -1204,7 +1204,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,180,start_date) ; diff --git a/inst/sql/sql_server/343.sql b/inst/sql/sql_server/343.sql index b7360c77f..ab64218e0 100644 --- a/inst/sql/sql_server/343.sql +++ b/inst/sql/sql_server/343.sql @@ -1203,7 +1203,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date FROM #final_cohort CO -WHERE end_date >= DATEADD(day,548,start_date) ;