From 43507da2046395902972e5ac15c14fe639f53128 Mon Sep 17 00:00:00 2001
From: keesvanbochove <kees@thehyve.nl>
Date: Tue, 28 Sep 2021 12:12:42 +0200
Subject: [PATCH 01/10] Performance optimization: use data.table fread and
 fwrite functions instead of readr

This reverts commit b2b3c4f8eb7f117dbe390e9940b371dfdde8aedb.
---
 R/Incremental.R | 12 ++++++------
 R/RunStudy.R    | 16 ++++++++--------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/R/Incremental.R b/R/Incremental.R
index 86d8ce73b..57e0d7875 100644
--- a/R/Incremental.R
+++ b/R/Incremental.R
@@ -20,7 +20,7 @@ computeChecksum <- function(column) {
 
 isTaskRequired <- function(..., checksum, recordKeepingFile, verbose = TRUE) {
   if (file.exists(recordKeepingFile)) {
-    recordKeeping <-  readr::read_csv(recordKeepingFile, col_types = readr::cols())
+    recordKeeping <- data.table::fread(recordKeepingFile)
     task <- recordKeeping[getKeyIndex(list(...), recordKeeping), ]
     if (nrow(task) == 0) {
       return(TRUE)
@@ -46,7 +46,7 @@ isTaskRequired <- function(..., checksum, recordKeepingFile, verbose = TRUE) {
 getRequiredTasks <- function(..., checksum, recordKeepingFile) {
   tasks <- list(...)
   if (file.exists(recordKeepingFile) && length(tasks[[1]]) > 0) {
-    recordKeeping <-  readr::read_csv(recordKeepingFile, col_types = readr::cols())
+    recordKeeping <- data.table::fread(recordKeepingFile)
     tasks$checksum <- checksum
     tasks <- tibble::as_tibble(tasks)
     if (all(names(tasks) %in% names(recordKeeping))) {
@@ -83,7 +83,7 @@ recordTasksDone <- function(..., checksum, recordKeepingFile, incremental = TRUE
     return()
   }
   if (file.exists(recordKeepingFile)) {
-    recordKeeping <-  readr::read_csv(recordKeepingFile, col_types = readr::cols())
+    recordKeeping <-  data.table::fread(recordKeepingFile)
     idx <- getKeyIndex(list(...), recordKeeping)
     if (length(idx) > 0) {
       recordKeeping <- recordKeeping[-idx, ]
@@ -95,7 +95,7 @@ recordTasksDone <- function(..., checksum, recordKeepingFile, incremental = TRUE
   newRow$checksum <- checksum
   newRow$timeStamp <-  Sys.time()
   recordKeeping <- dplyr::bind_rows(recordKeeping, newRow)
-  readr::write_csv(recordKeeping, recordKeepingFile)
+  data.table::fwrite(recordKeeping, recordKeepingFile)
 }
 
 saveIncremental <- function(data, fileName, ...) {
@@ -103,12 +103,12 @@ saveIncremental <- function(data, fileName, ...) {
     return()
   }
   if (file.exists(fileName)) {
-    previousData <- readr::read_csv(fileName, col_types = readr::cols())
+    previousData <- data.table::fread(fileName)
     idx <- getKeyIndex(list(...), previousData)
     if (length(idx) > 0) {
       previousData <- previousData[-idx, ] 
     }
     data <- dplyr::bind_rows(previousData, data)
   } 
-  readr::write_csv(data, fileName)
+  data.table::fwrite(data, fileName)
 }
diff --git a/R/RunStudy.R b/R/RunStudy.R
index 390d10aff..7091d3af7 100644
--- a/R/RunStudy.R
+++ b/R/RunStudy.R
@@ -260,7 +260,7 @@ runStudy <- function(connectionDetails = NULL,
   writeToCsv(counts, file.path(exportFolder, "cohort_count.csv"), incremental = incremental, cohortId = counts$cohortId)
   
   # Read in the cohort counts
-  counts <- readr::read_csv(file.path(exportFolder, "cohort_count.csv"), col_types = readr::cols())
+  counts <- data.table::fread(file.path(exportFolder, "cohort_count.csv"))
   colnames(counts) <- SqlRender::snakeCaseToCamelCase(colnames(counts))
   
   # Export the cohorts from the study
@@ -411,9 +411,9 @@ exportResults <- function(exportFolder, databaseId, cohortIdsToExcludeFromResult
     # Censor out the cohorts based on the IDs passed in
     for(i in 1:length(filesWithCohortIds)) {
       fileName <- file.path(tempFolder, filesWithCohortIds[i])
-      fileContents <- readr::read_csv(fileName, col_types = readr::cols())
+      fileContents <- data.table::fread(fileName)
       fileContents <- fileContents[!(fileContents$cohort_id %in% cohortIdsToExcludeFromResultsExport),]
-      readr::write_csv(fileContents, fileName)
+      data.table::fwrite(fileContents, fileName)
     }
     
     # Zip the results and copy to the main export folder
@@ -591,7 +591,7 @@ writeToCsv <- function(data, fileName, incremental = FALSE, ...) {
     params$fileName = fileName
     do.call(saveIncremental, params)
   } else {
-    readr::write_csv(data, fileName)
+    data.table::fwrite(data, fileName)
   }
 }
 
@@ -672,7 +672,7 @@ recordTasksDone <- function(..., checksum, recordKeepingFile, incremental = TRUE
     return()
   }
   if (file.exists(recordKeepingFile)) {
-    recordKeeping <-  readr::read_csv(recordKeepingFile, col_types = readr::cols())
+    recordKeeping <- data.table::fread(recordKeepingFile)
     idx <- getKeyIndex(list(...), recordKeeping)
     if (length(idx) > 0) {
       recordKeeping <- recordKeeping[-idx, ]
@@ -684,7 +684,7 @@ recordTasksDone <- function(..., checksum, recordKeepingFile, incremental = TRUE
   newRow$checksum <- checksum
   newRow$timeStamp <-  Sys.time()
   recordKeeping <- dplyr::bind_rows(recordKeeping, newRow)
-  readr::write_csv(recordKeeping, recordKeepingFile)
+  data.table::fwrite(recordKeeping, recordKeepingFile)
 }
 
 saveIncremental <- function(data, fileName, ...) {
@@ -692,12 +692,12 @@ saveIncremental <- function(data, fileName, ...) {
     return()
   }
   if (file.exists(fileName)) {
-    previousData <- readr::read_csv(fileName, col_types = readr::cols())
+    previousData <- data.table::fread(fileName)
     idx <- getKeyIndex(list(...), previousData)
     if (length(idx) > 0) {
       previousData <- previousData[-idx, ] 
     }
     data <- dplyr::bind_rows(previousData, data)
   } 
-  readr::write_csv(data, fileName)
+  data.table::fwrite(data, fileName)
 }

From 69cdc55f3b29a03f7e95298c541ccbc07dca04a9 Mon Sep 17 00:00:00 2001
From: Maxim Moinat <maxim@thehyve.nl>
Date: Thu, 30 Sep 2021 16:55:20 +0200
Subject: [PATCH 02/10] filter end dates occurring before start date

---
 inst/sql/sql_server/103.sql | 1 +
 inst/sql/sql_server/104.sql | 1 +
 inst/sql/sql_server/105.sql | 1 +
 inst/sql/sql_server/106.sql | 1 +
 inst/sql/sql_server/107.sql | 1 +
 inst/sql/sql_server/108.sql | 1 +
 inst/sql/sql_server/109.sql | 1 +
 inst/sql/sql_server/110.sql | 1 +
 inst/sql/sql_server/111.sql | 1 +
 inst/sql/sql_server/112.sql | 1 +
 inst/sql/sql_server/343.sql | 1 +
 11 files changed, 11 insertions(+)

diff --git a/inst/sql/sql_server/103.sql b/inst/sql/sql_server/103.sql
index 2ade6d4d5..349883d4d 100644
--- a/inst/sql/sql_server/103.sql
+++ b/inst/sql/sql_server/103.sql
@@ -1140,6 +1140,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/104.sql b/inst/sql/sql_server/104.sql
index f3a4935bb..6933a1665 100644
--- a/inst/sql/sql_server/104.sql
+++ b/inst/sql/sql_server/104.sql
@@ -1195,6 +1195,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/105.sql b/inst/sql/sql_server/105.sql
index ffc265cda..0fda1fe32 100644
--- a/inst/sql/sql_server/105.sql
+++ b/inst/sql/sql_server/105.sql
@@ -1365,6 +1365,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/106.sql b/inst/sql/sql_server/106.sql
index 4acae7006..1a0173437 100644
--- a/inst/sql/sql_server/106.sql
+++ b/inst/sql/sql_server/106.sql
@@ -1412,6 +1412,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/107.sql b/inst/sql/sql_server/107.sql
index 72f0422f5..7261413bf 100644
--- a/inst/sql/sql_server/107.sql
+++ b/inst/sql/sql_server/107.sql
@@ -1622,6 +1622,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/108.sql b/inst/sql/sql_server/108.sql
index f0f3d95f4..82c8510fe 100644
--- a/inst/sql/sql_server/108.sql
+++ b/inst/sql/sql_server/108.sql
@@ -1670,6 +1670,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/109.sql b/inst/sql/sql_server/109.sql
index e914531e8..eddd3223d 100644
--- a/inst/sql/sql_server/109.sql
+++ b/inst/sql/sql_server/109.sql
@@ -1620,6 +1620,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/110.sql b/inst/sql/sql_server/110.sql
index 49a3a0300..32b378987 100644
--- a/inst/sql/sql_server/110.sql
+++ b/inst/sql/sql_server/110.sql
@@ -1669,6 +1669,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/111.sql b/inst/sql/sql_server/111.sql
index 09e5825a8..306ac0ee4 100644
--- a/inst/sql/sql_server/111.sql
+++ b/inst/sql/sql_server/111.sql
@@ -1151,6 +1151,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/112.sql b/inst/sql/sql_server/112.sql
index f2d0c1edf..1cc8ae54f 100644
--- a/inst/sql/sql_server/112.sql
+++ b/inst/sql/sql_server/112.sql
@@ -1204,6 +1204,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/343.sql b/inst/sql/sql_server/343.sql
index ab64218e0..b7360c77f 100644
--- a/inst/sql/sql_server/343.sql
+++ b/inst/sql/sql_server/343.sql
@@ -1203,6 +1203,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 

From 081d70955c318074cb470615b2cc36cb7e93a7f7 Mon Sep 17 00:00:00 2001
From: Maxim Moinat <maxim@thehyve.nl>
Date: Thu, 30 Sep 2021 16:59:46 +0200
Subject: [PATCH 03/10] fix negative period in strata cohorts

---
 inst/sql/sql_server/341.sql | 1 +
 inst/sql/sql_server/342.sql | 1 +
 2 files changed, 2 insertions(+)

diff --git a/inst/sql/sql_server/341.sql b/inst/sql/sql_server/341.sql
index d1b2c677d..cf0a95ef9 100644
--- a/inst/sql/sql_server/341.sql
+++ b/inst/sql/sql_server/341.sql
@@ -1203,6 +1203,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,270,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,270,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/342.sql b/inst/sql/sql_server/342.sql
index 318fd358b..d0b003879 100644
--- a/inst/sql/sql_server/342.sql
+++ b/inst/sql/sql_server/342.sql
@@ -1203,6 +1203,7 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,365,start_date) as start_date, end_date 
 FROM #final_cohort CO
+WHERE end_date >= DATEADD(day,365,start_date)
 ;
 
 

From d865b1984a790da3b3498f500c5a5362c1578be0 Mon Sep 17 00:00:00 2001
From: keesvanbochove <kees@thehyve.nl>
Date: Fri, 1 Oct 2021 14:15:55 +0200
Subject: [PATCH 04/10] Add performance optimization for results shiny app

---
 inst/shiny/PioneerWatchfulWaitingExplorer/global.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
index c41dd90f6..56a6b35fd 100644
--- a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
+++ b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
@@ -72,7 +72,7 @@ if (dataStorage == "database") {
       # print(file)
       tableName <- gsub(".csv$", "", file)
       camelCaseName <- SqlRender::snakeCaseToCamelCase(tableName)
-      data <- readr::read_csv(file.path(folder, file), col_types = readr::cols(), guess_max = 1e7, locale = readr::locale(encoding = "UTF-8"))
+      data <- data.table::fread(file.path(folder, file))
       colnames(data) <- SqlRender::snakeCaseToCamelCase(colnames(data))
       
       if (!overwrite && exists(camelCaseName, envir = .GlobalEnv)) {
@@ -152,7 +152,7 @@ domainName <- "All"
 timeWindow <- data.frame(windowId=c(1:4), name=c("-365 to index", "index to 365", "366d to 730d", "731d+"))
 timeWindow$name <- as.character(timeWindow$name)
 
-cohortXref <- readr::read_csv("./cohortXref.csv", col_types = readr::cols())
+cohortXref <- data.table::fread("./cohortXref.csv")
 targetCohort <- cohortXref[,c("targetId","targetName")]
 targetCohort <- unique(targetCohort)
 targetCohort <- targetCohort[order(targetCohort$targetName),]
@@ -177,7 +177,7 @@ strataName <- cohortXref[cohortXref$cohortId == initCharCohortId,c("strataName")
 comparatorName <- cohortXref[cohortXref$cohortId == initCharCompareCohortId,c("targetName")][1]
 comparatorStrataName <- cohortXref[cohortXref$cohortId == initCharCompareCohortId,c("strataName")][1]
 
-cohortInfo <- readr::read_csv("./cohorts.csv", col_types = readr::cols())
+cohortInfo <- data.table::fread("./cohorts.csv")
 cohortInfo <- cohortInfo[order(cohortInfo$name),]
 
 # Read in the database terms of use
@@ -196,4 +196,4 @@ if(length(cohortStagingCount$name[cohortStagingCount$cohortId == max(ids)]) == 0
 
 KMIds <- data.frame(id = ids,
                     name = names)
-
+print(KMIds)

From 14ddd690e7e7f96e02cd57ca793870e7010806bf Mon Sep 17 00:00:00 2001
From: keesvanbochove <kees@thehyve.nl>
Date: Fri, 1 Oct 2021 15:10:53 +0200
Subject: [PATCH 05/10] Fix error in label assignment of time to event plot

---
 inst/shiny/PioneerWatchfulWaitingExplorer/global.R | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
index 56a6b35fd..3c51f1d25 100644
--- a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
+++ b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
@@ -188,12 +188,16 @@ database <- database[order(database$databaseId),]
 
 
 # Add Time to Event names and ids
+
+# Gather unique outcome ids in time to event table
 ids <- unique(cohortTimeToEvent$outcomeId)
-names <- unique(cohortStagingCount$name[cohortStagingCount$cohortId %in% ids])
-if(length(cohortStagingCount$name[cohortStagingCount$cohortId == max(ids)]) == 0){
-  names <- c(names, 'Symptomatic progr. free surv.')
-}
+# Find corresponding cohort names
+names <- sapply(ids,function(id){ cohortStagingCount$name[cohortStagingCount$cohortId == id ][1]})
+
+# hack/fix which I don't understand
+#if(length(cohortStagingCount$name[cohortStagingCount$cohortId == max(ids)]) == 0){
+#  names <- c(names, 'Symptomatic progr. free surv.')
+#}
 
 KMIds <- data.frame(id = ids,
                     name = names)
-print(KMIds)

From 0ebdcb427af6e172b05ef1ceb555232e561a5928 Mon Sep 17 00:00:00 2001
From: keesvanbochove <kees@thehyve.nl>
Date: Fri, 1 Oct 2021 17:00:40 +0200
Subject: [PATCH 06/10] Various fixes to the Shiny app

---
 R/Shiny.R                                     |  1 +
 .../PioneerWatchfulWaitingExplorer/global.R   | 22 +++++++++++--------
 .../PioneerWatchfulWaitingExplorer/server.R   | 19 ++++++++--------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/R/Shiny.R b/R/Shiny.R
index 66343e881..355e47484 100644
--- a/R/Shiny.R
+++ b/R/Shiny.R
@@ -24,6 +24,7 @@ launchShinyApp <- function(outputFolder,
   ensure_installed("shinydashboard")
   ensure_installed("shinyWidgets")
   ensure_installed("DT")
+  ensure_installed("data.table")
   ensure_installed("VennDiagram")
   ensure_installed("htmltools")
   ensure_installed("pool")
diff --git a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
index 3c51f1d25..43fbd89a3 100644
--- a/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
+++ b/inst/shiny/PioneerWatchfulWaitingExplorer/global.R
@@ -1,6 +1,7 @@
 library(shiny)
 library(pool)
 library(DatabaseConnector)
+library(data.table)
 source("DataPulls.R")
 
 connPool <- NULL # Will be initialized if using a DB
@@ -138,18 +139,18 @@ if (exists("covariate")) {
 }
 
 # Setup filters
-domain <- data.frame()
-domain <- rbind(domain,data.frame(name = "All", covariateAnalysisId = c(1:10000)))
-domain <- rbind(domain,data.frame(name = "Cohort", covariateAnalysisId = c(10000)))
-domain <- rbind(domain,data.frame(name = "Demographics", covariateAnalysisId = c(1:99)))
-domain <- rbind(domain,data.frame(name = "Drug", covariateAnalysisId = c(412)))
-domain <- rbind(domain,data.frame(name = "Condition", covariateAnalysisId = c(212)))
-domain <- rbind(domain,data.frame(name = 'Procedure', covariateAnalysisId = c(712)))
+domain <- data.table()
+domain <- rbind(domain,data.table(name = "All", covariateAnalysisId = c(1:10000)))
+domain <- rbind(domain,data.table(name = "Cohort", covariateAnalysisId = c(10000)))
+domain <- rbind(domain,data.table(name = "Demographics", covariateAnalysisId = c(1:99)))
+domain <- rbind(domain,data.table(name = "Drug", covariateAnalysisId = c(412)))
+domain <- rbind(domain,data.table(name = "Condition", covariateAnalysisId = c(212)))
+domain <- rbind(domain,data.table(name = 'Procedure', covariateAnalysisId = c(712)))
 domain$name <- as.character(domain$name)
 domainName <- "All"
 
 # This must match the featureTimeWindow.csv from the Pioneer study
-timeWindow <- data.frame(windowId=c(1:4), name=c("-365 to index", "index to 365", "366d to 730d", "731d+"))
+timeWindow <- data.table(windowId=c(1:4), name=c("-365 to index", "index to 365", "366d to 730d", "731d+"))
 timeWindow$name <- as.character(timeWindow$name)
 
 cohortXref <- data.table::fread("./cohortXref.csv")
@@ -199,5 +200,8 @@ names <- sapply(ids,function(id){ cohortStagingCount$name[cohortStagingCount$coh
 #  names <- c(names, 'Symptomatic progr. free surv.')
 #}
 
-KMIds <- data.frame(id = ids,
+KMIds <- data.table(id = ids,
                     name = names)
+
+#Filter out NA value in name which leads to problems with computations and plotting
+KMIds <- KMIds[!is.na(KMIds$name)]
\ No newline at end of file
diff --git a/inst/shiny/PioneerWatchfulWaitingExplorer/server.R b/inst/shiny/PioneerWatchfulWaitingExplorer/server.R
index 4da565123..11a41072a 100644
--- a/inst/shiny/PioneerWatchfulWaitingExplorer/server.R
+++ b/inst/shiny/PioneerWatchfulWaitingExplorer/server.R
@@ -2,6 +2,7 @@ library(shiny)
 library(shinydashboard)
 library(DT)
 library(htmltools)
+library(data.table)
 source("PlotsAndTables.R")
 source("utilities.R")
 source("survplot_core.R")
@@ -302,7 +303,7 @@ shinyServer(function(input, output, session) {
   output$TimeToEventDeath <- renderPlot({
     target_id <- cohortCount[cohortCount$databaseId %in% input$databasesTimeToEvent &
                              cohortCount$cohortId %in% cohortIdTimeToEvent(), ][[1]]
-    target_id_entries_num <- cohortCount[cohortCount$cohortId == target_id, "cohortEntries"][[1]]
+    target_id_entries_num <- sum(cohortCount[cohortCount$cohortId == target_id, "cohortEntries"])
     
     if (length(target_id) == 0 | target_id_entries_num <= 100 | is.null(input$KMPlot)){
       plot <- ggplot2::ggplot()
@@ -312,7 +313,7 @@ shinyServer(function(input, output, session) {
     targetIdTimeToEventData <- cohortTimeToEvent %>% dplyr::filter(targetId == target_id,
                                                                databaseId == input$databasesTimeToEvent)
     
-    accumulatedData <- data.frame(time = c(), surv = c(), n.censor = c(), 
+    accumulatedData <- data.table(time = c(), surv = c(), n.censor = c(), 
                                   n.event = c(), upper = c(), lower = c())
     for(plotName in input$KMPlot){
       oId <- KMIds$id[KMIds$name == plotName]
@@ -330,7 +331,6 @@ shinyServer(function(input, output, session) {
     color_map <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
     names(color_map) <- KMIds$name
    
-    
     plot <- ggsurvplot_core(accumulatedData,
                             risk.table = "nrisk_cumcensor",
                             palette = color_map,
@@ -354,12 +354,13 @@ shinyServer(function(input, output, session) {
   
   
   getMetricsTable <- reactive ({
+    browser()
     target_id <- cohortCount[cohortCount$databaseId %in% input$databasesMetricsDistribution 
                              & cohortCount$cohortId %in% cohortIdMetricsDistribution(), ][[1]]
     metricsTable <- metricsDistribution %>% dplyr::filter(cohortDefinitionId == target_id, 
                                                           databaseId == input$databasesMetricsDistribution)
     names(metricsTable)[names(metricsTable) == 'iqr'] <- 'IQR'
-    return(metricsTable[c('analysisName', 'IQR', 'minimum', 'q1', 'median', 'q3', 'maximum')])
+    return(metricsTable[,c('analysisName', 'IQR', 'minimum', 'q1', 'median', 'q3', 'maximum')])
   })
   
   
@@ -386,11 +387,11 @@ shinyServer(function(input, output, session) {
     data <- getCohortCountsTable()
     databaseIds <- unique(data$databaseId)
     databaseIds <- sort(databaseIds)
-    table <- data[data$databaseId == databaseIds[1], columnsToInclude]
+    table <- data[data$databaseId == databaseIds[1], ..columnsToInclude]
     colnames(table)[subjectIndex] <- paste(colnames(table)[2], databaseIds[1], sep = "_")
     if (length(databaseIds) > 1) {
       for (i in 2:length(databaseIds)) {
-        temp <- data[data$databaseId == databaseIds[i], columnsToInclude]
+        temp <- data[data$databaseId == databaseIds[i], ..columnsToInclude]
         colnames(temp)[subjectIndex] <- paste(colnames(temp)[subjectIndex], databaseIds[i], sep = "_")
         table <- merge(table, temp, all = TRUE)
       }
@@ -498,11 +499,11 @@ shinyServer(function(input, output, session) {
     databaseIds <- unique(data$databaseId)
     databaseIdsWithCounts <- merge(databaseIds, counts, by.x="x", by.y="databaseId")
     databaseIdsWithCounts <- dplyr::rename(databaseIdsWithCounts, databaseId="x")
-    table <- data[data$databaseId == databaseIdsWithCounts$databaseId[1], columnsToInclude]
+    table <- data[data$databaseId == databaseIdsWithCounts$databaseId[1], ..columnsToInclude]
     colnames(table)[meanColumnIndex] <- paste(colnames(table)[meanColumnIndex], databaseIdsWithCounts$databaseId[1], sep = "_")
     if (nrow(databaseIdsWithCounts) > 1) {
       for (i in 2:nrow(databaseIdsWithCounts)) {
-        temp <- data[data$databaseId == databaseIdsWithCounts$databaseId[i], columnsToInclude]
+        temp <- data[data$databaseId == databaseIdsWithCounts$databaseId[i], ..columnsToInclude]
         colnames(temp)[meanColumnIndex] <- paste(colnames(temp)[meanColumnIndex], databaseIdsWithCounts$databaseId[i], sep = "_")
         table <- merge(table, temp, all = TRUE)
       }
@@ -585,7 +586,7 @@ shinyServer(function(input, output, session) {
   
   computeBalance <- reactive({
     if (cohortId() == comparatorCohortId()) {
-      return(data.frame())
+      return(data.table())
     }
     covariateFiltered <- getFilteredCovariates()
     covariateValue <- getCovariateDataSubset(cohortId(), input$database, comparatorCohortId())

From fef0c370af8aa2280230e68fc5a2e91ce34089c2 Mon Sep 17 00:00:00 2001
From: keesvanbochove <kees@thehyve.nl>
Date: Fri, 1 Oct 2021 17:16:02 +0200
Subject: [PATCH 07/10] Various changes needed to change data.frame to
 data.table

---
 R/CohortConstruction.R | 4 ++--
 R/GenerateSurvival.R   | 1 +
 R/ResourceFiles.R      | 4 ++--
 R/RunStudy.R           | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/R/CohortConstruction.R b/R/CohortConstruction.R
index c07d5eab9..543602f50 100644
--- a/R/CohortConstruction.R
+++ b/R/CohortConstruction.R
@@ -202,7 +202,7 @@ getInclusionStatisticsFromFiles <- function(cohortId,
   
   fetchStats <- function(file) {
     ParallelLogger::logDebug("- Fetching data from ", file)
-    stats <- readr::read_csv(file, col_types = readr::cols())
+    stats <- data.table::fread(file)
     stats <- stats[stats$cohortDefinitionId == cohortId, ]
     return(stats)
   }
@@ -459,7 +459,7 @@ saveAndDropTempInclusionStatsTables <- function(connection,
     if (incremental) {
       saveIncremental(data, fullFileName, cohortDefinitionId = cohortIds)
     } else {
-      readr::write_csv(data, fullFileName)
+      data.table::fread(data, fullFileName)
     }
   }
   fetchStats("#cohort_inclusion", "cohortInclusion.csv")
diff --git a/R/GenerateSurvival.R b/R/GenerateSurvival.R
index 1c53ce43c..e53281aef 100644
--- a/R/GenerateSurvival.R
+++ b/R/GenerateSurvival.R
@@ -23,6 +23,7 @@ generateSurvival <- function(connection, cohortDatabaseSchema, cohortTable, targ
                       id = dplyr::row_number()) %>%
         dplyr::select(id, timeToEvent, event)
       
+      # TODO: Change to Cyclops
       surv_info <- survival::survfit(survival::Surv(timeToEvent, event) ~ 1, data = km_proc)
       
       surv_info <- survminer::surv_summary(surv_info)
diff --git a/R/ResourceFiles.R b/R/ResourceFiles.R
index ec73a1c24..2cf983b94 100644
--- a/R/ResourceFiles.R
+++ b/R/ResourceFiles.R
@@ -41,7 +41,7 @@ getCohortsToCreate <- function(cohortGroups = getCohortGroups()) {
   packageName <- getThisPackageName()
   cohorts <- data.frame()
   for(i in 1:nrow(cohortGroups)) {
-    c <- readr::read_csv(system.file(cohortGroups$fileName[i], package = packageName, mustWork = TRUE), col_types = readr::cols())
+    c <- data.table::fread(system.file(cohortGroups$fileName[i], package = packageName, mustWork = TRUE))
     c <- c[c('name', 'atlasName', 'atlasId', 'cohortId')]
     c$cohortType <- cohortGroups$cohortGroup[i]
     cohorts <- rbind(cohorts, c)
@@ -104,7 +104,7 @@ getThisPackageName <- function() {
 readCsv <- function(resourceFile) {
   packageName <- getThisPackageName()
   pathToCsv <- system.file(resourceFile, package = packageName, mustWork = TRUE)
-  fileContents <- readr::read_csv(pathToCsv, col_types = readr::cols())
+  fileContents <- data.table::fread(pathToCsv)
   return(fileContents)
 }
 
diff --git a/R/RunStudy.R b/R/RunStudy.R
index 7091d3af7..9f7f9c9b5 100644
--- a/R/RunStudy.R
+++ b/R/RunStudy.R
@@ -219,12 +219,12 @@ runStudy <- function(connectionDetails = NULL,
   DistribAnalyses <- c('AgeAtDiagnosis', 'YearOfDiagnosis', 'CharlsonAtDiagnosis', 'PsaAtDiagnosis', outcomeBasedAnalyses)
   outcomes <- getFeatures()
   
-  metricsDistribution <- data.frame()
+  metricsDistribution <- data.table()
   
   for(analysis in DistribAnalyses){
     outcome <- gsub("TimeTo", "", analysis)
     outcome <- substring(SqlRender::camelCaseToTitleCase(outcome), 2)
-    outcomeId <- outcomes[tolower(outcomes$name) == tolower(outcome), "cohortId"][[1]]
+    outcomeId <- outcomes[tolower(outcomes$name) == tolower(outcome), "cohortId"]
     
     if (length(outcomeId) == 0 & analysis %in% outcomeBasedAnalyses){
       next

From abc8a440075561c4f894a504c5bfebd682daee86 Mon Sep 17 00:00:00 2001
From: keesvanbochove <kees@thehyve.nl>
Date: Fri, 1 Oct 2021 18:37:09 +0200
Subject: [PATCH 08/10] Bump to v0.4.4

---
 DESCRIPTION   | 2 +-
 codemeta.json | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4954b2155..fbca0b554 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: PioneerWatchfulWaiting
 Type: Package
 Title: PIONEER / EHDEN / OHDSI prostate cancer study
-Version: 0.4.3.2
+Version: 0.4.4
 Author: Anthony G. Sena, Artem Gorbachev, Kees van Bochove
 Authors@R: c(
     person("Anthony G.", "Sena", email = "asena5@its.jnj.com", role = c("aut")),
diff --git a/codemeta.json b/codemeta.json
index 19edf6801..0961c2cfd 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -7,7 +7,7 @@
   "codeRepository": "https://github.com/ohdsi-studies/PioneerWatchfulWaiting",
   "issueTracker": "https://github.com/ohdsi-studies/PioneerWatchfulWaiting/issues",
   "license": "https://spdx.org/licenses/Apache-2.0",
-  "version": "0.4.3.2",
+  "version": "0.4.4",
   "programmingLanguage": {
     "@type": "ComputerLanguage",
     "name": "R",
@@ -309,5 +309,5 @@
     "SystemRequirements": null
   },
   "isPartOf": "https://ohdsi.org",
-  "fileSize": "39823.066KB"
+  "fileSize": "1877409.878KB"
 }

From 5a79d488a76e07048db197a1cc1a8fada3d729eb Mon Sep 17 00:00:00 2001
From: keesvanbochove <kees@thehyve.nl>
Date: Fri, 1 Oct 2021 18:42:39 +0200
Subject: [PATCH 09/10] Revert "fix negative period in strata cohorts"

This reverts commit 081d70955c318074cb470615b2cc36cb7e93a7f7.
---
 inst/sql/sql_server/341.sql | 1 -
 inst/sql/sql_server/342.sql | 1 -
 2 files changed, 2 deletions(-)

diff --git a/inst/sql/sql_server/341.sql b/inst/sql/sql_server/341.sql
index cf0a95ef9..d1b2c677d 100644
--- a/inst/sql/sql_server/341.sql
+++ b/inst/sql/sql_server/341.sql
@@ -1203,7 +1203,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,270,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,270,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/342.sql b/inst/sql/sql_server/342.sql
index d0b003879..318fd358b 100644
--- a/inst/sql/sql_server/342.sql
+++ b/inst/sql/sql_server/342.sql
@@ -1203,7 +1203,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,365,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,365,start_date)
 ;
 
 

From 56c2a8ba47db995a38f9e71a0d3df766bbdf04e3 Mon Sep 17 00:00:00 2001
From: keesvanbochove <kees@thehyve.nl>
Date: Fri, 1 Oct 2021 18:42:59 +0200
Subject: [PATCH 10/10] Revert "filter end dates occurring before start date"

This reverts commit 69cdc55f3b29a03f7e95298c541ccbc07dca04a9.
---
 inst/sql/sql_server/103.sql | 1 -
 inst/sql/sql_server/104.sql | 1 -
 inst/sql/sql_server/105.sql | 1 -
 inst/sql/sql_server/106.sql | 1 -
 inst/sql/sql_server/107.sql | 1 -
 inst/sql/sql_server/108.sql | 1 -
 inst/sql/sql_server/109.sql | 1 -
 inst/sql/sql_server/110.sql | 1 -
 inst/sql/sql_server/111.sql | 1 -
 inst/sql/sql_server/112.sql | 1 -
 inst/sql/sql_server/343.sql | 1 -
 11 files changed, 11 deletions(-)

diff --git a/inst/sql/sql_server/103.sql b/inst/sql/sql_server/103.sql
index 349883d4d..2ade6d4d5 100644
--- a/inst/sql/sql_server/103.sql
+++ b/inst/sql/sql_server/103.sql
@@ -1140,7 +1140,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/104.sql b/inst/sql/sql_server/104.sql
index 6933a1665..f3a4935bb 100644
--- a/inst/sql/sql_server/104.sql
+++ b/inst/sql/sql_server/104.sql
@@ -1195,7 +1195,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/105.sql b/inst/sql/sql_server/105.sql
index 0fda1fe32..ffc265cda 100644
--- a/inst/sql/sql_server/105.sql
+++ b/inst/sql/sql_server/105.sql
@@ -1365,7 +1365,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/106.sql b/inst/sql/sql_server/106.sql
index 1a0173437..4acae7006 100644
--- a/inst/sql/sql_server/106.sql
+++ b/inst/sql/sql_server/106.sql
@@ -1412,7 +1412,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/107.sql b/inst/sql/sql_server/107.sql
index 7261413bf..72f0422f5 100644
--- a/inst/sql/sql_server/107.sql
+++ b/inst/sql/sql_server/107.sql
@@ -1622,7 +1622,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/108.sql b/inst/sql/sql_server/108.sql
index 82c8510fe..f0f3d95f4 100644
--- a/inst/sql/sql_server/108.sql
+++ b/inst/sql/sql_server/108.sql
@@ -1670,7 +1670,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/109.sql b/inst/sql/sql_server/109.sql
index eddd3223d..e914531e8 100644
--- a/inst/sql/sql_server/109.sql
+++ b/inst/sql/sql_server/109.sql
@@ -1620,7 +1620,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/110.sql b/inst/sql/sql_server/110.sql
index 32b378987..49a3a0300 100644
--- a/inst/sql/sql_server/110.sql
+++ b/inst/sql/sql_server/110.sql
@@ -1669,7 +1669,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,548,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/111.sql b/inst/sql/sql_server/111.sql
index 306ac0ee4..09e5825a8 100644
--- a/inst/sql/sql_server/111.sql
+++ b/inst/sql/sql_server/111.sql
@@ -1151,7 +1151,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id,DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/112.sql b/inst/sql/sql_server/112.sql
index 1cc8ae54f..f2d0c1edf 100644
--- a/inst/sql/sql_server/112.sql
+++ b/inst/sql/sql_server/112.sql
@@ -1204,7 +1204,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,180,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,180,start_date)
 ;
 
 
diff --git a/inst/sql/sql_server/343.sql b/inst/sql/sql_server/343.sql
index b7360c77f..ab64218e0 100644
--- a/inst/sql/sql_server/343.sql
+++ b/inst/sql/sql_server/343.sql
@@ -1203,7 +1203,6 @@ DELETE FROM @target_database_schema.@target_cohort_table where cohort_definition
 INSERT INTO @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
 select @target_cohort_id as cohort_definition_id, person_id, DATEADD(day,548,start_date) as start_date, end_date 
 FROM #final_cohort CO
-WHERE end_date >= DATEADD(day,548,start_date)
 ;