From dec223dde7f845f4ade6069e09cb1c352202f6ef Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 11:29:48 -0500
Subject: [PATCH 01/12] ignore data folder

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 551f1c1..5e7c13e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,4 @@ hidden/
 results.zip
 
 scratchDiagnostics
+extras/shiny/data/

From dc97fb4a7687695b39e85448080aa57ac1915c55 Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 11:30:03 -0500
Subject: [PATCH 02/12] add shiny files

---
 extras/DataMigration.R    |  808 ++++++++++++++++++++++
 extras/R/helpers.R        |  472 +++++++++++++
 extras/shiny/R/fn.R       |  319 +++++++++
 extras/shiny/R/loadData.R |  198 ++++++
 extras/shiny/app.R        | 1375 +++++++++++++++++++++++++++++++++++++
 5 files changed, 3172 insertions(+)
 create mode 100644 extras/DataMigration.R
 create mode 100644 extras/R/helpers.R
 create mode 100644 extras/shiny/R/fn.R
 create mode 100644 extras/shiny/R/loadData.R
 create mode 100644 extras/shiny/app.R

diff --git a/extras/DataMigration.R b/extras/DataMigration.R
new file mode 100644
index 0000000..74bc455
--- /dev/null
+++ b/extras/DataMigration.R
@@ -0,0 +1,808 @@
+# Dependencies -------------------
+library(tidyverse)
+library(dplyr)
+library(readr)
+
+#source helper functions
+source("extras/R/helpers.R")
+
+#path to place app data
+appDataPath <- here::here("extras/shiny", "data")
+
+# make new directory
+appDataPath %>% fs::dir_create()
+
+
+# set path to the execution results
+resultsPath <- here::here("results")
+
+#list the databases used in the execution
+listOfDatabase <- fs::dir_ls(resultsPath) %>%
+  basename()
+
+
+#list the execution tasks
+listOfTasks <- c("01_buildCohorts",
+                 "02_cohortDiagnostics",
+                 "03_buildStrata",
+                 "04_incidenceAnalysis",
+                 "05_baselineCharacteristics",
+                 "06_postIndexPrevalenceConditions",
+                 "07_postIndexPrevalenceDrugs",
+                 "08_treatmentHistory",
+                 "09_treatmentPatterns",
+                 "10_timeToDiscontinuation",
+                 "11_postIndexPrevalenceProcedures",
+                 "12_timeToIntervention",
+                 "13_treatmentHistory2",
+                 "14_treatmentPatterns")
+
+#create a dataframe of all permutations of paths
+allPaths <- tidyr::expand_grid(listOfDatabase, listOfTasks) %>%
+  dplyr::mutate(
+    fullPath = fs::path(resultsPath, listOfDatabase, listOfTasks)
+  )
+
+## 1. Bind and save Cohort Manifest for all databases-----------
+
+cohortManifest <- bindCsv(allPaths = allPaths,
+                          task = listOfTasks[1], # cohorts
+                          file = "cohortManifest.csv")
+cm2 <- cohortManifest %>%
+  dplyr::select(databaseId, id, name, entries, subjects) %>%
+  dplyr::rename(
+    Database = databaseId,
+    `Cohort Id` = id,
+    `Cohort Name` = name,
+    Entries = entries,
+    Subjects = subjects
+  ) %>%
+  dplyr::mutate(Entries = dplyr::if_else(is.na(Entries), 0, Entries, 0),
+                Subjects = dplyr::if_else(is.na(Subjects), 0, Subjects, 0)
+  ) %>%
+  dplyr::mutate(Entries = dplyr::if_else(Entries <= 5L & Entries > 0, "<5", format(Entries, big.mark = ",", scientific = FALSE), "0"),
+                Subjects = dplyr::if_else(Subjects <= 5L & Subjects > 0, "<5", format(Subjects, big.mark = ",", scientific = FALSE), "0")
+  )
+
+readr::write_csv(cm2, file = fs::path(appDataPath, "cohortCounts.csv"))
+
+
+## 2. Bind and save strata table for all databases--------------
+strataManifest <- bindCsv(allPaths = allPaths,
+                          task = listOfTasks[3], # strata
+                          file = "strata_table.csv")
+
+sm2 <- strataManifest %>%
+  dplyr::rename(
+    Database = databaseId,
+    `Strata Cohort Id` = cohort_definition_id,
+    `Strata Cohort Name` = name,
+    Subjects = n
+  )%>%
+  dplyr::mutate(Subjects = dplyr::if_else(is.na(Subjects), 0, Subjects, 0)
+  ) %>%
+  dplyr::mutate(Subjects = dplyr::if_else(Subjects <= 5L & Subjects > 0, "<5", format(Subjects, big.mark = ",", scientific = FALSE), "0"))
+
+
+readr::write_csv(sm2, file = fs::path(appDataPath, "strataCounts.csv"))
+
+# bind all cohorts together for full list
+allCohorts <- dplyr::bind_rows(
+  cohortManifest %>%
+    dplyr::rename(cohortName = name) %>%
+    dplyr::select(id, cohortName) %>%
+    dplyr::distinct(),
+  strataManifest %>%
+    dplyr::rename(id = cohort_definition_id,
+                  cohortName = name) %>%
+    dplyr::select(id, cohortName) %>%
+    dplyr::distinct()
+)
+
+
+## 3) Baseline Demographics------------------
+`%notin%` <- Negate("%in%")
+demo <- bindCsv(allPaths = allPaths,
+                task = listOfTasks[5], # baseline char
+                file = "demographics_baseline.csv")
+
+demo2 <- demo %>%
+  dplyr::mutate(
+    #pct = scales::label_percent(accuracy = 0.01, suffix ="")(pct),
+    Covariate = dplyr::case_when(
+      # gender
+      analysisId == 1 & conceptId == 8507 ~ "Gender: Male",
+      analysisId == 1 & conceptId == 8532 ~ "Gender: Female",
+      # race
+      analysisId == 4 & conceptId == 8515 ~ "Race: Asian",
+      analysisId == 4 & conceptId == 8516 ~ "Race: Black or African American",
+      analysisId == 4 & conceptId == 8527 ~ "Race: White",
+      # ethnicity
+      analysisId == 5 & conceptId == 38003563 ~ "Ethnicity: Hispanic or Latino",
+      analysisId == 5 & conceptId == 38003564 ~ "Ethnicity: Not Hispanic or Latino",
+      # age group 5 yr
+      analysisId == 3 & name == "age group: -10 -  -6" ~ "Age Group: -10--6",
+      analysisId == 3 & name == "age group:  -5 -  -1" ~ "Age Group: -5--1",
+      analysisId == 3 & name == "age group:   0 -   4" ~ "Age Group: 0-4",
+      analysisId == 3 & name == "age group:   5 -   9" ~ "Age Group: 5-9",
+      analysisId == 3 & name == "age group:  10 -  14" ~ "Age Group: 10-14",
+      analysisId == 3 & name == "age group:  15 -  19" ~ "Age Group: 15-19",
+      analysisId == 3 & name == "age group:  20 -  24" ~ "Age Group: 20-24",
+      analysisId == 3 & name == "age group:  25 -  29" ~ "Age Group: 25-29",
+      analysisId == 3 & name == "age group:  30 -  34" ~ "Age Group: 30-34",
+      analysisId == 3 & name == "age group:  35 -  39" ~ "Age Group: 35-39",
+      analysisId == 3 & name == "age group:  40 -  44" ~ "Age Group: 40-44",
+      analysisId == 3 & name == "age group:  45 -  49" ~ "Age Group: 45-49",
+      analysisId == 3 & name == "age group:  50 -  54" ~ "Age Group: 50-54",
+      analysisId == 3 & name == "age group:  55 -  59" ~ "Age Group: 55-59",
+      analysisId == 3 & name == "age group:  60 -  64" ~ "Age Group: 60-64",
+      analysisId == 3 & name == "age group:  65 -  69" ~ "Age Group: 65-69",
+      analysisId == 3 & name == "age group:  70 -  74" ~ "Age Group: 70-74",
+      analysisId == 3 & name == "age group:  75 -  79" ~ "Age Group: 75-79",
+      analysisId == 3 & name == "age group:  80 -  84" ~ "Age Group: 80-84",
+      analysisId == 3 & name == "age group:  85 -  89" ~ "Age Group: 85-89",
+      analysisId == 3 & name == "age group:  90 -  94" ~ "Age Group: 90-94",
+      analysisId == 3 & name == "age group:  95 -  99" ~ "Age Group: 95-99",
+      analysisId == 3 & name == "age group: 100 - 104" ~ "Age Group: 100-104",
+      analysisId == 3 & name == "age group: 105 - 109" ~ "Age Group: 105-109",
+      analysisId == 3 & name == "age group: 110 - 114" ~ "Age Group: 110-114",
+      # index year
+      analysisId == 6 & name == "index year: 2000" ~ "Year: 2000",
+      analysisId == 6 & name == "index year: 2001" ~ "Year: 2001",
+      analysisId == 6 & name == "index year: 2002" ~ "Year: 2002",
+      analysisId == 6 & name == "index year: 2003" ~ "Year: 2003",
+      analysisId == 6 & name == "index year: 2004" ~ "Year: 2004",
+      analysisId == 6 & name == "index year: 2005" ~ "Year: 2005",
+      analysisId == 6 & name == "index year: 2006" ~ "Year: 2006",
+      analysisId == 6 & name == "index year: 2007" ~ "Year: 2007",
+      analysisId == 6 & name == "index year: 2008" ~ "Year: 2008",
+      analysisId == 6 & name == "index year: 2009" ~ "Year: 2009",
+      analysisId == 6 & name == "index year: 2010" ~ "Year: 2010",
+      analysisId == 6 & name == "index year: 2011" ~ "Year: 2011",
+      analysisId == 6 & name == "index year: 2012" ~ "Year: 2012",
+      analysisId == 6 & name == "index year: 2013" ~ "Year: 2013",
+      analysisId == 6 & name == "index year: 2014" ~ "Year: 2014",
+      analysisId == 6 & name == "index year: 2015" ~ "Year: 2015",
+      analysisId == 6 & name == "index year: 2016" ~ "Year: 2016",
+      analysisId == 6 & name == "index year: 2017" ~ "Year: 2017",
+      analysisId == 6 & name == "index year: 2018" ~ "Year: 2018",
+      analysisId == 6 & name == "index year: 2019" ~ "Year: 2019",
+      analysisId == 6 & name == "index year: 2020" ~ "Year: 2020",
+      analysisId == 6 & name == "index year: 2021" ~ "Year: 2021",
+      analysisId == 6 & name == "index year: 2022" ~ "Year: 2022"
+    ),
+    id = dplyr::case_when(
+      # age group 5 yr
+      analysisId == 3 & name == "age group: -10 -  -6" ~ 301,
+      analysisId == 3 & name == "age group:  -5 -  -1" ~ 302,
+      analysisId == 3 & name == "age group:   0 -   4" ~ 303,
+      analysisId == 3 & name == "age group:   5 -   9" ~ 304,
+      analysisId == 3 & name == "age group:  10 -  14" ~ 305,
+      analysisId == 3 & name == "age group:  15 -  19" ~ 306,
+      analysisId == 3 & name == "age group:  20 -  24" ~ 307,
+      analysisId == 3 & name == "age group:  25 -  29" ~ 308,
+      analysisId == 3 & name == "age group:  30 -  34" ~ 309,
+      analysisId == 3 & name == "age group:  35 -  39" ~ 310,
+      analysisId == 3 & name == "age group:  40 -  44" ~ 311,
+      analysisId == 3 & name == "age group:  45 -  49" ~ 312,
+      analysisId == 3 & name == "age group:  50 -  54" ~ 313,
+      analysisId == 3 & name == "age group:  55 -  59" ~ 314,
+      analysisId == 3 & name == "age group:  60 -  64" ~ 315,
+      analysisId == 3 & name == "age group:  65 -  69" ~ 316,
+      analysisId == 3 & name == "age group:  70 -  74" ~ 317,
+      analysisId == 3 & name == "age group:  75 -  79" ~ 318,
+      analysisId == 3 & name == "age group:  80 -  84" ~ 319,
+      analysisId == 3 & name == "age group:  85 -  89" ~ 320,
+      analysisId == 3 & name == "age group:  90 -  94" ~ 321,
+      analysisId == 3 & name == "age group:  95 -  99" ~ 322,
+      analysisId == 3 & name == "age group: 100 - 104" ~ 323,
+      analysisId == 3 & name == "age group: 105 - 109" ~ 324,
+      analysisId == 3 & name == "age group: 110 - 114" ~ 325,
+      # index year
+      analysisId == 6 & name == "index year: 2000" ~ 62000,
+      analysisId == 6 & name == "index year: 2001" ~ 62001,
+      analysisId == 6 & name == "index year: 2002" ~ 62002,
+      analysisId == 6 & name == "index year: 2003" ~ 62003,
+      analysisId == 6 & name == "index year: 2004" ~ 62004,
+      analysisId == 6 & name == "index year: 2005" ~ 62005,
+      analysisId == 6 & name == "index year: 2006" ~ 62006,
+      analysisId == 6 & name == "index year: 2007" ~ 62007,
+      analysisId == 6 & name == "index year: 2008" ~ 62008,
+      analysisId == 6 & name == "index year: 2009" ~ 62009,
+      analysisId == 6 & name == "index year: 2010" ~ 62010,
+      analysisId == 6 & name == "index year: 2011" ~ 62011,
+      analysisId == 6 & name == "index year: 2012" ~ 62012,
+      analysisId == 6 & name == "index year: 2013" ~ 62013,
+      analysisId == 6 & name == "index year: 2014" ~ 62014,
+      analysisId == 6 & name == "index year: 2015" ~ 62015,
+      analysisId == 6 & name == "index year: 2016" ~ 62016,
+      analysisId == 6 & name == "index year: 2017" ~ 62017,
+      analysisId == 6 & name == "index year: 2018" ~ 62018,
+      analysisId == 6 & name == "index year: 2019" ~ 62019,
+      analysisId == 6 & name == "index year: 2020" ~ 62020,
+      analysisId == 6 & name == "index year: 2021" ~ 62021,
+      analysisId == 6 & name == "index year: 2022" ~ 62022,
+      TRUE ~ conceptId
+    )
+  )  %>%
+  dplyr::left_join(
+    allCohorts, by = c("cohortDefinitionId" ="id"), relationship = "many-to-many"
+  ) %>%
+  dplyr::select(
+    databaseId, cohortDefinitionId, cohortName, id, Covariate,  n, pct
+  ) %>%
+  dplyr::arrange(
+    databaseId, cohortDefinitionId, id
+  )
+
+# ) %>%
+# dplyr::rename(
+#   Database = databaseId,
+#   `Cohort Id` = cohortDefinitionId,
+#   `Cohort Name` = cohortName,
+#   `Covariate Id` = id,
+#   `Covariate Name` = Covariate,
+#   `Count` = n,
+#   `Percentage` = pct
+# )
+readr::write_csv(demo2, file = fs::path(appDataPath, "baselineDemographics.csv"))
+
+
+## 4) Baseline Continuous ------------
+
+cts <- bindCsv(allPaths = allPaths,
+               task = listOfTasks[5], # baseline char
+               file = "continuous_baseline.csv")
+
+
+cts2 <- cts %>%
+  dplyr::left_join(
+    allCohorts, by = c("cohortDefinitionId" ="id"), relationship = "many-to-many"
+  ) %>%
+  dplyr::mutate(
+    #iqr = glue::glue("({p25Value}, {p75Value})")
+    iqr = p75Value - p25Value,
+    name = stringr::str_to_title(name)
+  ) %>%
+  dplyr::select(
+    databaseId, cohortDefinitionId, cohortName, covariateId, name, medianValue, iqr
+  ) %>%
+  dplyr::arrange(
+    databaseId, cohortDefinitionId, covariateId
+  ) #%>%
+# dplyr::rename(
+#   Database = databaseId,
+#   `Cohort Id` = cohortDefinitionId,
+#   `Cohort Name` = cohortName,
+#   `Covariate Id` = covariateId,
+#   `Covariate Name` = name,
+#   `Median` = medianValue,
+#   `IQR` = iqr
+# )
+readr::write_csv(cts2, file = fs::path(appDataPath, "baselineContinuous.csv"))
+
+## 5) Baseline Concepts ------------
+
+# extract drug concepts
+drug <- bindCsv(allPaths = allPaths,
+                task = listOfTasks[5], # baseline char
+                file = "drugs_baseline.csv") %>%
+  dplyr::filter(pct >= 0.02) %>%
+  dplyr::left_join(
+    allCohorts, by = c("cohortDefinitionId" ="id"), relationship = "many-to-many"
+  )  %>%
+  dplyr::mutate(
+    domain = "Drugs"
+  ) %>%
+  dplyr::select(
+    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct
+  )
+
+# extract condition concepts
+cond <- bindCsv(allPaths = allPaths,
+                task = listOfTasks[5], # baseline char
+                file = "conditions_baseline.csv") %>%
+  dplyr::filter(pct >= 0.02) %>%
+  dplyr::left_join(
+    allCohorts, by = c("cohortDefinitionId" ="id"), relationship = "many-to-many"
+  )  %>%
+  dplyr::mutate(
+    domain = "Conditions"
+  ) %>%
+  dplyr::select(
+    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct
+  )
+
+# extract procedure concepts
+#skip THIN no procedures
+procPaths <- allPaths %>%
+  dplyr::filter(listOfDatabase != "THINBE")
+
+
+proc <- bindCsv(allPaths = procPaths,
+                task = listOfTasks[5], # baseline char
+                file = "procedures_baseline.csv") %>%
+  dplyr::filter(pct >= 0.02) %>%
+  dplyr::left_join(
+    allCohorts, by = c("cohortDefinitionId" ="id"), relationship = "many-to-many"
+  ) %>%
+  dplyr::mutate(
+    domain = "Procedures"
+  ) %>%
+  dplyr::select(
+    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct
+  )
+
+conceptTab <- dplyr::bind_rows(
+  drug, cond, proc
+) %>%
+  # dplyr::mutate(
+  #   pct = scales::label_percent(accuracy = 0.01, suffix = "")(pct)
+  # ) %>%
+  dplyr::arrange(databaseId, cohortDefinitionId, domain, conceptId)
+
+#%>%
+# dplyr::rename(
+#   Database = databaseId,
+#   `Cohort Id` = cohortDefinitionId,
+#   `Cohort Name` = cohortName,
+#   `Covariate Id` = conceptId,
+#   `Covariate Name` = name,
+#   `Count` = n,
+#   `Percentage` = pct
+# )
+readr::write_csv(conceptTab, file = fs::path(appDataPath, "baselineConcepts.csv"))
+
+
+## 6) Baseline Cohorts ------------
+cohort365 <- bindCsv(allPaths = allPaths,
+                     task = listOfTasks[5], # baseline char
+                     file = "cohort_covariates_365_0.csv") %>%
+  dplyr::mutate(
+    #pct = scales::label_percent(accuracy = 0.01, suffix = "")(pct),
+    timeWindow = "-365d - 0d",
+    covariateName = dplyr::case_when(
+      covariateId == 3 ~ "PID",
+      covariateId == 4 ~ "STD",
+      covariateId == 5 ~ "adenomyosis",
+      covariateId == 6 ~ "anemia",
+      covariateId == 7 ~ "antidepressants",
+      covariateId == 8 ~ "antipsychotics",
+      covariateId == 9 ~ "antithrombotics",
+      covariateId == 10 ~ "coagulopathy",
+      covariateId == 11 ~ "copperIUDdrug",
+      covariateId == 12 ~ "covid19",
+      covariateId == 13 ~ "diabetes",
+      covariateId == 14 ~ "disorderOfOvary",
+      covariateId == 15 ~ "dysmenorrhea",
+      covariateId == 16 ~ "endoHyperplasia",
+      covariateId == 17 ~ "endoPolyp",
+      covariateId == 18 ~ "endometriosis",
+      covariateId == 19 ~ "gonadalSteroids",
+      covariateId == 20 ~ "ironDefAnemia",
+      covariateId == 21 ~ "obesity",
+      covariateId == 22 ~ "ovulatoryDysfunction",
+      covariateId == 23 ~ "pain",
+      covariateId == 24 ~ "pcos",
+      covariateId == 25 ~ "tamoxifen",
+      covariateId == 26 ~ "uterineLeiomyoma"
+    )
+  ) %>%
+  dplyr::select(databaseId, timeWindow, cohortId, cohortName,
+                covariateId, covariateName, count, pct) %>%
+  dplyr::arrange(databaseId, cohortId, covariateId) #%>%
+# dplyr::rename(
+#   Database = databaseId,
+#   `Time Window` = timeWindow,
+#   `Cohort Id` = cohortId,
+#   `Cohort Name` = cohortName,
+#   `Covariate Id` = covariateId,
+#   `Covariate Name` = covariateName,
+#   `Count` = count,
+#   `Percentage` = pct
+# )
+
+# cohort9999 <- bindCsv(allPaths = allPaths,
+#                       task = listOfTasks[3],
+#                       file = "cohort_covariates_9999_1.csv") %>%
+#   dplyr::mutate(
+#     pct = scales::label_percent(accuracy = 0.01, suffix = "")(pct),
+#     timeWindow = "anyt time prior"
+#   ) %>%
+#   dplyr::select(databaseId, timeWindow, cohortId, cohortName,
+#                 covariateId, covariateName, count, pct) %>%
+#   dplyr::arrange(databaseId, cohortId, covariateId) %>%
+#   dplyr::mutate(
+#     timeWindow = "any time prior"
+#   ) %>%
+#   dplyr::rename(
+#     Database = databaseId,
+#     `Time Window` = timeWindow,
+#     `Cohort Id` = cohortId,
+#     `Cohort Name` = cohortName,
+#     `Covariate Id` = covariateId,
+#     `Covariate Name` = covariateName,
+#     `Count` = count,
+#     `Percentage` = pct
+#   )
+# cohortCovariates <- dplyr::bind_rows(
+#   cohort365, cohort9999
+# )
+readr::write_csv(cohort365, file = fs::path(appDataPath, "baselineCohorts.csv"))
+
+
+## 7) ICD 10 Chapters Baseline ----------------
+allCohorts2 <- dplyr::bind_rows(
+  cohortManifest %>%
+    dplyr::rename(cohortName = name,
+                  n = subjects) %>%
+    dplyr::select(databaseId, id, cohortName, n) %>%
+    dplyr::distinct(),
+  strataManifest %>%
+    dplyr::rename(id = cohort_definition_id,
+                  cohortName = name) %>%
+    dplyr::select(databaseId, id, cohortName, n) %>%
+    dplyr::distinct()
+)
+
+# extract icd10
+icd10 <- bindCsv(allPaths = allPaths,
+                 task = listOfTasks[5], # baseline char
+                 file = "condition_chapters.csv") %>%
+  dplyr::filter(CATEGORY_CODE != 0) %>%
+  dplyr::left_join(
+    allCohorts2, by = c("COHORT_ID" ="id", "databaseId"), relationship = "many-to-many"
+  ) %>%
+  dplyr::mutate(
+    categoryName = dplyr::case_when(
+      CATEGORY_CODE == 443723 ~ glue::glue("{CATEGORY_NAME}: Disorder of cellular component of blood"),
+      CATEGORY_CODE == 440371 ~ glue::glue("{CATEGORY_NAME}: Disorder of immune function"),
+      CATEGORY_CODE == 432795 ~ glue::glue("{CATEGORY_NAME}: Traumatic or non-traumatic injury"),
+      CATEGORY_CODE == 444363 ~ glue::glue("{CATEGORY_NAME}: Drug-related disorder"),
+      CATEGORY_CODE == 442562 ~ glue::glue("{CATEGORY_NAME}: Poisoning"),
+      CATEGORY_CODE == 4088927 ~ glue::glue("{CATEGORY_NAME}: Pregnancy, childbirth and puerperium finding"),
+      CATEGORY_CODE == 4154314 ~ glue::glue("{CATEGORY_NAME}: Finding of arrangement of fetus"),
+      CATEGORY_CODE == 435875 ~ glue::glue("{CATEGORY_NAME}: Complication of pregnancy, childbirth and/or puerperium"),
+      CATEGORY_CODE == 4136529 ~ glue::glue("{CATEGORY_NAME}: Fetal movement finding"),
+      CATEGORY_CODE == 441406 ~ glue::glue("{CATEGORY_NAME}: Disorder of fetus or newborn"),
+      CATEGORY_CODE == 432250 ~ glue::glue("{CATEGORY_NAME}: Disorder due to infection"),
+      CATEGORY_CODE == 438112 ~ glue::glue("{CATEGORY_NAME}: Neoplastic disease"),
+      CATEGORY_CODE == 31821 ~ glue::glue("{CATEGORY_NAME}: Disorder of endocrine system"),
+      CATEGORY_CODE == 436670 ~ glue::glue("{CATEGORY_NAME}: Metabolic disease"),
+      CATEGORY_CODE == 4090739 ~ glue::glue("{CATEGORY_NAME}: Nutritional disorder"),
+      CATEGORY_CODE == 432586 ~ glue::glue("{CATEGORY_NAME}: Mental disorder"),
+      CATEGORY_CODE == 4011630 ~ glue::glue("{CATEGORY_NAME}: Neurological finding"),
+      CATEGORY_CODE == 376337 ~ glue::glue("{CATEGORY_NAME}: Disorder of nervous system"),
+      CATEGORY_CODE == 4038502 ~ glue::glue("{CATEGORY_NAME}: Eye / vision finding"),
+      CATEGORY_CODE == 4042836 ~ glue::glue("{CATEGORY_NAME}: Disorder of head"),
+      CATEGORY_CODE == 134057 ~ glue::glue("{CATEGORY_NAME}: Disorder of cardiovascular system"),
+      CATEGORY_CODE == 320136 ~ glue::glue("{CATEGORY_NAME}: Disorder of respiratory system"),
+      CATEGORY_CODE == 4302537 ~ glue::glue("{CATEGORY_NAME}: Digestive system finding"),
+      CATEGORY_CODE == 4028387 ~ glue::glue("{CATEGORY_NAME}: Disorder of integument"),
+      CATEGORY_CODE == 4244662 ~ glue::glue("{CATEGORY_NAME}: Disorder of musculoskeletal system"),
+      CATEGORY_CODE == 4027384 ~ glue::glue("{CATEGORY_NAME}: Inflammatory disorder"),
+      CATEGORY_CODE == 40482430 ~ glue::glue("{CATEGORY_NAME}: Deformity of limb"),
+      CATEGORY_CODE == 4344497 ~ glue::glue("{CATEGORY_NAME}: Soft tissue lesion"),
+      CATEGORY_CODE == 433595 ~ glue::glue("{CATEGORY_NAME}: Edema"),
+      CATEGORY_CODE == 4041285 ~ glue::glue("{CATEGORY_NAME}: Urogenital finding"),
+      CATEGORY_CODE == 4053838 ~ glue::glue("{CATEGORY_NAME}: Foreign body"),
+      CATEGORY_CODE == 4105886 ~ glue::glue("{CATEGORY_NAME}: Adverse reaction"),
+      CATEGORY_CODE == 440508 ~ glue::glue("{CATEGORY_NAME}: Congenital disease")
+    )
+  ) %>%
+  dplyr::arrange(databaseId, COHORT_ID, CATEGORY_ID) %>%
+  dplyr::mutate(
+    pct = COUNTVALUE / n#,
+    #pct = scales::label_percent(accuracy = 0.01, suffix = "")(pct)
+  ) %>%
+  dplyr::select(databaseId, COHORT_ID, cohortName,
+                CATEGORY_CODE, categoryName, COUNTVALUE, pct) #%>%
+# dplyr::rename(
+#   Database = databaseId,
+#   `Cohort Id` = COHORT_ID,
+#   `Cohort Name` = cohortName,
+#   `Concept Id` = CATEGORY_CODE,
+#   `Category Name` = categoryName,
+#   `Count` = COUNTVALUE,
+#   `Percentage` = pct
+# )
+
+readr::write_csv(icd10, file = fs::path(appDataPath, "baselineChapters.csv"))
+
+## 8) Post Index Prevalence ---------------------
+
+### A) Post Index Conditions --------------------
+
+piPrevFilesCond <- c("cohort_covariates_1_365.csv",
+                     "cohort_covariates_366_730.csv",
+                     "cohort_covariates_731_1825.csv")
+piPrevTimeFrameCond <- c("1d - 365d",
+                         "366d - 730d",
+                         "731d - 1825d")
+piPrevCond <- purrr::map2_dfr(piPrevFilesCond, # files to use
+                              piPrevTimeFrameCond, # time frame column to add
+                              ~bindCsv(# bind csv
+                                allPaths = allPaths,
+                                task = listOfTasks[6], # postindex Conditions
+                                file = .x) %>%
+                                dplyr::mutate( # add timeWindow Column
+                                  timeWindow = .y
+                                ))  %>%
+  dplyr::mutate(
+    type = "conditions"
+  ) %>%
+  dplyr::select(-covariateName) %>%
+  dplyr::left_join(
+    cohortManifest %>% dplyr::select(databaseId, id, name),
+    by = c("databaseId" = "databaseId", "covariateId" = "id")
+  ) %>%
+  dplyr::filter(
+    name %in% c("adenomyosis", "coagulopathy", "disorderOfOvary",
+                "dysmenorrhea", "endoHyperplasia", "endoPolyp", "endometriosis",
+                "ovulatoryDysfunction", "pcos", "uterineLeiomyoma")
+  ) %>%
+  dplyr::rename(covariateName = name)
+
+
+
+
+### B) Post Index Drugs --------------------
+
+piPrevFilesDrugs <- c(
+  "cohort_covariates_1_183.csv",
+  "cohort_covariates_184_365.csv",
+  "cohort_covariates_1_365.csv",
+  "cohort_covariates_366_730.csv",
+  "cohort_covariates_731_1825.csv"
+)
+piPrevTimeFrameDrugs <- c(
+  "1d - 183d",
+  "184d - 365d",
+  "1d - 365d",
+  "366d - 730d",
+  "731d - 1825d"
+)
+piPrevDrugs <- purrr::map2_dfr(piPrevFilesDrugs, # files to use
+                               piPrevTimeFrameDrugs, # time frame column to add
+                               ~bindCsv(# bind csv
+                                 allPaths = allPaths,
+                                 task = listOfTasks[7], # postindex Drugs
+                                 file = .x) %>%
+                                 dplyr::mutate( # add timeWindow Column
+                                   timeWindow = .y
+                                 )) %>%
+  dplyr::mutate(
+    type = "drugs"
+  )
+
+
+### C) Post Index Procedures --------------------
+#skip THIN no procedures
+piPrevFilesProc <- c("procedure_prevalence_1.csv",
+                     "procedure_prevalence_1001.csv",
+                     "procedure_prevalence_1002.csv",
+                     "procedure_prevalence_1003.csv")
+
+piPrevProcCohorts <- c(1L, 1001L, 1002L, 1003L)
+
+piPrevProc <- purrr::map2_dfr(piPrevFilesProc, # files to use
+                              piPrevProcCohorts, # cohorts
+                              ~bindCsv(# bind csv
+                                allPaths = procPaths,#skip THIN no procedures
+                                task = listOfTasks[11], # postindex Proc
+                                file = .x) %>%
+                                dplyr::mutate(
+                                  cohortId = .y
+                                )) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
+    ),
+    covariateId = cohortDefinitionId,
+    covariateName = dplyr::case_when(
+      covariateId == 36 ~ "bloodTransfusion",
+      covariateId == 37 ~ "copperIUDprocedure",
+      covariateId == 38 ~ "endometrialAblation",
+      covariateId == 39 ~ "hormonalIUD",
+      covariateId == 40 ~ "hysterectomy",
+      covariateId == 41 ~ "myomectomy",
+      covariateId == 42 ~ "uae",
+      covariateId == 43 ~ "undefinedIUD"
+    ),
+    count = numEvents,
+    timeWindow = dplyr::case_when(
+      window == "All" ~ "1d - 9999d",
+      window == "1 - 183" ~ "1d - 183d",
+      window == "1 - 365" ~ "1d - 365d",
+      window == "184 - 365" ~ "184d - 365d",
+      window == "366 - 730" ~ "366d - 730d",
+      window == "731 - 1825" ~ "731d - 1825d"
+    ),
+    type = "procedures"
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, covariateId, covariateName,
+                count, pct, timeWindow, type)
+
+postIndexPrev <- dplyr::bind_rows(
+  piPrevCond, piPrevDrugs, piPrevProc
+)
+
+readr::write_csv(postIndexPrev, file = fs::path(appDataPath, "postIndexPrevalence.csv"))
+
+## 9) Incidence -----------------------
+
+inicFiles <- glue::glue("incidence_analysis_ref_{1:4}.csv")
+
+
+inic <- purrr::map_dfr(inicFiles, # files to use
+                       ~bindCsv(# bind csv
+                         allPaths = allPaths,
+                         task = listOfTasks[4],
+                         file = .x))
+
+fctOrder <- c("All", as.character(2000:2022))
+
+inic2 <- inic %>%
+  dplyr::mutate(
+    # timeWindow = glue::glue("{TAR_START_OFFSET}d - {TAR_END_OFFSET}d"),
+    START_YEAR = ifelse(is.na(START_YEAR), "All", as.character(START_YEAR)),
+    START_YEAR = factor(START_YEAR, levels = fctOrder),
+    #INCIDENCE_PROPORTION_P100P = scales::label_number(accuracy = 0.01)(INCIDENCE_PROPORTION_P100P),
+    INCIDENCE_RATE_P1000PY = INCIDENCE_RATE_P100PY * 10
+  ) %>%
+  dplyr::select(databaseId,
+                #TARGET_COHORT_DEFINITION_ID, TARGET_NAME,
+                START_YEAR,
+                OUTCOME_COHORT_DEFINITION_ID, OUTCOME_NAME,
+                PERSONS_AT_RISK, PERSON_DAYS, OUTCOMES,
+                INCIDENCE_PROPORTION_P100P,
+                INCIDENCE_RATE_P1000PY) %>%
+  dplyr::arrange(databaseId, OUTCOME_COHORT_DEFINITION_ID, START_YEAR) #%>%
+# dplyr::rename(
+#   Database = databaseId,
+#   `Year` = START_YEAR,
+#   # `Cohort Id` = TARGET_COHORT_DEFINITION_ID,
+#   # `Cohort Name` = TARGET_NAME,
+#   `Outcome Id` = OUTCOME_COHORT_DEFINITION_ID,
+#   `Outcome Name` = OUTCOME_NAME,
+#   `Persons at risk` = PERSONS_AT_RISK,
+#   `Person Days` = PERSON_DAYS,
+#   `Outcomes` = OUTCOMES,
+#   `Incidence Proportion` = INCIDENCE_PROPORTION_P100P,
+#   `Incidence Rate (per 1000 person-years)` = INCIDENCE_RATE_P1000PY
+# )
+readr::write_csv(inic2, file = fs::path(appDataPath, "incidence.csv"))
+
+
+## 11) Treatment Patterns --------------
+### HMB normal----------------------
+
+txPath <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[9])
+
+## get treatment patterns table
+txPathDat <- purrr::pmap_dfr(
+  txPath,
+  ~bindTxPathTab(path = ..3, database = ..1)
+) %>%
+  tidyr::separate_wider_delim(
+    cols = cohortName,
+    delim = "_",
+    names = c("type", "cohortId")
+  ) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
+    )
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::arrange(databaseId, cohortId, desc(n))
+# write to app
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns.csv"))
+
+### HMB 2 --------------------------
+txPath2 <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[14])
+
+## get treatment patterns table
+txPathDat2 <- purrr::pmap_dfr(
+  txPath2,
+  ~bindTxPathTab(path = ..3, database = ..1)
+) %>%
+  tidyr::separate_wider_delim(
+    cols = cohortName,
+    delim = "_",
+    names = c("type", "cohortId")
+  ) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 44 ~ "hmb2",
+      cohortId == 44001L ~ "hmb2_age_lt_30",
+      cohortId == 44002L ~ "hmb2_age_30_45",
+      cohortId == 44003L ~ "hmb2_age_45_55"
+    )
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::arrange(databaseId, cohortId, desc(n))
+# write to app
+readr::write_csv(txPathDat2, file = fs::path(appDataPath, "treatmentPatterns2.csv"))
+
+
+## get sankey diagram
+
+# sankey <- purrr::pmap(
+#   txPath,
+#   ~groupSankey(path = ..3, database = ..1)
+# )
+# names(sankey) <- listOfDatabase
+# readr::write_rds(sankey, file = fs::path(appDataPath, "sankey.rds"))
+
+
+## 12) Time to event ----------------
+
+## Time to discontinuation
+
+### list files to extract
+ttdFiles <- c("tte_1.csv",
+              "tte_1001.csv",
+              "tte_1002.csv",
+              "tte_1003.csv")
+
+
+permutations <- tidyr::expand_grid(
+  ttdFiles,
+  listOfDatabase
+)
+
+#bind all in ttd
+ttd <- purrr::pmap_dfr(
+  permutations,
+  ~bindTteData(
+    path = resultsPath,
+    database = ..2,
+    task = listOfTasks[10],
+    file = ..1
+  )
+)
+arrow::write_parquet(
+  x = ttd,
+  sink = fs::path(appDataPath, "ttd.parquet")
+)
+# readr::write_csv(ttd, file = fs::path(appDataPath, "ttd.csv"))
+
+### Time to intervention
+
+ttiFiles <- c("procedure_survival_1.csv",
+              "procedure_survival_1001.csv",
+              "procedure_survival_1002.csv",
+              "procedure_survival_1003.csv")
+
+
+permutations <- tidyr::expand_grid(
+  ttiFiles,
+  listOfDatabase
+)
+
+
+#bind all in ttd
+tti <- purrr::pmap_dfr(
+  permutations,
+  ~bindTteData2(
+    path = resultsPath,
+    database = ..2,
+    task = listOfTasks[12],
+    file = ..1
+  )
+)
+arrow::write_parquet(
+  x = tti,
+  sink = fs::path(appDataPath, "tti.parquet")
+)
+
+#readr::write_csv(tti, file = fs::path(appDataPath, "tti.csv"))
diff --git a/extras/R/helpers.R b/extras/R/helpers.R
new file mode 100644
index 0000000..2fa210a
--- /dev/null
+++ b/extras/R/helpers.R
@@ -0,0 +1,472 @@
+
+# function to bind csvs from executions
+bindCsv <- function(allPaths, task, file) {
+
+  dat <- allPaths %>%
+    dplyr::filter(listOfTasks == !!task) %>%
+    dplyr::mutate(
+      filePath = fs::path(fullPath, !!file)
+    ) %>%
+    dplyr::select(filePath, listOfDatabase) %>%
+    purrr::pmap_dfr(~readr::read_csv(..1, show_col_types = FALSE) %>%
+                      dplyr::mutate(
+                        databaseId = ..2)) %>%
+    dplyr::relocate(databaseId, .before = 1)
+
+  return(dat)
+}
+
+# function to bind csv within a subfolder
+bindFolder <- function(path, folder) {
+  dat <- fs::path(path, folder) %>%
+    fs::dir_ls(type = "file") %>%
+    purrr::map_dfr(~readr::read_csv(.x, show_col_types = FALSE))
+  return(dat)
+}
+
+# Function to mask low counts. Default count is 5
+maskLowCount <- function(df, countLimit = 5L) {
+
+  dfLow <- df %>%
+    dplyr::mutate(
+      pct = dplyr::if_else(count <= countLimit, "-", scales::percent(pct, accuracy = 0.01), "-"),
+      count = dplyr::if_else(count <= countLimit, "<5", format(count, big.mark = ",", scientific = FALSE), "-")
+    )
+
+  return(dfLow)
+}
+
+# Treatment Patterns Functions -----------
+
+# bind txPath rds
+bindTxPathTab <- function(path, database) {
+
+  rdsFiles <- path %>%
+    fs::dir_ls(type = "file")
+
+  # use when corrected
+  # cohortId <- tools::file_path_sans_ext(basename(rdsFiles)) %>%
+  #   gsub("sankey_", "", .) %>%
+  #   as.integer()
+
+  cohortName <- tools::file_path_sans_ext(basename(rdsFiles)) %>%
+    gsub("_sankey", "", .)
+
+  dat <- purrr::map2_dfr(rdsFiles,
+                         cohortName,
+                         ~readr::read_rds(.x) %>%
+                           getElement("treatmentPatterns") %>%
+                           dplyr::mutate(databaseId = !!database,
+                                         cohortName = !!.y))
+
+  return(dat)
+
+}
+
+# function to plot treatment patterns
+plot_patterns <- function(sankey) {
+
+  links <- sankey$links
+  nodes <- sankey$nodes
+
+  label <- unique(links$type)
+  label2 <- paste0("'", paste(label, collapse = "','"), "',", "'end'")
+
+  martin_colors <- unname(colorBlindness::paletteMartin)[-1]
+
+  col <- martin_colors[seq_along(label)]
+  col2 <- paste0("'", paste(col, collapse = "','"), "',", "'#1B1919FF'")
+
+  myCol <- glue::glue('d3.scaleOrdinal() .domain([{label2}]) .range([{col2}])')
+
+  #plot sankeyNetwork
+  sankey <- networkD3::sankeyNetwork(
+    Links = links,
+    Nodes = nodes,
+    Source = 'source',
+    Target = 'target',
+    Value = 'value',
+    NodeID = 'name',
+    fontSize = 11,
+    sinksRight = FALSE,
+    colourScale = myCol
+  )
+
+  return(sankey)
+
+}
+
+## Group sankey data together to save
+groupSankey <- function(path, database) {
+
+  # get rds files
+  rdsFiles <- path %>%
+    fs::dir_ls(type = "file")
+
+  # retrieve dat to make sankey
+  dat <- purrr::map(rdsFiles, ~readr::read_rds(.x)[c(2,3)])
+
+  #find which links df that have data
+  jj <- purrr::map_lgl(dat, ~nrow(.x$links) > 0)
+
+  #subset list to keep only those with data
+  sankeyDat <- dat[jj]
+
+  sankey <- purrr::map(sankeyDat, ~plot_patterns(.x))
+
+  # rename to the cohort id
+  names(sankey) <- tools::file_path_sans_ext(basename(names(sankey))) %>%
+    gsub("sankey_", "", .)
+
+  return(sankey)
+}
+
+# Time to event functions -----
+
+bindTteData <- function(path,
+                        database,
+                        task,
+                        file,
+                        nYears = 3) {
+
+
+  # create path to file
+  pathToFile <- fs::path(path, database, task, file)
+
+  #create target id
+  targetId <- gsub(".*_", "", file) %>%
+    tools::file_path_sans_ext()
+
+  #read in data
+  tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE) %>%
+    dplyr::filter(
+      time <= 3
+    )
+
+  #remove singleLine Strata
+  singleLineStrata <- tteData %>%
+    dplyr::filter(
+      !grepl("\\+", strata)
+    ) %>%
+    dplyr::pull(strata) %>%
+    unique()
+
+  #get top 4 multi lines
+  top4MultiLineStrata <-tteData %>%
+    dplyr::filter(
+      grepl("\\+", strata)
+    ) %>%
+    count(strata) %>%
+    dplyr::arrange(desc(n)) %>%
+    dplyr::slice(1:4) %>%
+    dplyr::pull(strata) %>%
+    unique()
+
+  # combine specified strata lines
+  strataLines <- c(singleLineStrata, top4MultiLineStrata)
+
+
+  # subset tted Data to the specified strata lines
+  subsetTteData <- tteData %>%
+    dplyr::filter(
+      strata %in% strataLines
+    ) %>%
+    dplyr::mutate(
+      database = !!database,
+      targetId = !!targetId
+    ) %>%
+    dplyr::select(
+      database, targetId, strata, time, n.risk, n.event, estimate, std.error
+    )
+
+  return(subsetTteData)
+
+}
+
+bindTteData2 <- function(path,
+                         database,
+                         task,
+                         file,
+                         nYears = 3) {
+
+
+  # create path to file
+  pathToFile <- fs::path(path, database, task, file)
+
+  #create target id
+  targetId <- gsub(".*_", "", file) %>%
+    tools::file_path_sans_ext()
+
+  #read in data
+  tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE) %>%
+    dplyr::filter(
+      time <= 3
+    )
+
+
+  # subset tted Data to the specified strata lines
+  updateTteData <- tteData %>%
+    dplyr::mutate(
+      database = !!database,
+      targetId = !!targetId
+    ) %>%
+    dplyr::select(
+      database, targetId, outcomeCohortId, time, n.risk, n.event, estimate, std.error
+    )
+
+  return(updateTteData)
+
+}
+# create KM plot
+plotKM <- function(kmData, # input data
+                   targetCohortId, # select target cohort
+                   database, # select database
+                   nLines = 15L, # determine the number of lines to plot default is 8
+                   saveLocation = here::here("appData/www") # location to save plot
+) {
+
+  # create saveLocation if doesnt exist
+  saveLocation <- fs::dir_create(saveLocation)
+
+  #console prints
+  cli::cat_line(crayon::green("Create KM Plot"))
+  cli::cat_bullet(glue::glue("Database: {crayon::yellow(database)}"),
+                  bullet = "pointer", bullet_col = "yellow")
+  cli::cat_bullet(glue::glue("Cohort Id: {crayon::yellow(targetCohortId)}"),
+                  bullet = "pointer", bullet_col = "yellow")
+
+  # subset data based on targetId, database and era
+  dat <- kmData %>%
+    dplyr::filter(
+      databaseId == database,
+      targetCohortId == !!targetCohortId
+    ) %>%
+    dplyr::mutate(
+      strata = factor(strata)
+    )
+
+  # determine the top 8 treatment lines and subset dat to only show these
+  keep <- dat %>%
+    count(strata) %>%
+    dplyr::arrange(desc(n)) %>%
+    dplyr::slice(1:nLines) %>%
+    dplyr::pull(strata)
+
+  # filter data to only keep top 8 treatment lines
+  dat2 <- dat %>%
+    dplyr::filter(
+      strata %in% keep
+    )
+
+  # prep plot
+
+  cols <- unname(grafify::graf_palettes$kelly) # get plotting colors
+  plot_colors <- cols[1:nLines] # retrieve first nLines
+
+  # get km plot
+  p <- ggplot(dat2, aes(x = time, y = estimate, color = strata)) +
+    geom_step(linewidth = 1) +
+    scale_color_manual(values = plot_colors) + #scale colors to kelly
+    scale_y_continuous(labels = scales::percent_format()) + # convert y axis to percent
+    labs(x = 'Years', y = 'Probability of Survival') + # improve labels
+    theme_classic()
+
+  #create saving convention
+  plotName <- glue::glue("km_{database}_{targetCohortId}.png") %>%
+    as.character()
+
+  plotSave <- fs::path(saveLocation, plotName)
+  # save plot
+  ggplot2::ggsave(filename = plotSave,
+                  plot = p,
+                  width = 27, height = 24, units = "cm")
+
+  #console print of save location
+  cli::cat_line("Saved plot to: ")
+  cli::cat_bullet(crayon::cyan(plotSave),
+                  bullet = "pointer", bullet_col = "yellow")
+
+  invisible(p)
+
+}
+
+
+
+
+## Run the plotKm in for loop or purrr walk to get all the plots in the www folder
+
+# Get Probability Tables ------------------------
+
+findSurvProbAtTime <- function(dat, strata, t) {
+
+  label <- paste("yr", t,  sep = "_")
+
+  survProb <- dat %>%
+    dplyr::filter(strata == !!strata) %>% #filter on strata
+    dplyr::mutate(
+      tt = abs(time - !!t) # find absolute value of time column with t
+    ) %>% # this will find value closest to zero
+    dplyr::arrange(tt) %>% #sort from smallest to largest
+    dplyr::slice(1) %>%
+    dplyr::select(
+      strata, estimate, conf.low, conf.high
+    ) %>%
+    dplyr::mutate(
+      t = label, .before = 1
+    )
+
+  return(survProb)
+
+}
+
+findSurvProbAtTime2 <- function(dat, t) {
+
+  label <- paste("yr", t,  sep = "_")
+
+  survProb <- dat %>%
+    #dplyr::filter(strata == !!strata) %>% #filter on strata
+    dplyr::mutate(
+      tt = abs(time - !!t) # find absolute value of time column with t
+    ) %>% # this will find value closest to zero
+    group_by(outcomeName) %>%
+    dplyr::arrange(tt) %>% #sort from smallest to largest
+    dplyr::slice(1) %>%
+    dplyr::ungroup() %>%
+    dplyr::select(outcomeName, estimate, conf.low, conf.high) %>%
+    dplyr::mutate(
+      t = label, .before = 1
+    )
+
+  return(survProb)
+
+}
+
+
+findTimeAtSurvProb <- function(dat, strata, p) {
+
+  label <- paste("p", p,  sep = "_")
+  tmp <- dat %>%
+    dplyr::filter(strata == !!strata) #filter on strata
+
+  timeEst <- tmp %>%
+    dplyr::mutate(
+      pp = abs(estimate - !!p) # find absolute value of time column with p
+    ) %>% # this will find two values closest to zero
+    dplyr::arrange(pp) %>% #sort from smallest to largest
+    dplyr::slice(1) %>%
+    dplyr::select(strata, time)
+
+  ciBounds <- tibble::tibble(
+    'conf.low' = tmp$time[min(which(tmp$conf.low < p))],
+    'conf.high' = tmp$time[max(which(tmp$conf.high > p)) + 1]
+  )
+
+  timeEst <- dplyr::bind_cols(timeEst, ciBounds) %>%
+    dplyr::mutate(
+      p = label, .before = 1
+    )
+
+  return(timeEst)
+
+}
+
+getSurvProbTab <- function(kmData,
+                           targetCohortId, # select target cohort
+                           database, # select database
+                           nLines = 10L # determine the number of lines to plot default is 10
+) {
+
+  # subset data based on targetId, database and era
+  dat <- kmData %>%
+    dplyr::filter(
+      databaseId == !!database,
+      targetCohortId == !!targetCohortId
+    )
+
+  # determine the top 8 treatment lines and subset dat to only show these
+  keep <- dat %>%
+    count(strata) %>%
+    dplyr::arrange(desc(n)) %>%
+    dplyr::slice(1:nLines) %>%
+    dplyr::pull(strata)
+
+  # filter data to only keep top 8 treatment lines
+  dat2 <- dat %>%
+    dplyr::filter(
+      strata %in% keep
+    )
+
+  #list times 0.5 yrs (6months), 1 year and 2 years
+  t <- c(0.5, 1, 2)
+  # expand grid to make tabular structure of permutations. strata per t
+  mm <- tidyr::expand_grid(keep, t)
+  #function factory of survProb at a specified time
+  survProbTab <- purrr::pmap_dfr(
+    mm,
+    ~findSurvProbAtTime(
+      dat = dat2,
+      strata = ..1, # column for strata
+      t = ..2 # column for time
+    )
+  )
+
+  return(survProbTab)
+
+}
+
+#test
+# getSurvProbTab(kmData = kmData,
+#                targetCohortId = 1L,
+#                database = "cprdAurum",
+#                era = "era_30")
+
+getTimeTab <- function(kmData,
+                       targetCohortId, # select target cohort
+                       database, # select database
+                       nLines = 10L # determine the number of lines to plot default is 8
+) {
+
+
+  # subset data based on targetId, database and era
+  dat <- kmData %>%
+    dplyr::filter(
+      database == !!database,
+      targetCohortId == !!targetCohortId
+    )
+
+  # determine the top 8 treatment lines and subset dat to only show these
+  keep <- dat %>%
+    count(strata) %>%
+    dplyr::arrange(desc(n)) %>%
+    dplyr::slice(1:nLines) %>%
+    dplyr::pull(strata)
+
+  # filter data to only keep top 8 treatment lines
+  dat2 <- dat %>%
+    dplyr::filter(
+      strata %in% keep
+    )
+  #list probs 0.25, 0,5 and 0.75
+  p <- c(0.25, 0.5, 0.75)
+  # expand grid to make tabular structure of permutations. strata per p
+  mm <- tidyr::expand_grid(keep, p)
+  #function factory of time at a specified survProb
+
+  timeTab <- purrr::pmap_dfr(
+    mm,
+    ~findTimeAtSurvProb(
+      dat = dat2,
+      strata = ..1,# column for strata
+      p = ..2 # column for time
+    )
+  )
+
+  return(timeTab)
+
+}
+
+
+# getTimeTab(kmData = kmData,
+#            targetCohortId = 1L,
+#            database = "cprdAurum",
+#            era = "era_30")
diff --git a/extras/shiny/R/fn.R b/extras/shiny/R/fn.R
new file mode 100644
index 0000000..1e48590
--- /dev/null
+++ b/extras/shiny/R/fn.R
@@ -0,0 +1,319 @@
+buildSankeyData <- function(dt) {
+
+  # build links
+  links <- dt %>%
+    dplyr::select(event_cohort_name1:n) %>%
+    dplyr::mutate(row = dplyr::row_number()) %>%
+    tidyr::pivot_longer(cols = c(-row, -n),
+                        names_to = 'column', values_to = 'source') %>%
+    dplyr::mutate(column = match(column, names(dt))) %>%
+    tidyr::drop_na(source) %>%
+    dplyr::mutate(source = paste0(source, '__', column)) %>%
+    dplyr::group_by(row) %>%
+    dplyr::mutate(target = dplyr::lead(source, order_by = column)) %>%
+    tidyr::drop_na(target, source) %>%
+    dplyr::group_by(source, target) %>%
+    dplyr::summarise(value = sum(n), .groups = 'drop') %>%
+    dplyr::arrange(desc(value))
+
+
+  # build nodes
+  nodes <- data.frame(name = unique(c(links$source, links$target)))
+  nodes <- data.table::data.table(nodes)
+  links <- data.table::data.table(links)
+  links$source <- match(links$source, nodes$name) - 1
+  links$target <- match(links$target, nodes$name) - 1
+  nodes$name <- sub('__[0-9]+$', '', nodes$name)
+  links$type <- sub(' .*', '',
+                    as.data.frame(nodes)[links$source + 1, 'name'])
+  data.table::setkey(links, type)
+  data.table::setorder(links, cols = - "value")
+
+  res <- list(
+    'links' = links,
+    'nodes' = nodes
+  )
+  return(res)
+
+}
+
+
+plotSankey <- function(sankey) {
+
+  links <- sankey$links
+  nodes <- sankey$nodes
+
+  label <- unique(links$type)
+  label2 <- paste0("'", paste(label, collapse = "','"), "',", "'end'")
+
+  kelly_colors <- unname(grafify::graf_palettes$kelly)[-1]
+
+  col <- kelly_colors[seq_along(label)]
+  col2 <- paste0("'", paste(col, collapse = "','"), "',", "'#1B1919FF'")
+
+  myCol <- glue::glue('d3.scaleOrdinal() .domain([{label2}]) .range([{col2}])')
+
+  #plot sankeyNetwork
+  sankey <- networkD3::sankeyNetwork(
+    Links = links,
+    Nodes = nodes,
+    Source = 'source',
+    Target = 'target',
+    Value = 'value',
+    NodeID = 'name',
+    fontSize = 11,
+    sinksRight = FALSE,
+    colourScale = myCol
+  )
+
+  return(sankey)
+
+}
+
+
+plotYearlyIncidence <- function(dat) {
+
+  plot_colors <- unname(grafify::graf_palettes$kelly)
+
+  p <- dat %>%
+    dplyr::mutate(
+      grp = paste(databaseId, OUTCOME_NAME, sep = "_"),
+      START_YEAR = lubridate::ymd(START_YEAR, truncated = 2L),
+      `Cohort Name` = OUTCOME_NAME
+    ) %>%
+    ggplot(aes(x = START_YEAR, y = INCIDENCE_RATE_P1000PY, color = `Cohort Name`)) +
+    geom_point() +
+    geom_line(aes(group = grp)) +
+    scale_x_date(name = "Year", date_labels = "%Y") +
+    scale_color_manual(values = plot_colors) + #scale colors to kelly
+    facet_grid(rows = vars(databaseId), cols = vars(`Cohort Name`)) +
+    labs(
+      y = "Incidence Rate (per 1000 person-years)"
+    ) +
+    theme_bw() +
+    theme(
+      axis.text.x = element_text(angle = 60, hjust = 1)
+    )
+
+  return(p)
+}
+
+relabelStrata <- function(dat, oldLabels, newLabels) {
+
+  for (i in seq_along(oldLabels)){
+    dat$strata <- gsub(oldLabels[i], newLabels[i], dat$strata)
+  }
+  return(dat)
+
+}
+
+relabelOutcome <- function(dat, oldLabels, newLabels) {
+
+  for (i in seq_along(oldLabels)){
+    dat$outcomeCohortId <- gsub(oldLabels[i], newLabels[i], dat$outcomeCohortId)
+  }
+  return(dat)
+
+}
+
+# For strata ----------------------
+
+plotKM <- function(dat) {
+
+
+  plot_colors <- unname(grafify::graf_palettes$kelly)
+
+  p <- ggplot(dat, aes(x = time, y = estimate, color = strata)) +
+    geom_step(linewidth = 1.5) +
+    scale_color_manual(values = plot_colors) + #scale colors to kelly
+    scale_y_continuous(labels = scales::percent_format()) + # convert y axis to percent
+    labs(x = 'Years', y = 'Probability of Survival') + # improve labels
+    theme_classic() +
+    theme(
+      legend.text = element_text(size = 12)
+    )
+
+  return(p)
+
+}
+
+findSurvProbAtTime <- function(dat, strata, t) {
+
+
+  label <- dplyr::case_when(
+    t == 0.5 ~ "6 month",
+    t == 1 ~ "1 year",
+    t == 2 ~ "2 year"
+  )
+
+  survProb <- dat %>%
+    dplyr::filter(
+      strata == !!strata
+    ) %>% #filter on strata
+    dplyr::mutate(
+      tt = abs(time - !!t) # find absolute value of time column with t
+    ) %>% # this will find value closest to zero
+    dplyr::arrange(tt) %>% #sort from smallest to largest
+    dplyr::slice(1)  %>%
+    dplyr::mutate(
+      survivalTime = label
+    ) %>%
+    dplyr::select(
+      database, targetId, strata, survivalTime, estimate
+    )
+
+  return(survProb)
+
+}
+
+
+makeSurvProbTab <- function(dat) {
+
+  # subset data
+  # dat <- dat %>%
+  #   dplyr::filter(
+  #     database == !!database,
+  #     targetId == !!targetId
+  #   )
+
+  # determine unique strata
+  strataLines <- unique(dat$strata)
+
+  # list time points
+  t <- c(0.5, 1, 2)
+
+  # make permutations
+  permutations <- tidyr::expand_grid(
+    strataLines,
+    t
+  )
+
+  # create survProbTab
+  survProbTab <- purrr::pmap_dfr(
+    permutations,
+    ~findSurvProbAtTime(
+      dat = dat,
+      strata = ..1,
+      t = ..2
+    )
+  )
+
+  # survProbTab <- relabelStrata(
+  #   survProbTab,
+  #   oldLabels = as.character(c(27:29, 31:35)),
+  #   newLabels = c("oc", "danazol", "grha", "lglIUD",
+  #                 "nsaids", "progestin", "tranexamicAcid", "ulipristalAcetate")
+  # )
+
+
+  survProbTab2 <- survProbTab %>%
+    tidyr::pivot_wider(
+      id_cols = c(database, targetId, strata),
+      names_from = survivalTime,
+      values_from = estimate
+    )
+
+  return(survProbTab2)
+
+}
+
+# For outcome ----------------------------
+#
+plotKM2 <- function(dat) {
+
+
+  plot_colors <- unname(grafify::graf_palettes$kelly)
+
+  p <- ggplot(dat, aes(x = time, y = estimate, color = outcomeCohortId)) +
+    geom_step(linewidth = 1.5) +
+    scale_color_manual(values = plot_colors) + #scale colors to kelly
+    scale_y_continuous(labels = scales::percent_format()) + # convert y axis to percent
+    labs(x = 'Years', y = 'Probability of Survival') + # improve labels
+    theme_classic() +
+    theme(
+      legend.text = element_text(size = 12)
+    )
+
+  return(p)
+
+}
+
+findSurvProbAtTime2 <- function(dat, outcome, t) {
+
+
+  label <- dplyr::case_when(
+    t == 0.5 ~ "6 month",
+    t == 1 ~ "1 year",
+    t == 2 ~ "2 year"
+  )
+
+  survProb <- dat %>%
+    dplyr::filter(
+      outcomeCohortId == !!outcome
+    ) %>% #filter on strata
+    dplyr::mutate(
+      tt = abs(time - !!t) # find absolute value of time column with t
+    ) %>% # this will find value closest to zero
+    dplyr::arrange(tt) %>% #sort from smallest to largest
+    dplyr::slice(1)  %>%
+    dplyr::mutate(
+      survivalTime = label
+    ) %>%
+    dplyr::select(
+      database, targetId, outcomeCohortId, survivalTime, estimate
+    )
+
+  return(survProb)
+
+}
+
+
+makeSurvProbTab2 <- function(dat) {
+
+  # subset data
+  # dat <- dat %>%
+  #   dplyr::filter(
+  #     database == !!database,
+  #     targetId == !!targetId
+  #   )
+
+  # determine unique strata
+  outcomeLines <- unique(dat$outcomeCohortId)
+
+  # list time points
+  t <- c(0.5, 1, 2)
+
+  # make permutations
+  permutations <- tidyr::expand_grid(
+    outcomeLines,
+    t
+  )
+
+  # create survProbTab
+  survProbTab <- purrr::pmap_dfr(
+    permutations,
+    ~findSurvProbAtTime2(
+      dat = dat,
+      outcome = ..1,
+      t = ..2
+    )
+  )
+
+  # survProbTab <- relabelStrata(
+  #   survProbTab,
+  #   oldLabels = as.character(c(27:29, 31:35)),
+  #   newLabels = c("oc", "danazol", "grha", "lglIUD",
+  #                 "nsaids", "progestin", "tranexamicAcid", "ulipristalAcetate")
+  # )
+
+
+  survProbTab2 <- survProbTab %>%
+    tidyr::pivot_wider(
+      id_cols = c(database, targetId, outcomeCohortId),
+      names_from = survivalTime,
+      values_from = estimate
+    )
+
+  return(survProbTab2)
+
+}
diff --git a/extras/shiny/R/loadData.R b/extras/shiny/R/loadData.R
new file mode 100644
index 0000000..b0c0394
--- /dev/null
+++ b/extras/shiny/R/loadData.R
@@ -0,0 +1,198 @@
+# Load data into environment for app
+
+
+# Dependencies -------------------
+
+library(dplyr)
+library(readr)
+
+source(here::here("shiny", "R", "fn.R"))
+source(here::here("extras/R/helpers.R"))
+
+dataPath <- here::here("shiny", "data")
+
+
+# About  ------------------------
+## Load database meta
+# databaseMeta <- readr::read_csv(fs::path(dataPath, "databaseMeta.csv"),
+#                                 show_col_types = FALSE)
+
+
+# Cohorts  ------------------------
+## Load cohort Counts
+cohortCounts <- readr::read_csv(fs::path(dataPath, "cohortCounts.csv"),
+                                show_col_types = FALSE)
+
+## Load strata counts
+strataCounts <- readr::read_csv(fs::path(dataPath, "strataCounts.csv"),
+                                show_col_types = FALSE)
+
+## Global Pickers ---------------------------
+databaseName <- unique(cohortCounts$Database)
+cohortName <- c("hmb", "hmb age_lt_30", "hmb age_30_45", "hmb age_45_55")
+
+
+
+# Clinical Characteristics  ------------------------
+## Load demographics baseline
+
+demoChar <- readr::read_csv(fs::path(dataPath, "baselineDemographics.csv"),
+                            show_col_types = FALSE) %>%
+  dplyr::rename(count = n) %>%
+  maskLowCount()
+
+## Load continuous baseline
+ctsChar <- readr::read_csv(fs::path(dataPath, "baselineContinuous.csv"),
+                           show_col_types = FALSE)
+
+## Load concept baseline
+conceptChar <- readr::read_csv(fs::path(dataPath, "baselineConcepts.csv"),
+                               show_col_types = FALSE) %>%
+  dplyr::rename(count = n) %>%
+  maskLowCount()
+
+## Load cohort baseline
+cohortChar <- readr::read_csv(fs::path(dataPath, "baselineCohorts.csv"),
+                              show_col_types = FALSE) %>%
+  dplyr::mutate(
+    domain = dplyr::case_when(
+      covariateName %in% c("antidepressants", "antipsychotics", "antithrombotics",
+                           "tamoxifen", "gonadalSteroids", "copperIUDdrug") ~ "Drugs",
+      TRUE ~ "Conditions"
+    )
+  ) %>%
+  dplyr::select(
+    databaseId, timeWindow, cohortId, cohortName,
+    domain, covariateId, covariateName, count, pct
+  ) %>%
+  maskLowCount()
+
+## Load chapters baseline
+icdChar <- readr::read_csv(fs::path(dataPath, "baselineChapters.csv"),
+                           show_col_types = FALSE) %>%
+  dplyr::rename(count = COUNTVALUE) %>%
+  maskLowCount()
+
+
+## Baseline Pickers
+domainConceptChar   <- sort(unique(conceptChar$domain))
+
+
+# Incidence -----------------------
+
+## Incidence
+incTab <- readr::read_csv(fs::path(dataPath, "incidence.csv"),
+                          show_col_types = FALSE)
+
+### Incidence Pickers
+yearInci <- c("All", as.character(2000:2022))
+
+
+# PostIndex Prevalence  ------------------------------------
+
+postIndex <- readr::read_csv(fs::path(dataPath, "postIndexPrevalence.csv"),
+                             show_col_types = FALSE) %>%
+  maskLowCount()
+
+## Underlying conditions--------------------
+
+condPi <- postIndex %>%
+  dplyr::filter(type == "conditions") %>%
+  dplyr::select(databaseId, timeWindow, cohortId, cohortName, covariateId, covariateName,
+                count, pct) %>%
+  dplyr::arrange(databaseId, cohortId, timeWindow, covariateId)
+
+### Pickers
+condCohorts <- unique(condPi$covariateName)
+condTimeWindow <- unique(condPi$timeWindow)
+
+## Drug Utilization------------------
+
+drugPi <- postIndex %>%
+  dplyr::filter(type == "drugs") %>%
+  dplyr::mutate(
+    timeWindow = factor(timeWindow, levels = c("1d - 183d", "184d - 365d", "1d - 365d", "366d - 730d", "731d - 1825d"))
+  ) %>%
+  dplyr::select(databaseId, timeWindow, cohortId, cohortName, covariateId, covariateName,
+                count, pct) %>%
+  dplyr::arrange(databaseId, cohortId, timeWindow, covariateId) %>%
+  dplyr::mutate(
+    timeWindow = as.character(timeWindow)
+  )
+
+### Pickers
+drugCohorts <- unique(drugPi$covariateName)
+drugTimeWindow <- unique(drugPi$timeWindow)
+
+## Procedures--------------------
+
+procPi <- postIndex %>%
+  dplyr::filter(type == "procedures") %>%
+  dplyr::select(databaseId, timeWindow, cohortId, cohortName, covariateId, covariateName,
+                count, pct)
+
+
+### Pickers
+procCohorts <- unique(procPi$covariateName)
+procTimeWindow <- unique(procPi$timeWindow)
+
+
+# Treatment Patterns ----------------------
+
+## Load treatment patterns table
+txPatDat <- readr::read_csv(fs::path(dataPath, "treatmentPatterns.csv"),
+                            show_col_types = FALSE)
+
+
+txPatDat2 <- readr::read_csv(fs::path(dataPath, "treatmentPatterns2.csv"),
+                             show_col_types = FALSE)
+
+txPatDatAll <- dplyr::bind_rows(
+  txPatDat,
+  txPatDat2
+)
+
+## sankey pickers
+cohortName2 <- c(
+  cohortName,
+  "hmb2", "hmb2 age_lt_30", "hmb2 age_30_45", "hmb2 age_45_55"
+)
+
+sankeyCohorts <- tibble::tibble(
+  id = c(1L, 1001L, 1002L, 1003L, 44L, 44001L, 44002L, 44003L),
+  name = cohortName2
+)
+
+
+# Time to Event -----------------------
+
+## Time to Discontinuation -----------------
+
+ttd <- arrow::read_parquet(file = fs::path(here::here(dataPath ,"ttd.parquet")))
+
+# relabel strata
+ttd <- relabelStrata(
+  ttd,
+  oldLabels = as.character(c(27:29, 31:35)),
+  newLabels = c("oc", "danazol", "grha", "lglIUD",
+                "nsaids", "progestin", "tranexamicAcid", "ulipristalAcetate")
+)
+
+## TTE pickers
+tteCohorts <- tibble::tibble(
+  id = c(1, 1001L, 1002L, 1003L),
+  name = cohortName
+)
+
+## Time to Intervention -----------------
+
+tti <- arrow::read_parquet(file = fs::path(here::here(dataPath ,"tti.parquet")))
+
+
+# relabel strata
+tti <- relabelOutcome(
+  tti,
+  oldLabels = as.character(36:43),
+  newLabels = c("bloodTransfusion", "copperIUD", "endoemtrialAblation", "hormonalIUD",
+                "hysterectomy", "myomectomy", "uae", "undefinedIUD")
+)
diff --git a/extras/shiny/app.R b/extras/shiny/app.R
new file mode 100644
index 0000000..6cb7bc7
--- /dev/null
+++ b/extras/shiny/app.R
@@ -0,0 +1,1375 @@
+# Client Shiny App Script
+
+# Author: George Argyriou + Martin Lavallee
+# Date 08/17/2023
+
+
+# Dependencies -------------------
+library(shiny)
+library(htmltools)
+library(shinyWidgets)
+library(shinydashboard)
+library(dplyr)
+library(networkD3)
+library(reactable)
+library(ggplot2)
+library(grafify)
+options(shiny.fullstacktrace = FALSE)
+
+
+# Variables ---------------------
+title <- "EHDEN HMB"
+
+description <- "The EHDEN HMB study......"
+incidenceDescription <- "Incidence rate is calculated by 'Outcome Count'/'Person Days' * 100."
+underlyingDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
+drugUtilizationDescription <- "Drug utilization counts equal to 5 and below have been masked and replaced with '<5."
+treatmentPatternsDescription <- "Treatment Patterns counts (Sequences) are restricted to 30"
+clinicalCharacteristicsDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
+procedureAnalysisDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
+
+dashboardVersion <- "0.0.4"
+dashboardDate <- Sys.Date()
+# Functions ---------------
+csvDownloadButton <- function(id, filename = "data.csv", label = "Download as CSV") {
+  tags$button(
+    tagList(icon("download"), label),
+    onclick = sprintf("Reactable.downloadDataCSV('%s', '%s')", id, filename)
+  )
+}
+
+# App Ui ---------------------
+## Header ------------
+header <- dashboardHeader(title = title,
+                          tags$li(
+                            div(
+                              #add odysseus logo
+                              img(
+                                src = 'odysseus_logo.png',
+                                title = "title",
+                                height = "50px",
+                                width = "50px"),
+                              style = "padding-top:0px; padding-bottom:0px;"),
+                            class = "dropdown")
+)
+
+## Sidebar  -----------
+sidebar <- dashboardSidebar(
+  sidebarMenu(
+    id = "tabs",
+    menuItem("About", tabName = "about", icon = shiny::icon("book", lib = "font-awesome")),
+    menuItem("Cohorts", tabName = "cohorts", icon = shiny::icon("circle-user", lib = "font-awesome")),
+    menuItem("Clinical Characteristics", tabName = "clinChar", icon = shiny::icon("clipboard", lib = "font-awesome")),
+    menuItem("Incidence", tabName = "inci", icon = shiny::icon("vial", lib = "font-awesome")),
+    menuItem("Underlying Conditions", tabName = "cond", icon = shiny::icon("disease", lib = "font-awesome")),
+    menuItem("Treatment Patterns",  tabName = "txPath", icon = shiny::icon("worm", lib = "font-awesome")),
+    menuItem("Procedure Analysis",  tabName = "proc", icon = shiny::icon("x-ray", lib = "font-awesome"))
+  ),
+  h6(style = "position: absolute; bottom: 60px; left: 10px", glue::glue("Dashboard Created by:")),
+  h6(style = "position: absolute; bottom: 45px; left: 15px", glue::glue("Odysseus Data Services")),
+  h6(style = "position: absolute; bottom: 30px; left: 15px", glue::glue("Version {dashboardVersion}")),
+  h6(style = "position: absolute; bottom: 15px; left: 15px", glue::glue("Date {dashboardDate}"))
+)
+
+## Body  -----------
+body <- dashboardBody(
+  tabItems(
+
+    ### About Tab------
+    tabItem(
+      tabName = "about",
+      #### Study description
+      fluidRow(
+        box(
+          title = "Study Description",
+          width = 12,
+          status = "success",
+          textOutput("studyDescription")
+        )
+      ),
+      #### Study Information
+      fluidRow(
+        box(
+          title = "Study Information",
+          width = 12,
+          status = "success",
+          "Github Page:",a(href= "https://github.com/OdyOSG/ehden_hmb" ,"https://github.com/OdyOSG/ehden_hmb"),
+          br(),
+          "SAP Link:",a(href= "https://odyosg.github.io/ehden_hmb/sap.html", "https://odyosg.github.io/ehden_hmb/sap.html")
+        )
+      ),
+      #### Database Information
+      fluidRow(
+        box(
+          title = "Database Information",
+          status = "success",
+          width = 12,
+          reactableOutput("databaseInformation")
+        )
+      )
+    ),
+
+    ### Cohorts Tab -----------------
+    tabItem(
+      tabName = "cohorts",
+      fluidRow(
+        box(
+          collapsible = T,
+          collapsed = F,
+          title = "Cohort & Strata Counts",
+          width = 12,
+          background = "light-blue"
+        )
+      ),
+
+      fluidRow(
+        tabBox(
+          #title = "Cohort Counts",
+          id = "cohortCounts",
+          width = 12,
+          tabPanel("Cohort Counts",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 12,
+                              pickerInput(
+                                inputId = "databaseNameCohort",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     )
+                   ),
+                   fluidRow(
+                     box(width = 12,
+                         reactableOutput("cohortCountsTab"),
+                         csvDownloadButton("cohortCountsTab", filename = "cohortCounts.csv")
+                     )
+                   )
+          ),
+          tabPanel("Strata Counts",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 12,
+                              pickerInput(
+                                inputId = "databaseNameStrata",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     )
+                   ),
+                   fluidRow(
+                     box(width = 12,
+                         reactableOutput("strataCountsTab"),
+                         csvDownloadButton("strataCountsTab", filename = "strataCounts.csv"))
+                   )
+          )
+        )
+      )
+    ),
+
+    ### Clinical Characteristics Tab -----------------
+    tabItem(
+      tabName = "clinChar",
+      fluidRow(
+        box(
+          collapsible = T,
+          collapsed = F,
+          title = "Clinical Characteristics",
+          width = 12,
+          background = "light-blue",
+          textOutput("clinicalCharacteristicsDescription")
+        )
+      ),
+      fluidRow(
+        tabBox(
+          id = "baselineChar",
+          width = 12,
+          tabPanel("Demographics",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "databaseNameDemo",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+
+                              pickerInput(
+                                inputId = "cohortNameDemo",
+                                label = "Cohort Name",
+                                choices = cohortName,
+                                selected = cohortName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     ),
+                     fluidRow(
+                       box(
+                         width = 12,
+                         reactableOutput("demoCharTab"),
+                         csvDownloadButton("demoCharTab", filename = "baselineDemographics.csv")
+                       )
+                     )
+                   )
+
+          ),
+          tabPanel("Continuous",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "databaseNameCts",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+
+                              pickerInput(
+                                inputId = "cohortNameCts",
+                                label = "Cohort Name",
+                                choices = cohortName,
+                                selected = cohortName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     ),
+                     fluidRow(
+                       box(
+                         width = 12,
+                         reactableOutput("ctsCharTab"),
+                         csvDownloadButton("ctsCharTab", filename = "baselineContinuous.csv")
+                       )
+                     )
+                   )
+
+          ),
+          tabPanel("Concept",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "databaseNameConcept",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+
+                              pickerInput(
+                                inputId = "cohortNameConcept",
+                                label = "Cohort Name",
+                                choices = cohortName,
+                                selected = cohortName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+                       ),
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "domainConcept",
+                                label = "Domain",
+                                choices = domainConceptChar,
+                                selected = domainConceptChar,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     ),
+                     fluidRow(
+                       box(
+                         width = 12,
+                         reactableOutput("conceptCharTab")
+                       )
+                     )
+                   )
+
+          ),
+          tabPanel("Cohorts",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "databaseNameCohortCov",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+
+                              pickerInput(
+                                inputId = "cohortNameCohortCov",
+                                label = "Cohort Name",
+                                choices = cohortName,
+                                selected = cohortName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       ),
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "domainCohortCov",
+                                label = "Domain",
+                                choices = domainConceptChar[1:2],
+                                selected = domainConceptChar[1:2],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     ),
+                     fluidRow(
+                       box(
+                         width = 12,
+                         reactableOutput("cohortCharTab"),
+                         csvDownloadButton("cohortCharTab", filename = "baselineCohorts.csv")
+                       )
+                     )
+                   )
+          ),
+
+          tabPanel("ICD10Chapters",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "databaseNameIcd",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+
+                              pickerInput(
+                                inputId = "cohortNameIcd",
+                                label = "Cohort Name",
+                                choices = cohortName,
+                                selected = cohortName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     ),
+                     fluidRow(
+                       box(
+                         width = 12,
+                         reactableOutput("icdCharTab"),
+                         csvDownloadButton("icdCharTab", filename = "baselineChapters.csv"))
+                     )
+                   )
+          )
+
+        )
+      )
+    ),
+
+
+    ### Incidence Tab -----------------
+    tabItem(
+      tabName = "inci",
+
+      fluidRow(
+        box(
+          collapsible = T,
+          collapsed = F,
+          title = "Incidence",
+          width = 12,
+          background = "light-blue",
+          textOutput("clinicalOutcomesDescription")
+        )
+      ),
+
+      fluidRow(
+        tabBox(
+          id = "baselineChar",
+          width = 12,
+
+          tabPanel(
+            "Table",
+            fluidRow(
+              box(
+                status = "success",
+                column(width = 6,
+
+                       # pick database
+                       pickerInput(
+                         inputId = "databaseNameInci",
+                         label = "Database",
+                         choices = databaseName,
+                         selected = databaseName,
+                         options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                         multiple = TRUE
+                       ),
+
+                       # pick cohortName
+                       pickerInput(
+                         inputId = "cohortNameInci",
+                         label = "Cohort Name",
+                         choices = cohortName,
+                         selected = cohortName[1],
+                         options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                         multiple = TRUE
+                       )
+                       #,
+
+
+                ),
+                column(width = 6,
+
+                       # pick year
+                       pickerInput(
+                         inputId = "yearInci",
+                         label = "Year",
+                         choices = yearInci,
+                         selected = yearInci[1],
+                         options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                         multiple = TRUE
+                       )
+                )
+              )
+            ),
+            fluidRow(
+              box(
+                width = 12,
+                reactableOutput("inciTab"),
+                csvDownloadButton("inciTab", filename = "incidence.csv")
+              )
+            )
+          ),
+          tabPanel("Yearly Trend",
+
+                   fluidRow(
+                     column(width = 6,
+
+                            box(
+                              #width = 12,
+                              status = "success",
+
+                              pickerInput(
+                                inputId = "databaseNameYrInci",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+
+                              pickerInput(
+                                inputId = "cohortNameYrInci",
+                                label = "Cohort Name",
+                                choices = cohortName,
+                                selected = cohortName,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                            )
+                     )
+                   ),
+
+                   fluidRow(
+
+                     box(
+                       width = 12,
+                       plotOutput("inciYearPlot")
+                     )
+                   )
+          )
+        )
+      )
+    ),
+
+
+
+    ### Underlying Conditions Tab -----------------
+    tabItem(
+      tabName = "cond",
+      fluidRow(
+        box(
+          collapsible = T,
+          collapsed = F,
+          title = "Underlying Conditions",
+          width = 12,
+          background = "light-blue",
+          textOutput("underlyingDescription")
+        )
+      ),
+      fluidRow(
+        box(
+          status = "success",
+          column(width = 6,
+                 # pick database
+                 pickerInput(
+                   inputId = "databaseNameCondPi",
+                   label = "Database",
+                   choices = databaseName,
+                   selected = databaseName,
+                   options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                   multiple = TRUE
+                 ),
+                 # pick cohort
+                 pickerInput(
+                   inputId = "cohortNameCondPi",
+                   label = "Cohort Name",
+                   choices = cohortName,
+                   selected = cohortName[1],
+                   options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                   multiple = TRUE
+                 )
+
+          ),
+          column(width = 6,
+                 # pick outcome
+                 pickerInput(
+                   inputId = "conditionNameCondPi",
+                   label = "Condition Name",
+                   choices = condCohorts,
+                   selected = condCohorts,
+                   options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                   multiple = TRUE
+                 ),
+
+                 # pick year
+                 pickerInput(
+                   inputId = "timeWindowCondPi",
+                   label = "Time Window",
+                   choices = condTimeWindow,
+                   selected = condTimeWindow,
+                   options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                   multiple = TRUE
+                 )
+          )
+        )
+      ),
+      fluidRow(
+        box(
+          width = 12,
+          reactableOutput("condPiTab"),
+          csvDownloadButton("condPiTab", filename = "underlyingConditions.csv")
+        )
+      )
+    ),
+
+
+    ### Treatment patterns Tab -----------------
+    tabItem(
+      tabName = "txPath",
+      fluidRow(
+        box(
+          collapsible = T,
+          collapsed = F,
+          title = "Treatment Patterns",
+          width = 12,
+          background = "light-blue",
+          textOutput("treatmentPatternsDescription"),
+          textOutput("drugUtilizationDescription")
+        )
+      ),
+      fluidRow(
+        tabBox(
+          id = "txPathTab",
+          width = 12,
+          ### Utilization Panel
+          tabPanel("Drug Utilization",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "databaseNameDuPi",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+                              pickerInput(
+                                inputId = "cohortNameDuPi",
+                                label = "Cohort Name",
+                                choices = cohortName,
+                                selected = cohortName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       ),
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "drugNameDuPi",
+                                label = "Drug Name",
+                                choices = drugCohorts,
+                                selected = drugCohorts,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+                              pickerInput(
+                                inputId = "timeWindowDuPi",
+                                label = "Time Window",
+                                choices = drugTimeWindow,
+                                selected = drugTimeWindow,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     ),
+                     fluidRow(
+                       box(
+                         width = 12,
+                         reactableOutput("duTab"),
+                         csvDownloadButton("duTab", filename = "postIndexDrugUtilization.csv")
+                       )
+                     )
+                   )
+          ),
+          ### Sequences Panel
+          tabPanel("Sequences",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       width = 4,
+                       pickerInput(
+                         inputId = "databaseNameSankey",
+                         label = "Database",
+                         choices = databaseName
+                       ),
+                       pickerInput(
+                         inputId = "cohortNameSankey",
+                         label = "Cohort Name",
+                         choices = cohortName2
+                       )
+                     )
+                   ),
+                   fluidRow(
+                     box(width = 12 ,
+                         sankeyNetworkOutput("txSankey",
+                                             width = "auto",
+                                             height = "500px")
+                     )
+                   ),
+                   fluidRow(
+                     box(
+                       width = 12,
+                       reactableOutput("txPatDat"))
+                   )
+          ),
+          ### TTD Panel
+          tabPanel("Time to Discontinuation",
+                   fluidRow(
+                     column(width = 9,
+                            #box(
+                            status = "success",
+                            plotOutput("ttdKmPlot")
+                            #),
+                     ),
+                     column(width = 3,
+                            box(
+                              width = 9,
+                              height = "230px",
+                              background = "light-blue",
+                              pickerInput(
+                                inputId = "databaseNameTtd",
+                                label = "Database",
+                                choices = databaseName
+                              ),
+                              pickerInput(
+                                inputId = "cohortNameTtd",
+                                label = "Cohort Name",
+                                choices = cohortName
+                              ),
+                              pickerInput(
+                                inputId = "strataTtd",
+                                label = "Drugs",
+                                choices = c("Single", "All")
+                              )
+                            )
+                     ),
+                   ),
+                   fluidRow(
+                     box(
+                       width = 12,
+                       reactableOutput("ttdSurvTab")
+                     )
+                   )
+          )
+        )
+      )
+    ),
+
+
+    ### Procedure Analysis Tab -----------------
+    tabItem(
+      tabName = "proc",
+      fluidRow(
+
+        box(
+          collapsible = T,
+          collapsed = F,
+          title = "Procedure Analysis",
+          width = 12,
+          background = "light-blue",
+          textOutput("procedureAnalysisDescription")
+        ),
+
+        tabBox(
+          id = "procAnalysis",
+          width = 12,
+          tabPanel("Procedure Prevalence",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "databaseNameProcPi",
+                                label = "Database",
+                                choices = databaseName,
+                                selected = databaseName,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+                              pickerInput(
+                                inputId = "cohortNameProcPi",
+                                label = "Cohort Name",
+                                choices = cohortName,
+                                selected = cohortName[1],
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       ),
+                       column(width = 6,
+                              pickerInput(
+                                inputId = "procNameProcPi",
+                                label = "Procedure Name",
+                                choices = procCohorts,
+                                selected = procCohorts,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              ),
+                              pickerInput(
+                                inputId = "timeWindowProcPi",
+                                label = "Time Window",
+                                choices = procTimeWindow,
+                                selected = procTimeWindow,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
+                       )
+                     )
+                   ),
+                   fluidRow(
+                     box(
+                       width = 12,
+                       reactableOutput("procTab"),
+                       csvDownloadButton("procTab", filename = "postIndexProcedures.csv")
+                     )
+                   )
+          ),
+          tabPanel("Time to Intervention",
+                   fluidRow(
+                     column(width = 9,
+                            #box(
+                            status = "success",
+                            plotOutput("ttiKmPlot")
+                            #)
+                     ),
+                     column(width = 3,
+                            box(
+                              width = 9,
+                              height = "180px",
+                              background = "light-blue",
+                              pickerInput(
+                                inputId = "databaseNameTti",
+                                label = "Database",
+                                choices = databaseName
+                              ),
+                              pickerInput(
+                                inputId = "cohortNameTti",
+                                label = "Cohort Name",
+                                choices = cohortName
+                              )
+                            )
+                     ),
+                   ),
+                   fluidRow(
+                     box(
+                       width = 12,
+                       reactableOutput("ttiSurvTab")
+                     )
+                   )
+          )
+        )
+      )
+    )
+  )
+)
+
+
+
+# Bind ui elements
+ui <- dashboardPage(
+  header,
+  sidebar,
+  body
+)
+
+# App Server ------------------
+server <- function(input, output, session){
+
+
+  # About --------------
+
+  ## Study Description
+  output$studyDescription <- renderText({
+    description
+  })
+
+  ## Database Information
+  # output$databaseInformation <- renderReactable(
+  #   databaseMeta %>% reactable()
+  # )
+
+
+  # Cohorts ----------------
+
+  ## Cohort Counts
+  output$cohortCountsTab <- renderReactable(
+    cohortCounts %>%
+      dplyr::filter(Database %in% input$databaseNameCohort) %>%
+      reactable(
+        columns = list(Subjects = colDef(name = "Subjects", format = colFormat(separators = TRUE)),
+                       Entries = colDef(name = "Entries", format = colFormat(separators = TRUE))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+  ## Strata Counts
+  output$strataCountsTab <- renderReactable(
+    strataCounts %>%
+      dplyr::filter(Database %in% input$databaseNameStrata) %>%
+      reactable(
+        columns = list(Subjects = colDef(name = "Subjects", format = colFormat(separators = TRUE))),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+
+  # Clinical Characteristics -------------
+
+  ## Box text
+  output$clinicalCharacteristicsDescription <- renderText({
+    clinicalCharacteristicsDescription
+  })
+
+
+  ## Demographics
+  output$demoCharTab <- renderReactable(
+    demoChar %>%
+      dplyr::filter(databaseId %in% input$databaseNameDemo,
+                    cohortName %in% input$cohortNameDemo
+      ) %>%
+      reactable(
+        columns = list(
+          databaseId = colDef(name = "Database"),
+          cohortDefinitionId = colDef(name = "Cohort Id"),
+          cohortName = colDef(name = "Cohort Name"),
+          id = colDef(name = "Covariate Id"),
+          Covariate = colDef(name = "Covariate Name"),
+          count = colDef(name = "Count"),
+          pct = colDef(name = "Percentage")
+          # n = colDef(name = "Count", format = colFormat(separators = TRUE)),
+          # pct = colDef(name = "Percentage", format = colFormat(percent = TRUE, digits = 2))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+  ## Continuous
+  output$ctsCharTab <- renderReactable(
+    ctsChar %>%
+      dplyr::filter(databaseId %in% input$databaseNameCts,
+                    cohortName %in% input$cohortNameCts
+      ) %>%
+      reactable(
+        columns = list(
+          databaseId = colDef(name = "Database"),
+          cohortDefinitionId = colDef(name = "Cohort Id"),
+          cohortName = colDef(name = "Cohort Name"),
+          covariateId = colDef(name = "Covariate Id"),
+          name = colDef(name = "Covariate Name"),
+          medianValue = colDef(name = "Median", format = colFormat(separators = TRUE)),
+          iqr = colDef(name = "IQR", format = colFormat(separators = TRUE))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+  ## Concepts
+  output$conceptCharTab <- renderReactable(
+    conceptChar  %>%
+      dplyr::filter(databaseId %in% input$databaseNameConcept,
+                    cohortName %in% input$cohortNameConcept,
+                    domain %in% input$domainConcept
+      ) %>%
+      reactable(
+        columns = list(
+          databaseId = colDef(name = "Database"),
+          domain = colDef(name = "Domain"),
+          cohortDefinitionId = colDef(name = "Cohort Id"),
+          cohortName = colDef(name = "Cohort Name"),
+          conceptId = colDef(name = "Concept Id"),
+          name = colDef(name = "Concept Name"),
+          count = colDef(name = "Count"),
+          pct = colDef(name = "Percentage")
+          # n = colDef(name = "Count", format = colFormat(separators = TRUE)),
+          # pct = colDef(name = "Percentage", format = colFormat(percent = TRUE, digits = 2))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+  ## Cohorts
+  output$cohortCharTab <- renderReactable(
+    cohortChar %>%
+      dplyr::filter(databaseId %in% input$databaseNameCohortCov,
+                    cohortName %in% input$cohortNameCohortCov,
+                    domain %in% input$domainCohortCov
+      ) %>%
+      reactable(
+        columns = list(
+          databaseId = colDef(name = "Database"),
+          timeWindow = colDef(name = "Time Window"),
+          cohortId = colDef(name = "Cohort Id"),
+          cohortName = colDef(name = "Cohort Name"),
+          domain = colDef(name = "Domain"),
+          covariateId = colDef(name = "Covariate Id"),
+          covariateName = colDef(name = "Covariate Name"),
+          count = colDef(name = "Count"),
+          pct = colDef(name = "Percentage")
+          # count = colDef(name = "Count", format = colFormat(separators = TRUE)),
+          # pct = colDef(name = "Percentage", format = colFormat(percent = TRUE, digits = 2))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+  ## ICD10 Chapters
+  output$icdCharTab <- renderReactable(
+    icdChar %>%
+      dplyr::filter(databaseId %in% input$databaseNameIcd,
+                    cohortName %in% input$cohortNameIcd
+      ) %>%
+      reactable(
+        columns = list(
+          databaseId = colDef(name = "Database"),
+          COHORT_ID = colDef(name = "Cohort Id"),
+          cohortName = colDef(name = "Cohort Name"),
+          CATEGORY_CODE = colDef(name = "Concept Id"),
+          categoryName = colDef(name = "ICD10 Chapter"),
+          count = colDef(name = "Count"),
+          pct = colDef(name = "Percentage")
+          # count = colDef(name = "Count", format = colFormat(separators = TRUE)),
+          # pct = colDef(name = "Percentage", format = colFormat(percent = TRUE, digits = 2))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+
+  # Incidence -----------
+
+  ## Box text
+  output$clinicalOutcomesDescription <- renderText({
+    incidenceDescription
+  })
+
+
+
+  ## Incidence
+  output$inciTab <- renderReactable(
+    incTab %>%
+      dplyr::filter(databaseId %in% input$databaseNameInci,
+                    START_YEAR %in% input$yearInci,
+                    OUTCOME_NAME %in% snakecase::to_snake_case(input$cohortNameInci)) %>%
+      reactable(
+        columns = list(
+          databaseId = colDef(name = "Database"),
+          START_YEAR = colDef(name = "Year"),
+          OUTCOME_COHORT_DEFINITION_ID = colDef(name = "Cohort Id"),
+          OUTCOME_NAME = colDef(name = "Cohort Name"),
+          PERSONS_AT_RISK = colDef(name = "Persons at Risk", format = colFormat(separators = TRUE)),
+          PERSON_DAYS = colDef(name = "Person Days", format = colFormat(separators = TRUE)),
+          OUTCOMES = colDef(name = "Outcome Count", format = colFormat(separators = TRUE)),
+          INCIDENCE_PROPORTION_P100P = colDef(name = "Incidence Proportion (per 100p)", format = colFormat(digits = 2)),
+          INCIDENCE_RATE_P1000PY = colDef(name = "Incidence Rate (per 1000yrs)", format = colFormat(digits = 2))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+  ## subset inci
+  subsetInci <- reactive({
+    incTab %>%
+      dplyr::filter(
+        START_YEAR != "All",
+        databaseId %in% input$databaseNameYrInci,
+        OUTCOME_NAME %in% snakecase::to_snake_case(input$cohortNameYrInci)
+      )
+  })
+
+  ### Make yearly incidence plot
+  output$inciYearPlot <- renderPlot({
+    subsetInci() %>%
+      plotYearlyIncidence()
+  })
+
+
+  # Underlying Conditions -------------------
+
+  ## Box text
+  output$underlyingDescription <- renderText({
+    underlyingDescription
+  })
+
+  ## Prevalence
+  output$condPiTab <- renderReactable(
+    condPi %>%
+      dplyr::filter(databaseId %in% input$databaseNameCondPi,
+                    cohortName %in% input$cohortNameCondPi,
+                    covariateName %in% input$conditionNameCondPi,
+                    timeWindow %in% input$timeWindowCondPi) %>%
+      reactable(
+        columns = list(
+          databaseId = colDef(name = "Database"),
+          timeWindow = colDef(name = "Time Window"),
+          cohortId = colDef(name = "Cohort Id"),
+          cohortName = colDef(name = "Cohort Name"),
+          covariateId = colDef(name = "Condition Id"),
+          covariateName = colDef(name = "Condition Name"),
+          count = colDef(name = "Count"),
+          pct = colDef(name = "Percentage")
+          # count = colDef(name = "Count", format = colFormat(separators = TRUE)),
+          # pct = colDef(name = "Percentage", format = colFormat(percent = TRUE, digits = 2))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+  # Treatment Patterns -------------
+
+  ## Box text
+  output$treatmentPatternsDescription <- renderText({
+    treatmentPatternsDescription
+  })
+
+  output$drugUtilizationDescription <- renderText({
+    drugUtilizationDescription
+  })
+
+
+  ## Utilization
+  duPiRe <- reactive({
+    drugPi %>%
+      dplyr::filter(
+        databaseId %in% input$databaseNameDuPi,
+        cohortName %in% input$cohortNameDuPi,
+        covariateName %in% input$drugNameDuPi,
+        timeWindow %in% input$timeWindowDuPi
+      )
+  })
+
+  output$duTab <- renderReactable(
+    duPiRe() %>% reactable(
+      columns = list(
+        databaseId = colDef(name = "Database"),
+        timeWindow = colDef(name = "Time Window"),
+        cohortId = colDef(name = "Cohort Id"),
+        cohortName = colDef(name = "Cohort Name"),
+        covariateId = colDef(name = "Drug Id"),
+        covariateName = colDef(name = "Drug Name"),
+        count = colDef(name = "Count", format = colFormat(separators = TRUE)),
+        pct = colDef(name = "Percentage", format = colFormat(percent = TRUE, digits = 2))
+      ),
+      filterable = TRUE,
+      searchable = TRUE,
+      outlined = TRUE,
+      bordered = TRUE,
+      striped = TRUE,
+      defaultPageSize = 20
+    )
+  )
+
+  ## Sequences
+  ### Reactive pickers
+
+  sankeyPick <- reactive({
+    sankeyCohorts %>%
+      dplyr::filter(name == input$cohortNameSankey) %>%
+      dplyr::pull(id)
+  })
+
+  trtSankeyPlot <- reactive({
+    txPatDatAll %>%
+      dplyr::filter(
+        databaseId == input$databaseNameSankey,
+        cohortId == sankeyPick()) %>%
+      dplyr::slice(1:20) %>%
+      buildSankeyData() %>%
+      plotSankey()
+
+  })
+
+  trtSankeyTab <- reactive({
+    txPatDatAll %>%
+      dplyr::filter(
+        databaseId == input$databaseNameSankey,
+        cohortId == sankeyPick()
+      ) %>%
+      dplyr::arrange(desc(n)) %>%
+      tidyr::unite(
+        col = "sequence",
+        event_cohort_name1:event_cohort_name5,
+        sep = " | ",
+        na.rm = TRUE
+      ) %>%
+      dplyr::select(
+        databaseId, cohortName, sequence, n
+      )
+  })
+
+
+  ### Treatment Patterns sankey
+  output$txSankey <- networkD3::renderSankeyNetwork({
+    trtSankeyPlot()
+  })
+
+
+  ### Treatment Patterns table
+  output$txPatDat <- renderReactable(
+    trtSankeyTab() %>%
+      reactable(
+        columns = list(
+          databaseId = colDef(name = "Database"),
+          cohortName = colDef(name = "Cohort Name"),
+          sequence = colDef(name = "Sequence"),
+          n = colDef(name = "Count", format = colFormat(separators = TRUE))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 10
+      )
+  )
+
+  ## Time To discontinuation
+
+  # convert cohort name to an id
+  ttdPick <- reactive({
+    tteCohorts %>%
+      dplyr::filter(name == input$cohortNameTtd) %>%
+      dplyr::pull(id)
+  })
+
+  # subset to those with strata
+  ttdSubset <- reactive({
+
+    dt1 <- ttd %>%
+      dplyr::filter(
+        database == input$databaseNameTtd,
+        targetId == ttdPick()
+      )
+    if (input$strataTtd == "Single") {
+      dt1 <- dt1 %>%
+        dplyr::filter(
+          !grepl("\\+", strata)
+        )
+    }
+
+    dt1
+
+  })
+  # make km plot
+  output$ttdKmPlot <- renderPlot({
+    plotKM(ttdSubset())
+  })
+
+  ## get survProb table
+  output$ttdSurvTab <- renderReactable(
+    makeSurvProbTab(ttdSubset()) %>%
+      reactable(
+        columns = list(
+          database = colDef(name = "Database"),
+          targetId = colDef(name = "Target Cohort"),
+          strata = colDef(name = "Drug Cohort"),
+          `6 month` = colDef(name = "6 month Survival", format = colFormat(digits = 1, percent = TRUE)),
+          `1 year` = colDef(name = "1 year Survival", format = colFormat(digits = 1, percent = TRUE)),
+          `2 year` = colDef(name = "2 year Survival", format = colFormat(digits = 1, percent = TRUE))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+
+
+  # Procedure Analysis ---------------------------------
+
+  ## Box text
+  output$procedureAnalysisDescription <- renderText({
+    procedureAnalysisDescription
+  })
+
+  ## Procedure Prevalence
+  procPiRe <- reactive({
+    procPi %>%
+      dplyr::filter(
+        databaseId %in% input$databaseNameProcPi,
+        cohortName %in% input$cohortNameProcPi,
+        covariateName %in% input$procNameProcPi,
+        timeWindow %in% input$timeWindowProcPi
+      )
+  })
+
+  output$procTab <- renderReactable(
+    procPiRe() %>% reactable(
+      columns = list(
+        databaseId = colDef(name = "Database"),
+        timeWindow = colDef(name = "Time Window"),
+        cohortId = colDef(name = "Target Cohort Id"),
+        cohortName = colDef(name = "Target Cohort Name"),
+        covariateId = colDef(name = "Procedure Id"),
+        covariateName = colDef(name = "Procedure Name"),
+        count = colDef(name = "Count", format = colFormat(separators = TRUE)),
+        pct = colDef(name = "Percentage", format = colFormat(digits = 1, percent = TRUE))
+      ),
+      filterable = TRUE,
+      searchable = TRUE,
+      outlined = TRUE,
+      bordered = TRUE,
+      striped = TRUE,
+      defaultPageSize = 20
+    )
+  )
+
+
+  ## Procedure Time To intervention
+
+  # convert cohort name to an id
+  ttiPick <- reactive({
+    tteCohorts %>%
+      dplyr::filter(name == input$cohortNameTti) %>%
+      dplyr::pull(id)
+  })
+
+  # subset to those with strata
+  ttiSubset <- reactive({
+
+    tti %>%
+      dplyr::filter(
+        database == input$databaseNameTti,
+        targetId == ttiPick()
+      )
+  })
+  # make km plot
+  output$ttiKmPlot <- renderPlot({
+    plotKM2(ttiSubset())
+  })
+
+  ## get survProb table
+  output$ttiSurvTab <- renderReactable(
+    makeSurvProbTab2(ttiSubset()) %>%
+      reactable(
+        columns = list(
+          database = colDef(name = "Database"),
+          targetId = colDef(name = "Target Cohort"),
+          outcomeCohortId = colDef(name = "Procedure Cohort"),
+          `6 month` = colDef(name = "6 month Survival", format = colFormat(digits = 1, percent = TRUE)),
+          `1 year` = colDef(name = "1 year Survival", format = colFormat(digits = 1, percent = TRUE)),
+          `2 year` = colDef(name = "2 year Survival", format = colFormat(digits = 1, percent = TRUE))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
+
+  session$onSessionEnded(stopApp)  # Kills the session when browser tab is closed
+
+}
+
+
+shinyApp(ui = ui, server = server)

From c49fd7781bb65a3e0c7b0e688edac221e5122b35 Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 11:33:46 -0500
Subject: [PATCH 03/12] move shiny ignore

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 5e7c13e..04ed06f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,4 +21,4 @@ hidden/
 results.zip
 
 scratchDiagnostics
-extras/shiny/data/
+shiny/data/

From 8949d306522db357d3e5f009530b303442c0adaf Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 11:34:02 -0500
Subject: [PATCH 04/12] separate shiny into own folder

---
 {extras/shiny => shiny}/R/fn.R       |  0
 {extras/shiny => shiny}/R/loadData.R |  2 +-
 {extras/shiny => shiny}/app.R        | 28 +++++++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 2 deletions(-)
 rename {extras/shiny => shiny}/R/fn.R (100%)
 rename {extras/shiny => shiny}/R/loadData.R (99%)
 rename {extras/shiny => shiny}/app.R (97%)

diff --git a/extras/shiny/R/fn.R b/shiny/R/fn.R
similarity index 100%
rename from extras/shiny/R/fn.R
rename to shiny/R/fn.R
diff --git a/extras/shiny/R/loadData.R b/shiny/R/loadData.R
similarity index 99%
rename from extras/shiny/R/loadData.R
rename to shiny/R/loadData.R
index b0c0394..0714e1f 100644
--- a/extras/shiny/R/loadData.R
+++ b/shiny/R/loadData.R
@@ -6,7 +6,7 @@
 library(dplyr)
 library(readr)
 
-source(here::here("shiny", "R", "fn.R"))
+source(here::here("extras/shiny", "R", "fn.R"))
 source(here::here("extras/R/helpers.R"))
 
 dataPath <- here::here("shiny", "data")
diff --git a/extras/shiny/app.R b/shiny/app.R
similarity index 97%
rename from extras/shiny/app.R
rename to shiny/app.R
index 6cb7bc7..bc64247 100644
--- a/extras/shiny/app.R
+++ b/shiny/app.R
@@ -20,7 +20,33 @@ options(shiny.fullstacktrace = FALSE)
 # Variables ---------------------
 title <- "EHDEN HMB"
 
-description <- "The EHDEN HMB study......"
+description <- "
+
+The EHDEN HMB study aims to answer the following questions:
+
+- What are the characteristics of women diagnosed with HMB in terms of demographics, comorbidities, procedures, and comedication?
+
+- What are the treatment pathways of women diagnosed with HMB?
+
+- What is the incidence of HMB across different countries and data sources?
+
+
+
+The primary objectives to answer them are:
+
+1. To characterise women of reproductive age diagnosed with a diagnosis of HMB (demographics, parity and reproductive history, comorbidities, comedications and procedures). [see Clinical Characteristics and Underlying Conditions tabs]
+
+2. To describe treatment pathways of women of reproductive age diagnosed with HMB across different countries and data sources (duration of use, switch to other/s treatment). [see Treatment Patterns tab]
+
+
+
+The exploratory objectives of this study are to:
+
+3. To describe the incidence and proportion of women diagnosed with HMB (calendar year over study period: 2000-2020). [see Incidence tab]
+
+4. To describe country-specific guideline compliant treatment use where national treatment guidelines are available for HMB. [see Treatment Patterns tab]
+
+"
 incidenceDescription <- "Incidence rate is calculated by 'Outcome Count'/'Person Days' * 100."
 underlyingDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
 drugUtilizationDescription <- "Drug utilization counts equal to 5 and below have been masked and replaced with '<5."

From 325a698c14b4f49c7ef31425f4d95a347c4cddad Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 11:40:05 -0500
Subject: [PATCH 05/12] add desc md

---
 extras/DataMigration.R    |  2 +-
 shiny/R/loadData.R        |  2 +-
 shiny/StudyDescription.md | 19 +++++++++++++++++++
 shiny/app.R               | 30 ++----------------------------
 4 files changed, 23 insertions(+), 30 deletions(-)
 create mode 100644 shiny/StudyDescription.md

diff --git a/extras/DataMigration.R b/extras/DataMigration.R
index 74bc455..3048461 100644
--- a/extras/DataMigration.R
+++ b/extras/DataMigration.R
@@ -7,7 +7,7 @@ library(readr)
 source("extras/R/helpers.R")
 
 #path to place app data
-appDataPath <- here::here("extras/shiny", "data")
+appDataPath <- here::here("shiny", "data")
 
 # make new directory
 appDataPath %>% fs::dir_create()
diff --git a/shiny/R/loadData.R b/shiny/R/loadData.R
index 0714e1f..b0c0394 100644
--- a/shiny/R/loadData.R
+++ b/shiny/R/loadData.R
@@ -6,7 +6,7 @@
 library(dplyr)
 library(readr)
 
-source(here::here("extras/shiny", "R", "fn.R"))
+source(here::here("shiny", "R", "fn.R"))
 source(here::here("extras/R/helpers.R"))
 
 dataPath <- here::here("shiny", "data")
diff --git a/shiny/StudyDescription.md b/shiny/StudyDescription.md
new file mode 100644
index 0000000..feb31d6
--- /dev/null
+++ b/shiny/StudyDescription.md
@@ -0,0 +1,19 @@
+The EHDEN HMB study aims to answer the following questions:
+
+-   What are the characteristics of women diagnosed with HMB in terms of demographics, comorbidities, procedures, and comedication?
+
+-   What are the treatment pathways of women diagnosed with HMB?
+
+-   What is the incidence of HMB across different countries and data sources?
+
+The primary objectives to answer them are:
+
+1.  To characterise women of reproductive age diagnosed with a diagnosis of HMB (demographics, parity and reproductive history, comorbidities, comedications and procedures). [see Clinical Characteristics and Underlying Conditions tabs]
+
+2.  To describe treatment pathways of women of reproductive age diagnosed with HMB across different countries and data sources (duration of use, switch to other/s treatment). [see Treatment Patterns tab]
+
+The exploratory objectives of this study are to:
+
+3.  To describe the incidence and proportion of women diagnosed with HMB (calendar year over study period: 2000-2020). [see Incidence tab]
+
+4.  To describe country-specific guideline compliant treatment use where national treatment guidelines are available for HMB. [see Treatment Patterns tab]
diff --git a/shiny/app.R b/shiny/app.R
index bc64247..5589db7 100644
--- a/shiny/app.R
+++ b/shiny/app.R
@@ -14,39 +14,13 @@ library(networkD3)
 library(reactable)
 library(ggplot2)
 library(grafify)
+library(markdown)
 options(shiny.fullstacktrace = FALSE)
 
 
 # Variables ---------------------
 title <- "EHDEN HMB"
 
-description <- "
-
-The EHDEN HMB study aims to answer the following questions:
-
-- What are the characteristics of women diagnosed with HMB in terms of demographics, comorbidities, procedures, and comedication?
-
-- What are the treatment pathways of women diagnosed with HMB?
-
-- What is the incidence of HMB across different countries and data sources?
-
-
-
-The primary objectives to answer them are:
-
-1. To characterise women of reproductive age diagnosed with a diagnosis of HMB (demographics, parity and reproductive history, comorbidities, comedications and procedures). [see Clinical Characteristics and Underlying Conditions tabs]
-
-2. To describe treatment pathways of women of reproductive age diagnosed with HMB across different countries and data sources (duration of use, switch to other/s treatment). [see Treatment Patterns tab]
-
-
-
-The exploratory objectives of this study are to:
-
-3. To describe the incidence and proportion of women diagnosed with HMB (calendar year over study period: 2000-2020). [see Incidence tab]
-
-4. To describe country-specific guideline compliant treatment use where national treatment guidelines are available for HMB. [see Treatment Patterns tab]
-
-"
 incidenceDescription <- "Incidence rate is calculated by 'Outcome Count'/'Person Days' * 100."
 underlyingDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
 drugUtilizationDescription <- "Drug utilization counts equal to 5 and below have been masked and replaced with '<5."
@@ -110,7 +84,7 @@ body <- dashboardBody(
           title = "Study Description",
           width = 12,
           status = "success",
-          textOutput("studyDescription")
+          includeMarkdown("StudyDescription.md")
         )
       ),
       #### Study Information

From ca522a4c7b4e5929fc862866921b95adb2634100 Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 11:48:35 -0500
Subject: [PATCH 06/12] move migration to shiny folder

---
 extras/PreviewResults.R                           | 15 +++++++++++++++
 .../migration/dataMigration.R                     |  0
 {extras/R => shiny/migration}/helpers.R           |  0
 3 files changed, 15 insertions(+)
 create mode 100644 extras/PreviewResults.R
 rename extras/DataMigration.R => shiny/migration/dataMigration.R (100%)
 rename {extras/R => shiny/migration}/helpers.R (100%)

diff --git a/extras/PreviewResults.R b/extras/PreviewResults.R
new file mode 100644
index 0000000..ae4a071
--- /dev/null
+++ b/extras/PreviewResults.R
@@ -0,0 +1,15 @@
+# Preview results -------------
+# The purpose of this file is to launch a local shiny app to review the results
+# of the analysis conducted for ehden_hmb
+
+# Once the study has been executed results will be saved to a folder separated
+# by database name.
+
+
+
+### Step 1: Run migration script to populate 'shiny/data' folder with files
+source(here::here("extras", "dataMigration.R"))
+
+
+### Step 2: Once migration script is ran successfully, run the app in a browser
+shiny::runApp(appDir = here::here("shiny"))
diff --git a/extras/DataMigration.R b/shiny/migration/dataMigration.R
similarity index 100%
rename from extras/DataMigration.R
rename to shiny/migration/dataMigration.R
diff --git a/extras/R/helpers.R b/shiny/migration/helpers.R
similarity index 100%
rename from extras/R/helpers.R
rename to shiny/migration/helpers.R

From 259706ecd2eba81f41069784901d5f7cfd33ea5b Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 11:51:13 -0500
Subject: [PATCH 07/12] fix bugs in migration

---
 shiny/R/loadData.R              | 2 +-
 shiny/migration/dataMigration.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/shiny/R/loadData.R b/shiny/R/loadData.R
index b0c0394..142a6f2 100644
--- a/shiny/R/loadData.R
+++ b/shiny/R/loadData.R
@@ -7,7 +7,7 @@ library(dplyr)
 library(readr)
 
 source(here::here("shiny", "R", "fn.R"))
-source(here::here("extras/R/helpers.R"))
+source(here::here("shiny/migration/helpers.R"))
 
 dataPath <- here::here("shiny", "data")
 
diff --git a/shiny/migration/dataMigration.R b/shiny/migration/dataMigration.R
index 3048461..08159d5 100644
--- a/shiny/migration/dataMigration.R
+++ b/shiny/migration/dataMigration.R
@@ -4,7 +4,7 @@ library(dplyr)
 library(readr)
 
 #source helper functions
-source("extras/R/helpers.R")
+source("shiny/migration/helpers.R")
 
 #path to place app data
 appDataPath <- here::here("shiny", "data")

From 4e14b48f70e07b78b7fe23f8627431cb3944f639 Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 11:51:25 -0500
Subject: [PATCH 08/12] add file to preview results

---
 extras/PreviewResults.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extras/PreviewResults.R b/extras/PreviewResults.R
index ae4a071..0ef2f9d 100644
--- a/extras/PreviewResults.R
+++ b/extras/PreviewResults.R
@@ -8,7 +8,7 @@
 
 
 ### Step 1: Run migration script to populate 'shiny/data' folder with files
-source(here::here("extras", "dataMigration.R"))
+source(here::here("shiny/migration", "dataMigration.R"))
 
 
 ### Step 2: Once migration script is ran successfully, run the app in a browser

From 71f2ebbece987aa843fcb32375d5b4ec23644d5f Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 14:34:00 -0500
Subject: [PATCH 09/12] remove hard code of cohort baseline

---
 shiny/migration/dataMigration.R | 74 ++++-----------------------------
 1 file changed, 8 insertions(+), 66 deletions(-)

diff --git a/shiny/migration/dataMigration.R b/shiny/migration/dataMigration.R
index 08159d5..9cf3f7b 100644
--- a/shiny/migration/dataMigration.R
+++ b/shiny/migration/dataMigration.R
@@ -358,76 +358,18 @@ readr::write_csv(conceptTab, file = fs::path(appDataPath, "baselineConcepts.csv"
 cohort365 <- bindCsv(allPaths = allPaths,
                      task = listOfTasks[5], # baseline char
                      file = "cohort_covariates_365_0.csv") %>%
-  dplyr::mutate(
-    #pct = scales::label_percent(accuracy = 0.01, suffix = "")(pct),
-    timeWindow = "-365d - 0d",
-    covariateName = dplyr::case_when(
-      covariateId == 3 ~ "PID",
-      covariateId == 4 ~ "STD",
-      covariateId == 5 ~ "adenomyosis",
-      covariateId == 6 ~ "anemia",
-      covariateId == 7 ~ "antidepressants",
-      covariateId == 8 ~ "antipsychotics",
-      covariateId == 9 ~ "antithrombotics",
-      covariateId == 10 ~ "coagulopathy",
-      covariateId == 11 ~ "copperIUDdrug",
-      covariateId == 12 ~ "covid19",
-      covariateId == 13 ~ "diabetes",
-      covariateId == 14 ~ "disorderOfOvary",
-      covariateId == 15 ~ "dysmenorrhea",
-      covariateId == 16 ~ "endoHyperplasia",
-      covariateId == 17 ~ "endoPolyp",
-      covariateId == 18 ~ "endometriosis",
-      covariateId == 19 ~ "gonadalSteroids",
-      covariateId == 20 ~ "ironDefAnemia",
-      covariateId == 21 ~ "obesity",
-      covariateId == 22 ~ "ovulatoryDysfunction",
-      covariateId == 23 ~ "pain",
-      covariateId == 24 ~ "pcos",
-      covariateId == 25 ~ "tamoxifen",
-      covariateId == 26 ~ "uterineLeiomyoma"
-    )
+  dplyr::left_join(
+    cohortManifest %>% dplyr::select(databaseId, id, name),
+    by = c("databaseId" = "databaseId", "covariateId" = "id")
   ) %>%
+  dplyr::mutate(
+    covariateName = name,
+    timeWindow = "[-365 : 0]"
+    ) %>%
   dplyr::select(databaseId, timeWindow, cohortId, cohortName,
                 covariateId, covariateName, count, pct) %>%
-  dplyr::arrange(databaseId, cohortId, covariateId) #%>%
-# dplyr::rename(
-#   Database = databaseId,
-#   `Time Window` = timeWindow,
-#   `Cohort Id` = cohortId,
-#   `Cohort Name` = cohortName,
-#   `Covariate Id` = covariateId,
-#   `Covariate Name` = covariateName,
-#   `Count` = count,
-#   `Percentage` = pct
-# )
+  dplyr::arrange(databaseId, cohortId, covariateId)
 
-# cohort9999 <- bindCsv(allPaths = allPaths,
-#                       task = listOfTasks[3],
-#                       file = "cohort_covariates_9999_1.csv") %>%
-#   dplyr::mutate(
-#     pct = scales::label_percent(accuracy = 0.01, suffix = "")(pct),
-#     timeWindow = "anyt time prior"
-#   ) %>%
-#   dplyr::select(databaseId, timeWindow, cohortId, cohortName,
-#                 covariateId, covariateName, count, pct) %>%
-#   dplyr::arrange(databaseId, cohortId, covariateId) %>%
-#   dplyr::mutate(
-#     timeWindow = "any time prior"
-#   ) %>%
-#   dplyr::rename(
-#     Database = databaseId,
-#     `Time Window` = timeWindow,
-#     `Cohort Id` = cohortId,
-#     `Cohort Name` = cohortName,
-#     `Covariate Id` = covariateId,
-#     `Covariate Name` = covariateName,
-#     `Count` = count,
-#     `Percentage` = pct
-#   )
-# cohortCovariates <- dplyr::bind_rows(
-#   cohort365, cohort9999
-# )
 readr::write_csv(cohort365, file = fs::path(appDataPath, "baselineCohorts.csv"))
 
 

From 6623294c722e23d81b86f5f2deeb785cacbe0909 Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 14:39:06 -0500
Subject: [PATCH 10/12] add descriptive text

---
 shiny/app.R | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/shiny/app.R b/shiny/app.R
index 5589db7..89981b9 100644
--- a/shiny/app.R
+++ b/shiny/app.R
@@ -24,9 +24,11 @@ title <- "EHDEN HMB"
 incidenceDescription <- "Incidence rate is calculated by 'Outcome Count'/'Person Days' * 100."
 underlyingDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
 drugUtilizationDescription <- "Drug utilization counts equal to 5 and below have been masked and replaced with '<5."
-treatmentPatternsDescription <- "Treatment Patterns counts (Sequences) are restricted to 30"
-clinicalCharacteristicsDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
-procedureAnalysisDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
+treatmentPatternsDescription <- "Treatment Patterns counts (Sequences) are restricted to 30. HMB cohort is the population whose index event is HMB and are censored by hysterectomy. Thus we consider only drugs.
+HMB2 cohort are those whose index event is HMB where there is no censoring for hysterectomy. Therefore we consider both drugs and procedures."
+clinicalCharacteristicsDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'. ICD10 Chapters are the rollup of conditions based on condition chapters. This is used to get a snapshot of comorbidities."
+procedureAnalysisDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'.
+In the time to intervention tab, consider the time for only those in the population who experienced the event not the entire population."
 
 dashboardVersion <- "0.0.4"
 dashboardDate <- Sys.Date()

From 99d12f50c85149ba325ade4fd76033b83154133e Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 14:39:59 -0500
Subject: [PATCH 11/12] update release

---
 NEWS.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 3739d96..578a50d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+# EHDEN HMB v0.2.0
+
+* Add shiny app to preview results
+
 # EHDEN HMB v0.1.8
 
 * Fix pid and std file names in cohort definitions, changes anticipated build order

From 03b5cffbcc1c6bff9ea0c8d95e233bf0d4b7ba62 Mon Sep 17 00:00:00 2001
From: Martin Lavallee <44976592+mdlavallee92@users.noreply.github.com>
Date: Tue, 7 Nov 2023 14:47:50 -0500
Subject: [PATCH 12/12] update documentation

---
 docs/howToRun.html         | 5 +++++
 docs/news.html             | 9 ++++++++-
 docs/search.json           | 9 ++++++++-
 documentation/howToRun.qmd | 5 +++++
 documentation/news.qmd     | 4 ++++
 5 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/docs/howToRun.html b/docs/howToRun.html
index 4b00197..68bbba2 100644
--- a/docs/howToRun.html
+++ b/docs/howToRun.html
@@ -215,6 +215,7 @@ <h2 id="toc-title">On this page</h2>
   <li><a href="#troubleshooting-1" id="toc-troubleshooting-1" class="nav-link" data-scroll-target="#troubleshooting-1"><span class="header-section-number">2.3.1</span> Troubleshooting</a></li>
   </ul></li>
   <li><a href="#results-folder" id="toc-results-folder" class="nav-link" data-scroll-target="#results-folder"><span class="header-section-number">2.4</span> Results Folder</a></li>
+  <li><a href="#review-results" id="toc-review-results" class="nav-link" data-scroll-target="#review-results"><span class="header-section-number">2.5</span> Review Results</a></li>
   </ul></li>
   </ul>
 </nav>
@@ -391,6 +392,10 @@ <h3 data-number="2.3.1" class="anchored" data-anchor-id="troubleshooting-1"><spa
 <section id="results-folder" class="level2" data-number="2.4">
 <h2 data-number="2.4" class="anchored" data-anchor-id="results-folder"><span class="header-section-number">2.4</span> Results Folder</h2>
 <p>Following successful execution of the study, each database will have its own sub-folder within results. There will be a further 14 subfolders containing results from the execution underneath the database. Within each of these folders there will be a combination of <code>csv</code>, <code>rds</code> and <code>parquet</code> (only for treatmentHistory) that contain the results. You may review these files individually if you wish. Instructions on how to share the results are maintained in the contribution tab of the website.</p>
+</section>
+<section id="review-results" class="level2" data-number="2.5">
+<h2 data-number="2.5" class="anchored" data-anchor-id="review-results"><span class="header-section-number">2.5</span> Review Results</h2>
+<p>In version 0.2 of the study code, we have added code to launch a shiny app to locally review results prior to distributing them to the study host. In order to run the shiny app the user needs to prep the data for the app. Navigate to <code>extras/PreviewResults.R</code> and run the first step. This will take all databases run in the results section and build a data folder for the shiny app. This data folder is ignored to prevent accidental commits back to github. Next launch the shiny app.</p>
 
 
 </section>
diff --git a/docs/news.html b/docs/news.html
index b024762..a0b476c 100644
--- a/docs/news.html
+++ b/docs/news.html
@@ -156,7 +156,8 @@
     <h2 id="toc-title">On this page</h2>
    
   <ul>
-  <li><a href="#ehden-hmb-v0.1.8" id="toc-ehden-hmb-v0.1.8" class="nav-link active" data-scroll-target="#ehden-hmb-v0.1.8">EHDEN HMB v0.1.8</a></li>
+  <li><a href="#ehden-hmb-v0.2.0" id="toc-ehden-hmb-v0.2.0" class="nav-link active" data-scroll-target="#ehden-hmb-v0.2.0">EHDEN HMB v0.2.0</a></li>
+  <li><a href="#ehden-hmb-v0.1.8" id="toc-ehden-hmb-v0.1.8" class="nav-link" data-scroll-target="#ehden-hmb-v0.1.8">EHDEN HMB v0.1.8</a></li>
   <li><a href="#ehden-hmb-v0.1.7" id="toc-ehden-hmb-v0.1.7" class="nav-link" data-scroll-target="#ehden-hmb-v0.1.7">EHDEN HMB v0.1.7</a></li>
   <li><a href="#ehden-hmb-v0.1.6" id="toc-ehden-hmb-v0.1.6" class="nav-link" data-scroll-target="#ehden-hmb-v0.1.6">EHDEN HMB v0.1.6</a></li>
   <li><a href="#ehden-hmb-v0.1.5" id="toc-ehden-hmb-v0.1.5" class="nav-link" data-scroll-target="#ehden-hmb-v0.1.5">EHDEN HMB v0.1.5</a></li>
@@ -194,6 +195,12 @@ <h1 class="title">News</h1>
 
 </header>
 
+<section id="ehden-hmb-v0.2.0" class="level1">
+<h1>EHDEN HMB v0.2.0</h1>
+<ul>
+<li>Add shiny app to preview results</li>
+</ul>
+</section>
 <section id="ehden-hmb-v0.1.8" class="level1">
 <h1>EHDEN HMB v0.1.8</h1>
 <ul>
diff --git a/docs/search.json b/docs/search.json
index cd857f5..7101559 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -46,7 +46,7 @@
     "href": "news.html",
     "title": "News",
     "section": "",
-    "text": "EHDEN HMB v0.1.8\n\nFix pid and std file names in cohort definitions, changes anticipated build order\n\n\n\nEHDEN HMB v0.1.7\n\nCorrect event in time to procedure intervention\n\n\n\nEHDEN HMB v0.1.6\n\nMinor bug fix to treatment patterns analysis\nCorrect typo in functions\n\n\n\nEHDEN HMB v0.1.5\n\nCorrect bug in analysis settings (Issue #12)\nUpdate package website\nAdd yearly incidence to Incidence Analysis (Issue #13)\nAdd R scripts for data exchange and CD preview (Issue #14)\nupdate cohort definitions from Siir\n\ncopperIUDproc\ncopperIUDdrug\ndisorderOfOvary\n\n\n\n\nEHDEN HMB v0.1.4\n\nFix bug in baseline characteristics; typo in analysis settings (Issue #10)\nCorrections to the study website\n\n\n\nEHDEN HMB v0.1.3\n\nUpdate cohort definitions\n\nadd baseline procedures\nadd baseline drugs\n\nInclude procedure prevalence at baseline [-365,0]\n\n\n\nEHDEN HMB v0.1.2\n\nUpdate cohort definitions\n\nrerun drug exposure with Capr\nrerun procedures with Capr\nadd time restraints to nsaids\nadd hmb cohort without hysterectomy censor\n\nAdd secondary treatment sequence including procedures\n\n\n\nEHDEN HMB v0.1.1\n\nAdd study website\nUpdate cohort definitions for denominator\nAddition of executeStudy.R file to automate execution\nadd age strata [under 30, 30-45, 45-55]\n\n\n\nEHDEN HMB v0.1.0\n\nFirst release of study code for EHDEN HMB\n\nAdd Incidence Analysis\nAdd Baseline Characteristics\nAdd Treatment Patterns\nAdd Procedure Analysis\n\n\n\n\nEHDEN HMB v0.0.6\n\nAdd file StoreResults to upload cohort diagnostics zip to aws s3 bucket\nUpdate KeyringSetup to match Ulysses\nUpdate renv.lock to add aws.s3 and Ulysses v0.0.2\nminor correction to study task files\n\n\n\nEHDEN HMB v0.0.5\n\nAdd in renv\nminor correction to HMB cohort for cohort diagnostics\n\n\n\nEHDEN HMB v0.0.4\n\nFix errors in KeyringSetup.R file\nFix cohort diagnostics to use integer64 with id in cohortDefinitionSet\n\n\n\nEHDEN HMB v0.0.3\n\nUpdate How to run file with installation instructions for usethis and Ulysses\nFix extras/KeyringSetup.R file to correctly set up credentials and check them\nEdit startSnowflakeSession function to handle string split of workDatabaseSchema\nEdit study tasks to order snowflake start correctly and close connection at end of file\n\n\n\nEHDEN HMB v0.0.2\n\nUpdate cohort definition for HMB to fix error\nAdd cohort diagnostics script\nAdd How to Run file and key ring file\n\n\n\nEHDEN HMB v0.0.1\n\nAdd HMB cohort definition to repo\nStart preparing documentation about study\nInitialize OHDSI study\nAdd NEWS.md to"
+    "text": "EHDEN HMB v0.2.0\n\nAdd shiny app to preview results\n\n\n\nEHDEN HMB v0.1.8\n\nFix pid and std file names in cohort definitions, changes anticipated build order\n\n\n\nEHDEN HMB v0.1.7\n\nCorrect event in time to procedure intervention\n\n\n\nEHDEN HMB v0.1.6\n\nMinor bug fix to treatment patterns analysis\nCorrect typo in functions\n\n\n\nEHDEN HMB v0.1.5\n\nCorrect bug in analysis settings (Issue #12)\nUpdate package website\nAdd yearly incidence to Incidence Analysis (Issue #13)\nAdd R scripts for data exchange and CD preview (Issue #14)\nupdate cohort definitions from Siir\n\ncopperIUDproc\ncopperIUDdrug\ndisorderOfOvary\n\n\n\n\nEHDEN HMB v0.1.4\n\nFix bug in baseline characteristics; typo in analysis settings (Issue #10)\nCorrections to the study website\n\n\n\nEHDEN HMB v0.1.3\n\nUpdate cohort definitions\n\nadd baseline procedures\nadd baseline drugs\n\nInclude procedure prevalence at baseline [-365,0]\n\n\n\nEHDEN HMB v0.1.2\n\nUpdate cohort definitions\n\nrerun drug exposure with Capr\nrerun procedures with Capr\nadd time restraints to nsaids\nadd hmb cohort without hysterectomy censor\n\nAdd secondary treatment sequence including procedures\n\n\n\nEHDEN HMB v0.1.1\n\nAdd study website\nUpdate cohort definitions for denominator\nAddition of executeStudy.R file to automate execution\nadd age strata [under 30, 30-45, 45-55]\n\n\n\nEHDEN HMB v0.1.0\n\nFirst release of study code for EHDEN HMB\n\nAdd Incidence Analysis\nAdd Baseline Characteristics\nAdd Treatment Patterns\nAdd Procedure Analysis\n\n\n\n\nEHDEN HMB v0.0.6\n\nAdd file StoreResults to upload cohort diagnostics zip to aws s3 bucket\nUpdate KeyringSetup to match Ulysses\nUpdate renv.lock to add aws.s3 and Ulysses v0.0.2\nminor correction to study task files\n\n\n\nEHDEN HMB v0.0.5\n\nAdd in renv\nminor correction to HMB cohort for cohort diagnostics\n\n\n\nEHDEN HMB v0.0.4\n\nFix errors in KeyringSetup.R file\nFix cohort diagnostics to use integer64 with id in cohortDefinitionSet\n\n\n\nEHDEN HMB v0.0.3\n\nUpdate How to run file with installation instructions for usethis and Ulysses\nFix extras/KeyringSetup.R file to correctly set up credentials and check them\nEdit startSnowflakeSession function to handle string split of workDatabaseSchema\nEdit study tasks to order snowflake start correctly and close connection at end of file\n\n\n\nEHDEN HMB v0.0.2\n\nUpdate cohort definition for HMB to fix error\nAdd cohort diagnostics script\nAdd How to Run file and key ring file\n\n\n\nEHDEN HMB v0.0.1\n\nAdd HMB cohort definition to repo\nStart preparing documentation about study\nInitialize OHDSI study\nAdd NEWS.md to"
   },
   {
     "objectID": "contribution.html",
@@ -215,5 +215,12 @@
     "title": "How-to-Run Study",
     "section": "2.4 Results Folder",
     "text": "2.4 Results Folder\nFollowing successful execution of the study, each database will have its own sub-folder within results. There will be a further 14 subfolders containing results from the execution underneath the database. Within each of these folders there will be a combination of csv, rds and parquet (only for treatmentHistory) that contain the results. You may review these files individually if you wish. Instructions on how to share the results are maintained in the contribution tab of the website."
+  },
+  {
+    "objectID": "howToRun.html#review-results",
+    "href": "howToRun.html#review-results",
+    "title": "How-to-Run Study",
+    "section": "2.5 Review Results",
+    "text": "2.5 Review Results\nIn version 0.2 of the study code, we have added code to launch a shiny app to locally review results prior to distributing them to the study host. In order to run the shiny app the user needs to prep the data for the app. Navigate to extras/PreviewResults.R and run the first step. This will take all databases run in the results section and build a data folder for the shiny app. This data folder is ignored to prevent accidental commits back to github. Next launch the shiny app."
   }
 ]
\ No newline at end of file
diff --git a/documentation/howToRun.qmd b/documentation/howToRun.qmd
index 880865d..4775673 100644
--- a/documentation/howToRun.qmd
+++ b/documentation/howToRun.qmd
@@ -174,3 +174,8 @@ The shiny app rendered from `CohortDiagnostics` relies on the OHDSI package `Ohd
 ## Results Folder
 
 Following successful execution of the study, each database will have its own sub-folder within results. There will be a further 14 subfolders containing results from the execution underneath the database. Within each of these folders there will be a combination of `csv`, `rds` and `parquet` (only for treatmentHistory) that contain the results. You may review these files individually if you wish. Instructions on how to share the results are maintained in the contribution tab of the website. 
+
+
+## Review Results
+
+In version 0.2 of the study code, we have added code to launch a shiny app to locally review results prior to distributing them to the study host. In order to run the shiny app the user needs to prep the data for the app. Navigate to `extras/PreviewResults.R` and run the first step. This will take all databases run in the results section and build a data folder for the shiny app. This data folder is ignored to prevent accidental commits back to github. Next launch the shiny app. 
diff --git a/documentation/news.qmd b/documentation/news.qmd
index 3d15f7e..df288a2 100644
--- a/documentation/news.qmd
+++ b/documentation/news.qmd
@@ -2,6 +2,10 @@
 title: "News"
 ---
 
+# EHDEN HMB v0.2.0
+
+* Add shiny app to preview results
+
 # EHDEN HMB v0.1.8
 
 * Fix pid and std file names in cohort definitions, changes anticipated build order