commit 01/18

OdyOSG · Jan 18, 2024 · d02e664 · d02e664
1 parent 63d073c
commit d02e664
Show file tree

Hide file tree

Showing 38 changed files with 701 additions and 710 deletions.
diff --git a/.Rprofile b/.Rprofile
diff --git a/.gitignore b/.gitignore
@@ -1,24 +1,78 @@
-.Rproj.user
+# Project specific files and folders
 results/
+results_old/
 logs/
 config.yml
-NUL
 scratch/incPrevTest.R
 .httr-oauth
 documentation/minutes/custom-reference-doc.docx
 documentation/custom-reference-doc.docx
 documentation/StudySAP.qmd
 documentation/minutes612023.qmd
-errorReportSql.txt
-
-# ignore minutes documents
 documentation/minutes/img
 documentation/minutes/*.qmd
-
-# ignore problem incidence Denom
+errorReportSql.txt
 cohortsToCreate/ignore/
 hidden/
 results.zip
-
 scratchDiagnostics
 shiny/data/
+shiny/data_old/
+bindResults.R
+bindResults_Public.R
+bindResults_Private.R
+shiny/data_public
+shiny/data_private
+
+# Mac files
+.DS_Store
+
+# History files
+.Rhistory
+.Rapp.history
+
+# Session Data files
+.RData
+.RDataTmp
+
+# User-specific files
+.Ruserdata
+
+# Example code in package build process
+*-Ex.R
+
+# Output files from R CMD build
+/*.tar.gz
+
+# Output files from R CMD check
+/*.Rcheck/
+
+# RStudio files
+.Rproj.user/
+
+# produced vignettes
+vignettes/*.html
+vignettes/*.pdf
+
+# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
+.httr-oauth
+
+# knitr and R markdown default cache directories
+*_cache/
+/cache/
+
+# Temporary files created by R markdown
+*.utf8.md
+*.knit.md
+
+# R Environment Variables
+.Renviron
+
+# pkgdown site
+docs/
+
+# translation temp files
+po/*~
+
+# RStudio Connect folder
+rsconnect/
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,15 @@
+# EHDEN HMB v0.9.0
+
+* Updated `.gitignore` with Github R template
+* Corrected links in `README.md` file
+* Corrected database names in `sap.qmd` and `index.qmd` file
+* Tidied up R scripts
+* Replaced SQL function `EXTRACT` with `YEAR` in the `_buildStrata.R` script to translate SQL code to Azure Synapse dialect
+* Converted `dbplyr` function to SQL code
+* Reworked `initializeCohortTables` function (Added `drop table` sql code)
+* Added database information Markdown in shiny app
+
+
 # EHDEN HMB v0.2.0
 
 * Add shiny app to preview results
@@ -21,7 +33,7 @@
 * Update package website
 * Add yearly incidence to `Incidence Analysis` (Issue #13)
 * Add R scripts for data exchange and CD preview (Issue #14)
-* update cohort definitions from Siir
+* Update cohort definitions from Siir
     - copperIUDproc
     - copperIUDdrug
     - disorderOfOvary
@@ -53,7 +65,7 @@
 * Add study website
 * Update cohort definitions for denominator
 * Addition of executeStudy.R file to automate execution
-* add age strata [under 30, 30-45, 45-55]
+* Add age strata [under 30, 30-45, 45-55]
 
 # EHDEN HMB v0.1.0
 
@@ -68,12 +80,12 @@
 * Add file `StoreResults` to upload cohort diagnostics zip to aws s3 bucket
 * Update `KeyringSetup` to match `Ulysses`
 * Update renv.lock to add `aws.s3` and `Ulysses` v0.0.2
-* minor correction to study task files
+* Minor correction to study task files
 
 # EHDEN HMB v0.0.5
 
-* Add in renv
-* minor correction to HMB cohort for cohort diagnostics
+* Added in `renv`
+* Minor correction to HMB cohort for cohort diagnostics
 
 # EHDEN HMB v0.0.4
 
@@ -91,11 +103,11 @@
 
 * Update cohort definition for HMB to fix error
 * Add cohort diagnostics script
-* Add How to Run file and key ring file
+* Add `HowToRun` file and `keyring` file
 
 # EHDEN HMB v0.0.1
 
 * Add HMB cohort definition to repo
 * Start preparing documentation about study
 * Initialize OHDSI study
-* Add `NEWS.md` to track changes to OHDSI study
+* Add `NEWS.md` to track changes
diff --git a/README.md b/README.md
@@ -33,6 +33,7 @@
 -   **Odysseus Data Services**
     -   Asieh Golozar
     -   Martin Lavallee
+    -   George Argyriou
 -   **Boehringer Ingelheim**
     -   Ron Herrera
 
@@ -48,10 +49,16 @@ This study will be run on OMOP databases in the EHDEN Network. The table below l
 |-------------------------------------|--------------------|--------------------------|
 | CPRD GOLD                           | UK                 | Bayer                    |
 | CPRD AURUM                          | UK                 | Bayer                    |
+| Optum Clinformatics                 | US                 | Bayer                    |
+| MarketScan IBM                      | US                 | Bayer                    |
+| Cegedim THIN                        | Belgium            | Boehringer Ingelheim     |
+| Disease Analyzer                    | Germany            | Boehringer Ingelheim     |
 | Hospital del Mar (Barcelona)        | Spain              | Hospital del Mar         |
+| Semmelweis University               | Hungary            | Semmelweis University    |
+
 
 ## Study Documentation
 
--   [**Study SAP**](https://github.com/OdyOSG/ehden_hmb/blob/develop/documentation/StudySAP.pdf)
--   [**How To Run**](https://github.com/OdyOSG/ehden_hmb/blob/main/documentation/HowToRun.md)
--   **Contribution Guideline**
+-   [**Study SAP**](https://odyosg.github.io/ehden_hmb/sap.html)
+-   [**How To Run**](https://odyosg.github.io/ehden_hmb/howToRun.html)
+-   [**Contribution Guidelines**](https://odyosg.github.io/ehden_hmb/contribution.html)
diff --git a/analysis/private/_buildCohorts.R b/analysis/private/_buildCohorts.R
@@ -1,21 +1,14 @@
-# A. Meta Info -----------------------
+# A. File Info -----------------------
 
 # Task: Build Cohorts
-# Author: [Add Name of Author]
-# Date: 2023-04-12
-# Description: The purpose of the _buildCohorts.R script is to
-# build cohort functions
+# Description: The purpose of the _buildCohorts.R script is to build cohort functions
 
-# B. Functions ------------------------
 
-initializeCohortTables <- function(executionSettings, con) {
+# B. Functions ------------------------
 
-  # if (con@dbms == "snowflake") {
-  #   workSchema <- paste(executionSettings$workDatabase, executionSettings$workSchema, sep = ".")
-  # } else {
-  #   workSchema <- executionSettings$workSchema
-  # }
-  #
+initializeCohortTables <- function(executionSettings,
+                                   con,
+                                   dropTables = FALSE) {
 
   name <- executionSettings$cohortTable
 
@@ -27,14 +20,42 @@ initializeCohortTables <- function(executionSettings, con) {
                            cohortCensorStatsTable = paste0(name, "_censor_stats"))
 
 
+  ## Drop cohort tables
+  if (dropTables == TRUE) {
+
+    ## Delete csv files from "01_buildCohorts" folder
+    manifestPath <- here::here("results", executionSettings$databaseName, "01_buildCohorts")
+    pathFiles <- list.files(manifestPath,  full.names = TRUE)
+    sapply(pathFiles, unlink)
+
+
+    ## Drop cohort tables
+    for (i in 1:length(cohortTableNames)) {
+
+      sql <- "DROP TABLE IF EXISTS @writeSchema.@tableName;"
+
+      dropSql <- SqlRender::render(
+        sql,
+        writeSchema = executionSettings$workDatabaseSchema,
+        tableName = cohortTableNames[i]
+      ) %>%
+        SqlRender::translate(targetDialect = "snowflake")
+
+      DatabaseConnector::executeSql(connection = con, dropSql, progressBar = FALSE)
+
+    }
+  }
+
+  ## Create cohort tables
   CohortGenerator::createCohortTables(connection = con,
                                       cohortDatabaseSchema = executionSettings$workDatabaseSchema,
                                       cohortTableNames = cohortTableNames,
                                       incremental = TRUE)
   invisible(cohortTableNames)
-
 }
 
+
+
 prepManifestForCohortGenerator <- function(cohortManifest) {
 
   cohortsToCreate <- cohortManifest %>%
@@ -48,8 +69,8 @@ prepManifestForCohortGenerator <- function(cohortManifest) {
     cohortsToCreate$json,
     ~CirceR::buildCohortQuery(CirceR::cohortExpressionFromJson(.x),
                               CirceR::createGenerateOptions(generateStats = TRUE)))
-  return(cohortsToCreate)
 
+  return(cohortsToCreate)
 }
 
 
@@ -114,7 +135,6 @@ generateCohorts <- function(executionSettings,
                   bullet = "tick", bullet_col = "green")
 
   return(cohortCounts)
-
 }
 
 # Run Cohort Diagnostis

diff --git a/analysis/private/_buildStrata.R b/analysis/private/_buildStrata.R
@@ -1,15 +1,13 @@
-# A. Meta Info -----------------------
+# A. File Info -----------------------
 
 # Task: Build Strata
-# Author: Martin Lavallee
-# Date: 2023-07-26
 # Description: The purpose of the _buildStrata.R script is to build strata for the analysis
 
+
 # B. Functions ------------------------
 
 ## Strata Funcitons-------------
 
-
 ageStrata <- function(con,
                       cohortDatabaseSchema,
                       cohortTable,
@@ -45,7 +43,7 @@ ageStrata <- function(con,
              c.cohort_start_date,
              c.cohort_end_date,
              p.year_of_birth,
-             abs(p.year_of_birth - EXTRACT(YEAR FROM c.cohort_start_date)) AS age
+             abs(p.year_of_birth - YEAR(c.cohort_start_date)) AS age
       FROM @cohortDatabaseSchema.@cohortTable c
       JOIN @cdmDatabaseSchema.person p
         ON p.person_id = c.subject_id
@@ -92,14 +90,11 @@ ageStrata <- function(con,
 }
 
 
-
-
 buildStrata <- function(con,
                         executionSettings,
                         analysisSettings) {
 
   # Step 0: Prep
-
   ## get schema vars
   cdmDatabaseSchema <- executionSettings$cdmDatabaseSchema
   workDatabaseSchema <- executionSettings$workDatabaseSchema
@@ -153,10 +148,8 @@ buildStrata <- function(con,
             ageMin = 45,
             ageMax = 56)
 
-
   strataKey <- tb1
 
-
   strataSummary <- dplyr::tbl(con, dbplyr::in_schema(workDatabaseSchema, cohortTable)) %>%
     dplyr::count(cohort_definition_id) %>%
     dplyr::collect() %>%
@@ -176,7 +169,5 @@ buildStrata <- function(con,
   )
 
   invisible(dt)
-
-
 }
 
diff --git a/analysis/private/_cohortPrevalence.R b/analysis/private/_cohortPrevalence.R
@@ -9,9 +9,11 @@ verboseSave <- function(object, saveName, saveLocation) {
                   bullet = "info", bullet_col = "blue")
   cli::cat_bullet(crayon::cyan(saveLocation), bullet = "pointer", bullet_col = "yellow")
   cli::cat_line()
+
   invisible(savePath)
 }
 
+
 cohortCovariates <- function(con,
                              cohortDatabaseSchema,
                              cohortTable,
@@ -25,7 +27,8 @@ cohortCovariates <- function(con,
 
   targetId <- cohortKey$id
   eventId <- covariateKey$id
-  #sql to get cohort covariates - period prevalence change
+
+  # sql to get cohort covariates - period prevalence change
   sql <- "
     SELECT
       t.cohort_definition_id AS target_cohort_id,
@@ -47,8 +50,8 @@ cohortCovariates <- function(con,
           DATEADD(day, @timeA, t.cohort_start_date) AND
           DATEADD(day, @timeB, t.cohort_start_date)
           )
-    GROUP BY t.cohort_definition_id, e.cohort_definition_id
-"
+    GROUP BY t.cohort_definition_id, e.cohort_definition_id;"
+
   # Render and translate sql
   cohortCovariateSql <- SqlRender::render(
     sql,
@@ -102,8 +105,6 @@ cohortCovariates <- function(con,
 
 
   invisible(cohortCovTbl)
-
-
 }
 
 
@@ -154,7 +155,7 @@ executeCohortPrevalence <- function(con,
     cli::cat_bullet("Using cohorts ids:\n   ", crayon::green(cat_cohortId),
                     bullet = "info", bullet_col = "blue")
 
-    # Run post-index s
+    # Run post-index
     cohortCovariates(con = con,
                      cohortDatabaseSchema = workDatabaseSchema,
                      cohortTable = cohortTable,
@@ -166,13 +167,14 @@ executeCohortPrevalence <- function(con,
 
 
   }
+
   tok <- Sys.time()
   cli::cat_bullet("Execution Completed at: ", crayon::red(tok),
                   bullet = "info", bullet_col = "blue")
   tdif <- tok - tik
   tok_format <- paste(scales::label_number(0.01)(as.numeric(tdif)), attr(tdif, "units"))
   cli::cat_bullet("Execution took: ", crayon::red(tok_format),
                   bullet = "info", bullet_col = "blue")
-  invisible(tok)
 
+  invisible(tok)
 }