Skip to content

Commit

Permalink
Rerunning vignettes on DataBricks
Browse files Browse the repository at this point in the history
  • Loading branch information
Admin_mschuemi authored and Admin_mschuemi committed Sep 3, 2024
1 parent c8ea064 commit 786d87a
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 39 deletions.
27 changes: 12 additions & 15 deletions extras/MultiAnalysesVignetteDataFetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,19 @@

# This code should be used to fetch the data that is used in the vignettes.
library(SelfControlledCaseSeries)
options(andromedaTempFolder = "d:/andromedaTemp")
options(andromedaTempFolder = "e:/andromedaTemp")

connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "redshift",
connectionString = keyring::key_get("redShiftConnectionStringOhdaMdcd"),
user = keyring::key_get("redShiftUserName"),
password = keyring::key_get("redShiftPassword")
connectionDetails <- createConnectionDetails(
dbms = "spark",
connectionString = keyring::key_get("databricksConnectionString"),
user = "token",
password = keyring::key_get("databricksToken")
)
cdmDatabaseSchema <- "cdm_truven_mdcd_v3038"
cohortDatabaseSchema <- "scratch_mschuemi"
cohortTable <- "sccs_vignette"
options(sqlRenderTempEmulationSchema = NULL)
cohortTable <- "sccs_vignette"
cdmVersion <- "5"
outputFolder <- "d:/temp/sccsVignette2"
cdmDatabaseSchema <- "merative_mdcr.cdm_merative_mdcr_v3045"
cohortDatabaseSchema <- "scratch.scratch_mschuemi"
cohortTable <- "sccs_vignette"
options(sqlRenderTempEmulationSchema = "scratch.scratch_mschuemi")
outputFolder <- "e:/temp/sccsVignette2"


# Create cohorts ---------------------------------------------------------------
Expand Down Expand Up @@ -258,7 +256,6 @@ runSccsAnalyses(
exposureTable = "drug_era",
outcomeDatabaseSchema = cohortDatabaseSchema,
outcomeTable = cohortTable,
cdmVersion = cdmVersion,
outputFolder = outputFolder,
combineDataFetchAcrossOutcomes = TRUE,
exposuresOutcomeList = exposuresOutcomeList,
Expand Down Expand Up @@ -333,7 +330,7 @@ DatabaseConnector::disconnect(connection)

# Launch Shiny app -------------------------------------------------------------
library(dplyr)
outputFolder <- "d:/temp/sccsVignette2"
outputFolder <- "e:/temp/sccsVignette2"
databaseFile <- file.path(outputFolder, "export", "SccsResults.sqlite")
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "sqlite",
Expand Down
35 changes: 16 additions & 19 deletions extras/SingleStudyVignetteDataFetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,19 @@

# This code should be used to fetch the data that is used in the vignettes.
library(SelfControlledCaseSeries)
options(andromedaTempFolder = "d:/andromedaTemp")

folder <- "d:/temp/vignetteSccs"
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "redshift",
connectionString = keyring::key_get("redShiftConnectionStringOhdaMdcd"),
user = keyring::key_get("redShiftUserName"),
password = keyring::key_get("redShiftPassword")
options(andromedaTempFolder = "e:/andromedaTemp")

folder <- "e:/temp/vignetteSccs"
connectionDetails <- createConnectionDetails(
dbms = "spark",
connectionString = keyring::key_get("databricksConnectionString"),
user = "token",
password = keyring::key_get("databricksToken")
)
cdmDatabaseSchema <- "cdm_truven_mdcd_v2565"
cohortDatabaseSchema <- "scratch_mschuemi"
cohortTable <- "sccs_epistaxis"
cdmVersion <- "5"
options(sqlRenderTempEmulationSchema = NULL)
cdmDatabaseSchema <- "merative_mdcr.cdm_merative_mdcr_v3045"
cohortDatabaseSchema <- "scratch.scratch_mschuemi"
cohortTable <- "sccs_vignette"
options(sqlRenderTempEmulationSchema = "scratch.scratch_mschuemi")

# Create cohorts ---------------------------------------------------------------
connection <- DatabaseConnector::connect(connectionDetails)
Expand Down Expand Up @@ -73,8 +72,7 @@ sccsData <- getDbSccsData(connectionDetails = connectionDetails,
exposureIds = aspirin,
studyStartDates = "20100101",
studyEndDates = "21000101",
maxCasesPerOutcome = 100000,
cdmVersion = cdmVersion)
maxCasesPerOutcome = 100000)
saveSccsData(sccsData, file.path(folder, "data1.zip"))
sccsData <- loadSccsData(file.path(folder, "data1.zip"))
sccsData
Expand Down Expand Up @@ -191,8 +189,7 @@ sccsData <- getDbSccsData(connectionDetails = connectionDetails,
exposureIds = aspirin,
maxCasesPerOutcome = 100000,
studyStartDates = c("20100101", "20220101"),
studyEndDates = c("20191231", "21001231"),
cdmVersion = cdmVersion)
studyEndDates = c("20191231", "21001231"))
saveSccsData(sccsData, file.path(folder, "data2.zip"))
sccsData <- loadSccsData(file.path(folder, "data2.zip"))
studyPop <- createStudyPopulation(sccsData = sccsData,
Expand Down Expand Up @@ -300,8 +297,7 @@ sccsData <- getDbSccsData(connectionDetails = connectionDetails,
exposureIds = c(),
maxCasesPerOutcome = 100000,
studyStartDates = c("19000101", "20220101"),
studyEndDates = c("20191231", "21001231"),
cdmVersion = cdmVersion)
studyEndDates = c("20191231", "21001231"))
saveSccsData(sccsData, file.path(folder, "data3.zip"))
sccsData <- loadSccsData(file.path(folder, "data3.zip"))

Expand All @@ -313,6 +309,7 @@ studyPop <- createStudyPopulation(sccsData = sccsData,
naivePeriod = 180)

covarAllDrugs <- createEraCovariateSettings(label = "Other exposures",
includeEraIds = c(),
excludeEraIds = aspirin,
stratifyById = TRUE,
start = 1,
Expand Down
Binary file modified inst/doc/MultipleAnalyses.pdf
Binary file not shown.
Binary file modified inst/doc/SingleStudies.pdf
Binary file not shown.
4 changes: 1 addition & 3 deletions vignettes/MultipleAnalyses.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ vignette: >
```{r echo = FALSE, message = FALSE, warning = FALSE}
library(SelfControlledCaseSeries)
library(EmpiricalCalibration)
outputFolder <- "d:/temp/sccsVignette2"
outputFolder <- "e:/temp/sccsVignette2"
diclofenac <- 1124300
giBleed <- 77
```
Expand Down Expand Up @@ -66,7 +66,6 @@ outputFolder <- "s:/temp/sccsVignette2"
cdmDatabaseSchema <- "my_cdm_data"
cohortDatabaseSchema <- "my_cohorts"
options(sqlRenderTempEmulationSchema = NULL)
cdmVersion <- "5"
```

The last three lines define the `cdmDatabaseSchema` and `cohortDatabaseSchema` variables, as well as the CDM version. We'll use these later to tell R where the data in CDM format live, where we want to store the (outcome) cohorts, and what version CDM is used. Note that for Microsoft SQL Server, databaseschemas need to specify both the database and the schema, so for example `cdmDatabaseSchema <- "my_cdm_data.dbo"`.
Expand Down Expand Up @@ -320,7 +319,6 @@ referenceTable <- runSccsAnalyses(
exposureTable = "drug_era",
outcomeDatabaseSchema = cohortDatabaseSchema,
outcomeTable = cohortTable,
cdmVersion = cdmVersion,
outputFolder = outputFolder,
combineDataFetchAcrossOutcomes = TRUE,
exposuresOutcomeList = exposuresOutcomeList,
Expand Down
Binary file added vignettes/MultipleAnalyses.pdf
Binary file not shown.
5 changes: 3 additions & 2 deletions vignettes/SingleStudies.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ vignette: >

```{r echo = FALSE, message = FALSE, warning = FALSE}
library(SelfControlledCaseSeries)
outputFolder <- "d:/temp/vignetteSccs"
outputFolder <- "e:/temp/vignetteSccs"
folderExists <- dir.exists(outputFolder)
```
# Introduction
Expand Down Expand Up @@ -529,6 +529,7 @@ studyPop <- createStudyPopulation(sccsData = sccsData,
naivePeriod = 180)
covarAllDrugs <- createEraCovariateSettings(label = "Other exposures",
includeEraIds = c(),
excludeEraIds = aspirin,
stratifyById = TRUE,
start = 1,
Expand Down Expand Up @@ -646,7 +647,7 @@ We can visualize which age ranges are covered by each subject's observation time
```{r eval=FALSE}
plotAgeSpans(studyPop)
```
```{r echo=FALSE,message=FALSE}
```{r echo=FALSE,message=FALSE,warnings=FALSE}
if (folderExists) {
plotAgeSpans(studyPop)
}
Expand Down

0 comments on commit 786d87a

Please sign in to comment.