Skip to content

Commit

Permalink
Merge pull request #450 from OHDSI/develop
Browse files Browse the repository at this point in the history
v2.2.0 Release
  • Loading branch information
katy-sadowski authored May 5, 2023
2 parents 1197d5d + 8918738 commit 162e709
Show file tree
Hide file tree
Showing 78 changed files with 1,590 additions and 1,376 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: DataQualityDashboard
Type: Package
Title: Execute and View Data Quality Checks on OMOP CDM Database
Version: 2.1.2
Date: 2023-03-10
Version: 2.2.0
Date: 2023-05-05
Authors@R: c(
person("Katy", "Sadowski", email = "[email protected]", role = c("aut", "cre")),
person("Clair", "Blacketer", role = c("aut")),
Expand Down Expand Up @@ -31,7 +31,8 @@ Imports:
plyr,
stringr,
rlang,
tidyselect
tidyselect,
readr
Suggests:
testthat,
knitr,
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ export(writeJsonResultsToCsv)
export(writeJsonResultsToTable)
import(DatabaseConnector)
import(magrittr)
importFrom(dplyr,case_when)
importFrom(dplyr,mutate)
importFrom(magrittr,"%>%")
importFrom(readr,read_csv)
importFrom(rlang,.data)
importFrom(stats,na.omit)
importFrom(stats,setNames)
Expand All @@ -18,5 +21,4 @@ importFrom(tidyselect,all_of)
importFrom(utils,install.packages)
importFrom(utils,menu)
importFrom(utils,packageVersion)
importFrom(utils,read.csv)
importFrom(utils,write.table)
22 changes: 22 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
DataQualityDashboard 2.2.0
==========================
This release includes:

### New features

- `cohortTableName` parameter added to `executeDqChecks`. Allows user to specify the name of the cohort table when running DQD on a cohort. Defaults to `"cohort"`


### Bugfixes

- Fixed several bugs in the default threshold files:
- Updated plausible low value for specimen quantity from 1 to 0
- Removed foreign key domains for episode object concept ID (multitude of plausible domains make checking this field infeasible)
- Updated date format for hard-coded dates to `YYYYMMDD` to conform to SqlRender standard
- Added DEATH checks to v5.2 and v5.3
- Fixed field level checks to incorporate user-specified `vocabDatabaseSchema` and `cohortDatabaseSchema` where appropriate
- Removed `outputFile` parameter from DQD setup vignette (variable not set in script)
- Removed hidden BOM character from several threshold csv files, and updated csv read method to account for BOM character moving forward. This character caused an error on some operating systems

And some minor documentation updates for clarity/accuracy.

DataQualityDashboard 2.1.2
==========================

Expand Down
27 changes: 21 additions & 6 deletions R/executeDqChecks.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@
#' @param csvFile (OPTIONAL) CSV file to write results
#' @param checkLevels Choose which DQ check levels to execute. Default is all 3 (TABLE, FIELD, CONCEPT)
#' @param checkNames (OPTIONAL) Choose which check names to execute. Names can be found in inst/csv/OMOP_CDM_v[cdmVersion]_Check_Descriptions.csv. Note that "cdmTable", "cdmField" and "measureValueCompleteness" are always executed.
#' @param cohortDefinitionId The cohort definition id for the cohort you wish to run the DQD on. The package assumes a standard OHDSI cohort table called 'Cohort'
#' @param cohortDefinitionId The cohort definition id for the cohort you wish to run the DQD on. The package assumes a standard OHDSI cohort table
#' with the fields cohort_definition_id and subject_id.
#' @param cohortDatabaseSchema The schema where the cohort table is located.
#' @param cohortTableName The name of the cohort table. Defaults to `cohort`.
#' @param tablesToExclude (OPTIONAL) Choose which CDM tables to exclude from the execution.
#' @param cdmVersion The CDM version to target for the data source. Options are "5.2", "5.3", or "5.4". By default, "5.3" is used.
#' @param tableCheckThresholdLoc The location of the threshold file for evaluating the table checks. If not specified the default thresholds will be applied.
Expand All @@ -48,9 +49,11 @@
#' @importFrom magrittr %>%
#' @import DatabaseConnector
#' @importFrom stringr str_detect regex
#' @importFrom utils packageVersion read.csv write.table
#' @importFrom utils packageVersion write.table
#' @importFrom rlang .data
#' @importFrom tidyselect all_of
#' @importFrom readr read_csv
#' @importFrom dplyr mutate case_when
#'
#' @export
#'
Expand All @@ -72,6 +75,7 @@ executeDqChecks <- function(connectionDetails,
checkNames = c(),
cohortDefinitionId = c(),
cohortDatabaseSchema = resultsDatabaseSchema,
cohortTableName = "cohort",
tablesToExclude = c("CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN"),
cdmVersion = "5.3",
tableCheckThresholdLoc = "default",
Expand All @@ -89,6 +93,7 @@ executeDqChecks <- function(connectionDetails,
stopifnot(is.character(cdmDatabaseSchema), is.character(resultsDatabaseSchema), is.numeric(numThreads))
stopifnot(is.character(cdmSourceName), is.logical(sqlOnly), is.character(outputFolder), is.logical(verboseMode))
stopifnot(is.logical(writeToTable), is.character(checkLevels))
stopifnot(is.character(cohortDatabaseSchema), is.character(cohortTableName))

if (!all(checkLevels %in% c("TABLE", "FIELD", "CONCEPT"))) {
stop('checkLevels argument must be a subset of c("TABLE", "FIELD", "CONCEPT").
Expand Down Expand Up @@ -169,14 +174,14 @@ executeDqChecks <- function(connectionDetails,

startTime <- Sys.time()

checkDescriptionsDf <- read.csv(
checkDescriptionsDf <- read_csv(
file = system.file(
"csv",
sprintf("OMOP_CDMv%s_Check_Descriptions.csv", cdmVersion),
package = "DataQualityDashboard"
),
stringsAsFactors = FALSE
)
)
checkDescriptionsDf <- as.data.frame(checkDescriptionsDf)

tableChecks <- .readThresholdFile(
checkThresholdLoc = tableCheckThresholdLoc,
Expand All @@ -202,9 +207,18 @@ executeDqChecks <- function(connectionDetails,
conceptChecks <- conceptChecks[!conceptChecks$cdmTableName %in% tablesToExclude, ]
}

## remove offset from being checked
## remove offset from being checked as it is a reserved word in some databases
fieldChecks <- subset(fieldChecks, fieldChecks$cdmFieldName != "offset")

tableChecks <- dplyr::mutate(tableChecks, schema = dplyr::case_when(
schema == "CDM" ~ cdmDatabaseSchema,
schema == "VOCAB" ~ vocabDatabaseSchema,
schema == "COHORT" ~ cohortDatabaseSchema,
TRUE ~ cdmDatabaseSchema
))

fieldChecks <- merge(x = fieldChecks, y = tableChecks[, c("cdmTableName", "schema")], by = "cdmTableName", all.x = TRUE)

checksToInclude <- checkDescriptionsDf$checkName[sapply(checkDescriptionsDf$checkName, function(check) {
!is.null(eval(parse(text = sprintf("tableChecks$%s", check)))) |
!is.null(eval(parse(text = sprintf("fieldChecks$%s", check)))) |
Expand Down Expand Up @@ -246,6 +260,7 @@ executeDqChecks <- function(connectionDetails,
cdmDatabaseSchema,
vocabDatabaseSchema,
cohortDatabaseSchema,
cohortTableName,
cohortDefinitionId,
outputFolder,
sqlOnly,
Expand Down
44 changes: 22 additions & 22 deletions R/listChecks.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,75 +24,75 @@
#' @param fieldCheckThresholdLoc The location of the threshold file for evaluating the field checks. If not specified the default thresholds will be applied.
#' @param conceptCheckThresholdLoc The location of the threshold file for evaluating the concept checks. If not specified the default thresholds will be applied.
#'
#' @importFrom readr read_csv
#'
#' @export
listDqChecks <- function(cdmVersion = "5.3", tableCheckThresholdLoc = "default", fieldCheckThresholdLoc = "default", conceptCheckThresholdLoc = "default") {
dqChecks <- {}
dqChecks$checkDescriptions <-
read.csv(system.file(
read_csv(system.file(
"csv",
sprintf("OMOP_CDMv%s_Check_Descriptions.csv", cdmVersion),
package = "DataQualityDashboard"
),
stringsAsFactors = FALSE
)
))
dqChecks$checkDescriptions <- as.data.frame(dqChecks$checkDescriptions)


if (tableCheckThresholdLoc == "default") {
dqChecks$tableChecks <-
read.csv(
read_csv(
system.file(
"csv",
sprintf("OMOP_CDMv%s_Table_Level.csv", cdmVersion),
package = "DataQualityDashboard"
),
stringsAsFactors = FALSE,
na.strings = c(" ", "")
na = c(" ", "")
)
dqChecks$tableChecks <- as.data.frame(dqChecks$tableChecks)
} else {
dqChecks$tableChecks <- read.csv(
dqChecks$tableChecks <- read_csv(
tableCheckThresholdLoc,
stringsAsFactors = FALSE,
na.strings = c(" ", "")
na = c(" ", "")
)
dqChecks$tableChecks <- as.data.frame(dqChecks$tableChecks)
}

if (fieldCheckThresholdLoc == "default") {
dqChecks$fieldChecks <-
read.csv(
read_csv(
system.file(
"csv",
sprintf("OMOP_CDMv%s_Field_Level.csv", cdmVersion),
package = "DataQualityDashboard"
),
stringsAsFactors = FALSE,
na.strings = c(" ", "")
na = c(" ", "")
)
dqChecks$fieldChecks <- as.data.frame(dqChecks$fieldChecks)
} else {
dqChecks$fieldChecks <- read.csv(
dqChecks$fieldChecks <- read_csv(
fieldCheckThresholdLoc,
stringsAsFactors = FALSE,
na.strings = c(" ", "")
na = c(" ", "")
)
dqChecks$fieldChecks <- as.data.frame(dqChecks$fieldChecks)
}

if (conceptCheckThresholdLoc == "default") {
dqChecks$conceptChecks <-
read.csv(
read_csv(
system.file(
"csv",
sprintf("OMOP_CDMv%s_Concept_Level.csv", cdmVersion),
package = "DataQualityDashboard"
),
stringsAsFactors = FALSE,
na.strings = c(" ", "")
na = c(" ", "")
)
dqChecks$conceptChecks <- as.data.frame(dqChecks$conceptChecks)
} else {
dqChecks$conceptChecks <- read.csv(
dqChecks$conceptChecks <- read_csv(
conceptCheckThresholdLoc,
stringsAsFactors = FALSE,
na.strings = c(" ", "")
na = c(" ", "")
)
dqChecks$conceptChecks <- as.data.frame(dqChecks$conceptChecks)
}

return(dqChecks)
Expand Down
34 changes: 20 additions & 14 deletions R/readThresholdFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,28 @@
# limitations under the License.

.readThresholdFile <- function(checkThresholdLoc, defaultLoc) {
thresholdFile <- checkThresholdLoc

if (checkThresholdLoc == "default") {
result <- read.csv(
file = system.file(
"csv",
defaultLoc,
package = "DataQualityDashboard"
),
stringsAsFactors = FALSE,
na.strings = c(" ", "")
)
} else {
result <- read.csv(
file = checkThresholdLoc,
stringsAsFactors = FALSE,
na.strings = c(" ", "")
thresholdFile <- system.file(
"csv",
defaultLoc,
package = "DataQualityDashboard"
)
}

colspec <- readr::spec_csv(thresholdFile)

# plausibleUnitConceptIds is a comma-separated list of concept ids, but it is being interpreted as col_double()
if ("plausibleUnitConceptIds" %in% names(colspec$cols)) {
colspec$cols$plausibleUnitConceptIds <- readr::col_character()
}

result <- read_csv(
file = thresholdFile,
col_types = colspec,
na = c(" ", "")
)
result <- as.data.frame(result)
return(result)
}
3 changes: 3 additions & 0 deletions R/runCheck.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#' @param cdmDatabaseSchema The fully qualified database name of the CDM schema
#' @param vocabDatabaseSchema The fully qualified database name of the vocabulary schema (default is to set it as the cdmDatabaseSchema)
#' @param cohortDatabaseSchema The schema where the cohort table is located.
#' @param cohortTableName The name of the cohort table.
#' @param cohortDefinitionId The cohort definition id for the cohort you wish to run the DQD on. The package assumes a standard OHDSI cohort table called 'Cohort'
#' @param outputFolder The folder to output logs and SQL files to
#' @param sqlOnly Should the SQLs be executed (FALSE) or just returned (TRUE)?
Expand All @@ -42,6 +43,7 @@
cdmDatabaseSchema,
vocabDatabaseSchema,
cohortDatabaseSchema,
cohortTableName,
cohortDefinitionId,
outputFolder,
sqlOnly) {
Expand Down Expand Up @@ -77,6 +79,7 @@
list(warnOnMissingParameters = FALSE),
list(cdmDatabaseSchema = cdmDatabaseSchema),
list(cohortDatabaseSchema = cohortDatabaseSchema),
list(cohortTableName = cohortTableName),
list(cohortDefinitionId = cohortDefinitionId),
list(vocabDatabaseSchema = vocabDatabaseSchema),
list(cohort = cohort),
Expand Down
4 changes: 2 additions & 2 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions docs/articles/AddNewCheck.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 162e709

Please sign in to comment.