Skip to content

Commit

Permalink
Merge pull request #330 from OHDSI/bench
Browse files Browse the repository at this point in the history
Benchmark vignette updated
  • Loading branch information
edward-burn authored Sep 25, 2024
2 parents b5ebcf5 + 715a773 commit d13ea61
Show file tree
Hide file tree
Showing 16 changed files with 179 additions and 130 deletions.
Binary file modified R/sysdata.rda
Binary file not shown.
Binary file not shown.
Binary file added data-raw/data/Results_CORIVA-Estonia_20240913.zip
Binary file not shown.
Binary file added data-raw/data/Results_CPRD Gold 100k_20240909.zip
Binary file not shown.
Binary file not shown.
142 changes: 141 additions & 1 deletion data-raw/getBenchmarkResults.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ library(omopgenerics)
library(here)
library(dplyr)
library(tidyr)
library(visOmopResults)
library(stringr)

readData <- function(path) {
zipFiles <- list.files(path = path, pattern = ".zip")
Expand Down Expand Up @@ -50,5 +52,143 @@ mergeData <- function(data, patterns) {
return(x)
}

updateCDMname <- function(resultList, old, new) {
caseWhen <- "dplyr::case_when("
for (k in 1:length(old)) {
caseWhen <- glue::glue("{caseWhen} .data$cdm_name == '{old[k]}' ~ '{new[k]}', ")
}
caseWhen <- paste0(caseWhen, ".default = .data$cdm_name)") |>
rlang::parse_exprs() |> rlang::set_names("cdm_name")
for (res in names(resultList)) {
resultList[[res]] <- resultList[[res]] |>
dplyr::mutate(cdm_name = !!!caseWhen)
}
resultList
}

# Functions
niceNum <- function(x, dec = 0) {
trimws(format(round(as.numeric(x), dec), big.mark = ",", nsmall = dec, scientific = FALSE))
}

# get data ----
resultPatterns <- c("time", "comparison", "details", "omop", "index_counts", "sql_indexes")
benchmarkData <- readData(here::here("data-raw", "data")) %>% mergeData(resultPatterns)
benchmarkDataPre <- readData(here::here("data-raw", "data")) |>
mergeData(resultPatterns) |>
updateCDMname(old = "AurumCDM_202403", new = "CPRD Aurum")
benchmarkData <- list()

### omop
benchmarkData$omop <- benchmarkDataPre$omop |>
filter(table_name != "death") |>
select("cdm_name", "OMOP table" = "table_name", "number_records") |>
mutate(
number_records = niceNum(number_records, 0),
"OMOP table" = factor(
`OMOP table`,
levels = c("person", "observation_period", "drug_exposure", "condition_occurrence",
"procedure_occurrence", "visit_occurrence", "measurement", "observation")
)
) |>
arrange(.data$`OMOP table`) |>
pivot_wider(names_from = c("cdm_name"), values_from = c("number_records"), names_prefix = "[header]Database\n[header_level]")

### details
benchmarkData$details <- benchmarkDataPre$details |>
filterSettings(result_type == "cohort_count") |>
splitAll() |>
pivotEstimates() |>
select(-variable_level, - result_id) |>
distinct() |>
filter(grepl("cc_|atlas_", cohort_name)) |>
mutate(
Tool = case_when(grepl("cc", cohort_name) ~ "CohortConstructor", grepl("atlas", cohort_name) ~ "CIRCE", .default = NA),
"Cohort name" = str_to_sentence(gsub("_", " ", gsub("cc_|atlas_", "", cohort_name))),
variable_name = stringr::str_to_sentence(gsub("_", " ", .data$variable_name)),
"Cohort name" = case_when(
grepl("Asthma", .data[["Cohort name"]]) ~ "Asthma without COPD",
grepl("Covid", .data[["Cohort name"]]) ~ gsub("Covid|Covid", "COVID-19", `Cohort name`),
grepl("eutropenia", .data[["Cohort name"]]) ~ "Acquired neutropenia or unspecified leukopenia",
grepl("Hosp", .data[["Cohort name"]]) ~ "Inpatient hospitalisation",
grepl("First", .data[["Cohort name"]]) ~ "First major depression",
grepl("fluoro", .data[["Cohort name"]]) ~ "New fluoroquinolone users",
grepl("Beta", .data[["Cohort name"]]) ~ "New users of beta blockers nested in essential hypertension",
.default = .data[["Cohort name"]]
),
"Cohort name" = if_else(
grepl("COVID", .data[["Cohort name"]]),
gsub(" female", ": female", gsub(" male", ": male", .data[["Cohort name"]])),
.data[["Cohort name"]]
),
"Cohort name" = if_else(
grepl(" to ", .data[["Cohort name"]]),
gsub("male ", "male, ", .data[["Cohort name"]]),
.data[["Cohort name"]]
)
) |>
arrange(`Cohort name`) |>
select(-cohort_name) |>
pivot_wider(names_from = c("Tool", "variable_name"), values_from = c("count"), names_prefix = "[header]Tool\n[header_level]", names_sep = "\n[header_level]")

### time definition
benchmarkData$time_definition <- benchmarkDataPre$time |>
distinct() |>
filter(!grepl("male|set", msg)) |>
mutate(
time = (as.numeric(toc) - as.numeric(tic))/60,
Tool = if_else(grepl("cc", msg), "CohortConstructor", "CIRCE"),
"Cohort name" = str_to_sentence(gsub("_", " ", gsub("cc_|atlas_", "", msg)))
) |>
select(-c("tic", "toc", "msg", "callback_msg")) |>
mutate(
"Cohort name" = case_when(
grepl("Asthma", .data[["Cohort name"]]) ~ "Asthma without COPD",
grepl("Covid", .data[["Cohort name"]]) ~ "COVID-19",
grepl("eutropenia", .data[["Cohort name"]]) ~ "Acquired neutropenia or unspecified leukopenia",
grepl("Hosp", .data[["Cohort name"]]) ~ "Inpatient hospitalisation",
grepl("First", .data[["Cohort name"]]) ~ "First major depression",
grepl("fluoro", .data[["Cohort name"]]) ~ "New fluoroquinolone users",
grepl("Beta", .data[["Cohort name"]]) ~ "New users of beta blockers nested in essential hypertension",
.default = .data[["Cohort name"]]
)
) |>
arrange(`Cohort name`)

### time domain
header_prefix <- "[header]Time (minutes)\n[header_level]"
benchmarkData$time_domain <- benchmarkDataPre$time |>
distinct() |>
filter(grepl("atlas", msg)) |>
filter(!grepl("male", msg)) |>
group_by(cdm_name) |>
summarise(time = niceNum(sum(as.numeric(toc) - as.numeric(tic))/60, 2)) |>
mutate(Tool = "CIRCE") |>
union_all(
benchmarkDataPre$time |>
filter(msg == "cc_set_no_strata") |>
group_by(cdm_name) |>
summarise(time = niceNum(sum(as.numeric(toc) - as.numeric(tic))/60, 2)) |>
mutate(Tool = "CohortConstructor")
) |>
pivot_wider(names_from = "Tool", values_from = "time", names_prefix = header_prefix) |>
select(c("Database_name" = "cdm_name", starts_with(header_prefix)))

### time strata
benchmarkData$time_strata <- benchmarkDataPre$time |>
mutate(Tool = if_else(grepl("cc", msg), "CohortConstructor", "CIRCE")) |>
group_by(cdm_name, Tool) |>
summarise(time = niceNum(sum(as.numeric(toc) - as.numeric(tic))/60, 2), .groups = "drop") |>
pivot_wider(names_from = "Tool", values_from = "time", names_prefix = "[header]Time (minutes)\n[header_level]") |>
select("Database" = "cdm_name", starts_with("[header]Time"))

### time comparison
benchmarkData$comparison <- benchmarkDataPre$comparison |>
filterSettings(result_type == "cohort_overlap") |>
splitGroup() |>
filter(grepl("atlas_", cohort_name_comparator) & grepl("cc_", cohort_name_reference)) |>
filter(gsub("atlas_", "", cohort_name_comparator) == gsub("cc_", "", cohort_name_reference)) |>
uniteGroup(cols = c("cohort_name_reference", "cohort_name_comparator")) |>
newSummarisedResult()

### sql indexes
benchmarkData$sql_indexes <- benchmarkDataPre$sql_indexes
Binary file added pkgdown/favicon/apple-touch-icon-120x120.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkgdown/favicon/apple-touch-icon-152x152.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkgdown/favicon/apple-touch-icon-180x180.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkgdown/favicon/apple-touch-icon-60x60.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkgdown/favicon/apple-touch-icon-76x76.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkgdown/favicon/apple-touch-icon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkgdown/favicon/favicon-16x16.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkgdown/favicon/favicon-32x32.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkgdown/favicon/favicon.ico
Binary file not shown.
Loading

0 comments on commit d13ea61

Please sign in to comment.