-
Notifications
You must be signed in to change notification settings - Fork 0
Benchmarks
Aviezer Lifshitz edited this page May 9, 2022
·
16 revisions
devtools::load_all()
options(emr_max.data.size = 1e9)
emr_db.connect("/net/mraid14/export/tgdata/db/tgdb/emr/mock/")
ptrack <- "WZMN.dx.1.250.11"
ltrack <- "ltrack"
ltrack_values <- "dx.250.1.star"
system.time(replicate(100, emr_extract(ptrack)))
- vanilla: 1.059
- dev: 1.096 (this is due to devtools::load_all() instead of library)
- logical tracks: 0.928
system.time(replicate(100, emr_extract(ltrack)))
- logical tracks: 0.914
system.time(replicate(10, emr_extract(ltrack_values)))
- logical tracks: 3.916
emr_vtrack.create("vt", "diagnosis.250", params = c(11))
system.time(replicate(10, emr_extract("vt")))
- vanilla: 13.729
- dev: 15.897
- logical tracks: 13.548
system.time(replicate(100, emr_extract(ptrack, iterator = ptrack)))
- vanilla: 0.955
- dev: 1.024
- logical tracks: 0.857
system.time(replicate(100, emr_extract(ltrack, iterator = ltrack)))
- logical tracks: 0.848
devtools::load_all()
library(glue)
options(emr_max.data.size = 1e9)
emr_db.connect("/net/mraid14/export/tgdata/db/tgdb/emr/mock/")
dx_track <- "diagnosis.250"
lab_track <- "lab.103"
val <- 15 # around the 90th percentile
screen_and_extract <- function(){
itr_df <- emr_screen(glue("{lab_track} >= {val}"), iterator = lab_track)
emr_filter.create("f", itr_df, time.shift = c(-years(5), 0))
emr_extract(dx_track, filter = "f")
}
value_filter <- function(){
emr_filter.create("f", lab_track, val = val, operator = ">=", time.shift = c(-years(5), 0))
emr_extract(dx_track, filter = "f")
}
system.time(replicate(10, screen_and_extract()))
- screen and extract: 35.871
system.time(replicate(10, value_filter()))
- value filter: 11.725
This was our previous strategy, mostly for relative risk computations.
tmp <- tempdir()
emr_db.connect(c("/net/mraid14/export/tgdata/db/tgdb/emr/mock/", tmp))
cache_track <- function(){
itr_df <- emr_screen(glue("{lab_track} >= {val}"), iterator = lab_track)
itr_df$value <- 1
emr_track.import("temp", "user", categorical = TRUE, src = itr_df)
}
extract_cache <- function(){
emr_filter.create("f", "temp", time.shift = c(-years(5), 0))
emr_extract(dx_track, filter = "f")
}
screen_and_extract_cache <- function(n=10){
cache_track()
replicate(n, extract_cache())
}
system.time(screen_and_extract_cache(10))
- screen, cache and extract: 11.649
The caching itself takes 2.496, and then 0.839 for each extract, compared with 1.126 for every track value filter extraction.
withr::local_options(list(emr_max.data.size = 1e9))
emr_filter.create("female", "patients.female", time.shift = c(-years(120), 0))
emr_filter.create("male", "patients.male", time.shift = c(-years(120), 0))
hgb_female <- emr_screen("lab.103 < 12", filter = "female", keepref = TRUE)
hgb_male <- emr_screen("lab.103 < 14", filter = "male", keepref = TRUE)
emr_filter.create("abnormal_hgb_female", hgb_female %>% dplyr::distinct(id, time))
emr_filter.create("abnormal_hgb_male", hgb_male %>% dplyr::distinct(id, time))
emr_track.create("anemia",
categorical = FALSE, expr = "lab.103",
filter = "abnormal_hgb_female | abnormal_hgb_male"
)
before: 14.642 seconds
withr::local_options(list(emr_max.data.size = 1e9))
emr_filter.create("female", "patients.female", time.shift = c(-years(120), 0))
emr_filter.create("male", "patients.male", time.shift = c(-years(120), 0))
emr_filter.create("abnormal_hgb_female", src = "lab.103", val = 12, operator = "<")
emr_filter.create("abnormal_hgb_male", src = "lab.103", val = 14, operator = "<")
emr_track.create("anemia1",
categorical = FALSE, expr = "lab.103",
filter = "(female & abnormal_hgb_female) | (male & abnormal_hgb_male)"
)
after: 5.504 seconds
withr::local_options(list(emr_max.data.size = 1e9))
emr_filter.create("female", "patients.female", time.shift = c(-years(120), 0))
emr_filter.create("male", "patients.male", time.shift = c(-years(120), 0))
hct_female_48 <- emr_screen("lab.104 > 48", filter = "female", keepref = TRUE)
hct_male_49 <- emr_screen("lab.104 > 49", filter = "male", keepref = TRUE)
emr_filter.create("abnormal_hct_female_past", hct_female_48 %>% dplyr::distinct(id, time), time.shift = c(-years(3), -1))
emr_filter.create("abnormal_hct_male_past", hct_male_49 %>% dplyr::distinct(id, time), time.shift = c(-years(3), -1))
emr_filter.create("abnormal_hct_female_current", hct_female_48 %>% dplyr::distinct(id, time))
emr_filter.create("abnormal_hct_male_current", hct_male_49 %>% dplyr::distinct(id, time))
emr_track.create("abnormal_hct_second_time",
categorical = FALSE, expr = "lab.104",
filter = "(abnormal_hct_female_past & abnormal_hct_female_current) | (abnormal_hct_male_past & abnormal_hct_male_current)"
)
before: 3.339 seconds
emr_filter.clear()
withr::local_options(list(emr_max.data.size = 1e9))
emr_filter.create("female", "patients.female", time.shift = c(-years(120), 0))
emr_filter.create("male", "patients.male", time.shift = c(-years(120), 0))
emr_filter.create("abnormal_hct_female", src = "lab.104", val = 48, operator = ">")
emr_filter.create("abnormal_hct_male", src = "lab.104", val = 49, operator = ">")
emr_vtrack.create("abnormal_hct_past", src = "lab.104", filter = "(female & abnormal_hct_female) | (male & abnormal_hct_male)", time.shift = c(-years(3), -1))
emr_track.create("abnormal_hct_second_time1",
categorical = FALSE, expr = "lab.104",
filter = "abnormal_hct_past & ((female & abnormal_hct_female) | (male & abnormal_hct_male))", iterator = "lab.104"
)
after: 2.222 seconds