Benchmarks

devtools::load_all()
options(emr_max.data.size = 1e9)
emr_db.connect("/net/mraid14/export/tgdata/db/tgdb/emr/mock/")

Important note: this document is for benchmarking only, and it is not intended for learning naryn!

Logical tracks

Extract physical track

ptrack <- "WZMN.dx.1.250.11"
ltrack <- "ltrack"
ltrack_values <- "dx.250.1.star"

system.time(replicate(100, emr_extract(ptrack)))

vanilla: 1.059
dev: 1.096 (this is due to devtools::load_all() instead of library)
logical tracks: 0.928

Extract logical track (no values)

system.time(replicate(100, emr_extract(ltrack)))

logical tracks: 0.914

Extract logical track (with values)

system.time(replicate(10, emr_extract(ltrack_values)))

logical tracks: 3.916

Extract virtual track with values

emr_vtrack.create("vt", "diagnosis.250", params = c(11))
system.time(replicate(10, emr_extract("vt")))

vanilla: 13.729
dev: 15.897
logical tracks: 13.548

Extract tracks with explicit iterator

system.time(replicate(100, emr_extract(ptrack, iterator = ptrack)))

vanilla: 0.955
dev: 1.024
logical tracks: 0.857

system.time(replicate(100, emr_extract(ltrack, iterator = ltrack)))

logical tracks: 0.848

Value filters

devtools::load_all()
library(glue)
options(emr_max.data.size = 1e9)
emr_db.connect("/net/mraid14/export/tgdata/db/tgdb/emr/mock/")
dx_track <- "diagnosis.250"
lab_track <- "lab.103"
val <- 15 # around the 90th percentile

compare with screen and then extract

screen_and_extract <- function(){
    itr_df <- emr_screen(glue("{lab_track} >= {val}"), iterator = lab_track)
    emr_filter.create("f", itr_df, time.shift = c(-years(5), 0))
    emr_extract(dx_track, filter = "f")
}

value_filter <- function(){
    emr_filter.create("f", lab_track, val = val, operator = ">=", time.shift = c(-years(5), 0))
    emr_extract(dx_track, filter = "f")
}

system.time(replicate(10, screen_and_extract()))

screen and extract: 35.871

system.time(replicate(10, value_filter()))

value filter: 11.725

Compare with disk caching (for multiple extractions)

This was our previous strategy, mostly for relative risk computations.

tmp <- tempdir()
emr_db.connect(c("/net/mraid14/export/tgdata/db/tgdb/emr/mock/", tmp))

cache_track <- function(){
    itr_df <- emr_screen(glue("{lab_track} >= {val}"), iterator = lab_track)
    itr_df$value <- 1
    emr_track.import("temp", "user", categorical = TRUE, src = itr_df)
}

extract_cache <- function(){    
    emr_filter.create("f", "temp", time.shift = c(-years(5), 0))
    emr_extract(dx_track, filter = "f")
}

screen_and_extract_cache <- function(n=10){
    cache_track()
    replicate(n, extract_cache())
}

system.time(screen_and_extract_cache(10))

screen, cache and extract: 11.649

The caching itself takes 2.496, and then 0.839 for each extract, compared with 1.126 for every track value filter extraction.

Some examples (there are definitely better ways to do each example)

Anemia

withr::local_options(list(emr_max.data.size = 1e9))
emr_filter.create("female", "patients.female", time.shift = c(-years(120), 0))
emr_filter.create("male", "patients.male", time.shift = c(-years(120), 0))
hgb_female <- emr_screen("lab.103 < 12", filter = "female", keepref = TRUE)
hgb_male <- emr_screen("lab.103 < 14", filter = "male", keepref = TRUE)
emr_filter.create("abnormal_hgb_female", hgb_female %>% dplyr::distinct(id, time))
emr_filter.create("abnormal_hgb_male", hgb_male %>% dplyr::distinct(id, time))
emr_track.create("anemia",
    categorical = FALSE, expr = "lab.103",
    filter = "abnormal_hgb_female | abnormal_hgb_male"
)

before: 14.642 seconds

withr::local_options(list(emr_max.data.size = 1e9))
emr_filter.create("female", "patients.female", time.shift = c(-years(120), 0))
emr_filter.create("male", "patients.male", time.shift = c(-years(120), 0))
emr_filter.create("abnormal_hgb_female", src = "lab.103", val = 12, operator = "<")
emr_filter.create("abnormal_hgb_male", src = "lab.103", val = 14, operator = "<")
emr_track.create("anemia1",
    categorical = FALSE, expr = "lab.103",
    filter = "(female & abnormal_hgb_female) | (male & abnormal_hgb_male)"
)

after: 5.504 seconds

abnormal hematocrit second time

withr::local_options(list(emr_max.data.size = 1e9))
emr_filter.create("female", "patients.female", time.shift = c(-years(120), 0))
emr_filter.create("male", "patients.male", time.shift = c(-years(120), 0))
hct_female_48 <- emr_screen("lab.104 > 48", filter = "female", keepref = TRUE)
hct_male_49 <- emr_screen("lab.104 > 49", filter = "male", keepref = TRUE)
emr_filter.create("abnormal_hct_female_past", hct_female_48 %>% dplyr::distinct(id, time), time.shift = c(-years(3), -1))
emr_filter.create("abnormal_hct_male_past", hct_male_49 %>% dplyr::distinct(id, time), time.shift = c(-years(3), -1))
emr_filter.create("abnormal_hct_female_current", hct_female_48 %>% dplyr::distinct(id, time))
emr_filter.create("abnormal_hct_male_current", hct_male_49 %>% dplyr::distinct(id, time))
emr_track.create("abnormal_hct_second_time",
    categorical = FALSE, expr = "lab.104",
    filter = "(abnormal_hct_female_past & abnormal_hct_female_current) | (abnormal_hct_male_past & abnormal_hct_male_current)"
)

before: 3.339 seconds

emr_filter.clear()
withr::local_options(list(emr_max.data.size = 1e9))
emr_filter.create("female", "patients.female", time.shift = c(-years(120), 0))
emr_filter.create("male", "patients.male", time.shift = c(-years(120), 0))
emr_filter.create("abnormal_hct_female", src = "lab.104", val = 48, operator = ">")
emr_filter.create("abnormal_hct_male", src = "lab.104", val = 49, operator = ">")
emr_vtrack.create("abnormal_hct_past", src = "lab.104", filter = "(female & abnormal_hct_female) | (male & abnormal_hct_male)", time.shift = c(-years(3), -1))
emr_track.create("abnormal_hct_second_time1",
    categorical = FALSE, expr = "lab.104",
    filter = "abnormal_hct_past & ((female & abnormal_hct_female) | (male & abnormal_hct_male))", iterator = "lab.104"
)

after: 2.222 seconds

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmarks

Important note: this document is for benchmarking only, and it is not intended for learning `naryn`!

Logical tracks

Extract physical track

Extract logical track (no values)

Extract logical track (with values)

Extract virtual track with values

Extract tracks with explicit iterator

Value filters

compare with screen and then extract

Compare with disk caching (for multiple extractions)

Some examples (there are definitely better ways to do each example)

Anemia

abnormal hematocrit second time

Clone this wiki locally

Benchmarks

Important note: this document is for benchmarking only, and it is not intended for learning naryn!

Logical tracks

Extract physical track

Extract logical track (no values)

Extract logical track (with values)

Extract virtual track with values

Extract tracks with explicit iterator

Value filters

compare with screen and then extract

Compare with disk caching (for multiple extractions)

Some examples (there are definitely better ways to do each example)

Anemia

abnormal hematocrit second time

Clone this wiki locally

Important note: this document is for benchmarking only, and it is not intended for learning `naryn`!