Skip to content

Commit

Permalink
Merge pull request #34 from pranavanba/main
Browse files Browse the repository at this point in the history
Minor Update
  • Loading branch information
pranavanba authored Sep 13, 2024
2 parents f73ee79 + 27b316f commit 5d86d2e
Show file tree
Hide file tree
Showing 11 changed files with 52 additions and 27 deletions.
5 changes: 5 additions & 0 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ staging:
prod:
inherits: staging
synFolderID: syn51406700

internal:
inherits: staging
parquetDirID: syn63135213
s3basekey: staging/2024-09-10
2 changes: 1 addition & 1 deletion pipeline/run-pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ tictoc::tic(msg = "INFO: Total execution time")
# Get config variables
list2env(
x = config::get(file = "config/config.yml",
config = "staging"),
config = "internal"),
envir = .GlobalEnv
)

Expand Down
9 changes: 6 additions & 3 deletions scripts/process-data/fitbitactivitylogs.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ for (col_name in names(df_filtered)) {
}

# Pivot data frame from long to wide
cat("recoverutils::melt_df()....")
df_melted_filtered <-
df_filtered %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
Expand All @@ -58,25 +59,27 @@ df_melted_filtered <-
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverutils::melt_df() completed.\n")
cat("OK\n")

# Generate i2b2 summaries
cat("recoverutils::stat_summarize()....")
df_summarized <-
df_melted_filtered %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverutils::stat_summarize() completed.\n")
cat("OK\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
cat("recoverutils::process_df()....")
output_concepts <-
recoverutils::process_df(df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
dplyr::mutate(nval_num = signif(nval_num, 9)) %>%
dplyr::arrange(concept) %>%
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Identify the participants who have output concepts derived from fitbit variables
fitbit_participants <-
Expand Down
9 changes: 6 additions & 3 deletions scripts/process-data/fitbitdailydata.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ for (col_name in names(df_filtered)) {
}

# Pivot data frame from long to wide
cat("recoverutils::melt_df()....")
df_melted_filtered <-
df_filtered %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
Expand All @@ -63,19 +64,21 @@ df_melted_filtered <-
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverutils::melt_df() completed.\n")
cat("OK\n")

# Generate i2b2 summaries
cat("recoverutils::stat_summarize()....")
df_summarized <-
df_melted_filtered %>%
rename(startdate = dplyr::any_of(c("date", "datetime"))) %>%
mutate(enddate = if (!("enddate" %in% names(.))) NA else enddate) %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverutils::stat_summarize() completed.\n")
cat("OK\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
cat("recoverutils::process_df()....")
output_concepts <-
recoverutils::process_df(df_summarized,
concept_map,
Expand All @@ -87,7 +90,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Identify the participants who have output concepts derived from fitbit variables
curr_fitbit_participants <-
Expand Down
10 changes: 6 additions & 4 deletions scripts/process-data/fitbitecg.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#' `participantidentifier` and `concept` for all data, and
#' `participantidentifier`, `concept`, `year`, `week` for weekly summaries).
ecg_stat_summarize <- function(df) {
cat("Running ecg_stat_summarize()...\n")

if (!is.data.frame(df)) stop("df must be a data frame")
if (!all(c("participantidentifier", "concept", "value") %in% colnames(df))) stop("'participantidentifier', 'concept', and 'value' columns must be present in df")
Expand Down Expand Up @@ -131,6 +130,7 @@ approved_concepts_summarized <-
)

# Pivot data frame from long to wide
cat("recoverutils::melt_df()....")
df_melted_filtered <-
df %>%
mutate("SinusRhythm" = if_else(resultclassification == "Normal Sinus Rhythm", 1, NA),
Expand All @@ -143,9 +143,10 @@ df_melted_filtered <-
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverutils::melt_df() completed.\n")
cat("OK\n")

# Generate i2b2 summaries
cat("ecg_stat_summarize()....")
df_summarized <-
df_melted_filtered %>%
rename(startdate = dplyr::any_of(c("date", "datetime"))) %>%
Expand All @@ -154,9 +155,10 @@ df_summarized <-
ecg_stat_summarize() %>%
mutate(value = as.numeric(value)) %>%
distinct()
cat("ecg_stat_summarize() completed.\n")
cat("OK\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
cat("recoverutils::process_df()....")
output_concepts <-
recoverutils::process_df(df_summarized,
concept_map,
Expand All @@ -168,7 +170,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Identify the participants who have output concepts derived from fitbit variables
curr_fitbit_participants <-
Expand Down
9 changes: 6 additions & 3 deletions scripts/process-data/fitbitintradaycombined.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ for (col_name in names(df_filtered)) {
}

# Pivot data frame from long to wide
cat("recoverutils::melt_df()....")
df_melted_filtered <-
df_filtered %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
Expand All @@ -81,27 +82,29 @@ df_melted_filtered <-
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverutils::melt_df() completed.\n")
cat("OK\n")

# Generate i2b2 summaries
cat("recoverutils::stat_summarize()....")
df_summarized <-
df_melted_filtered %>%
rename(startdate = dplyr::any_of(c("date", "datetime"))) %>%
mutate(enddate = if (!("enddate" %in% names(.))) NA else enddate) %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverutils::stat_summarize() completed.\n")
cat("OK\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
cat("recoverutils::process_df()....")
output_concepts <-
recoverutils::process_df(df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
dplyr::mutate(nval_num = signif(nval_num, 9)) %>%
dplyr::arrange(concept) %>%
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Identify the participants who have output concepts derived from fitbit variables
curr_fitbit_participants <-
Expand Down
10 changes: 6 additions & 4 deletions scripts/process-data/fitbitsleeplogs.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#' `participantidentifier` and `concept` for all data, and
#' `participantidentifier`, `concept`, `year`, `week` for weekly summaries).
sleeplogs_stat_summarize <- function(df) {
cat("Running sleeplogs_stat_summarize()...\n")

if (!is.data.frame(df)) stop("df must be a data frame")
if (!all(c("participantidentifier", "concept", "value") %in% colnames(df))) stop("'participantidentifier', 'concept', and 'value' columns must be present in df")
Expand Down Expand Up @@ -332,6 +331,7 @@ for (col_name in names(df_filtered)) {
}

# Pivot data frames from long to wide
cat("recoverutils::melt_df()....")
df_melted_filtered <-
df_filtered %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
Expand Down Expand Up @@ -366,9 +366,10 @@ numepisodes_df_melted_filtered_weekly <-
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))

cat("recoverutils::melt_df() completed.\n")
cat("OK\n")

# Generate i2b2 summaries
cat("sleeplogs_stat_summarize()....")
df_summarized <-
df_melted_filtered %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
Expand Down Expand Up @@ -397,17 +398,18 @@ final_df_summarized <-
numepisodes_df_summarized_weekly) %>%
dplyr::distinct()

cat("sleeplogs_stat_summarize() completed.\n")
cat("OK\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
cat("recoverutils::process_df()....")
output_concepts <-
recoverutils::process_df(final_df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
dplyr::mutate(nval_num = signif(nval_num, 9)) %>%
dplyr::arrange(concept) %>%
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Identify the participants who have output concepts derived from fitbit variables
curr_fitbit_participants <-
Expand Down
10 changes: 6 additions & 4 deletions scripts/process-data/healthkitv2electrocardiogram.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#' `participantidentifier` and `concept` for all data, and
#' `participantidentifier`, `concept`, `year`, `week` for weekly summaries).
ecg_stat_summarize <- function(df) {
cat("Running ecg_stat_summarize()...\n")

if (!is.data.frame(df)) stop("df must be a data frame")
if (!all(c("participantidentifier", "concept", "value") %in% colnames(df))) stop("'participantidentifier', 'concept', and 'value' columns must be present in df")
Expand Down Expand Up @@ -136,6 +135,7 @@ approved_concepts_summarized <-
)

# Pivot data frame from long to wide
cat("recoverutils::melt_df()....")
df_melted_filtered <-
df %>%
mutate("SinusRhythm" = if_else(classification == "SinusRhythm", 1, NA),
Expand All @@ -148,18 +148,20 @@ df_melted_filtered <-
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverutils::melt_df() completed.\n")
cat("OK\n")

# Generate i2b2 summaries
cat("ecg_stat_summarize()....")
df_summarized <-
df_melted_filtered %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
ecg_stat_summarize() %>%
mutate(value = as.numeric(value)) %>%
distinct()
cat("ecg_stat_summarize() completed.\n")
cat("OK\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
cat("recoverutils::process_df()....")
output_concepts <-
recoverutils::process_df(df_summarized,
concept_map,
Expand All @@ -171,7 +173,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Identify the participants who have output concepts derived from healthkit variables
curr_hk_participants <-
Expand Down
6 changes: 4 additions & 2 deletions scripts/process-data/healthkitv2samples.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,20 +74,22 @@ df_melted_filtered <-
cat("Melt and filtering step completed.\n")

# Generate i2b2 summaries
cat("recoverutils::stat_summarize()....")
df_summarized <-
df_melted_filtered %>%
rename(enddate = "date") %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverutils::stat_summarize() completed.\n")
cat("OK\n")

tmp_concept_replacements <- c("respiratoryrate" = "breathingrate",
"heartratevariability" = "hrv",
"heartrate" = "avghr",
"oxygensaturation" = "spo2avg")

# Add i2b2 columns from concept map (ontology file) and clean the output
cat("recoverutils::process_df()....")
output_concepts <-
recoverutils::process_df(df_summarized,
concept_map,
Expand All @@ -99,7 +101,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Identify the participants who have output concepts derived from healthkit variables
curr_hk_participants <-
Expand Down
6 changes: 4 additions & 2 deletions scripts/process-data/healthkitv2statistics.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,19 @@ df_melted_filtered <-
cat("Melt and filtering step completed.\n")

# Generate i2b2 summaries
cat("recoverutils::stat_summarize()....")
df_summarized <-
df_melted_filtered %>%
rename(enddate = "date") %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverutils::stat_summarize() completed.\n")
cat("OK\n")

tmp_concept_replacements <- c("dailysteps" = "steps")

# Add i2b2 columns from concept map (ontology file) and clean the output
cat("recoverutils::process_df()....")
output_concepts <-
recoverutils::process_df(df_summarized,
concept_map,
Expand All @@ -71,7 +73,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Identify the participants who have output concepts derived from healthkit variables
curr_hk_participants <-
Expand Down
3 changes: 2 additions & 1 deletion scripts/process-data/participant_devices.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ df_joined <-
# Add i2b2 columns from concept map (ontology file) and clean the output
concept_map_concepts <- "CONCEPT_CD"
concept_map_units <- "UNITS_CD"
cat("recoverutils::process_df()....")
output_concepts <-
df_joined %>%
dplyr::mutate(valtype_cd = dplyr::case_when(class(value) == "numeric" ~ "N",
Expand All @@ -84,7 +85,7 @@ output_concepts <-
dplyr::select(participantidentifier, startdate, enddate,
concept, valtype_cd, nval_num, tval_char, UNITS_CD) %>%
dplyr::rename(units_cd = UNITS_CD)
cat("recoverutils::process_df() completed.\n")
cat("OK\n")

# Write the output
output_concepts %>%
Expand Down

0 comments on commit 5d86d2e

Please sign in to comment.