Skip to content

Commit

Permalink
Merge pull request #29 from pranavanba/main
Browse files Browse the repository at this point in the history
Minor changes due to renaming of recoverSummarizeR package
  • Loading branch information
pranavanba authored Jul 11, 2024
2 parents ea94c15 + 6f68585 commit 2be17d0
Show file tree
Hide file tree
Showing 13 changed files with 53 additions and 53 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM ghcr.io/pranavanba/rocker-sage:main

RUN R -e 'install.packages("devtools")'
RUN R -e 'devtools::install_github("Sage-Bionetworks/recoverSummarizeR")'
RUN R -e 'devtools::install_github("Sage-Bionetworks/recoverutils")'

RUN apt-get update && apt-get install -y git

Expand Down
6 changes: 3 additions & 3 deletions scripts/egress/egress.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ latest_commit_tree_url <-
latest_commit$html_url %>%
stringr::str_replace("commit", "tree")

recoverSummarizeR::store_in_syn(
recoverutils::store_in_syn(
synFolderID = synFolderID,
filepath = file.path(outputConceptsDir, "output_concepts.csv"),
used_param = c(ontologyFileID, parquetDirID, selectedVarsFileID),
Expand All @@ -24,12 +24,12 @@ cat(glue::glue("Output concepts stored at {synFolderID}"), "\n\n")

file_name <- "concepts_map.csv"
write.csv(concept_map, file = file_name, row.names = F)
store_in_syn(synFolderID, file_name, used_param = ontologyFileID)
recoverutils::store_in_syn(synFolderID, file_name, used_param = ontologyFileID)
cat(glue::glue("The input concept map used was stored at {synFolderID} as '{file_name}'"), "\n\n")

file_name <- "selected_vars.csv"
write.csv(selected_vars, file = file_name, row.names = F)
store_in_syn(synFolderID, file_name, used_param = selectedVarsFileID)
recoverutils::store_in_syn(synFolderID, file_name, used_param = selectedVarsFileID)
cat(glue::glue("The input variable list used was stored at {synFolderID} as '{file_name}'"), "\n\n")

rm(latest_commit,
Expand Down
8 changes: 4 additions & 4 deletions scripts/fetch-data/fetch_data.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
library(synapser)
library(recoverSummarizeR)
library(recoverutils)
library(dplyr)

cat("Fetching data\n")
Expand All @@ -8,11 +8,11 @@ synLogin()

# Get input files from synapse
concept_map <-
syn_file_to_df(ontologyFileID, "CONCEPT_CD") %>%
recoverutils::syn_file_to_df(ontologyFileID, "CONCEPT_CD") %>%
filter(CONCEPT_CD!="<null>")

selected_vars <-
syn_file_to_df(selectedVarsFileID) %>%
recoverutils::syn_file_to_df(selectedVarsFileID) %>%
mutate(Lower_Bound = suppressWarnings(as.numeric(Lower_Bound)),
Upper_Bound = suppressWarnings(as.numeric(Upper_Bound)))

Expand Down Expand Up @@ -49,7 +49,7 @@ system(sync_cmd)
rm(sync_cmd)

# For use in process-data steps
concept_replacements_reversed <- vec_reverse(concept_replacements)
concept_replacements_reversed <- recoverutils::vec_reverse(concept_replacements)

if (!dir.exists(outputConceptsDir)) {
dir.create(outputConceptsDir)
Expand Down
12 changes: 6 additions & 6 deletions scripts/process-data/fitbitactivitylogs.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,32 +53,32 @@ for (col_name in names(df_filtered)) {
# Pivot data frame from long to wide
df_melted_filtered <-
df_filtered %>%
recoverSummarizeR::melt_df(excluded_concepts = excluded_concepts) %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
select(if("participantidentifier" %in% colnames(.)) "participantidentifier",
dplyr::matches("(?<!_)date(?!_)", perl = T),
if("concept" %in% colnames(.)) "concept",
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverSummarizeR::melt_df() completed.\n")
cat("recoverutils::melt_df() completed.\n")

# Generate i2b2 summaries
df_summarized <-
df_melted_filtered %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverSummarizeR::stat_summarize() %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverSummarizeR::stat_summarize() completed.\n")
cat("recoverutils::stat_summarize() completed.\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
output_concepts <-
process_df(df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
recoverutils::process_df(df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
dplyr::mutate(nval_num = signif(nval_num, 9)) %>%
dplyr::arrange(concept) %>%
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Identify the participants who have output concepts derived from fitbit variables
fitbit_participants <-
Expand Down
12 changes: 6 additions & 6 deletions scripts/process-data/fitbitdailydata.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,28 +58,28 @@ for (col_name in names(df_filtered)) {
# Pivot data frame from long to wide
df_melted_filtered <-
df_filtered %>%
recoverSummarizeR::melt_df(excluded_concepts = excluded_concepts) %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
select(if("participantidentifier" %in% colnames(.)) "participantidentifier",
dplyr::matches("(?<!_)date(?!_)", perl = T),
if("concept" %in% colnames(.)) "concept",
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverSummarizeR::melt_df() completed.\n")
cat("recoverutils::melt_df() completed.\n")

# Generate i2b2 summaries
df_summarized <-
df_melted_filtered %>%
rename(startdate = dplyr::any_of(c("date", "datetime"))) %>%
mutate(enddate = if (!("enddate" %in% names(.))) NA else enddate) %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverSummarizeR::stat_summarize() %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverSummarizeR::stat_summarize() completed.\n")
cat("recoverutils::stat_summarize() completed.\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
output_concepts <-
process_df(df_summarized,
recoverutils::process_df(df_summarized,
concept_map,
concept_replacements_reversed,
concept_map_concepts = "CONCEPT_CD",
Expand All @@ -89,7 +89,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Identify the participants who have output concepts derived from fitbit variables
curr_fitbit_participants <-
Expand Down
8 changes: 4 additions & 4 deletions scripts/process-data/fitbitecg.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,14 @@ df_melted_filtered <-
mutate("SinusRhythm" = if_else(resultclassification == "Normal Sinus Rhythm", 1, NA),
"AtrialFibrillation" = if_else(resultclassification == "Atrial Fibrillation", 1, NA)) %>%
select(-c(resultclassification)) %>%
recoverSummarizeR::melt_df(excluded_concepts = excluded_concepts) %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
select(if("participantidentifier" %in% colnames(.)) "participantidentifier",
dplyr::matches("(?<!_)date(?!_)", perl = T),
if("concept" %in% colnames(.)) "concept",
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverSummarizeR::melt_df() completed.\n")
cat("recoverutils::melt_df() completed.\n")

# Generate i2b2 summaries
df_summarized <-
Expand All @@ -160,7 +160,7 @@ cat("ecg_stat_summarize() completed.\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
output_concepts <-
recoverSummarizeR::process_df(df_summarized,
recoverutils::process_df(df_summarized,
concept_map,
concept_replacements_reversed,
concept_map_concepts = "CONCEPT_CD",
Expand All @@ -170,7 +170,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Identify the participants who have output concepts derived from fitbit variables
curr_fitbit_participants <-
Expand Down
12 changes: 6 additions & 6 deletions scripts/process-data/fitbitintradaycombined.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,34 +76,34 @@ for (col_name in names(df_filtered)) {
# Pivot data frame from long to wide
df_melted_filtered <-
df_filtered %>%
recoverSummarizeR::melt_df(excluded_concepts = excluded_concepts) %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
select(if("participantidentifier" %in% colnames(.)) "participantidentifier",
dplyr::matches("(?<!_)date(?!_)", perl = T),
if("concept" %in% colnames(.)) "concept",
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverSummarizeR::melt_df() completed.\n")
cat("recoverutils::melt_df() completed.\n")

# Generate i2b2 summaries
df_summarized <-
df_melted_filtered %>%
rename(startdate = dplyr::any_of(c("date", "datetime"))) %>%
mutate(enddate = if (!("enddate" %in% names(.))) NA else enddate) %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverSummarizeR::stat_summarize() %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverSummarizeR::stat_summarize() completed.\n")
cat("recoverutils::stat_summarize() completed.\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
output_concepts <-
process_df(df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
recoverutils::process_df(df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
dplyr::mutate(nval_num = signif(nval_num, 9)) %>%
dplyr::arrange(concept) %>%
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Identify the participants who have output concepts derived from fitbit variables
curr_fitbit_participants <-
Expand Down
8 changes: 4 additions & 4 deletions scripts/process-data/fitbitsleeplogs.R
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ for (col_name in names(df_filtered)) {
# Pivot data frames from long to wide
df_melted_filtered <-
df_filtered %>%
recoverSummarizeR::melt_df(excluded_concepts = excluded_concepts) %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
select(if("participantidentifier" %in% colnames(.)) "participantidentifier",
dplyr::matches("(?<!_)date(?!_)", perl = T),
if("concept" %in% colnames(.)) "concept",
Expand Down Expand Up @@ -368,7 +368,7 @@ numepisodes_df_melted_filtered_weekly <-
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))

cat("recoverSummarizeR::melt_df() completed.\n")
cat("recoverutils::melt_df() completed.\n")

# Generate i2b2 summaries
df_summarized <-
Expand Down Expand Up @@ -403,13 +403,13 @@ cat("sleeplogs_stat_summarize() completed.\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
output_concepts <-
process_df(final_df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
recoverutils::process_df(final_df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
dplyr::mutate(nval_num = signif(nval_num, 9)) %>%
dplyr::arrange(concept) %>%
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Identify the participants who have output concepts derived from fitbit variables
curr_fitbit_participants <-
Expand Down
12 changes: 6 additions & 6 deletions scripts/process-data/healthkitv2activitysummaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,32 +46,32 @@ for (col_name in names(df_filtered)) {

df_melted_filtered <-
df_filtered %>%
recoverSummarizeR::melt_df(excluded_concepts = excluded_concepts) %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
select(if("participantidentifier" %in% colnames(.)) "participantidentifier",
dplyr::matches("(?<!_)date(?!_)", perl = T),
if("concept" %in% colnames(.)) "concept",
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverSummarizeR::melt_df() completed.\n")
cat("recoverutils::melt_df() completed.\n")

df_summarized <-
df_melted_filtered %>%
rename(startdate = dplyr::any_of(c("date", "datetime"))) %>%
mutate(enddate = if (!("enddate" %in% names(.))) NA else enddate) %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverSummarizeR::stat_summarize() %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverSummarizeR::stat_summarize() completed.\n")
cat("recoverutils::stat_summarize() completed.\n")

output_concepts <-
process_df(df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
recoverutils::process_df(df_summarized, concept_map, concept_replacements_reversed, concept_map_concepts = "CONCEPT_CD", concept_map_units = "UNITS_CD") %>%
dplyr::mutate(nval_num = signif(nval_num, 9)) %>%
dplyr::arrange(concept) %>%
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

output_concepts %>%
write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F)
Expand Down
8 changes: 4 additions & 4 deletions scripts/process-data/healthkitv2electrocardiogram.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,14 @@ df_melted_filtered <-
mutate("SinusRhythm" = if_else(classification == "SinusRhythm", 1, NA),
"AtrialFibrillation" = if_else(classification == "AtrialFibrillation", 1, NA)) %>%
select(-c(classification)) %>%
recoverSummarizeR::melt_df(excluded_concepts = excluded_concepts) %>%
recoverutils::melt_df(excluded_concepts = excluded_concepts) %>%
select(if("participantidentifier" %in% colnames(.)) "participantidentifier",
dplyr::matches("(?<!_)date(?!_)", perl = T),
if("concept" %in% colnames(.)) "concept",
if("value" %in% colnames(.)) "value") %>%
tidyr::drop_na("value") %>%
mutate(value = as.numeric(value))
cat("recoverSummarizeR::melt_df() completed.\n")
cat("recoverutils::melt_df() completed.\n")

# Generate i2b2 summaries
df_summarized <-
Expand All @@ -163,7 +163,7 @@ cat("ecg_stat_summarize() completed.\n")

# Add i2b2 columns from concept map (ontology file) and clean the output
output_concepts <-
recoverSummarizeR::process_df(df_summarized,
recoverutils::process_df(df_summarized,
concept_map,
concept_replacements_reversed,
concept_map_concepts = "CONCEPT_CD",
Expand All @@ -173,7 +173,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Identify the participants who have output concepts derived from healthkit variables
curr_hk_participants <-
Expand Down
8 changes: 4 additions & 4 deletions scripts/process-data/healthkitv2samples.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ df_summarized <-
df_melted_filtered %>%
rename(enddate = "date") %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverSummarizeR::stat_summarize() %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverSummarizeR::stat_summarize() completed.\n")
cat("recoverutils::stat_summarize() completed.\n")

tmp_concept_replacements <- c("respiratoryrate" = "breathingrate",
"heartratevariability" = "hrv",
Expand All @@ -91,7 +91,7 @@ tmp_concept_replacements <- c("respiratoryrate" = "breathingrate",

# Add i2b2 columns from concept map (ontology file) and clean the output
output_concepts <-
process_df(df_summarized,
recoverutils::process_df(df_summarized,
concept_map,
concept_replacements_reversed = tmp_concept_replacements,
concept_map_concepts = "CONCEPT_CD",
Expand All @@ -101,7 +101,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Identify the participants who have output concepts derived from healthkit variables
curr_hk_participants <-
Expand Down
8 changes: 4 additions & 4 deletions scripts/process-data/healthkitv2statistics.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@ df_summarized <-
df_melted_filtered %>%
rename(enddate = "date") %>%
select(all_of(c("participantidentifier", "startdate", "enddate", "concept", "value"))) %>%
recoverSummarizeR::stat_summarize() %>%
recoverutils::stat_summarize() %>%
distinct()
cat("recoverSummarizeR::stat_summarize() completed.\n")
cat("recoverutils::stat_summarize() completed.\n")

tmp_concept_replacements <- c("dailysteps" = "steps")

# Add i2b2 columns from concept map (ontology file) and clean the output
output_concepts <-
process_df(df_summarized,
recoverutils::process_df(df_summarized,
concept_map,
concept_replacements_reversed = tmp_concept_replacements,
concept_map_concepts = "CONCEPT_CD",
Expand All @@ -73,7 +73,7 @@ output_concepts <-
dplyr::mutate(dplyr::across(.cols = dplyr::everything(), .fns = as.character)) %>%
replace(is.na(.), "<null>") %>%
dplyr::filter(nval_num != "<null>" | tval_char != "<null>")
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Identify the participants who have output concepts derived from healthkit variables
curr_hk_participants <-
Expand Down
2 changes: 1 addition & 1 deletion scripts/process-data/participant_devices.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ output_concepts <-
dplyr::select(participantidentifier, startdate, enddate,
concept, valtype_cd, nval_num, tval_char, UNITS_CD) %>%
dplyr::rename(units_cd = UNITS_CD)
cat("recoverSummarizeR::process_df() completed.\n")
cat("recoverutils::process_df() completed.\n")

# Write the output
output_concepts %>%
Expand Down

0 comments on commit 2be17d0

Please sign in to comment.