diff --git a/scripts/egress/egress.R b/scripts/egress/egress.R index 13c7874..cb2ce59 100644 --- a/scripts/egress/egress.R +++ b/scripts/egress/egress.R @@ -1,6 +1,7 @@ -cat("Beginning egress: storing output concepts, input concept map, and input variable list in Synapse\n") +cat("\n----Beginning egress: storing output concepts, + input concept map, and input variable list in Synapse----\n") -synapser::synLogin() +login <- synapser::synLogin() # Write the following to Synapse: 1) the final output concepts data, 2) the input data used in this pipeline latest_commit <- @@ -37,4 +38,4 @@ rm(latest_commit, file_name ) -cat("Finished egress\n\n") \ No newline at end of file +cat("\n----Finished egress----\n") diff --git a/scripts/fetch-data/fetch_data.R b/scripts/fetch-data/fetch_data.R index b8e8db0..1ede767 100644 --- a/scripts/fetch-data/fetch_data.R +++ b/scripts/fetch-data/fetch_data.R @@ -1,8 +1,8 @@ library(tidyverse) -cat("Fetching data\n") +cat("\n----Fetching data and connecting to S3 bucket----\n") -synapser::synLogin() +login <- synapser::synLogin() # Get input files from synapse concept_map <- @@ -61,4 +61,5 @@ if (!dir.exists(outputConceptsDir)) { dir.create(outputConceptsDir) } -cat("Finished fetching data\n\n") +cat("\n----Finished----\n") + diff --git a/scripts/process-data/fitbitactivitylogs.R b/scripts/process-data/fitbitactivitylogs.R index f57cfed..84a9325 100644 --- a/scripts/process-data/fitbitactivitylogs.R +++ b/scripts/process-data/fitbitactivitylogs.R @@ -1,6 +1,6 @@ dataset <- "fitbitactivitylogs" -cat(glue::glue("Transforming data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for {dataset}"), "----\n")) # Get variables for this dataset vars <- @@ -93,7 +93,7 @@ output_concepts %>% write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F) cat(glue::glue("output_concepts written to {file.path(outputConceptsDir, paste0(dataset, '.csv'))}"),"\n") -cat(glue::glue("Finished transforming data for {dataset}"),"\n\n") +cat(paste0("\n----", glue::glue("Finished transforming data for {dataset}"),"\n")) # Remove objects created here from the global environment rm(dataset, diff --git a/scripts/process-data/fitbitdailydata.R b/scripts/process-data/fitbitdailydata.R index 188cb5e..efcda79 100644 --- a/scripts/process-data/fitbitdailydata.R +++ b/scripts/process-data/fitbitdailydata.R @@ -1,6 +1,6 @@ dataset <- "fitbitdailydata" -cat(glue::glue("Transforming data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for {dataset}"), "----\n")) # Get variables for this dataset vars <- @@ -112,7 +112,7 @@ output_concepts %>% write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F) cat(glue::glue("output_concepts written to {file.path(outputConceptsDir, paste0(dataset, '.csv'))}"), "\n") -cat(glue::glue("Finished transforming data for {dataset}"),"\n\n") +cat(paste0("\n----", glue::glue("Finished transforming data for {dataset}"),"\n")) # Remove objects created here from the global environment rm(dataset, diff --git a/scripts/process-data/fitbitecg.R b/scripts/process-data/fitbitecg.R index 37eece4..7dd3202 100644 --- a/scripts/process-data/fitbitecg.R +++ b/scripts/process-data/fitbitecg.R @@ -102,7 +102,7 @@ ecg_stat_summarize <- function(df) { dataset <- "fitbitecg$" -cat(glue::glue("Transforming data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for {dataset}"), "----\n")) # Get variables for this dataset vars <- @@ -116,7 +116,7 @@ df <- select(all_of(c(vars))) %>% filter(ResultClassification %in% c("Normal Sinus Rhythm", "Atrial Fibrillation")) %>% rename(StartDate = StartTime) %>% - mutate(EndDate = base_s3_uri %>% stringr::str_extract("[0-9]{4}-[0-9]{2}-[0-9]{2}")) %>% + mutate(EndDate = bucket_path %>% stringr::str_extract("[0-9]{4}-[0-9]{2}-[0-9]{2}")) %>% collect() colnames(df) <- tolower(colnames(df)) @@ -193,7 +193,7 @@ output_concepts %>% write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F) cat(glue::glue("output_concepts written to {file.path(outputConceptsDir, paste0(dataset, '.csv'))}"), "\n") -cat(glue::glue("Finished transforming data for {dataset}"),"\n\n") +cat(paste0("\n----", glue::glue("Finished transforming data for {dataset}"),"\n")) # Remove objects created here from the global environment rm(ecg_stat_summarize, diff --git a/scripts/process-data/fitbitintradaycombined.R b/scripts/process-data/fitbitintradaycombined.R index dd2aaf1..1fb0f8a 100644 --- a/scripts/process-data/fitbitintradaycombined.R +++ b/scripts/process-data/fitbitintradaycombined.R @@ -1,6 +1,6 @@ dataset <- "fitbitintradaycombined" -cat(glue::glue("Transforming data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for {dataset}"), "----\n")) # Get variables for this dataset vars <- @@ -126,7 +126,7 @@ output_concepts %>% write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F) cat(glue::glue("output_concepts written to {file.path(outputConceptsDir, paste0(dataset, '.csv'))}"), "\n") -cat(glue::glue("Finished transforming data for {dataset}"),"\n\n") +cat(paste0("\n----", glue::glue("Finished transforming data for {dataset}"),"\n")) # Remove objects created here from the global environment rm(dataset, diff --git a/scripts/process-data/fitbitsleeplogs.R b/scripts/process-data/fitbitsleeplogs.R index 2936013..8a236c4 100644 --- a/scripts/process-data/fitbitsleeplogs.R +++ b/scripts/process-data/fitbitsleeplogs.R @@ -128,7 +128,7 @@ sleeplogs_stat_summarize <- function(df) { dataset <- "fitbitsleeplogs$" -cat(glue::glue("Transforming data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for {dataset}"), "----\n")) # Get variables for this dataset vars <- @@ -432,7 +432,7 @@ output_concepts %>% write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F) cat(glue::glue("output_concepts written to {file.path(outputConceptsDir, paste0(dataset, '.csv'))}"),"\n") -cat(glue::glue("Finished transforming data for {dataset}"),"\n\n") +cat(paste0("\n----", glue::glue("Finished transforming data for {dataset}"),"\n")) # Remove objects created here from the global environment rm(sleeplogs_stat_summarize, diff --git a/scripts/process-data/healthkitv2electrocardiogram.R b/scripts/process-data/healthkitv2electrocardiogram.R index ab21a97..9c018d7 100644 --- a/scripts/process-data/healthkitv2electrocardiogram.R +++ b/scripts/process-data/healthkitv2electrocardiogram.R @@ -102,7 +102,7 @@ ecg_stat_summarize <- function(df) { dataset <- "healthkitv2electrocardiogram$" -cat(glue::glue("Transforming data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for {dataset}"), "----\n")) # Get variables for this dataset vars <- @@ -196,7 +196,7 @@ output_concepts %>% write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F) cat(glue::glue("output_concepts written to {file.path(outputConceptsDir, paste0(dataset, '.csv'))}"),"\n") -cat(glue::glue("Finished transforming data for {dataset}"),"\n\n") +cat(paste0("\n----", glue::glue("Finished transforming data for {dataset}"),"\n")) # Remove objects created here from the global environment rm(ecg_stat_summarize, diff --git a/scripts/process-data/healthkitv2samples.R b/scripts/process-data/healthkitv2samples.R index e167bb7..0ba9b58 100644 --- a/scripts/process-data/healthkitv2samples.R +++ b/scripts/process-data/healthkitv2samples.R @@ -1,6 +1,6 @@ dataset <- "healthkitv2samples" -cat(glue::glue("Transforming data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for {dataset}"), "----\n")) # Get variables for this dataset vars <- @@ -116,7 +116,7 @@ output_concepts %>% write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F) cat(glue::glue("output_concepts written to {file.path(outputConceptsDir, paste0(dataset, '.csv'))}"),"\n") -cat(glue::glue("Finished transforming data for {dataset}"),"\n\n") +cat(paste0("\n----", glue::glue("Finished transforming data for {dataset}"),"\n")) # Remove objects created here from the global environment rm(dataset, diff --git a/scripts/process-data/healthkitv2statistics.R b/scripts/process-data/healthkitv2statistics.R index 0d2dc8e..9a4c35a 100644 --- a/scripts/process-data/healthkitv2statistics.R +++ b/scripts/process-data/healthkitv2statistics.R @@ -1,6 +1,6 @@ dataset <- "healthkitv2statistics" -cat(glue::glue("Transforming data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for {dataset}"), "----\n")) # Get variables for this dataset vars <- @@ -96,7 +96,7 @@ output_concepts %>% write.csv(file.path(outputConceptsDir, glue::glue("{dataset}.csv")), row.names = F) cat(glue::glue("output_concepts written to {file.path(outputConceptsDir, paste0(dataset, '.csv'))}"),"\n") -cat(glue::glue("Finished transforming data for {dataset}"),"\n\n") +cat(paste0("\n----", glue::glue("Finished transforming data for {dataset}"),"\n")) # Remove objects created here from the global environment rm(dataset, diff --git a/scripts/process-data/participant_devices.R b/scripts/process-data/participant_devices.R index 0a51745..a5b0945 100644 --- a/scripts/process-data/participant_devices.R +++ b/scripts/process-data/participant_devices.R @@ -1,6 +1,6 @@ dataset <- c("fitbitdevices", "healthkitv2samples") -cat(glue::glue("Transforming device data for {dataset}"),"\n") +cat(paste0("\n----", glue::glue("Transforming data for datasets: {dataset[1]}, {dataset[2]}"), "----\n")) # Get variables for this dataset vars <- list(fitbitdevices = c("ParticipantIdentifier", @@ -13,7 +13,7 @@ vars <- list(fitbitdevices = c("ParticipantIdentifier", df <- lapply(dataset, function(x) { tmp <- vars[[x]] - arrow::open_dataset(s3$path(str_subset(dataset_paths, dataset))) %>% + arrow::open_dataset(s3$path(str_subset(dataset_paths, x))) %>% select(all_of(tmp)) %>% dplyr::rename_with(tolower) %>% collect() diff --git a/scripts/write-output/final-output-concepts.R b/scripts/write-output/final-output-concepts.R index e2ba5fd..c565e95 100644 --- a/scripts/write-output/final-output-concepts.R +++ b/scripts/write-output/final-output-concepts.R @@ -1,4 +1,4 @@ -cat("Creating final output concepts\n") +cat("\n----Creating final output concepts----\n") # Read each dataset's (intermediate) i2b2 output concepts CSV file, combine # them, and de-duplicate data if it already exists (fitbit data is highest @@ -66,4 +66,4 @@ rm(datasets, valid_participants ) -cat("Finished creating final output concepts\n\n") +cat("\n----Finished creating final output concepts----\n")