From 66e36c03bbb5263bd19706e5eeac6e64c1235dc8 Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Mon, 18 Mar 2024 21:35:30 +0000 Subject: [PATCH 1/3] Convert dataset column names to lowercase more efficiently --- scripts/process-data/participant_devices.R | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/process-data/participant_devices.R b/scripts/process-data/participant_devices.R index 4730117..fe46e4b 100644 --- a/scripts/process-data/participant_devices.R +++ b/scripts/process-data/participant_devices.R @@ -13,18 +13,14 @@ vars <- list(fitbitdevices = c("ParticipantIdentifier", # Load the desired subset of this dataset in memory df <- - sapply(dataset, function(x) { + lapply(dataset, function(x) { tmp <- vars[[x]] arrow::open_dataset(file.path(downloadLocation, glue::glue("dataset_{x}"))) %>% select(all_of(tmp)) %>% + dplyr::rename_with(tolower) %>% collect() - }) - -df <- - lapply(df, function(x) { - colnames(x) <- tolower(colnames(x)) - return(x) - }) + }) %>% + setNames(dataset) # Get lists of participants that i2b2 summaries are already generated for and # add a variable to indicate device type From e03eaac6f77f8f1ecf60e74b2630b561ea0799e2 Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Mon, 18 Mar 2024 22:12:56 +0000 Subject: [PATCH 2/3] RMHDR-244 Participant device type is "Apple" if "Apple" among other non-fitbit device types for a participant --- scripts/process-data/participant_devices.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/process-data/participant_devices.R b/scripts/process-data/participant_devices.R index fe46e4b..d7f9ed4 100644 --- a/scripts/process-data/participant_devices.R +++ b/scripts/process-data/participant_devices.R @@ -53,6 +53,7 @@ df_joined <- summarise(type = toString(sort(unique(type)))) %>% mutate(concept = "mhp:device") %>% rename(value = type) %>% + mutate(value = ifelse({grepl(", Apple|Apple, ", value)}, "Apple", value)) %>% select(all_of(c("participantidentifier", "concept", "value"))) %>% ungroup() From 02740aaf07c08989b11d5c2fbc2183da7a61bc77 Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Mon, 18 Mar 2024 22:52:56 +0000 Subject: [PATCH 3/3] RMHDR-243 Exclude records where device_model is "iPhone" and where a participant's only summarized device type is "Other" --- scripts/process-data/participant_devices.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/process-data/participant_devices.R b/scripts/process-data/participant_devices.R index d7f9ed4..919956c 100644 --- a/scripts/process-data/participant_devices.R +++ b/scripts/process-data/participant_devices.R @@ -29,14 +29,15 @@ fitbit_participants <- read.csv(file.path(outputConceptsDir, "fitbit_participant df$fitbitdevices <- df$fitbitdevices %>% dplyr::filter(participantidentifier %in% fitbit_participants$participantidentifier) %>% - dplyr::mutate(type = ifelse(is.na(device) | device == "", NA, "fitbit")) + dplyr::mutate(type = ifelse(is.na(device) | device == "", NA, "Fitbit")) hk_participants <- read.csv(file.path(outputConceptsDir, "hk_participants.csv")) df$healthkitv2samples <- df$healthkitv2samples %>% dplyr::filter(participantidentifier %in% hk_participants$participantidentifier) %>% - mutate(type = case_when(device_manufacturer %in% c("Apple", "Apple Inc.") ~ "Apple", + dplyr::filter(device_model != "iPhone") %>% + mutate(type = case_when(device_manufacturer %in% c("Apple", "Apple Inc.") ~ "Apple Watch", device_manufacturer %in% c("Garmin") ~ "Garmin", device_manufacturer %in% c("Polar Electro Oy") ~ "Polar", device_model %in% c("HRM808S") ~ "HRM808S", @@ -53,7 +54,8 @@ df_joined <- summarise(type = toString(sort(unique(type)))) %>% mutate(concept = "mhp:device") %>% rename(value = type) %>% - mutate(value = ifelse({grepl(", Apple|Apple, ", value)}, "Apple", value)) %>% + mutate(value = ifelse({grepl(", Apple Watch|Apple Watch, ", value)}, "Apple Watch", value)) %>% + filter(value != "Other") %>% select(all_of(c("participantidentifier", "concept", "value"))) %>% ungroup()