From f5936b057c244313c62059d30c5486942c0366ff Mon Sep 17 00:00:00 2001 From: EhrmannS Date: Tue, 16 Jan 2024 11:24:24 +0100 Subject: [PATCH] simplify and speed up --- R/edit_matches.R | 74 +++++++++++++++--------------------------------- 1 file changed, 23 insertions(+), 51 deletions(-) diff --git a/R/edit_matches.R b/R/edit_matches.R index c9ba743..3332fd9 100644 --- a/R/edit_matches.R +++ b/R/edit_matches.R @@ -101,36 +101,12 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL, temp <- get_concept(label = new, class = target$class, has_broader = target$has_broader, ontology = ontology) %>% left_join(tibble(label = new, has_broader = target$has_broader), ., by = c("label", "has_broader")) - # determine previous matches from ontology - newGrep <- str_replace_all(new, c("\\(" = "\\\\(", "\\)" = "\\\\)", "\\*" = "\\\\*")) - prevMatches <- get_concept(str_detect(has_close_match, paste0(newGrep, collapse = "|")) | - str_detect(has_broader_match, paste0(newGrep, collapse = "|")) | - str_detect(has_narrower_match, paste0(newGrep, collapse = "|")) | - str_detect(has_exact_match, paste0(newGrep, collapse = "|")), - class = target$class, has_broader = target$has_broader, - matches = TRUE, ontology = ontology) %>% - separate_rows(has_close_match, sep = " \\| ") %>% - separate_wider_delim(cols = has_close_match, names = "has_close_match", delim = "><", too_many = "drop") %>% - mutate(has_close_match = if_else(!has_close_match %in% new, NA_character_, has_close_match)) %>% - separate_rows(has_broader_match, sep = " \\| ") %>% - separate_wider_delim(cols = has_broader_match, names = "has_broader_match", delim = "><", too_many = "drop") %>% - mutate(has_broader_match = if_else(!has_broader_match %in% new, NA_character_, has_broader_match)) %>% - separate_rows(has_narrower_match, sep = " \\| ") %>% - separate_wider_delim(cols = has_narrower_match, names = "has_narrower_match", delim = "><", too_many = "drop") %>% - mutate(has_narrower_match = if_else(!has_narrower_match %in% new, NA_character_, has_narrower_match)) %>% - separate_rows(has_exact_match, sep = " \\| ") %>% - separate_wider_delim(cols = has_exact_match, names = "has_exact_match", delim = "><", too_many = "drop") %>% - mutate(has_exact_match = if_else(!has_exact_match %in% new, NA_character_, has_exact_match)) %>% - filter(if_any(c(has_close_match, has_broader_match, has_narrower_match, has_exact_match), ~ !is.na(.))) %>% - group_by(id, label, description, class, has_broader) %>% - summarise(across(.cols = c(has_close_match, has_broader_match, has_narrower_match, has_exact_match), .fns = ~paste0(unique(na.omit(.x)), collapse = " | "))) %>% - ungroup() %>% - mutate(across(where(is.character), ~na_if(x = ., y = ""))) - # determine previous matches from matching table if(testFileExists(paste0(matchDir, sourceFile))){ - prevMatches <- readRDS(file = paste0(matchDir, sourceFile)) %>% - bind_rows(prevMatches) + prevMatches <- readRDS(file = paste0(matchDir, sourceFile)) + } else { + prevMatches <- tibble(id = character(), label = character(), description = character(), class = character(), has_broader = character(), + has_close_match = character(), has_broader_match = character(), has_narrower_match = character(), has_exact_match = character()) } prevMatchLabels <- prevMatches %>% @@ -138,7 +114,7 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL, pivot_longer(cols = c(has_broader_match, has_close_match, has_exact_match, has_narrower_match), values_to = "labels") %>% filter(!is.na(labels)) %>% distinct(labels) %>% - separate_rows(labels, sep = " \\| ") %>% + separate_longer_delim(cols = labels, delim = " | ") %>% pull(labels) # gather all concepts for the focal data-series (previous matches from @@ -148,7 +124,7 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL, rename(harmLab = label) %>% pivot_longer(cols = c(has_broader_match, has_close_match, has_exact_match, has_narrower_match), names_to = "match", values_to = "label") %>% - separate_rows(label, sep = " \\| ") %>% + separate_longer_delim(cols = label, delim = " | ") %>% full_join(temp, by = c("label", "class", "id", "has_broader", "description")) %>% mutate(harmLab = if_else(is.na(harmLab), label, harmLab), label = if_else(is.na(match), if_else(!is.na(id), label, NA_character_), label), @@ -226,7 +202,6 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL, toJoin <- relate %>% rename(label_harm = label) %>% mutate(label = tolower(label_harm)) %>% - # filter(!is.na(label_harm) & class == tail(filterClasses, 1)) %>% filter(!is.na(label_harm)) %>% select(-has_broader, -has_broader_match, -has_close_match, -has_exact_match, -has_narrower_match) @@ -264,8 +239,7 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL, hits <- joined %>% filter(!is.na(has_0_differences)) %>% - mutate(#class = tail(filterClasses, 1), - has_new_close_match = label, + mutate(has_new_close_match = label, label = has_0_differences) %>% select(label, id, all_of(withBroader), class, has_new_close_match) @@ -276,7 +250,6 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL, relate <- relate %>% left_join(hits, by = c("id", "label", "class", withBroader)) %>% left_join(numbers, by = "label") %>% - # mutate(has_new_close_match = if_else(n > 1, NA_character_, has_new_close_match)) %>% rowwise() %>% mutate(has_new_close_match = if_else(grepl(x = has_close_match, pattern = has_new_close_match), NA_character_, has_new_close_match)) %>% unite(col = "has_close_match", has_close_match, has_new_close_match, sep = " | ", na.rm = TRUE) %>% @@ -317,13 +290,6 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL, beep(sound = beep) } - # ... and make them aware of their duty - # if(prevAvail){ - # message("\nprevious matches found for this dataseries, only previously not matched terms are presented") - # } else { - # message("\nno previous matches found for this dataseries, close match with other potentially available terms is presented") - # } - message("-> please edit the file '", paste0(matchDir, "/matching.csv"), "' \n") if(verbose){ message("--- column description ---\n") @@ -377,26 +343,32 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL, if(!is.null(related)){ - out <- prevMatches %>% + matchingTable <- prevMatches %>% filter(!id == "ignore") %>% bind_rows(related) %>% pivot_longer(cols = c(has_broader_match, has_close_match, has_exact_match, has_narrower_match), names_to = "match", values_to = "new_label") %>% - separate_rows(new_label, sep = " \\| ") %>% - distinct() %>% - group_by(id, has_broader, label, class, description, match) %>% - summarise(new_label = paste0(na.omit(new_label), collapse = " | "), .groups = "keep") %>% - ungroup() %>% - mutate(new_label = na_if(x = new_label, y = "")) %>% - pivot_wider(id_cols = c(label, class, id, has_broader, description), names_from = match, values_from = new_label) %>% + separate_longer_delim(cols = new_label, delim = " | ") %>% distinct() %>% + pivot_wider(id_cols = c("id", "label", "class", "has_broader", "description"), names_from = match, + values_from = new_label, values_fn = ~paste0(na.omit(.x), collapse = " | ")) %>% + mutate(across(where(is.character), ~na_if(x = ., y = ""))) %>% filter(!is.na(has_broader_match) | !is.na(has_close_match) | !is.na(has_narrower_match) | !is.na(has_exact_match)) %>% filter(!is.na(id)) %>% arrange(id) + newGrep <- str_replace_all(new, c("\\(" = "\\\\(", "\\)" = "\\\\)", "\\*" = "\\\\*")) + newGrep <- paste0("^", newGrep, "$") + out <- related %>% + filter(str_detect(has_close_match, paste0(newGrep, collapse = "|")) | + str_detect(has_broader_match, paste0(newGrep, collapse = "|")) | + str_detect(has_narrower_match, paste0(newGrep, collapse = "|")) | + str_detect(has_exact_match, paste0(newGrep, collapse = "|"))) %>% + arrange(id) + } - saveRDS(object = out, file = paste0(matchDir, sourceFile)) + saveRDS(object = matchingTable, file = paste0(matchDir, sourceFile)) - return(related) + return(out) }