Skip to content

Commit

Permalink
simplify and speed up
Browse files Browse the repository at this point in the history
  • Loading branch information
EhrmannS committed Jan 16, 2024
1 parent cf9dcf3 commit f5936b0
Showing 1 changed file with 23 additions and 51 deletions.
74 changes: 23 additions & 51 deletions R/edit_matches.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,44 +101,20 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL,
temp <- get_concept(label = new, class = target$class, has_broader = target$has_broader, ontology = ontology) %>%
left_join(tibble(label = new, has_broader = target$has_broader), ., by = c("label", "has_broader"))

# determine previous matches from ontology
newGrep <- str_replace_all(new, c("\\(" = "\\\\(", "\\)" = "\\\\)", "\\*" = "\\\\*"))
prevMatches <- get_concept(str_detect(has_close_match, paste0(newGrep, collapse = "|")) |
str_detect(has_broader_match, paste0(newGrep, collapse = "|")) |
str_detect(has_narrower_match, paste0(newGrep, collapse = "|")) |
str_detect(has_exact_match, paste0(newGrep, collapse = "|")),
class = target$class, has_broader = target$has_broader,
matches = TRUE, ontology = ontology) %>%
separate_rows(has_close_match, sep = " \\| ") %>%
separate_wider_delim(cols = has_close_match, names = "has_close_match", delim = "><", too_many = "drop") %>%
mutate(has_close_match = if_else(!has_close_match %in% new, NA_character_, has_close_match)) %>%
separate_rows(has_broader_match, sep = " \\| ") %>%
separate_wider_delim(cols = has_broader_match, names = "has_broader_match", delim = "><", too_many = "drop") %>%
mutate(has_broader_match = if_else(!has_broader_match %in% new, NA_character_, has_broader_match)) %>%
separate_rows(has_narrower_match, sep = " \\| ") %>%
separate_wider_delim(cols = has_narrower_match, names = "has_narrower_match", delim = "><", too_many = "drop") %>%
mutate(has_narrower_match = if_else(!has_narrower_match %in% new, NA_character_, has_narrower_match)) %>%
separate_rows(has_exact_match, sep = " \\| ") %>%
separate_wider_delim(cols = has_exact_match, names = "has_exact_match", delim = "><", too_many = "drop") %>%
mutate(has_exact_match = if_else(!has_exact_match %in% new, NA_character_, has_exact_match)) %>%
filter(if_any(c(has_close_match, has_broader_match, has_narrower_match, has_exact_match), ~ !is.na(.))) %>%
group_by(id, label, description, class, has_broader) %>%
summarise(across(.cols = c(has_close_match, has_broader_match, has_narrower_match, has_exact_match), .fns = ~paste0(unique(na.omit(.x)), collapse = " | "))) %>%
ungroup() %>%
mutate(across(where(is.character), ~na_if(x = ., y = "")))

# determine previous matches from matching table
if(testFileExists(paste0(matchDir, sourceFile))){
prevMatches <- readRDS(file = paste0(matchDir, sourceFile)) %>%
bind_rows(prevMatches)
prevMatches <- readRDS(file = paste0(matchDir, sourceFile))
} else {
prevMatches <- tibble(id = character(), label = character(), description = character(), class = character(), has_broader = character(),
has_close_match = character(), has_broader_match = character(), has_narrower_match = character(), has_exact_match = character())
}

prevMatchLabels <- prevMatches %>%
filter(class %in% filterClasses) %>%
pivot_longer(cols = c(has_broader_match, has_close_match, has_exact_match, has_narrower_match), values_to = "labels") %>%
filter(!is.na(labels)) %>%
distinct(labels) %>%
separate_rows(labels, sep = " \\| ") %>%
separate_longer_delim(cols = labels, delim = " | ") %>%
pull(labels)

# gather all concepts for the focal data-series (previous matches from
Expand All @@ -148,7 +124,7 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL,
rename(harmLab = label) %>%
pivot_longer(cols = c(has_broader_match, has_close_match, has_exact_match, has_narrower_match),
names_to = "match", values_to = "label") %>%
separate_rows(label, sep = " \\| ") %>%
separate_longer_delim(cols = label, delim = " | ") %>%
full_join(temp, by = c("label", "class", "id", "has_broader", "description")) %>%
mutate(harmLab = if_else(is.na(harmLab), label, harmLab),
label = if_else(is.na(match), if_else(!is.na(id), label, NA_character_), label),
Expand Down Expand Up @@ -226,7 +202,6 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL,
toJoin <- relate %>%
rename(label_harm = label) %>%
mutate(label = tolower(label_harm)) %>%
# filter(!is.na(label_harm) & class == tail(filterClasses, 1)) %>%
filter(!is.na(label_harm)) %>%
select(-has_broader, -has_broader_match, -has_close_match, -has_exact_match, -has_narrower_match)

Expand Down Expand Up @@ -264,8 +239,7 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL,

hits <- joined %>%
filter(!is.na(has_0_differences)) %>%
mutate(#class = tail(filterClasses, 1),
has_new_close_match = label,
mutate(has_new_close_match = label,
label = has_0_differences) %>%
select(label, id, all_of(withBroader), class, has_new_close_match)

Expand All @@ -276,7 +250,6 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL,
relate <- relate %>%
left_join(hits, by = c("id", "label", "class", withBroader)) %>%
left_join(numbers, by = "label") %>%
# mutate(has_new_close_match = if_else(n > 1, NA_character_, has_new_close_match)) %>%
rowwise() %>%
mutate(has_new_close_match = if_else(grepl(x = has_close_match, pattern = has_new_close_match), NA_character_, has_new_close_match)) %>%
unite(col = "has_close_match", has_close_match, has_new_close_match, sep = " | ", na.rm = TRUE) %>%
Expand Down Expand Up @@ -317,13 +290,6 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL,
beep(sound = beep)
}

# ... and make them aware of their duty
# if(prevAvail){
# message("\nprevious matches found for this dataseries, only previously not matched terms are presented")
# } else {
# message("\nno previous matches found for this dataseries, close match with other potentially available terms is presented")
# }

message("-> please edit the file '", paste0(matchDir, "/matching.csv"), "' \n")
if(verbose){
message("--- column description ---\n")
Expand Down Expand Up @@ -377,26 +343,32 @@ edit_matches <- function(new, target = NULL, source = NULL, ontology = NULL,

if(!is.null(related)){

out <- prevMatches %>%
matchingTable <- prevMatches %>%
filter(!id == "ignore") %>%
bind_rows(related) %>%
pivot_longer(cols = c(has_broader_match, has_close_match, has_exact_match, has_narrower_match),
names_to = "match", values_to = "new_label") %>%
separate_rows(new_label, sep = " \\| ") %>%
distinct() %>%
group_by(id, has_broader, label, class, description, match) %>%
summarise(new_label = paste0(na.omit(new_label), collapse = " | "), .groups = "keep") %>%
ungroup() %>%
mutate(new_label = na_if(x = new_label, y = "")) %>%
pivot_wider(id_cols = c(label, class, id, has_broader, description), names_from = match, values_from = new_label) %>%
separate_longer_delim(cols = new_label, delim = " | ") %>%
distinct() %>%
pivot_wider(id_cols = c("id", "label", "class", "has_broader", "description"), names_from = match,
values_from = new_label, values_fn = ~paste0(na.omit(.x), collapse = " | ")) %>%
mutate(across(where(is.character), ~na_if(x = ., y = ""))) %>%
filter(!is.na(has_broader_match) | !is.na(has_close_match) | !is.na(has_narrower_match) | !is.na(has_exact_match)) %>%
filter(!is.na(id)) %>%
arrange(id)

newGrep <- str_replace_all(new, c("\\(" = "\\\\(", "\\)" = "\\\\)", "\\*" = "\\\\*"))
newGrep <- paste0("^", newGrep, "$")
out <- related %>%
filter(str_detect(has_close_match, paste0(newGrep, collapse = "|")) |
str_detect(has_broader_match, paste0(newGrep, collapse = "|")) |
str_detect(has_narrower_match, paste0(newGrep, collapse = "|")) |
str_detect(has_exact_match, paste0(newGrep, collapse = "|"))) %>%
arrange(id)

}

saveRDS(object = out, file = paste0(matchDir, sourceFile))
saveRDS(object = matchingTable, file = paste0(matchDir, sourceFile))

return(related)
return(out)
}

0 comments on commit f5936b0

Please sign in to comment.