Skip to content

Commit

Permalink
Merge pull request #202 from diskin-lab-chop/rjcorb/201-rm-address-co…
Browse files Browse the repository at this point in the history
…nflicting-command

rm address_conflicting_interpretations() function and command
  • Loading branch information
rjcorb authored Nov 6, 2023
2 parents 79ec85b + eedf846 commit b6d9356
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 78 deletions.
41 changes: 0 additions & 41 deletions scripts/02-annotate_variants_CAVATICA_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,42 +84,6 @@ output_tab_abr_file <- paste0(output_name, ".cavatica_input.annotations_report.a
## allocate more memory capacity
Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 2)


address_conflicting_interp <- function(clinvar_anno_vcf_df) { ## if conflicting intrep. take the call with most calls in CLNSIGCONF field

clinvar_nr <- clinvar_anno_vcf_df %>%
dplyr::filter(Stars == "1NR" & !is.na(Stars))

for (i in 1:nrow(clinvar_nr)) {
conf_section <- str_match(clinvar_nr$INFO[i], "CLNSIGCONF\\=.+\\;CLNVC") ## part to parse and count calls
call_names <- c("Pathogenic", "Likely_pathogenic", "Benign", "Likely_benign", "Uncertain_significance")

P <- (str_match(conf_section, "Pathogenic\\((\\d+)\\)")[, 2])
LP <- (str_match(conf_section, "Likely_pathogenic\\((\\d+)\\)")[, 2])
B <- (str_match(conf_section, "Benign\\((\\d+)\\)")[, 2])
LB <- (str_match(conf_section, "Likely_benign\\((\\d+)\\)")[, 2])
U <- (str_match(conf_section, "Uncertain_significance\\((\\d+)\\)")[, 2])

## make vector out of possible calls to get max
calls <- c(P, LP, B, LB, U)

if (length(which(calls == max(calls, na.rm = TRUE))) > 1) {
next
}

highest_ind <- which.max(calls)
consensus_call <- call_names[highest_ind]

clinvar_nr[i, ]$final_call_clinvar <- consensus_call
}

clinvar_anno_vcf_df <- clinvar_anno_vcf_df %>%
left_join(clinvar_nr[, c("vcf_id", "final_call_clinvar")], by = "vcf_id", suffix = c(".orig", ".resolved")) %>%
dplyr::mutate(final_call_clinvar = coalesce(final_call_clinvar.resolved, final_call_clinvar.orig)) %>%
dplyr::select(-final_call_clinvar.resolved, -final_call_clinvar.orig) %>%
return(clinvar_anno_vcf_df)
}

address_ambiguous_calls <- function(results_tab_abridged) { ## address ambiguous calls (non L/LB/P/LP/VUS) by taking the InterVar final call

results_tab_abridged <- results_tab_abridged %>%
Expand Down Expand Up @@ -162,11 +126,6 @@ clinvar_anno_vcf_df <- clinvar_anno_vcf_df %>%
final_call_clinvar = str_match(INFO, "CLNSIG\\=(\\w+)([\\|\\/]\\w+)*\\;")[, 2]
)


## if conflicting intrep. take the call with most calls in CLNSIGCONF field
clinvar_anno_vcf_df <- address_conflicting_interp(clinvar_anno_vcf_df)


## get latest calls from variant and submission summary files
variant_summary_df <- vroom(input_variant_summary, show_col_types = FALSE) %>%
filter(vcf_id %in% clinvar_anno_vcf_df$vcf_id) %>%
Expand Down
37 changes: 0 additions & 37 deletions scripts/02-annotate_variants_custom_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,41 +111,6 @@ address_ambiguous_calls <- function(results_tab_abridged) { ## address ambiguous
return(results_tab_abridged)
}

address_conflicting_interp <- function(clinvar_anno_vcf_df) { ## if conflicting intrep. take the call with most calls in CLNSIGCONF field

clinvar_nr <- clinvar_anno_vcf_df %>%
dplyr::filter(Stars == "1NR" & !is.na(Stars))

for (i in 1:nrow(clinvar_nr)) {
conf_section <- str_match(clinvar_nr$INFO[i], "CLNSIGCONF\\=.+\\;CLNVC") ## part to parse and count calls
call_names <- c("Pathogenic", "Likely_pathogenic", "Benign", "Likely_benign", "Uncertain_significance")

P <- (str_match(conf_section, "Pathogenic\\((\\d+)\\)")[, 2])
LP <- (str_match(conf_section, "Likely_pathogenic\\((\\d+)\\)")[, 2])
B <- (str_match(conf_section, "Benign\\((\\d+)\\)")[, 2])
LB <- (str_match(conf_section, "Likely_benign\\((\\d+)\\)")[, 2])
U <- (str_match(conf_section, "Uncertain_significance\\((\\d+)\\)")[, 2])

## make vector out of possible calls to get max
calls <- c(P, LP, B, LB, U)

if (length(which(calls == max(calls, na.rm = TRUE))) > 1) {
next
}

highest_ind <- which.max(calls)
consensus_call <- call_names[highest_ind]

clinvar_nr[i, ]$final_call_clinvar <- consensus_call
}

clinvar_anno_vcf_df <- clinvar_anno_vcf_df %>%
left_join(clinvar_nr[, c("vcf_id", "final_call_clinvar")], by = "vcf_id", suffix = c(".orig", ".resolved")) %>%
dplyr::mutate(final_call_clinvar = coalesce(final_call_clinvar.resolved, final_call_clinvar.orig)) %>%
dplyr::select(-final_call_clinvar.resolved, -final_call_clinvar.orig) %>%
return(clinvar_anno_vcf_df)
}

## make vcf dataframe and add vcf_if column
vcf_df <- vroom(input_vcf_file, comment = "#", delim = "\t", col_names = c("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "Sample"), trim_ws = TRUE, show_col_types = FALSE) %>%
mutate(
Expand Down Expand Up @@ -174,8 +139,6 @@ clinvar_anno_vcf_df <- vroom(input_clinVar_file, comment = "#", delim = "\t", co
final_call_clinvar = str_match(INFO, "CLNSIG\\=(\\w+)([\\|\\/]\\w+)*\\;")[, 2]
)

clinvar_anno_vcf_df <- address_conflicting_interp(clinvar_anno_vcf_df)

## store variants without clinvar info
clinvar_anti_join_vcf_df <- anti_join(vcf_df, clinvar_anno_vcf_df, by = "vcf_id") %>%
dplyr::mutate(
Expand Down

0 comments on commit b6d9356

Please sign in to comment.