diff --git a/NAMESPACE b/NAMESPACE index 9c37dd2..5200896 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,18 +1,21 @@ # Generated by roxygen2: do not edit by hand +export(abbreviate_effect) export(af_summary) +export(bcftools_stats_plot) export(cancer_rmd) +export(count_pieces) export(date_log) +export(dragen_hrd) +export(get_oncokb_genes) export(hrd_results_tabs) export(is_vcf) -export(is_vcf_bpi) export(mkdir) export(pkg_exists) export(plot_bnd_sr_pr_tot_hist) export(plot_bnd_sr_pr_tot_lines) +export(process_cnv_tsv) export(process_sv) -export(purple_cnv_germ_process) -export(purple_cnv_germ_read) export(purple_cnv_som_gene_process) export(purple_cnv_som_gene_read) export(purple_cnv_som_process) @@ -21,11 +24,12 @@ export(purple_kataegis) export(purple_purity_read) export(purple_qc_read) export(purple_snv_vcf_read) -export(purple_version_read) +export(read_oncokb) export(session_info_kable) export(tsv_is_empty) -export(umccrise_read_sv_tsv) export(vcf_is_empty) +export(virusbreakend_summary_read) +export(virusbreakend_vcf_read) export(write_jsongz) export(write_tsvgz) export(write_tsvjsongz) diff --git a/R/oncokb.R b/R/oncokb.R new file mode 100644 index 0000000..03954aa --- /dev/null +++ b/R/oncokb.R @@ -0,0 +1,24 @@ +#' @export +read_oncokb <- function(x) { + readr::read_tsv(x) |> + dplyr::filter( + `OncoKB Annotated` == "Yes" + ) |> + dplyr::pull("Hugo Symbol") +} + +#' @export +get_oncokb_genes <- function(x, oncokb_genes) { + delimiters <- " ,&-" + delimiter_re <- paste0("[", delimiters, "]") + + oncokb_genes |> + # Create regexes for each match, utilising delimiters for boundaries. Handles most cases where a gene symbol contains the '-' delimiter + purrr::map(function(n) paste0("(?<=^|", delimiter_re, ")", n, "(?=", delimiter_re, "|$)")) |> + # Loop with nm iterations through regex and gene symbols + purrr::map(function(n) stringr::str_detect(x, n) |> tibble::as_tibble_row(.name_repair="unique_quiet")) |> + # Combine as tibble to access dplyr::summarise and compile list of detected OncoKB gene symbols for each effect + dplyr::bind_rows() |> + dplyr::summarise(dplyr::across(dplyr::everything(), function(v) { paste0(sort(oncokb_genes[v]), collapse=", ") })) |> + unlist() +} diff --git a/R/oncoviral.R b/R/oncoviral.R new file mode 100644 index 0000000..7c87f0d --- /dev/null +++ b/R/oncoviral.R @@ -0,0 +1,141 @@ +#' Read VIRUSBreakend Summary File +#' +#' Reads the `virusbreakend.vcf.summary.tsv` file. +#' +#' @param x Path to `virusbreakend.vcf.summary.tsv` file. +#' +#' @return List with two elements: +#' * `tab`: Tibble containing data. +#' * `descr`: Description of tibble columns. +#' +#' @examples +#' x <- system.file("extdata/virusbreakend/virusbreakend.vcf.summary.tsv", package = "gpgr") +#' (vb <- virusbreakend_summary_read(x)) +#' @testexamples +#' expect_equal(colnames(vb)[ncol(vb)], "QCStatus") +#' +#' @export +virusbreakend_summary_read <- function(x) { + + nm <- c( + "taxid_genus" = "c", + "name_genus" = "c", + "reads_genus_tree" = "i", + "taxid_species" = "c", + "name_species" = "c", + "reads_species_tree" = "i", + "taxid_assigned" = "c", + "name_assigned" = "c", + "reads_assigned_tree" = "i", + "reads_assigned_direct" = "i", + "reference" = "c", + "reference_taxid" = "c", + "reference_kmer_count" = "i", + "alternate_kmer_count" = "i", + "rname" = "c", + "startpos" = "i", + "endpos" = "i", + "numreads" = "i", + "covbases" = "d", + "coverage" = "d", + "meandepth" = "d", + "meanbaseq" = "d", + "meanmapq" = "d", + "integrations" = "i", + "QCStatus" = "c" + ) + + ctypes <- paste(nm, collapse = "") + virusbreakend_summary <- readr::read_tsv(x, col_types = ctypes) + + if (nrow(virusbreakend_summary) > 0) { + assertthat::assert_that(ncol(virusbreakend_summary) == length(nm)) + assertthat::assert_that(all(colnames(virusbreakend_summary) == names(nm))) + + virusbreakend_summary <- virusbreakend_summary |> + dplyr::select( + Virus="name_assigned", + Length="endpos", + Reads="numreads", + Coverage="coverage", + `Mean depth`="meandepth", + Intergrations="integrations", + QC="QCStatus", + ) + } + + descr <- dplyr::tribble( + ~Column, ~Description, + "Virus", "Assigned NCBI taxonomy name of viral reference", + "Length", "Length of viral contig", + "Reads", "Number of reads mapped to adjusted viral reference", + "Coverage", "Percentage of viral positions with at least one read mapped", + "Mean depth", "Mean alignment depth", + "Intergrations", "Number of detected integration breakpoints", + "QC", "QC status of viral intergrations", + ) + + list( + tab = virusbreakend_summary, + descr = descr + ) +} + +#' Read VIRUSBreakend VCF File +#' +#' Reads the `virusbreakend.vcf` file and selects data to present. +#' +#' @param x Path to `virusbreakend.vcf` file. +#' +#' @return List with two elements: +#' * `tab`: Tibble containing selected data. +#' * `descr`: Description of tibble columns. +#' +#' @examples +#' x <- system.file("extdata/virusbreakend/virusbreakend.vcf", package = "gpgr") +#' (vb <- virusbreakend_vcf_read(x)) +#' @testexamples +#' expect_equal(colnames(vb)[ncol(vb)], "QC") +#' +#' @export +virusbreakend_vcf_read <- function(x) { + + d <- bedr::read.vcf(x, split.info = TRUE, verbose = FALSE) + + if (nrow(d$vcf) > 0) { + virusbreakend_integrations <- tibble::as_tibble(d$vcf) |> + dplyr::select( + Contig="CHROM", + Position="POS", + "Fragment support"="BVF", + "Fragment support (unmapped)"="BUM", + "Softclip read support"="BSC", + Reference="REF", + Alt="ALT", + `Breakend ID`="ID", + `Mate ID`="MATEID", + QC="FILTER", + ) + } else { + virusbreakend_integrations <- tibble::tibble() + } + + descr <- dplyr::tribble( + ~Column, ~Description, + "Contig", "Name of contig", + "Position", "Position of breakend in contig", + "Breakend ID", "ID of integration breakend", + "Mate ID", "ID of integration breakend mate", + "Reference", "Reference allele", + "Alt", "Alternative allele", + "QC", "VCF filter values", + "Fragment support", "Total number of fragments supporting breakend", + "Fragment support (unmapped)", "Number of fragments supporting breakend that have one read unmapped", + "Softclip read support", "Number of softclipped reads supporting breakend" + ) + + list( + tab = virusbreakend_integrations, + description = descr + ) +} diff --git a/R/purple.R b/R/purple.R index a417d42..713e20c 100644 --- a/R/purple.R +++ b/R/purple.R @@ -18,13 +18,12 @@ purple_cnv_som_gene_read <- function(x) { nm <- c( "chromosome" = "c", "start" = "i", "end" = "i", "gene" = "c", - "minCopyNumber" = "d", "maxCopyNumber" = "d", - "unused" = "c", "somaticRegions" = "d", "germlineHomDeletionRegions" = "d", - "germlineHetToHomDeletionRegions" = "d", - "transcriptId" = "c", "transcriptVersion" = "c", "chromosomeBand" = "c", + "minCopyNumber" = "d", "maxCopyNumber" = "d", "somaticRegions" = "d", + "transcriptId" = "c", "isCanonical" = "c", "chromosomeBand" = "c", "minRegions" = "d", "minRegionStart" = "i", "minRegionEnd" = "i", "minRegionStartSupport" = "c", "minRegionEndSupport" = "c", - "minRegionMethod" = "c", "minMinorAlleleCopyNumber" = "d" + "minRegionMethod" = "c", "minMinorAlleleCopyNumber" = "d", + "depthWindowCount" = "i" ) ctypes <- paste(nm, collapse = "") @@ -34,7 +33,6 @@ purple_cnv_som_gene_read <- function(x) { purple_cnv_gene } - #' Process PURPLE CNV Gene File for UMCCRISE #' #' Processes the `purple.cnv.gene.tsv` file. Keeps genes that are in the @@ -43,8 +41,8 @@ purple_cnv_som_gene_read <- function(x) { #' #' @param x Path to `purple.cnv.gene.tsv` file. #' @param g Path to gene file containing at least three columns: -#' * `symbol`: gene name (character). -#' * `tumorsuppressor`: is this gene a tumor suppressor (TRUE/FALSE). +#' * `ensembl_gene_symbol`: gene name (character). +#' * `tsgene`: is this gene a tumor suppressor (TRUE/FALSE). #' * `oncogene`: is this gene an oncogene (TRUE/FALSE). #' #' @return List with two elements: @@ -66,27 +64,26 @@ purple_cnv_som_gene_process <- function(x, g = NULL) { } genes <- readr::read_tsv(g, col_types = readr::cols( - symbol = "c", oncogene = "l", tumorsuppressor = "l" + ensembl_gene_symbol = "c", oncogene = "l", tsgene = "l" )) |> - dplyr::select("symbol", "oncogene", "tumorsuppressor") + dplyr::select("ensembl_gene_symbol", "oncogene", "tsgene") oncogenes <- genes |> dplyr::filter(.data$oncogene) |> - dplyr::pull(.data$symbol) + dplyr::pull(.data$ensembl_gene_symbol) tsgenes <- genes |> - dplyr::filter(.data$tumorsuppressor) |> - dplyr::pull(.data$symbol) + dplyr::filter(.data$tsgene) |> + dplyr::pull(.data$ensembl_gene_symbol) purple_cnv_gene <- purple_cnv_gene |> - dplyr::filter(.data$gene %in% genes$symbol) |> + dplyr::filter(.data$gene %in% genes$ensembl_gene_symbol) |> dplyr::mutate( chromosome = as.factor(.data$chromosome), - transcriptID = paste0(.data$transcriptId, ".", .data$transcriptVersion), + transcriptID = paste0(.data$transcriptId), minRegStartEnd = paste0(.data$minRegionStart, "-", .data$minRegionEnd), minRegSupportStartEndMethod = paste0( .data$minRegionStartSupport, "-", .data$minRegionEndSupport, " (", .data$minRegionMethod, ")" ), - germDelReg = paste0(.data$germlineHomDeletionRegions, "/", .data$germlineHetToHomDeletionRegions), oncogene = .data$gene %in% oncogenes, tsgene = .data$gene %in% tsgenes, onco_or_ts = dplyr::case_when( @@ -101,7 +98,7 @@ purple_cnv_som_gene_process <- function(x, g = NULL) { chrom = "chromosome", "start", "end", chrBand = "chromosomeBand", "onco_or_ts", "transcriptID", minMinorAlleleCN = "minMinorAlleleCopyNumber", - somReg = "somaticRegions", "germDelReg", minReg = "minRegions", + somReg = "somaticRegions", minReg = "minRegions", "minRegStartEnd", "minRegSupportStartEndMethod" ) @@ -115,7 +112,6 @@ purple_cnv_som_gene_process <- function(x, g = NULL) { "transcriptID", "Ensembl transcript ID (dot version)", "minMinorAlleleCN", "Minimum allele ploidy found over the gene exons - useful for identifying LOH events", "somReg (somaticRegions)", "Count of somatic copy number regions this gene spans", - "germDelReg (germlineHomDeletionRegions / germlineHetToHomDeletionRegions)", "Number of regions spanned by this gene that are (homozygously deleted in the germline / both heterozygously deleted in the germline and homozygously deleted in the tumor)", "minReg (minRegions)", "Number of somatic regions inside the gene that share the min copy number", "minRegStartEnd", "Start/End base of the copy number region overlapping the gene with the minimum copy number", "minRegSupportStartEndMethod", "Start/end support of the CN region overlapping the gene with the min CN (plus determination method)" @@ -127,6 +123,312 @@ purple_cnv_som_gene_process <- function(x, g = NULL) { ) } +sash_read_cnv_tsv <- function(x) { + + nm <- c( + "chromosome" = "c", + "start" = "i", + "end" = "i", + "svtype" = "c", + "baf" = "d", + "bafCount" = "d", + "copyNumber" = "d", + "depthWindowCount" = "i", + "gcContent" = "d", + "majorAlleleCopyNumber" = "d", + "method" = "c", + "minorAlleleCopyNumber" = "d", + "segmentEndSupport" = "c", + "segmentStartSupport" = "c", + "sv_top_tier" = "i", + "simple_ann" = "c" + ) + + ctypes <- paste(nm, collapse = "") + d <- readr::read_tsv(x, col_types = ctypes) + assertthat::assert_that(ncol(d) == length(nm)) + assertthat::assert_that(all(colnames(d) == names(nm))) + d +} + +filter_and_split_annotations_cnv <- function(x) { + + filter_conditions <- list( + # Chromosome effects + stringr::str_starts(x$Detail, "chrom_"), + # Fusion effects + x$Effect %in% c("BidFusG", "FusG") + ) + + x.grouped <- x |> + dplyr::group_by( + filter=ifelse(purrr::reduce(filter_conditions, `|`), "filter", "retain") + ) + + keys <- x.grouped |> + dplyr::group_keys() |> + dplyr::pull(filter) + + x.split <- x.grouped |> + dplyr::group_split(.keep=FALSE) |> + purrr::set_names(keys) + + list( + retained = purrr::pluck(x.split, "retain"), + filtered = purrr::pluck(x.split, "filter") |> dplyr::arrange(Tier, `Event ID`) + ) +} + +collapse_effect_group <- function(x) { + x.tmp <- dplyr::first(x) + genes <- x.tmp |> + dplyr::pull(Genes.unique) |> + unlist() + + x.tmp |> + dplyr::mutate( + Genes = paste0(genes, collapse=", "), + Transcripts = "", + Detail = paste0(unique(x$Detail) |> sort(), collapse=", "), + Tier = min(x$Tier), + `Annotation ID` = NA, + ) +} + +set_many_genes_cnv <- function(x) { + # Count genes and set eligibility for collapsing + collapse_effects <- c("DelG", "Dup", "DelTx", "UpstreamGV", "DnstreamGV", "IntergenReg") + x.counts <- x |> + dplyr::group_by(`Event ID`, Effect) |> + dplyr::mutate( + Genes.unique = Genes |> stringr::str_split(", ") |> unlist() |> unique() |> list(), + `Gene count (effect group)` = Genes.unique |> dplyr::first() |> length(), + collapse = `Gene count (effect group)` > 2 & Effect %in% collapse_effects, + ) |> + dplyr::select(-`Gene count (effect group)`) + + # Collapse target groups + x.collapsed <- x.counts |> + dplyr::filter(collapse) |> + dplyr::select(-collapse) |> + dplyr::group_modify(~ collapse_effect_group(.x)) |> + # Update top tier, referring to complete data set within each group + dplyr::mutate( + `Tier (top)` = paste0(Tier, " (", min(x$Tier[x$`Event ID` == `Event ID`]), ")"), + ) |> + # Create new annotation ID + dplyr::ungroup() |> + dplyr::mutate( + `Annotation ID` = paste0(dplyr::row_number(), ".merged"), + ) + + # Bind collapsed rows with uncollapsed rows, then count genes per entry + x.tmp <- x.counts |> + dplyr::filter(!collapse) |> + dplyr::bind_rows(x.collapsed) |> + dplyr::select(-c(collapse, Genes.unique, Tier)) |> + dplyr::rowwise() |> + dplyr::mutate( + `Gene count` = Genes |> stringr::str_split(", ") |> unlist() |> unique() |> length(), + ) |> + dplyr::ungroup() |> + # Sort rows + dplyr::arrange(`Tier (top)`, `Event ID`) + + # Set many genes + x.tmp <- x.tmp |> + dplyr::mutate( + many_genes = ifelse(`Gene count` > 2, "many_genes", "few_genes"), + ) + + # Build the many genes table + x.many_genes <- x.tmp |> + dplyr::filter(many_genes == "many_genes") |> + # Remove unneeded columns and rename others + dplyr::select( + "Event ID", + "Annotation ID", + "Tier (top)", + "Start", + "End", + "SV Type", + "Effect", + "Gene count", + "Genes", + ) + + # Clear genes and transcripts where appropriate + x.ready <- x.tmp |> + dplyr::mutate( + Genes = ifelse( + many_genes == "few_genes" | is.na(many_genes), + Genes, + paste0("Many genes (", `Gene count`, ")") + ), + Transcripts = ifelse(many_genes == "few_genes" | is.na(many_genes), Transcripts, ""), + ) |> + dplyr::select(-c(many_genes, `Gene count`)) + + list( + ready = x.ready, + many_genes = x.many_genes + ) +} + +set_many_transcripts_cnv <- function(x) { + # Set many transcripts + x.tmp <- x |> + dplyr::rowwise() |> + dplyr::mutate( + `Transcript count` = stringr::str_split(Transcripts, ", ") |> unlist() |> unique() |> length() + ) |> + dplyr::ungroup() |> + dplyr::mutate( + many_transcripts = ifelse(`Transcript count` > 2, "many_transcripts", "few_transcripts"), + ) |> + # Sort rows + dplyr::arrange(`Tier (top)`, `Event ID`) + + # Build the many transcripts table + x.many_transcripts <- x.tmp |> + dplyr::filter(many_transcripts == "many_transcripts") |> + # Remove unneeded columns and rename others + dplyr::select( + "Event ID", + "Annotation ID", + "Tier (top)", + "Start", + "End", + "SV Type", + "Effect", + "Transcript count", + "Transcripts", + ) + + # Clear transcripts where appropriate + x.ready <- x.tmp |> + dplyr::mutate( + Transcripts = ifelse( + many_transcripts == "few_transcripts" | is.na(many_transcripts), + Transcripts, + paste0("Many transcripts (", `Transcript count`, ")") + ) + ) |> + dplyr::select(-c(many_transcripts, `Transcript count`)) + + list( + ready = x.ready, + many_transcripts = x.many_transcripts + ) +} + +#' @export +process_cnv_tsv <- function(x) { + # Read input + cnv.input <- sash_read_cnv_tsv(x) + + # Prepare input + cnv.ready <- cnv.input |> + dplyr::mutate( + chrom_simple = stringr::str_remove(chromosome, "chr"), + start = paste(chrom_simple, base::format(start, big.mark = ",", trim = TRUE), sep=":"), + end = paste(chrom_simple, base::format(end, big.mark = ",", trim = TRUE), sep=":"), + ) |> + dplyr::select(-c( + chromosome, + chrom_simple, + )) + + # Melt annotations + cnv.tmp <- cnv.ready |> + dplyr::mutate(`Event ID` = dplyr::row_number()) |> + # Split into individual annotations + dplyr::mutate(annotation = strsplit(simple_ann, ",")) |> + # Convert annotation fields into columns + tidyr::unnest(annotation) |> + tidyr::separate( + annotation, c("Event", "Effect", "Genes", "Transcripts", "Detail", "Tier"), + sep = "\\|", convert = FALSE + ) |> + # Create new columns and modify existing ones + dplyr::mutate( + copyNumber = as.numeric(copyNumber) |> round(2) %>% sprintf("%.2f", .), + minorAlleleCopyNumber = as.numeric(minorAlleleCopyNumber) |> round(2) %>% sprintf("%.2f", .), + majorAlleleCopyNumber = as.numeric(majorAlleleCopyNumber) |> round(2) %>% sprintf("%.2f", .), + "PURPLE CN Min+Maj" = paste0(minorAlleleCopyNumber, "+", majorAlleleCopyNumber), + "Genes" = stringr::str_replace_all(Genes, "&", ", "), + "Transcripts" = stringr::str_replace_all(Transcripts, "&", ", "), + ) |> + # Remove unused columns + dplyr::select(-c( + baf, + bafCount, + depthWindowCount, + Event, + gcContent, + majorAlleleCopyNumber, + method, + minorAlleleCopyNumber, + segmentEndSupport, + segmentStartSupport, + sv_top_tier, + simple_ann, + )) + + # Abbreviate effects + abbreviate_effectv <- Vectorize(abbreviate_effect) + cnv.tmp$Effect <- abbreviate_effectv(cnv.tmp$Effect) + + # Filter unwanted annotations + cnv.annotations.split <- filter_and_split_annotations_cnv(cnv.tmp) + + # Complete processing + cnv.tmp <- cnv.annotations.split$retained |> + # Reset sv_top_tier after removing annotations + dplyr::group_by(`Event ID`) |> + dplyr::mutate( + sv_top_tier = min(Tier), + "Tier (top)" = paste0(Tier, " (", sv_top_tier, ")"), + ) |> + dplyr::ungroup() |> + # Set unique annotation ID + dplyr::mutate(`Annotation ID` = as.character(dplyr::row_number())) |> + # Sort rows + dplyr::arrange(`Tier (top)`, `Event ID`) + + # Set column names + column_selector <- c( + "Event ID", + "Annotation ID", + "Tier (top)", + "Start" = "start", + "End" = "end", + "SV Type" = "svtype", + "Effect", + "Genes", + "Transcripts", + "Detail", + "PURPLE CN" = "copyNumber", + "PURPLE CN Min+Maj" + ) + cnv.tmp <- dplyr::select(cnv.tmp, tidyselect::all_of(c(column_selector, "Tier"))) + cnv.filtered <- dplyr::select(cnv.annotations.split$filtered, tidyselect::any_of(column_selector)) + + # Collapse selected annotations and set many genes + cnv.many_genes_data <- set_many_genes_cnv(cnv.tmp) + cnv.tmp <- cnv.many_genes_data$ready + + # Set many transcripts + cnv.many_transcripts_data <- set_many_transcripts_cnv(cnv.tmp) + + list( + annotations = cnv.many_transcripts_data$ready, + filtered = cnv.filtered, + many_genes = cnv.many_genes_data$many_genes, + many_transcripts = cnv.many_transcripts_data$many_transcripts + ) +} + #' Read PURPLE CNV Somatic File #' #' Reads the `purple.cnv.somatic.tsv` file, which contains the copy number @@ -162,8 +464,7 @@ purple_cnv_som_read <- function(x) { #' Process PURPLE CNV Somatic File for UMCCRISE #' -#' Processes the `purple.cnv.somatic.tsv` file. -#' and selects columns of interest. +#' Processes the `purple.cnv.somatic.tsv` file and selects columns of interest. #' #' @param x Path to `purple.cnv.somatic.tsv` file. #' @@ -230,80 +531,6 @@ purple_cnv_som_process <- function(x) { ) } -#' Read PURPLE CNV Germline File -#' -#' Reads the `purple.cnv.germline.tsv` file. -#' -#' @param x Path to `purple.cnv.germline.tsv` file. -#' -#' @return The input file as a tibble. -#' -#' @examples -#' x <- system.file("extdata/purple/purple.cnv.germline.tsv", package = "gpgr") -#' (p <- purple_cnv_germ_read(x)) -#' @testexamples -#' expect_equal(colnames(p)[ncol(p)], "majorAlleleCopyNumber") -#' -#' @export -purple_cnv_germ_read <- function(x) { - # as of PURPLE v2.39, germline and somatic files have same columns. - purple_cnv_germline <- purple_cnv_som_read(x) - purple_cnv_germline -} - -#' Process PURPLE CNV germline File for UMCCRISE -#' -#' Processes the `purple.cnv.germline.tsv` file. -#' and selects columns of interest. -#' -#' @param x Path to `purple.cnv.germline.tsv` file. -#' -#' @return List with two elements: -#' * `tab`: Tibble with more condensed columns. -#' * `descr`: Description of tibble columns. -#' -#' @examples -#' x <- system.file("extdata/purple/purple.cnv.germline.tsv", package = "gpgr") -#' (pp <- purple_cnv_germ_process(x)) -#' @testexamples -#' expect_equal(colnames(pp$tab)[ncol(pp$tab)], "GC (windowCount)") -#' -#' @export -purple_cnv_germ_process <- function(x) { - # as of PURPLE v2.39, germline and somatic files have same columns. - processed_purple_cnv_germline <- purple_cnv_som_process(x) - processed_purple_cnv_germline -} - -#' Read PURPLE version file -#' -#' Reads the `purple.version` file containing the PURPLE version and build date. -#' -#' @param x Path to the `purple.version` file. -#' -#' @return A list with the elements: -#' * `version`: The version of PURPLE used. -#' * `build_date`: The build date of the PURPLE version. -#' -#' @examples -#' x <- system.file("extdata/purple/purple.version", package = "gpgr") -#' (v <- purple_version_read(x)) -#' @testexamples -#' expect_equal(length(v), 2) -#' expect_equal(names(v), c("version", "build_date")) -#' expect_equal(v$version, "2.51") -#' -#' @export -purple_version_read <- function(x) { - tab <- readr::read_delim(x, delim = "=", col_names = c("key", "value"), col_types = "cc") - assertthat::assert_that(nrow(tab) == 2, all(tab$key == c("version", "build.date"))) - - list( - version = tab$value[tab$key == "version"], - build_date = tab$value[tab$key == "build.date"] - ) -} - #' Read PURPLE QC file #' #' Reads the `purple.qc` file. @@ -328,7 +555,8 @@ purple_qc_read <- function(x) { nm <- c( "QCStatus", "Method", "CopyNumberSegments", "UnsupportedCopyNumberSegments", "Purity", "AmberGender", - "CobaltGender", "DeletedGenes", "Contamination", "GermlineAberrations" + "CobaltGender", "DeletedGenes", "Contamination", "GermlineAberrations", + "AmberMeanDepth", "LohPercent" ) assertthat::assert_that(all(purple_qc$key == nm)) @@ -350,6 +578,8 @@ purple_qc_read <- function(x) { "Rate of contamination in tumor sample as determined by AMBER.", 16, "GermlineAberrations", glue::glue('{q["GermlineAberrations"]}'), "Can be one or more of: KLINEFELTER, TRISOMY_X/21/13/18/15, XYY, MOSAIC_X.", + 18, "AmberMeanDepth", glue::glue('{q["AmberMeanDepth"]}'), + "Mean depth as determined by AMBER.", ) list( @@ -392,7 +622,6 @@ purple_purity_read <- function(x) { "maxPloidy", "d", "minDiploidProportion", "d", "maxDiploidProportion", "d", - "version", "c", "somaticPenalty", "d", "wholeGenomeDuplication", "c", "msIndelsPerMb", "d", @@ -401,7 +630,9 @@ purple_purity_read <- function(x) { "tmlStatus", "c", "tmbPerMb", "d", "tmbStatus", "c", - "svTumorMutationalBurden", "d" + "svTumorMutationalBurden", "d", + "runMode", "c", + "targeted", "c" ) ctypes <- paste(tab$type, collapse = "") @@ -475,7 +706,7 @@ purple_snv_vcf_read <- function(x) { dplyr::select("ID", "Description") info_cols <- c( - "AF", "PURPLE_AF", "PURPLE_CN", + "PURPLE_AF", "PURPLE_CN", "PURPLE_GERMLINE", "PURPLE_MACN", "PURPLE_VCN", "HMF_HOTSPOT", "KT", "MH", "SUBCL", "TNC" ) @@ -506,7 +737,7 @@ purple_snv_vcf_read <- function(x) { purple_kataegis <- function(x) { d <- purple_snv_vcf_read(x) info_cols <- c( - "KT", "AF", "PURPLE_AF", "PURPLE_CN", + "KT", "PURPLE_AF", "PURPLE_CN", "PURPLE_MACN", "PURPLE_VCN", "SUBCL", "MH", "TNC" ) diff --git a/R/rmd.R b/R/rmd.R index 8e21b99..25a454c 100644 --- a/R/rmd.R +++ b/R/rmd.R @@ -12,17 +12,18 @@ #' @param conda_list Path to `conda_pkg_list.txt` file. #' @param img_dir Path to directory containing PURPLE plots. #' @param key_genes Path to UMCCR cancer gene file. -#' @param oncoviral_breakpoints_tsv Path to `oncoviruses/oncoviral_breakpoints.tsv`. -#' @param oncoviral_present_viruses Path to `oncoviruses/present_viruses.txt`. -#' @param purple_germ_cnv Path to `purple.cnv.germline.tsv`. +#' @param oncokb_genes Path to OncoKB database file. +#' @param virusbreakend_tsv Path to VIRUSBreakend summary file. +#' @param virusbreakend_vcf Path to VIRUSBreakend VCF file. #' @param purple_purity Path to `purple.purity.tsv`. #' @param purple_qc Path to `purple.qc`. #' @param purple_som_cnv Path to `purple.cnv.somatic.tsv`. +#' @param purple_som_cnv_ann Path to annotated and prioritised `purple.cnv.somatic.tsv`. #' @param purple_som_gene_cnv Path to `purple.cnv.gene.tsv`. #' @param purple_som_snv_vcf Path to `purple.somatic.vcf.gz`. #' @param result_outdir Path to directory to write tidy JSON/TSV results. -#' @param somatic_snv_summary Path to `somatic_snv_summary.json` JSON. #' @param somatic_snv_vcf Path to `somatic-PASS.vcf.gz` SNV VCF. +#' @param somatic_snv_summary Path to `somatic_snv_summary.json` JSON. #' @param somatic_sv_tsv Path to `manta.tsv` TSV file. #' @param somatic_sv_vcf Path to `manta.vcf.gz` VCF file. #' @param tumor_name Name of tumor sample. @@ -31,13 +32,11 @@ #' #' @return Path to rendered HTML report. #' @export -cancer_rmd <- function(af_global, af_keygenes, batch_name, conda_list, img_dir, key_genes, - oncoviral_breakpoints_tsv, oncoviral_present_viruses, - purple_germ_cnv, purple_purity, purple_qc, purple_som_cnv, - purple_som_gene_cnv, purple_som_snv_vcf, somatic_snv_summary, - somatic_snv_vcf, somatic_sv_tsv, somatic_sv_vcf, - result_outdir, tumor_name, - out_file = NULL, quiet = FALSE) { +cancer_rmd <- function(af_global, af_keygenes, batch_name, bcftools_stats, conda_list, dragen_hrd, img_dir, key_genes, + oncokb_genes, virusbreakend_tsv, virusbreakend_vcf, purple_purity, purple_qc, + purple_som_cnv_ann, purple_som_cnv, purple_som_gene_cnv, purple_som_snv_vcf, + somatic_snv_vcf, somatic_snv_summary, somatic_sv_tsv, somatic_sv_vcf, + result_outdir, tumor_name, out_file = NULL, quiet = FALSE) { assertthat::assert_that( dir.exists(img_dir), quiet %in% c(FALSE, TRUE) @@ -64,21 +63,24 @@ cancer_rmd <- function(af_global, af_keygenes, batch_name, conda_list, img_dir, af_global = af_global, af_keygenes = af_keygenes, batch_name = batch_name, + bcftools_stats = bcftools_stats, conda_list = conda_list, + dragen_hrd = dragen_hrd, img_dir = img_dir_b, key_genes = key_genes, - somatic_snv_summary = somatic_snv_summary, + oncokb_genes = oncokb_genes, somatic_snv_vcf = somatic_snv_vcf, + somatic_snv_summary = somatic_snv_summary, somatic_sv_tsv = somatic_sv_tsv, somatic_sv_vcf = somatic_sv_vcf, purple_som_gene_cnv = purple_som_gene_cnv, + purple_som_cnv_ann = purple_som_cnv_ann, purple_som_cnv = purple_som_cnv, - purple_germ_cnv = purple_germ_cnv, purple_purity = purple_purity, purple_qc = purple_qc, purple_som_snv_vcf = purple_som_snv_vcf, - oncoviral_present_viruses = oncoviral_present_viruses, - oncoviral_breakpoints_tsv = oncoviral_breakpoints_tsv, + virusbreakend_tsv = virusbreakend_tsv, + virusbreakend_vcf = virusbreakend_vcf, result_outdir = result_outdir, tumor_name = tumor_name ) diff --git a/R/sv.R b/R/sv.R index 53acd85..69df9ec 100644 --- a/R/sv.R +++ b/R/sv.R @@ -73,6 +73,7 @@ split_double_col <- function(d, nms) { #' expect_equal(m, 2) #' expect_error(gpgr:::count_pieces("foo", NA)) #' +#' @export count_pieces <- function(x, sep) { ifelse(nchar(x) == 0, 0, stringr::str_count(x, sep) + 1) } @@ -107,6 +108,7 @@ count_pieces <- function(x, sep) { #' expect_equal(e3, "TFBSDel, TFBSVar") #' expect_equal(e4, "badaboom, bar, foo, StopGain") #' +#' @export abbreviate_effect <- function(effects) { effect_abbrev_nms <- names(gpgr::EFFECT_ABBREVIATIONS) @@ -123,238 +125,328 @@ abbreviate_effect <- function(effects) { paste(collapse = ", ") } -#' Read SV TSV -#' -#' Reads the Manta TSV file output by umccrise. -#' -#' @param x Path to `manta.tsv` output by umccrise. -#' -#' @return A tibble corresponding to the input TSV file. -#' -#' @examples -#' x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") -#' (sv <- umccrise_read_sv_tsv(x)$data) -#' @testexamples -#' expect_equal(colnames(sv)[ncol(sv)], "ALT") -#' -#' @export -umccrise_read_sv_tsv <- function(x) { +sash_read_sv_tsv <- function(x) { - # tsv column names, description and types tab <- dplyr::tribble( ~Column, ~Description, ~Type, - "caller", "Manta SV caller", "c", - "sample", "Tumor sample name", "c", "chrom", "CHROM column in VCF", "c", "start", "POS column in VCF", "i", - "end", "INFO/END: End position of the variant described in this record", "i", "svtype", "INFO/SVTYPE: Type of structural variant", "c", - "split_read_support", "FORMAT/SR of tumor sample: Split reads for the ref and alt alleles in the order listed, for reads where P(allele|read)>0.999", "c", - "paired_support_PE", "FORMAT/PE of tumor sample: ??", "c", - "paired_support_PR", "FORMAT/PR of tumor sample: Spanning paired-read support for the ref and alt alleles in the order listed, for reads where P(allele|read)>0.999", "c", - "AF_BPI", "INFO/BPI_AF: AF at each breakpoint (so AF_BPI1,AF_BPI2)", "c", - "somaticscore", "INFO/SOMATICSCORE: Somatic variant quality score", "i", + "SR_alt", "Split reads (or equivalent) of tumor sample", "i", + "PR_alt", "Paired reads (or equivalent) of tumor sample", "i", + "SR_asm_alt", "Split reads (assembly) of tumor sample", "i", + "PR_asm_alt", "Pair reads (assembly) of tumor sample", "i", + "IC_alt", "INDEL-containing reads of tumor sample", "i", + "SR_ref", "FORMAT/REF of tumor sample", "i", + "PR_ref", "FORMAT/REFPAIR of tumor sample", "i", + "QUAL", "QUAL column in VCF", "f", "tier", "INFO/SV_TOP_TIER (or 4 if missing): Highest priority tier for the effects of a variant entry", "c", "annotation", "INFO/SIMPLE_ANN: Simplified structural variant annotation: 'SVTYPE | EFFECT | GENE(s) | TRANSCRIPT | PRIORITY (1-4)'", "c", "AF_PURPLE", "INFO/PURPLE_AF: AF at each breakend (purity adjusted) (so AF_PURPLE1,AF_PURPLE2)", "c", "CN_PURPLE", "INFO/PURPLE_CN: CN at each breakend (purity adjusted) (so CN_PURPLE1,CN_PURPLE2)", "c", "CN_change_PURPLE", "INFO/PURPLE_CN_CHANGE: change in CN at each breakend (purity adjusted) (so CN_change_PURPLE1,CN_change_PURPLE2)", "c", - "Ploidy_PURPLE", "INFO/PURPLE_PLOIDY: Ploidy of variant (purity adjusted)", "d", "PURPLE_status", "INFERRED if FILTER=INFERRED, or RECOVERED if has INFO/RECOVERED, else blank. INFERRED: Breakend inferred from copy number transition", "c", - "START_BPI", "INFO/BPI_START: BPI adjusted breakend location", "i", - "END_BPI", "INFO/BPI_END: BPI adjusted breakend location", "i", "ID", "ID column in VCF", "c", "MATEID", "INFO/MATEID: ID of mate breakend", "c", "ALT", "ALT column in VCF", "c" ) ctypes <- paste(tab$Type, collapse = "") - somatic_sv_tsv <- readr::read_tsv(x, col_names = TRUE, col_types = ctypes) - assertthat::assert_that(ncol(somatic_sv_tsv) == nrow(tab)) - assertthat::assert_that(all(colnames(somatic_sv_tsv) == tab$Column)) + d <- readr::read_tsv(x, col_names = TRUE, col_types = ctypes) + assertthat::assert_that(ncol(d) == nrow(tab)) + assertthat::assert_that(all(colnames(d) == tab$Column)) list( - data = somatic_sv_tsv, + data = d, descr = tab ) } -#' Process Structural Variants -#' -#' Processes the Manta TSV file output by umccrise. -#' -#' @param x Path to `manta.tsv` output by umccrise. -#' @return A list with melted/unmelted tibbles (these are NULL if TSV file was empty). -#' -#' @examples -#' x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") -#' (sv <- process_sv(x)) -#' @testexamples -#' expect_true(inherits(sv, "list")) -#' expect_equal(length(sv), 4) -#' expect_equal(names(sv), c("unmelted", "melted", "tsv_descr", "col_descr")) -#' -#' @export -process_sv <- function(x) { - sv <- umccrise_read_sv_tsv(x) - tsv_descr <- sv$descr - sv <- sv$data - - if (nrow(sv) == 0) { - return(list( - unmelted = NULL, - melted = NULL - )) - } - col_descr <- dplyr::tribble( - ~Column, ~Description, - "nrow", "Row number that connects variants between tables in same tab set.", - "vcfnum", "Original event row number that connects variants to events.", - "TierTop", "Top priority of the event (from simple_sv_annotation: 1 highest, 4 lowest).", - "Tier", "Priority of the specific event (from simple_sv_annotation: 1 highest, 4 lowest).", - "Tier (Top)", "Priority of the specific event (Top tier of all event's annotations): 1 highest, 4 lowest. ", - "Chr", "Chromosome.", - "Start", "Start position as inferred by BPI. For PURPLE-inferred SVs we use POS.", - "End", "End position. For BNDs = Chr:Start of the mate.Values are inferred by BPI (PURPLE-inferred SVs do not have an End).", - "ID", "ID of BND from Manta. For PURPLE-inferred SVs this is PURPLE.", - "MATEID", "ID of BND mate from Manta.", - "BND_ID", "ID of BND pair simplified. BNDs with the same BND_ID belong to the same translocation event.", - "BND_mate", "A or B depending on whether the first or second mate of the BND pair.", - "Genes", "Genes involved in the event. DEL/DUP/INS events involving more than 2 genes are shown within separate table.", - "Transcript", "Transcripts involved in the event. DEL/DUP/INS events involving more than 2 transcripts are shown within separate table.", - "Effect", "SV effect (based on http://snpeff.sourceforge.net/SnpEff_manual.html#input).", - "Detail", "Prioritisation detail (from simple_sv_annotation).", - "Ploidy", "Ploidy of variant from PURPLE (purity adjusted).", - "AF_PURPLE", "PURPLE AF at each breakend preceded by their average.", - "AF_BPI", "BPI AF at each breakend preceded by their average.", - "CN", "Copy Number at each breakend preceded by their average.", - "CNC", "Copy Number Change at each breakend preceded by their average.", - "SR_alt", "Number of Split Reads supporting the alt allele, where P(allele|read)>0.999.", - "PR_alt", "Number of Paired Reads supporting the alt allele, where P(allele|read)>0.999.", - "SR_PR_ref", "Number of Split Reads and Paired Reads supporting the ref allele, where P(allele|read)>0.999.", - "SR_PR_sum", "Sum of SR_alt and PR_alt.", - "Type", "Type of structural variant.", - "SScore", "Somatic variant quality score.", - "ntrx", "Number of transcripts for given event.", - "ngen", "Number of genes for given event.", - "nann", "Number of annotations for given event." +split_svs <- function(x) { + bps_types <- c("BND", "DEL", "DUP"," INS", "INV") + + x.grouped <- x |> + dplyr::group_by( + record_type=ifelse(Type %in% bps_types, "bps", "other") + ) + + keys <- x.grouped |> + dplyr::group_keys() |> + dplyr::pull(record_type) + + x.split <- x.grouped |> + dplyr::group_split(.keep=FALSE) |> + purrr::set_names(keys) + + list( + bps = purrr::pluck(x.split, "bps"), + other = purrr::pluck(x.split, "other") ) +} - cols_to_split <- c("AF_BPI", "AF_PURPLE", "CN_PURPLE", "CN_change_PURPLE") - double_cols <- split_double_col(sv, cols_to_split) - unmelted <- sv |> - dplyr::select(-dplyr::all_of(c(cols_to_split, "caller", "sample"))) |> - dplyr::bind_cols(double_cols) |> - tidyr::separate(.data$split_read_support, c("SR_ref", "SR_alt"), ",", convert = TRUE) |> - tidyr::separate(.data$paired_support_PR, c("PR_ref", "PR_alt"), ",", convert = TRUE) - unmelted <- unmelted |> +join_breakpoint_entries <- function(x) { + # Group by GRIDSS identifier (clipping trailing h/o [h: High, o: lOwer]) + bps <- x |> + tidyr::separate(ID, into = c("BND_group", "BND_mate"), sep = -1, convert = TRUE, remove = FALSE)|> + dplyr::group_by(BND_group) + + # Set a sequential breakpoint identifier + bps_groups <- bps |> dplyr::n_groups() + bps |> dplyr::mutate( - SR_PR_ref = paste0(.data$SR_ref, ",", .data$PR_ref), - Ploidy = round(as.double(.data$Ploidy_PURPLE), 2), - chrom = sub("chr", "", .data$chrom), - svtype = ifelse(is.na(.data$PURPLE_status), .data$svtype, "PURPLE_inf"), - # when BPI cannot be run, just use start - Start = ifelse(is.na(.data$PURPLE_status), - ifelse(is.na(.data$START_BPI), .data$start, .data$START_BPI), - .data$start - ), - nann = count_pieces(.data$annotation, ","), - vcfnum = dplyr::row_number(), - vcfnum = sprintf(glue::glue("%0{nchar(nrow(unmelted))}d"), .data$vcfnum) + # Assign a unique ID based on current group + BND_ID = sprintf(paste0("%0", nchar(bps_groups), "d"), dplyr::cur_group_id()), + BND_mate = ifelse(BND_mate == "o", "A", "B"), ) |> - dplyr::rowwise() |> - dplyr::mutate(SR_PR_sum = sum(.data$SR_alt, .data$PR_alt, na.rm = TRUE)) |> - dplyr::ungroup() - - # BND IDs - # Two BND mates share the same ID up to the last digit (0 or 1) - unmelted_bnd1 <- unmelted |> - dplyr::filter(.data$svtype == "BND") |> - tidyr::separate(.data$ID, into = c("BND_group", "BND_mate"), sep = -1, convert = TRUE, remove = FALSE) |> - dplyr::group_by(.data$BND_group) - unmelted_bnd1 <- unmelted_bnd1 |> + dplyr::ungroup() |> dplyr::mutate( - # index per group 1, 2, 3.. - BND_ID = dplyr::cur_group_id(), - # turns into 001, 002, 003... if you've got 100+ rows - BND_ID = sprintf(glue::glue("%0{nchar(nrow(unmelted_bnd1))}d"), .data$BND_ID), - BND_mate = ifelse(.data$BND_mate == 0, "A", "B") + end_position = sub("^.*:(\\d+).*$", "\\1", ALT) |> as.numeric() |> base::format(big.mark = ",", trim = TRUE), + end_chrom = sub("^.*chr(.*):.*$", "\\1", ALT), + end = paste0(end_chrom, ":", end_position), ) |> - dplyr::ungroup() + dplyr::select(-c(end_position, end_chrom)) +} - # Grab each BND mate's chrom - # Orphan mates have that info in the ALT field - match_id2mateid <- match(unmelted_bnd1$ID, unmelted_bnd1$MATEID) - unmelted_bnd2 <- unmelted_bnd1[match_id2mateid, c("chrom")] |> - dplyr::rename(BND_mate_chrom = .data$chrom) +remove_gene_fusion_dups <- function(.data, columns) { + # Order elements of multi-entry effect values for reliable comparison + v.groups <- c("frameshift_variant&gene_fusion", "gene_fusion") + v.effects_ordered <- sapply(.data$Effect, function(s) { + c <- stringr::str_split(s, "&") |> unlist() + paste0(sort(c), collapse="&") + }) - unmelted_bnd <- dplyr::bind_cols(unmelted_bnd1, unmelted_bnd2) |> - dplyr::mutate( - BND_mate_chrom = ifelse( - is.na(.data$BND_mate_chrom), - sub(".*chr(.*):.*", "orphan_\\1", .data$ALT), - .data$BND_mate_chrom - ), - BND_mate_end = sub(".*chr(.*):(\\d+).*", "\\2", .data$ALT), - BND_mate_end = as.numeric(.data$BND_mate_end) - ) + if (all(v.groups %in% v.effects_ordered)) { + .data |> dplyr::filter(Effect != "gene_fusion") + } else { + .data + } +} - unmelted_other <- unmelted |> - dplyr::filter(.data$svtype != "BND") +filter_and_split_annotations_sv <- function(x) { - unmelted_all <- - dplyr::bind_rows( - unmelted_bnd, - unmelted_other + filter_conditions <- list( + # Empty Gene field + x$Genes == "", + # Only Ensembl identifiers in the Gene field + stringr::str_split(x$Genes, "[&-]|, ") |> purrr::map_lgl(\(x) stringr::str_starts(x, "ENSG") |> all()), + # Fusions that do not involve genes + x$Effect == "Fus", + # PURPLE inferred SVs + x$Type == "PURPLE_inf" + ) + + x.grouped <- x |> + dplyr::group_by( + filter=ifelse(purrr::reduce(filter_conditions, `|`), "filter", "retain") ) |> + dplyr::select(-c(Type, `Top Tier`)) + + keys <- x.grouped |> + dplyr::group_keys() |> + dplyr::pull(filter) + + x.split <- x.grouped |> + dplyr::group_split(.keep=FALSE) |> + purrr::set_names(keys) + + list( + retained = purrr::pluck(x.split, "retain"), + filtered = purrr::pluck(x.split, "filter") + ) +} + +set_many_transcripts_sv <- function(x) { + # Set many transcripts + x.tmp <- x |> + dplyr::rowwise() |> dplyr::mutate( - Start = base::format(.data$Start, big.mark = ",", trim = TRUE), - Start = paste0(.data$chrom, ":", .data$Start), - END_tmp = ifelse(is.na(.data$END_BPI), .data$end, .data$END_BPI), - END_tmp = ifelse(is.na(.data$END_tmp) & .data$svtype == "BND", .data$BND_mate_end, .data$END_tmp), - END_tmp = base::format(.data$END_tmp, big.mark = ",", trim = TRUE), - End = paste0( - ifelse(.data$svtype == "BND", .data$BND_mate_chrom, .data$chrom), - ":", - .data$END_tmp - ) + `Transcript count` = stringr::str_split(Transcripts, ", ") |> unlist() |> unique() |> length() ) |> + dplyr::ungroup() |> + dplyr::mutate( + many_transcripts = ifelse(`Transcript count` > 2, "many_transcripts", "few_transcripts") + ) + + # Build the many transcripts table + mt <- x.tmp |> + dplyr::filter(many_transcripts == "many_transcripts") |> + # Remove unneeded columns and rename others dplyr::select( - "vcfnum", "nann", - TierTop = "tier", - "Start", "End", - Type = "svtype", - "BND_ID", "BND_mate", - "SR_alt", "PR_alt", "SR_PR_sum", "SR_PR_ref", "Ploidy", - "AF_PURPLE", "AF_BPI", CNC = "CN_change_PURPLE", - CN = "CN_PURPLE", SScore = "somaticscore", "annotation" + "Record ID", + "Annotation ID", + "Tier (top)", + "Start", + "End", + "Transcript count", + "Transcripts", ) - abbreviate_effectv <- Vectorize(abbreviate_effect) + # Clear transcript where appropriate + x.ready <- x.tmp |> + dplyr::mutate( + Transcripts = ifelse( + many_transcripts == "few_transcripts" | is.na(many_transcripts), + Transcripts, + paste0("Many transcripts (", `Transcript count`, ")") + ) + ) |> + dplyr::select(-c(many_transcripts, `Transcript count`)) + + list( + many_transcripts = mt, + sv = x.ready + ) +} + +#' @export +process_sv <- function(x) { + # Read input and set column information + sv.input <- sash_read_sv_tsv(x) + + # Early return if no data to process + if (nrow(sv.input$data) == 0) { + return() + } + + # Prepare input + sv.ready <- sv.input$data |> + dplyr::mutate( + "annotation_count" = count_pieces(annotation, ","), + "Top Tier" = tier, + "SR_PR_ref" = paste0(SR_ref, ",", PR_ref), + "SR_PR_sum" = dplyr::case_when( + is.na(SR_alt) & is.na(PR_alt) ~ NA, + is.na(SR_alt) ~ PR_alt, + is.na(PR_alt) ~ SR_alt, + .default = SR_alt + PR_alt, + ), + "SR_PR_asm_sum" = dplyr::case_when( + is.na(SR_asm_alt) & is.na(PR_asm_alt) ~ NA, + is.na(SR_asm_alt) ~ PR_asm_alt, + is.na(PR_asm_alt) ~ SR_asm_alt, + .default = SR_asm_alt + PR_asm_alt, + ), + start = paste(chrom, base::format(start, big.mark = ",", trim = TRUE), sep=":"), + Type = ifelse(is.na(PURPLE_status), svtype, "PURPLE_inf"), + "Record ID" = dplyr::row_number(), + ) |> + dplyr::select(-c( + chrom, + PURPLE_status, + tier, + svtype, + )) + + # Split out breakpoints for merging + sv.split <- split_svs(sv.ready) + + # Complete breakpoint records with corresponding mate, then combine with non-breakend records + sv.bps <- join_breakpoint_entries(sv.split$bps) + sv.tmp <- dplyr::bind_rows(sv.bps, sv.split$other) + + # Format some columns + cols_to_split <- c("AF_PURPLE", "CN_PURPLE") + double_cols <- split_double_col(sv.tmp, cols_to_split) + sv.tmp <- sv.tmp |> + dplyr::select(-c(cols_to_split)) |> + dplyr::bind_cols(double_cols) + + # Format a table for to be used as the SV Map + sv.map <- sv.tmp |> + dplyr::select( + "Record ID", + "Annotations" = "annotation_count", + "Top Tier", + "Start" = "start", + "End" = "end", + "Type", + "Breakend ID" = "BND_ID", + "Breakend Mate" = "BND_mate", + "SR_alt", + "PR_alt", + "SR_PR_sum", + "SR_asm_alt", + "PR_asm_alt", + "SR_PR_asm_sum", + "IC_alt", + "SR_PR_ref", + "PURPLE AF" = "AF_PURPLE", + "PURPLE CN" = "CN_PURPLE", + ) |> + dplyr::arrange(`Record ID`) - melted <- unmelted_all |> - dplyr::mutate(annotation = strsplit(.data$annotation, ",")) |> - tidyr::unnest(.data$annotation) |> + # Melt annotations + sv.melted_all <- sv.tmp |> + # Split into individual annotations + dplyr::mutate(annotation = strsplit(annotation, ",")) |> + # Convert annotation fields into columns + tidyr::unnest(annotation) |> tidyr::separate( - .data$annotation, c("Event", "Effect", "Genes", "Transcript", "Detail", "Tier"), + annotation, c("Event", "Effect", "Genes", "Transcripts", "Detail", "Tier"), sep = "\\|", convert = FALSE ) |> + # Remove gene_fusion annotations for variants where frameshift_variant&gene_fusion already exist + dplyr::group_by(across(-Effect)) |> + dplyr::group_modify(remove_gene_fusion_dups) |> + dplyr::ungroup() |> + # Remove unused columns + dplyr::select(c(-Event, -ALT)) |> + # Create columns, modify others dplyr::mutate( - ntrx = count_pieces(.data$Transcript, "&"), - ngen = count_pieces(.data$Genes, "&"), - neff = count_pieces(.data$Effect, "&"), - Transcript = .data$Transcript |> stringr::str_replace_all("&", ", "), - Genes = .data$Genes |> stringr::str_replace_all("&", ", "), - Effect = abbreviate_effectv(.data$Effect), - `Tier (Top)` = glue::glue("{Tier} ({TierTop})") + "Annotation ID" = dplyr::row_number(), + "Tier (top)" = paste0(Tier, " (", `Top Tier`, ")"), + "Genes" = stringr::str_replace_all(Genes, "&", ", "), + "Transcripts" = stringr::str_replace_all(Transcripts, "&", ", "), ) |> - dplyr::distinct() |> - dplyr::arrange(.data$`Tier (Top)`, .data$Genes, .data$Effect) + # Sort rows + dplyr::arrange(`Tier (top)`, `Record ID`, Genes, Effect) + + # Abbreviate effects + abbreviate_effectv <- Vectorize(abbreviate_effect) + sv.melted_all$Effect <- abbreviate_effectv(sv.melted_all$Effect) + + # Select and rename columns + sv.melted_all <- sv.melted_all |> + dplyr::select( + "Record ID", + "Annotation ID", + "Tier (top)", + "Start" = "start", + "End" = "end", + "Breakend ID" = "BND_ID", + "Breakend Mate" = "BND_mate", + "Effect", + "Genes", + "Transcripts", + "Effect", + "Detail", + "SR_alt", + "PR_alt", + "SR_PR_sum", + "SR_asm_alt", + "PR_asm_alt", + "SR_PR_asm_sum", + "IC_alt", + "PURPLE AF" = "AF_PURPLE", + "PURPLE CN" = "CN_PURPLE", + # Dropped after ops for non-map outputs + "Top Tier", + "Type", + ) + + # Create and set many transcript values + sv.annotations.many_transcript_data <- set_many_transcripts_sv(sv.melted_all) + sv.annotations.many_transcripts <- purrr::pluck(sv.annotations.many_transcript_data, "many_transcripts") + sv.annotations <- purrr::pluck(sv.annotations.many_transcript_data, "sv") + + # Filter unwanted annotations + sv.annotations.split <- filter_and_split_annotations_sv(sv.annotations) list( - unmelted = unmelted_all, - melted = melted, - tsv_descr = tsv_descr, - col_descr = col_descr + map = sv.map, + map_melted = sv.melted_all, + annotations = sv.annotations.split$retained, + annotations_filtered = sv.annotations.split$filtered, + many_transcripts = sv.annotations.many_transcripts ) } @@ -380,7 +472,7 @@ plot_bnd_sr_pr_tot_lines <- function(d, assertthat::assert_that(all(c("Type", "SR_alt", "PR_alt") %in% colnames(d))) dplot <- d |> dplyr::filter(.data$Type == "BND") |> - dplyr::select(SR = "SR_alt", PR = "PR_alt", Tier = "TierTop", "BND_ID") |> + dplyr::select(SR = "SR_alt", PR = "PR_alt", Tier = "Top Tier", "Breakend ID") |> dplyr::distinct() |> dplyr::mutate( PR = ifelse(is.na(.data$PR), 0, .data$PR), @@ -443,7 +535,7 @@ plot_bnd_sr_pr_tot_hist <- function(d, assertthat::assert_that(all(c("Type", "SR_alt", "PR_alt") %in% colnames(d))) dplot <- d |> dplyr::filter(.data$Type == "BND") |> - dplyr::select(SR = "SR_alt", PR = "PR_alt", "BND_ID") |> + dplyr::select(SR = "SR_alt", PR = "PR_alt", "Breakend ID") |> dplyr::distinct() |> dplyr::mutate( PR = ifelse(is.na(.data$PR), 0, .data$PR), diff --git a/R/umccrise.R b/R/umccrise.R index aa95137..1c81b27 100644 --- a/R/umccrise.R +++ b/R/umccrise.R @@ -1,9 +1,80 @@ +#' Parse bcftools stats File +#' +#' Parses bcftools stats `bcftools_stats.txt` file. +#' +#' @param x Path to bcftools stats `bcftools_stats.txt` file. +#' @return A ggplot2 object. +#' @export +bcftools_stats_plot <- function(x = NULL) { + if (is.null(x)) { + return(NULL) + } + cnames <- c("QUAL_dummy", "id", "qual", "snps", "transi", "transv", "indels") + ln <- readr::read_lines(x) + d1 <- ln[grepl("QUAL", ln)] + # line1 ignore, line2 is colnames, just clean those up + d <- d1[3:length(d1)] |> + I() |> + readr::read_tsv( + col_names = cnames, col_types = readr::cols(.default = "d", "QUAL_dummy" = "c") + ) |> + dplyr::select(-"QUAL_dummy") + if (nrow(d) == 0) { + return(NULL) + } + d <- d |> + dplyr::select("qual", "snps", "indels") |> + tidyr::uncount(.data$snps + .data$indels) |> + dplyr::select("qual") + med <- median(d$qual, na.rm = TRUE) + tot <- nrow(d) + p <- d |> + ggplot2::ggplot(ggplot2::aes(x = .data$qual)) + + ggplot2::geom_histogram(ggplot2::aes(y = ggplot2::after_stat(density)), binwidth = 4, fill = "lightblue") + + ggplot2::geom_density(alpha = 0.6) + + ggplot2::geom_vline(xintercept = med, colour = "blue", linetype = "dashed") + + ggplot2::scale_x_continuous(n.breaks = 10) + + ggplot2::annotate( + geom = "label", x = med + 1, y = +Inf, vjust = 2, + label = paste0("Median: ", med), + ) + + ggplot2::theme_bw() + + ggplot2::ggtitle(glue::glue("SNV quality score distribution (total SNVs: {tot})")) + p +} + + +#' Parse DRAGEN HRD File +#' +#' Parses DRAGEN `hrdscore.csv` file. +#' +#' @param x Path to DRAGEN `hrdscore.csv` file. +#' +#' @return Tibble with a single row and the following score columns: +#' - HRD (homologous recombination deficiency) +#' - LOH (loss of heterozygosity) +#' - TAI (telomeric allelic imbalance) +#' - LST (large-scale state transitions) +#' +#' If no input is provided, the fields are NA. +#' @export +dragen_hrd <- function(x = NULL) { + res <- tibble::tibble(Sample = NA, LOH = NA, TAI = NA, LST = NA, HRD = NA) + if (!is.null(x)) { + res <- x |> + readr::read_csv(col_types = readr::cols(.default = "c")) + names(res) <- sub("_Score", "", names(res)) + } + res +} + #' HRDetect and CHORD Summary Table #' #' Return a summary table with HRDetect and CHORD results. #' #' @param chord_res Result from running [sigrap::chord_run()]. #' @param hrdetect_res Result from running [sigrap::hrdetect_run()]. +#' @param dragen_res Result from running [gpgr::dragen_hrd()]. #' #' @return A list with a tibble and a gt_tbl object (see [gt::gt()]). #' @@ -20,11 +91,12 @@ #' vcf.snv = snv, sample.name = nm, #' df.sv = gpgr:::chord_mantavcf2df(sv) #' ) -#' hrd_results_tabs(hrdetect_res = hrdetect_res, chord_res = chord_res) +#' dragen_res <- gpgr::dragen_hrd("path/to/sample.hrdscore.csv") +#' hrd_results_tabs(hrdetect_res = hrdetect_res, chord_res = chord_res, dragen_res = dragen_res) #' } #' #' @export -hrd_results_tabs <- function(hrdetect_res, chord_res) { +hrd_results_tabs <- function(hrdetect_res, chord_res, dragen_res) { sn <- chord_res$prediction[, "sample", drop = T] assertthat::are_equal(hrdetect_res[, "sample", drop = T], sn) @@ -42,10 +114,17 @@ hrd_results_tabs <- function(hrdetect_res, chord_res) { ) |> dplyr::filter(col != "sample") + dragen_res_tab <- + dragen_res |> + dplyr::select("HRD", "LOH", "TAI", "LST") |> + unlist() |> + tibble::enframe(name = "col", value = "val") + colnames(dragen_res_tab) <- c("DRAGEN", "results_dragen") colnames(hrdetect_res_tab) <- c("HRDetect", "results_hrdetect") colnames(chord_res_tab) <- c("CHORD", "results_chord") + # idea is to make 9-row tibbles and cbind them tab1 <- dplyr::bind_rows( hrdetect_res_tab, @@ -54,7 +133,7 @@ hrd_results_tabs <- function(hrdetect_res, chord_res) { " ", " " ) ) - tab2 <- + tab2a <- dplyr::bind_rows( chord_res_tab[1:7, ], tibble::tribble( @@ -63,10 +142,14 @@ hrd_results_tabs <- function(hrdetect_res, chord_res) { " ", " " ) ) - tab3 <- chord_res_tab[8:16, ] |> + tab2b <- chord_res_tab[8:16, ] |> purrr::set_names(c("CHORD2", "results_chord2")) + tab3 <- dplyr::bind_rows( + dragen_res_tab, + tibble::tibble(DRAGEN = rep(" ", 5), results_dragen = rep(" ", 5)) + ) - hrd_results_tab <- dplyr::bind_cols(tab1, tab2, tab3) + hrd_results_tab <- dplyr::bind_cols(tab3, tab1, tab2a, tab2b) hrd_results_gt <- hrd_results_tab |> @@ -74,6 +157,11 @@ hrd_results_tabs <- function(hrdetect_res, chord_res) { gt::tab_header( title = glue::glue("HRD Results for {sn}") ) |> + gt::tab_spanner( + label = "DRAGEN", + id = "id_dragen", + columns = c("DRAGEN", "results_dragen") + ) |> gt::tab_spanner( label = "HRDetect", id = "id_hrdetect", @@ -85,6 +173,8 @@ hrd_results_tabs <- function(hrdetect_res, chord_res) { columns = c("CHORD", "results_chord", "CHORD2", "results_chord2") ) |> gt::cols_label( + DRAGEN = "", + results_dragen = "", HRDetect = "", results_hrdetect = "", CHORD = "", @@ -97,12 +187,12 @@ hrd_results_tabs <- function(hrdetect_res, chord_res) { gt::cell_text(weight = "bold") ), locations = gt::cells_body( - columns = c("HRDetect", "CHORD", "CHORD2") + columns = c("DRAGEN", "HRDetect", "CHORD", "CHORD2") ) ) |> gt::cols_align( align = "right", - columns = c("results_hrdetect") + columns = c("results_hrdetect", "results_dragen") ) |> gt::tab_style( style = gt::cell_borders( @@ -112,7 +202,7 @@ hrd_results_tabs <- function(hrdetect_res, chord_res) { style = "solid" ), locations = gt::cells_body( - columns = c("CHORD"), + columns = c("CHORD", "HRDetect"), rows = dplyr::everything() ) ) |> diff --git a/R/utils.R b/R/utils.R index e0b3852..d57901e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,13 +1,3 @@ -#' BPI run on VCF -#' -#' @param x Path to VCF file. -#' @export -is_vcf_bpi <- function(x) { - assertthat::assert_that(is_vcf(x)) - d <- bedr::read.vcf(x, nrows = 0, verbose = FALSE) # nrows 0 parses just the vcf header - "bpiVersion" %in% names(d[["header"]]) -} - #' Does the input file look like a VCF? #' #' Quickly checks that the input file has a 'vcf' or 'vcf.gz' suffix, and that @@ -44,9 +34,11 @@ is_vcf <- function(x) { n_max = 1 ) + # COMMIT NOTE: FORMAT FIELD IS NOT MANDITORY + vcf_cols <- c( "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", - "FILTER", "INFO", "FORMAT" + "FILTER", "INFO" ) d_cols <- colnames(d) diff --git a/README.md b/README.md index 9931b82..e70f982 100644 --- a/README.md +++ b/README.md @@ -120,13 +120,12 @@ export PATH="${gpgr_cli}:${PATH}" SOMATIC_SNV_SUMMARY --somatic_snv_vcf SOMATIC_SNV_VCF --somatic_sv_tsv SOMATIC_SV_TSV --somatic_sv_vcf SOMATIC_SV_VCF --purple_som_gene_cnv PURPLE_SOM_GENE_CNV - --purple_som_cnv PURPLE_SOM_CNV --purple_germ_cnv - PURPLE_GERM_CNV --purple_purity PURPLE_PURITY --purple_qc - PURPLE_QC --purple_som_snv_vcf PURPLE_SOM_SNV_VCF - --oncoviral_present_viruses ONCOVIRAL_PRESENT_VIRUSES - --oncoviral_breakpoints_tsv ONCOVIRAL_BREAKPOINTS_TSV - [--out_file OUT_FILE] [--quiet] --result_outdir - RESULT_OUTDIR --tumor_name TUMOR_NAME + --purple_som_cnv PURPLE_SOM_CNV --purple_purity + PURPLE_PURITY --purple_qc PURPLE_QC --purple_som_snv_vcf + PURPLE_SOM_SNV_VCF --oncoviral_present_viruses + ONCOVIRAL_PRESENT_VIRUSES --oncoviral_breakpoints_tsv + ONCOVIRAL_BREAKPOINTS_TSV [--out_file OUT_FILE] [--quiet] + --result_outdir RESULT_OUTDIR --tumor_name TUMOR_NAME options: -h, --help show this help message and exit @@ -153,8 +152,6 @@ export PATH="${gpgr_cli}:${PATH}" Path to `purple.cnv.gene.tsv`. --purple_som_cnv PURPLE_SOM_CNV Path to `purple.cnv.somatic.tsv`. - --purple_germ_cnv PURPLE_GERM_CNV - Path to `purple.cnv.germline.tsv`. --purple_purity PURPLE_PURITY Path to `purple.purity.tsv`. --purple_qc PURPLE_QC diff --git a/inst/cli/canrep.R b/inst/cli/canrep.R index 520598c..b50a351 100644 --- a/inst/cli/canrep.R +++ b/inst/cli/canrep.R @@ -3,21 +3,24 @@ canrep_add_args <- function(subp) { canrep$add_argument("--af_global", help = "Path to `af_tumor.txt` file.", required = TRUE) canrep$add_argument("--af_keygenes", help = "Path to `af_tumor_keygenes.txt` file.", required = TRUE) canrep$add_argument("--batch_name", help = "Name of batch sample.", required = TRUE) - canrep$add_argument("--conda_list", help = "Path to `conda_pkg_list.txt` file.", required = TRUE) + canrep$add_argument("--conda_list", help = "Path to `conda_pkg_list.txt` file.") canrep$add_argument("--img_dir", help = "Path to directory containing PURPLE plots.", required = TRUE) canrep$add_argument("--key_genes", help = "Path to UMCCR cancer gene file.", required = TRUE) - canrep$add_argument("--somatic_snv_summary", help = "Path to `somatic_snv_summary.json`.", required = TRUE) + canrep$add_argument("--oncokb_genes", help = "Path to OncoKB database file.", required = TRUE) canrep$add_argument("--somatic_snv_vcf", help = "Path to `somatic-PASS.vcf.gz` SNV VCF.", required = TRUE) + canrep$add_argument("--somatic_snv_summary", help = "Path to `somatic_snv_summary.json` JSON.", required = TRUE) canrep$add_argument("--somatic_sv_tsv", help = "Path to `manta.tsv` TSV file.", required = TRUE) canrep$add_argument("--somatic_sv_vcf", help = "Path to `manta.vcf.gz` VCF file.", required = TRUE) canrep$add_argument("--purple_som_gene_cnv", help = "Path to `purple.cnv.gene.tsv`.", required = TRUE) + canrep$add_argument("--purple_som_cnv_ann", help = "Path to annotated and prioritised `purple.cnv.somatic.tsv`.", required = TRUE) canrep$add_argument("--purple_som_cnv", help = "Path to `purple.cnv.somatic.tsv`.", required = TRUE) - canrep$add_argument("--purple_germ_cnv", help = "Path to `purple.cnv.germline.tsv`.", required = TRUE) canrep$add_argument("--purple_purity", help = "Path to `purple.purity.tsv`.", required = TRUE) canrep$add_argument("--purple_qc", help = "Path to `purple.qc`.", required = TRUE) canrep$add_argument("--purple_som_snv_vcf", help = "Path to `purple.somatic.vcf.gz`.", required = TRUE) - canrep$add_argument("--oncoviral_present_viruses", help = "Path to `oncoviruses/present_viruses.txt`.", required = TRUE) - canrep$add_argument("--oncoviral_breakpoints_tsv", help = "Path to `oncoviruses/oncoviral_breakpoints.tsv`.", required = TRUE) + canrep$add_argument("--virusbreakend_tsv", help = "Path to VIRUSBreakend summary file.", required = TRUE) + canrep$add_argument("--virusbreakend_vcf", help = "Path to VIRUSBreakend VCF file.", required = TRUE) + canrep$add_argument("--dragen_hrd", help = "Path to DRAGEN HRD file", required = TRUE) + canrep$add_argument("--bcftools_stats", help = "Path to bcftools stats file", required = TRUE) canrep$add_argument("--out_file", help = "Path to output HTML file (needs '.html' suffix) [def: {tumor_name}_cancer_report.html].") canrep$add_argument("--quiet", help = "Suppress log printing during rendering.", action = "store_true") canrep$add_argument("--result_outdir", help = "Path to directory to write tidy JSON/TSV results.", required = TRUE) @@ -33,18 +36,21 @@ canrep_parse_args <- function(args) { conda_list = args$conda_list, img_dir = args$img_dir, key_genes = args$key_genes, - somatic_snv_summary = args$somatic_snv_summary, + oncokb_genes = args$oncokb_genes, somatic_snv_vcf = args$somatic_snv_vcf, + somatic_snv_summary = args$somatic_snv_summary, somatic_sv_tsv = args$somatic_sv_tsv, somatic_sv_vcf = args$somatic_sv_vcf, purple_som_gene_cnv = args$purple_som_gene_cnv, + purple_som_cnv_ann = args$purple_som_cnv_ann, purple_som_cnv = args$purple_som_cnv, - purple_germ_cnv = args$purple_germ_cnv, purple_purity = args$purple_purity, purple_qc = args$purple_qc, purple_som_snv_vcf = args$purple_som_snv_vcf, - oncoviral_present_viruses = args$oncoviral_present_viruses, - oncoviral_breakpoints_tsv = args$oncoviral_breakpoints_tsv, + virusbreakend_tsv = args$virusbreakend_tsv, + virusbreakend_vcf = args$virusbreakend_vcf, + dragen_hrd = args$dragen_hrd, + bcftools_stats = args$bcftools_stats, out_file = args$out_file, quiet = args$quiet, result_outdir = args$result_outdir, diff --git a/inst/rmd/umccrise/_navbar.html b/inst/rmd/umccrise/_navbar.html index 9c3a4ed..b87adb0 100644 --- a/inst/rmd/umccrise/_navbar.html +++ b/inst/rmd/umccrise/_navbar.html @@ -14,6 +14,7 @@ diff --git a/inst/rmd/umccrise/cancer_report.Rmd b/inst/rmd/umccrise/cancer_report.Rmd index 45d15a2..7bc1fb0 100644 --- a/inst/rmd/umccrise/cancer_report.Rmd +++ b/inst/rmd/umccrise/cancer_report.Rmd @@ -13,20 +13,23 @@ params: af_global: NULL af_keygenes: NULL batch_name: NULL + bcftools_stats: NULL conda_list: NULL + dragen_hrd: NULL img_dir: NULL key_genes: NULL - oncoviral_breakpoints_tsv: NULL - oncoviral_present_viruses: NULL - purple_germ_cnv: NULL + oncokb_genes: NULL + virusbreakend_tsv: NULL + virusbreakend_vcf: NULL purple_purity: NULL purple_qc: NULL + purple_som_cnv_ann: NULL purple_som_cnv: NULL purple_som_gene_cnv: NULL purple_som_snv_vcf: NULL result_outdir: NULL - somatic_snv_summary: NULL somatic_snv_vcf: NULL + somatic_snv_summary: NULL somatic_sv_tsv: NULL somatic_sv_vcf: NULL title: "UMCCR Cancer Report" @@ -110,14 +113,13 @@ sv_detail_table <- function(tab) { } ``` - ```{r hrd_prep, results='hide'} #---- Homologous Recombination Deficiency ----# -chord_sv_df <- sigrap::chord_mantavcf2df(params$somatic_sv_vcf) +dragen_hrd_res <- gpgr::dragen_hrd(params$dragen_hrd) chord_res <- sigrap::chord_run( vcf.snv = params$somatic_snv_vcf, - df.sv = chord_sv_df, - sv.caller = "manta", + vcf.sv = params$somatic_sv_vcf, + sv.caller = "gridss", sample.name = params$tumor_name, ref.genome = "hg38", verbose = TRUE @@ -183,35 +185,54 @@ gpgr::write_tsvjsongz(sigs_indel, glue("sigs/{bnm}-indel"), result_outdir) ``` ```{r sv_prep} +#---- Misc ----# +oncokb_genes <- gpgr::read_oncokb(params$oncokb_genes) + #---- Structural Variants ----# sv_path <- params$somatic_sv_tsv sv <- NULL -sv_unmelted <- NULL -sv_all <- NULL -no_sv_found <- TRUE -sv_tsv_descr <- NULL -sv_col_descr <- NULL -if (sv_path == "NA") { - sv_unmelted <- tibble(WARNING = "Structural variants were not called") - sv_all <- tibble(WARNING = "Structural variants were not called") -} else { - no_sv_found <- gpgr::tsv_is_empty(sv_path) - if (no_sv_found) { - sv_unmelted <- tibble(WARNING = "THERE WERE 0 SVs PRIORITISED!") - sv_all <- tibble(WARNING = "THERE WERE 0 SVs PRIORITISED!") - sv_tsv_descr <- tibble(Column = "THERE WERE 0 SVs PRIORITISED!") - sv_col_descr <- tibble(Column = "THERE WERE 0 SVs PRIORITISED!") - } else { - sv <- gpgr::process_sv(sv_path) - sv_unmelted <- sv$unmelted - sv_all <- sv$melted - sv_tsv_descr <- sv$tsv_descr - sv_col_descr <- sv$col_descr - } +no_sv_found <- gpgr::tsv_is_empty(sv_path) +if (!no_sv_found) { + sv <- gpgr::process_sv(sv_path) + + # Set OncoKB genes + sv$annotations <- sv$annotations |> + dplyr::mutate( + "Genes (OncoKB)" = gpgr::get_oncokb_genes(Genes, oncokb_genes), + .after = "Genes", + ) } -gpgr::write_tsvjsongz(sv_unmelted, glue("sv/{bnm}-01_sv_unmelted"), result_outdir) -gpgr::write_tsvjsongz(sv_all, glue("sv/{bnm}-02_sv_melted"), result_outdir) +#---- Copy Number Variants ----# +cnv <- gpgr::process_cnv_tsv(params$purple_som_cnv_ann) + +# Set OncoKB genes +cnv$annotations <- cnv$annotations |> + dplyr::mutate( + "Genes (OncoKB)" = get_oncokb_genes(Genes, oncokb_genes), + .after = "Genes", + ) + +# Set OncoKB genes for Many Genes table and transfer to the main table where appropriate +cnv$many_genes <- cnv$many_genes |> + dplyr::mutate( + `Genes (OncoKB)` = get_oncokb_genes(Genes, oncokb_genes), + .before = "Genes", + ) |> + # Create transfer column, set entries with more than n genes as 'Many genes (m)' + dplyr::rowwise() |> + dplyr::mutate( + gene.oncokb.count = strsplit(`Genes (OncoKB)`, ", ") |> unlist() |> unique() |> length(), + gene.oncokb.tx = ifelse(gene.oncokb.count > 2, paste0("Many genes (", gene.oncokb.count, ")"), `Genes (OncoKB)`), + ) + +# Transfer selected OncoKB gene entries +v.selector <- match(cnv$many_genes$`Annotation ID`, cnv$annotations$`Annotation ID`) +cnv$annotations$`Genes (OncoKB)`[v.selector] <- cnv$many_genes$gene.oncokb.tx + +# Clean up columns +cnv$many_genes <- cnv$many_genes |> + dplyr::select(-c(gene.oncokb.count, gene.oncokb.tx)) ``` ```{r purple_prep} @@ -225,15 +246,13 @@ purple_qc_summary <- dplyr::bind_rows( #---- PURPLE CNV Tables ----# purple_cnv_som <- gpgr::purple_cnv_som_process(params$purple_som_cnv) purple_cnv_som_gene <- gpgr::purple_cnv_som_gene_process(params$purple_som_gene_cnv, params$key_genes) -purple_cnv_germ <- gpgr::purple_cnv_germ_process(params$purple_germ_cnv) gpgr::write_tsvjsongz(purple_cnv_som$tab, glue("purple/{bnm}-purple_cnv_som"), result_outdir) gpgr::write_tsvjsongz(purple_cnv_som_gene$tab, glue("purple/{bnm}-purple_cnv_som_gene"), result_outdir) -gpgr::write_tsvjsongz(purple_cnv_germ$tab, glue("purple/{bnm}-purple_cnv_germ"), result_outdir) ``` ```{r, results='asis'} -blank_lines(2) +blank_lines(1) ``` ## __Summary__ @@ -241,23 +260,6 @@ blank_lines(2)
Description -__SNV Summary__ - -Starting from the raw DRAGEN calls, count the number of variants remaining after -each filtration stage. - -- `raw`: raw DRAGEN calls. -- `raw_pass`: PASSed raw. -- `noalt`: in chr1-22, X, Y, M. -- `sage`: including rescued by SAGE. -- `pregnomad`: should be the same as above. -- `gnomad`: in the hypermutated case, total remaining after removing those - with `gnomAD_AF >= 0.01` (that are not in hotspots). -- `anno`: after any additional filters for hypermutated case. -- `filt`: after applying main filters (tumor allele frequency, PCGR tiers etc.). Should be the same as above. -- `filt_pass`: after removing those that failed to pass the thresholds set above. -- `giab`: after removing those that are __not__ in GIAB confident regions. - __PURPLE Summary__ PURPLE outputs a QC status along with a summary @@ -298,28 +300,33 @@ summarise_sigs <- function(mut_sig_contr) { paste(collapse = ", ") } -snv_summary <- params$somatic_snv_summary %>% +snv_summary_counts <- params$somatic_snv_summary |> jsonlite::read_json() -snvs_max <- 500000 -hypermutated <- with(snv_summary, !is.null(pregnomad) && pregnomad > snvs_max && !is.null(gnomad) && gnomad > snvs_max) -bpi_enabled <- gpgr::is_vcf_bpi(params$somatic_sv_vcf) -snv_summary <- snv_summary %>% - base::unlist() %>% - tibble::enframe() %>% + +# NOTE(SW): hypermutated status is determine indirectly by different counts between 'sage' and +# 'annotated' since this indicates that the bolt hypermutated logic evaluated as true and applied +# variant selection + +hypermutated <- snv_summary_counts$sage != snv_summary_counts$annotated + +snv_summary <- snv_summary_counts |> + unlist() |> + tibble::enframe() |> dplyr::mutate( - value = base::format(value, big.mark = ",", trim = TRUE), - x = glue("{name}: {value}") - ) %>% - dplyr::pull(x) %>% - base::paste(collapse = "; ") + value = format(value, big.mark = ",", trim = TRUE), + x = glue::glue("{name}: {value}"), + ) |> + dplyr::pull(x) |> + paste(collapse = "; ") hrd_summary <- list( + dragen = dragen_hrd_res[, "HRD", drop = TRUE], chord = chord_res[["prediction"]][, "p_hrd", drop = TRUE], hrdetect = hrdetect_res[, "Probability", drop = TRUE] ) cnv_summary <- - list(som = purple_cnv_som$tab, germ = purple_cnv_germ$tab) %>% + list(som = purple_cnv_som$tab) %>% purrr::map(~ dplyr::summarise( .x, Min = round(min(CN), 2), @@ -330,11 +337,10 @@ cnv_summary <- if (no_sv_found) { sv_summary <- tibble(unmelted = c(0), melted = c(0)) } else { - sv_summary <- - list( - unmelted = sv_unmelted %>% dplyr::select(Type), - melted = sv_all %>% dplyr::select(Type) - ) %>% + sv_summary <- list( + unmelted = sv$map %>% dplyr::select(Type), + melted = sv$map_melted %>% dplyr::select(Type) + ) %>% purrr::map(function(x) addmargins(table(x, useNA = "ifany"))) %>% do.call("rbind", .) %>% as.data.frame() %>% @@ -348,26 +354,22 @@ if (no_sv_found) { qc_summary_all <- dplyr::tribble( ~n, ~variable, ~value, ~details, 4, "Hypermutated", ifelse(hypermutated, "TRUE", "FALSE"), - ifelse(hypermutated, "More than 500K SNVs detected, so keep only small variants within UMCCR cancer gene regions (Ensembl v107).", ""), - 4, "BPI Enabled", ifelse(bpi_enabled, "TRUE", "FALSE"), - ifelse(bpi_enabled, "", "BPI was skipped, so SV tables will not have AF_BPI/AF_PURPLE values."), + ifelse(hypermutated, glue::glue( + "More than 500K SNVs detected (including non-PASS), priority regions and/or variants ", + "selected for processing.", + ), ""), 5, "MutSigs (Old)", glue::glue("{summarise_sigs(sigs_snv_2015)}"), "2015 COSMIC (https://cancer.sanger.ac.uk/cosmic/signatures_v2.tt)", 5, "MutSigs (New)", glue::glue("{summarise_sigs(sigs_snv_2020)}"), "2020 COSMIC (https://cancer.sanger.ac.uk/cosmic/signatures)", - 6, "HRD", glue::glue("CHORD: {hrd_summary$chord}; HRDetect: {hrd_summary$hrdetect}"), - "Homologous recombination deficiency probability.", + 6, "HRD", glue::glue("DRAGEN: {hrd_summary$dragen}; CHORD: {hrd_summary$chord}; HRDetect: {hrd_summary$hrdetect}"), + "Homologous recombination deficiency score (DRAGEN) / probability (CHORD/HRDetect).", 20, "CNVs (Somatic)", glue::glue( "Min: {cnv_summary[['som']]$Min}; ", "Max: {cnv_summary[['som']]$Max}; ", "N: {cnv_summary[['som']]$N} " ), "", - 21, "CNVs (Germline)", glue::glue( - "Min: {cnv_summary[['germ']]$Min}; ", - "Max: {cnv_summary[['germ']]$Max}; ", - "N: {cnv_summary[['germ']]$N} " - ), "", - 17, "SNVs (Somatic)", snv_summary, "Summary of SNV filters, from raw to final.", + 17, "SNVs (Somatic)", snv_summary, "Summary SNV counts across main processes, in order", 18, "SVs (unmelted)", paste(rev(sv_summary$unmelted), collapse = ", "), "Summary of SVs as presented in raw VCF.", 19, "SVs (melted)", paste(rev(sv_summary$melted), collapse = ", "), @@ -414,16 +416,6 @@ qc_summary_all %>% rows = grepl("TRUE", value) & variable == "Hypermutated" ) ) %>% - gt::tab_style( - style = list( - cell_fill(color = orange), - cell_text(weight = "bold") - ), - locations = cells_body( - columns = value, - rows = grepl("FALSE", value) & variable == "BPI Enabled" - ) - ) %>% gt::tab_style( style = list( cell_text(weight = "bold") @@ -458,6 +450,7 @@ blank_lines(1) ``` ### __Allelic Frequencies__ {#allelic-freqs} + Summarised below are the allele frequencies (AFs) for somatic variants detected genome-wide (__Global__) vs. within the coding sequence of ~1,100 UMCCR cancer genes (__Key Genes CDS__). @@ -500,6 +493,14 @@ af_summary_results$af_plot blank_lines(1) ``` +### __Variant Quality Scores__ {#var-qual} + +```{r bcftools_stats_plot, fig.width=8, fig.height=4} +params$bcftools_stats |> + gpgr::bcftools_stats_plot() +``` + + ### __SNV__ Signatures {.tabset .tabset-fade #sig-snv} Deciphering the mutational signature of a tumor sample can provide insight into the mutational @@ -598,10 +599,6 @@ mp_plot_bias4 <- MutationalPatterns::plot_strand_bias(strand_bias_rep) (mp_plot_bias1 / mp_plot_bias2) | (mp_plot_bias3 / mp_plot_bias4) ``` -```{r, results='asis'} -blank_lines(1) -``` - ### __SNV__ Signature Contribution {.tabset .tabset-fade #sig-snv-contr}
@@ -635,7 +632,7 @@ sigs_snv_2015 %>% ``` ```{r, results='asis'} -blank_lines(1) +blank_lines(2) ``` #### 2020 @@ -649,7 +646,7 @@ sigs_snv_2020 %>% ``` ```{r, results='asis'} -blank_lines(1) +blank_lines(2) ``` ### __INDEL/DBS__ Signatures & Contributions {.tabset .tabset-fade #sig-indeldbs} @@ -674,7 +671,7 @@ sigs_indel %>% ``` ```{r, results='asis'} -blank_lines(1) +blank_lines(2) ``` #### DBS @@ -689,10 +686,11 @@ sigs_dbs %>% ``` ```{r, results='asis'} -blank_lines(1) +blank_lines(2) ``` ### __Rainfall Plot__ {#rainfall} + Rainfall plots show the distribution of mutations along the genome, with mutation types indicated with different colors. The y-axis corresponds to the distance of a mutation from the previous mutation, and is log10 transformed. Drop-downs from the plots indicate clusters or @@ -722,6 +720,7 @@ if (will_work) { ``` ### __Kataegis__ + Somatic variants of type C>T and C>G in a TpCpN context are annotated as showing Kataegis (Nik Zainal et al., 2012) if there are three or more mutations of that type, strand and context localised within a @@ -748,7 +747,7 @@ if (nrow(kat$data) > 0) { ``` ```{r purple_plot_somaticrainfall, out.width="70%"} -purple_plot_somaticrainfall <- file.path(img_dir, paste0(bnm, ".somatic.rainfall.png")) +purple_plot_somaticrainfall <- file.path(img_dir, paste0(params$tumor_name, ".somatic.rainfall.png")) if (file.exists(purple_plot_somaticrainfall)) { knitr::include_graphics(purple_plot_somaticrainfall) } else { @@ -765,6 +764,15 @@ blank_lines(1)
Description +__DRAGEN__ + +Following scores are shown: + +- HRD: homologous recombination deficiency +- LOH: loss of heterozygosity +- TAI: telomeric allelic imbalance +- LST: large-scale state transitions + __CHORD__ For more details see . @@ -809,10 +817,11 @@ that includes HRDetect.
-```{r chord_hrdetect_res} -hrd_results <- gpgr::hrd_results_tabs(hrdetect_res, chord_res) +```{r chord_hrdetect_dragen_res} +hrd_results <- gpgr::hrd_results_tabs(hrdetect_res, chord_res, dragen_hrd_res) gpgr::write_tsvjsongz(chord_res[["prediction"]], glue("hrd/{bnm}-chord"), result_outdir) gpgr::write_tsvjsongz(hrdetect_res, glue("hrd/{bnm}-hrdetect"), result_outdir) +gpgr::write_tsvjsongz(dragen_hrd_res, glue("hrd/{bnm}-dragen"), result_outdir) hrd_results$hrd_results_gt ``` @@ -821,6 +830,7 @@ blank_lines(1) ``` ## __Circos Plots__ {.tabset .tabset-fade #circos} + Circos plots are generated by PURPLE. The first BAF plot is based on PURPLE data and configuration files. @@ -854,10 +864,9 @@ The first BAF plot is based on PURPLE data and configuration files.
```{r circos_baf_plot, out.width="80%"} -knitr::include_graphics(file.path(img_dir, paste0(bnm, ".circos_baf.png"))) +knitr::include_graphics(file.path(img_dir, paste0(params$tumor_name, ".circos_baf.png"))) ``` - ### SNVs/Indels, Total/Minor CN, SVs
@@ -890,7 +899,7 @@ knitr::include_graphics(file.path(img_dir, paste0(bnm, ".circos_baf.png")))
```{r circos_default1, out.width="80%"} -knitr::include_graphics(file.path(img_dir, paste0(bnm, ".circos.png"))) +knitr::include_graphics(file.path(img_dir, paste0(params$tumor_name, ".circos.png"))) ``` ### Allele Ratios, BAF @@ -908,117 +917,14 @@ knitr::include_graphics(file.path(img_dir, paste0(bnm, ".circos.png")))
```{r circos_default2, out.width="80%"} -knitr::include_graphics(file.path(img_dir, paste0(bnm, ".input.png"))) +knitr::include_graphics(file.path(img_dir, paste0(params$tumor_name, ".input.png"))) ``` -## __Structural Variants__ {#sv-summary} -Structural variants are inferred with [Manta](https://github.com/illumina/manta), -adjusted using [PURPLE](https://github.com/hartwigmedical/hmftools/tree/master/purity-ploidy-estimator#7-structural-variant-recovery-and-single-breakend-filtering), -and prioritised using [simple_sv_annotation](https://github.com/AstraZeneca-NGS/simple_sv_annotation). -Allele frequencies, copy number changes and ploidy are purity-adjusted. - -
-SV Prioritisation Process - -The input file corresponds to `umccrised//structural/-manta.tsv`. - -It's generated through the following steps: - -__Step 1: Processing__ - -* __Input__: Manta structural variant calls from bcbio -(`final//-sv-prioritize-manta.vcf.gz` (or -`-manta.vcf.gz` if not prioritised)) -* Remove following annotations from Manta VCF: - 'INFO/SIMPLE_ANN', 'INFO/SV_HIGHEST_TIER', 'FILTER/Intergenic', 'FILTER/MissingAnn', 'FILTER/REJECT' -* Prioritise variants with [simple_sv_annotation](https://github.com/vladsaveliev/simple_sv_annotation)` -* __Output__: `work/{batch}/structural/prioritize/{batch}-manta.vcf.gz` -* Keep PASS variants -* If more than 100,000 variants, keep only variants where `INFO/SV_TOP_TIER <= 3` -* __Output__: `work/{batch}/structural/keep_pass/{batch}-manta.vcf` -* Deal with chromosome capitalisation occurring from SnpEff -* Run BreakPointInspector (BPI) if it was disabled in bcbio -* __Output__: `work/{batch}/structural/maybe_bpi/{batch}-manta.vcf` - -__Step 2: Filtering__ - -* Keep PASS variants (since BPI updates the FILTER column) -* For BND variants require paired reads support (PR) to be higher than split read support (SR) -* Keep all `INFO/SV_TOP_TIER <= 2` variants -* For `INFO/SV_TOP_TIER > 2` variants require split _or_ paired reads support of at least 5x -* For `INFO/SV_TOP_TIER > 2` variants with low allele frequency at any breakpoint (`BPI_AF[0 or 1] < 0.1`), - require SR or PR support of at least 10x -* __Output__: `work/{batch}/structural/filt/{batch}-manta.vcf` - -__Step 3: PURPLE and FFPE conditional__ - -* If the sample _is not_ FFPE: - * Feed above filtered SVs to PURPLE, which outputs `purple.sv.vcf.gz` that contains rescued SVs - * Prioritise variants (again) - * Remove 'INFO/ANN' annotation - * __Output__: `{batch}/structural/{batch}-manta.vcf.gz` -* If the sample _is_ FFPE: - * Just copy `filtered` variants and don't do anything - (i.e. we don't want the rescued SVs from PURPLE since they'll likely be heaps) - (note that PURPLE will still get fed with the `filtered` SVs) - * __Output__: `{batch}/structural/{batch}-manta.vcf.gz` - -__Step 4: TSV final output__ - -* Input: `{batch}/structural/{batch}-manta.vcf.gz` VCF -* Output: `{batch}/structural/{batch}-manta.tsv` TSV - - -```{r manta_tsv_description} -sv_tsv_descr %>% - dplyr::arrange(Column) %>% - dplyr::mutate(Column = kableExtra::cell_spec(Column, bold = TRUE)) %>% - knitr::kable(escape = FALSE, caption = "Description of Manta TSV columns") %>% - kableExtra::kable_paper(c("hover", "striped"), full_width = FALSE, position = "left") %>% - kableExtra::scroll_box(height = "250px") -``` - -__Prioritisation process__ - -1. Annotate with [SnpEff](http://snpeff.sourceforge.net/SnpEff_manual.html) based on Ensembl gene model -2. Subset annotations to [APPRIS principal transcripts](http://appris.bioinfo.cnio.es/#/) -3. Prioritize variants with [simple_sv_annotation](https://github.com/vladsaveliev/simple_sv_annotation) - 1(high)-2(moderate)-3(low)-4(no interest): - * exon loss - - on prioritisation gene list (1) - - other (2) - * gene_fusion - - paired (hits two genes) - - on list of known pairs (1) (curated by [HMF](https://resources.hartwigmedicalfoundation.nl)) - - one gene is a known promiscuous fusion gene (1) (curated by [HMF](https://resources.hartwigmedicalfoundation.nl)) - - on list of FusionCatcher known pairs (2) - - other: - - one or two genes on prioritisation gene list (2) - - neither gene on prioritisation gene list (3) - - unpaired (hits one gene) - - on prioritisation gene list (2) - - others (3) - * upstream or downstream - - on prioritisation gene list genes (2) - e.g. one gene is got into - control of another gene's promoter and get overexpressed (oncogene) or underexpressed (tsgene) - * LoF or HIGH impact in a tumor suppressor - - on prioritisation gene list (2) - - other TS gene (3) - * other (4) - -* Use PURPLE copy number caller to infer more SV calls from copy number transitions (marked as 'PURPLE Inferred') - -```{r effect_abbrev_tab} -gpgr::EFFECT_ABBREVIATIONS %>% - tibble::enframe(value = "abbreviation") %>% - dplyr::arrange(abbreviation) %>% - dplyr::mutate(name = kableExtra::cell_spec(name, bold = TRUE)) %>% - knitr::kable(escape = FALSE) %>% - kableExtra::kable_paper(c("hover", "striped"), full_width = FALSE, position = "left") %>% - kableExtra::scroll_box(height = "250px") +```{r, results='asis'} +blank_lines(1) ``` -
+## __Structural Variants__
What do the SV tables show? @@ -1029,17 +935,6 @@ annotations for these genes/transcripts are included in the original SV VCF, but they are extremely hard to display and browse inside a HTML table, due to their size. -Therefore, we first show a 'Map' of the SVs corresponding -to the main columns from the original SV VCF, but we don't show the -`INFO/SIMPLE_ANN` annotation column, which contains the -`Effect`, `Genes`, `Transcript`, and `Detail` values of a given SV. This makes -a bit more sense if you take a look at the `nann` Map column, -which shows the number of annotations corresponding to each VCF row. For example, -`nann = 500` would mean that this variant corresponds to 500 annotations. -These get shown in the next tables in a reshaped format, so that instead of -showing 500 annotations for a single row in a single cell, we split that over -multiple columns and rows. You can connect those tables to the Map by `vcfnum`. - Example SV with -only- 3 annotations: ``` @@ -1055,21 +950,30 @@ DEL | downstream_gene_variant | HFE2 | ENST00000336751_exon_1/4 DEL | intergenic_region | TXNIP-HFE2 | ENSG00000265972-ENSG00000168509 | unprioritized | 4 ``` -- The Map is sorted by `TierTop` then `vcfnum`: - - `TierTop` is the top tier of all annotations for a given variant. - - `vcfnum` is the variant row number in the umccrised VCF. -
+```{r} +sv.dt <- sv |> + purrr::map(function(x) { + x |> DT::datatable( + filter = list(position = "top", clear = FALSE, plain = TRUE), + class = "cell-border display compact", + rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), + options = list( + scroller = TRUE, scrollY = 400, scrollX = TRUE, autoWidth = FALSE, keys = TRUE, + buttons = c("csv", "excel"), dom = "Blfrtip" + ) + ) + }) +``` - -### Summary {.tabset .tabset-fade} +### Summary {.tabset .tabset-fade #sv-summary} #### Plots ```{r plot_bnd_sr_pr_tot_lines, fig.width=12, fig.height=12, out.width="90%"} -p1 <- gpgr:::plot_bnd_sr_pr_tot_lines(sv_unmelted) -p2 <- gpgr::plot_bnd_sr_pr_tot_hist(sv_unmelted) + ggplot2::theme(legend.position = "none") +p1 <- gpgr:::plot_bnd_sr_pr_tot_lines(sv$map) +p2 <- gpgr::plot_bnd_sr_pr_tot_hist(sv$map) + ggplot2::theme(legend.position = "none") ((p1$p_all | p2) / p1$p_tier) ``` @@ -1078,17 +982,17 @@ p2 <- gpgr::plot_bnd_sr_pr_tot_hist(sv_unmelted) + ggplot2::theme(legend.positio ```{r sv_summary, eval=!no_sv_found} d <- list( - unmelted = sv_unmelted %>% dplyr::select(Tier = TierTop, Type), - melted = sv_all %>% dplyr::select(Tier, Type) + unmelted = sv$map %>% dplyr::select("Top Tier", Type), + melted = sv$map_melted %>% dplyr::select("Top Tier", Type) ) %>% dplyr::bind_rows(.id = "group") %>% dplyr::mutate(group = factor(group, levels = c("unmelted", "melted"))) %>% dplyr::group_by(group) %>% - dplyr::count(Tier, Type) %>% + dplyr::count(`Top Tier`, Type) %>% tidyr::pivot_wider(names_from = Type, values_from = n) %>% dplyr::ungroup() %>% dplyr::rowwise() %>% - dplyr::mutate(tot = sum(dplyr::c_across(!(c(group, Tier))), na.rm = TRUE)) + dplyr::mutate(tot = sum(dplyr::c_across(!(c(group, `Top Tier`))), na.rm = TRUE)) d %>% gt::gt( @@ -1117,280 +1021,108 @@ d %>% ``` ```{r, results='asis'} -blank_lines(1) +blank_lines(2) ``` -### __SV Map__ {#sv-unmelted} - -```{r svtab_raw, eval=!no_sv_found} -svtab_raw <- sv_unmelted %>% - dplyr::select(-annotation) %>% - dplyr::arrange(TierTop, vcfnum) - -svtab_rawdt <- svtab_raw %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), - options = list( - scroller = TRUE, scrollY = 400, scrollX = TRUE, autoWidth = FALSE, keys = TRUE, - buttons = c("csv", "excel"), dom = "Blfrtip" - ) - ) -``` +### __SV Map__ {.tabset .tabset-fade} -```{r svtab_raw_details, eval=!no_sv_found} -details::details(sv_detail_table(svtab_raw), lang = "none", summary = "Column Description") -svtab_rawdt +```{r} +sv.dt$map ``` ```{r, results='asis'} blank_lines(2) ``` -### __Translocations (BNDs)__ {.tabset .tabset-fade #translocations} +### __Breakpoints and Breakends__ {.tabset .tabset-fade} -```{r svtab_BND, eval=!no_sv_found} -sv_BND <- sv_all %>% - dplyr::filter(Type == "BND") %>% - dplyr::mutate( - nrow = dplyr::row_number(), - nrow = sprintf(glue::glue("%0{nchar(nrow(.))}d"), nrow) - ) %>% - dplyr::select(nrow, dplyr::everything()) - - -sv_BND_main <- sv_BND %>% - dplyr::select( - nrow, vcfnum, `Tier (Top)`, Start, End, - BND_ID, BND_mate, - Genes, Effect, Detail, SR_alt, PR_alt, SR_PR_sum, Ploidy, AF_PURPLE - ) - -sv_BND_other <- sv_BND %>% - dplyr::select(nrow, AF_BPI, CNC, CN, SR_PR_ref, SScore, ntrx, Transcript) - -sv_BND_purple <- sv_all %>% - dplyr::filter(Type %in% c("PURPLE_inf")) %>% - dplyr::select(`Tier (Top)`, Start, Effect, Detail, Ploidy, CN, CNC) - -gpgr::write_tsvjsongz( - dplyr::bind_cols(sv_BND_main, sv_BND_other %>% dplyr::select(-nrow)), - glue("sv/{bnm}-03_sv_BND_main"), result_outdir -) -gpgr::write_tsvjsongz(sv_BND_purple, glue("sv/{bnm}-04_sv_BND_purpleinf"), result_outdir) +#### Prioritised -sv_BND_maindt <- sv_BND_main %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), - options = list( - scroller = TRUE, scrollY = 400, scrollX = TRUE, autoWidth = FALSE, keys = TRUE, - buttons = c("csv", "excel"), dom = "Blfrtip" - ) - ) - -sv_BND_otherdt <- sv_BND_other %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), - options = list( - scroller = TRUE, scrollY = 400, scrollX = TRUE, keys = TRUE, autoWidth = FALSE, - buttons = c("csv", "excel"), dom = "Blfrtip" - ) - ) +```{r} +sv.dt$annotations +``` -sv_BND_purpledt <- sv_BND_purple %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), - options = list( - scroller = TRUE, scrollY = 400, scrollX = TRUE, autoWidth = TRUE, - buttons = c("csv", "excel"), - dom = "Blfrtip" - ) - ) +```{r, results='asis'} +blank_lines(2) ``` -#### Main Columns +#### Filtered -```{r sv_BND_main, eval=!no_sv_found} -details::details(sv_detail_table(sv_BND_main), lang = "none", summary = "Column Description") -sv_BND_maindt +```{r} +sv.dt$annotations_filtered ``` ```{r, results='asis'} blank_lines(2) ``` -#### Other Columns +#### Many Transcripts -```{r sv_BND_otherdt, eval=!no_sv_found} -details::details(sv_detail_table(sv_BND_other), lang = "none", summary = "Column Description") -sv_BND_otherdt +```{r} +sv.dt$many_transcripts ``` -#### PURPLE Inferred - -```{r sv_BND_purple, eval=!no_sv_found} -details::details(sv_detail_table(sv_BND_purple), lang = "none", summary = "Column Description") -sv_BND_purpledt +```{r, results='asis'} +blank_lines(2) ``` -### __DEL/DUP/INS__ {.tabset .tabset-fade #deldupins} - -```{r svtab_noBND, eval=!no_sv_found} -sv_noBND <- sv_all %>% - dplyr::filter(!Type %in% c("BND", "PURPLE_inf")) %>% - dplyr::mutate( - nrow = dplyr::row_number(), - nrow = sprintf(glue::glue("%0{nchar(nrow(.))}d"), nrow), - Genes_original = Genes, - Transcript_original = Transcript - ) - -max_genes <- 2 -max_transcripts <- 2 - -sv_noBND_main <- sv_noBND %>% - dplyr::mutate( - Genes = ifelse(ngen > max_genes, - glue::glue("Many Genes ({ngen})"), - Genes - ), - Transcript = ifelse(ntrx > max_transcripts, - glue::glue("Many Transcripts ({ntrx})"), - Transcript - ) - ) %>% - dplyr::select( - nrow, vcfnum, `Tier (Top)`, Type, Start, End, Effect, - Genes, Transcript, Detail, SR_alt, PR_alt, SR_PR_sum, AF_PURPLE - ) - -gpgr::write_tsvjsongz(sv_noBND_main, glue("sv/{bnm}-05_sv_noBND_main"), result_outdir) - -sv_noBND_other <- sv_noBND %>% - dplyr::select(vcfnum, Ploidy, AF_BPI, CNC, CN, SR_PR_ref, SScore) %>% - dplyr::distinct() - -gpgr::write_tsvjsongz(sv_noBND_other, glue("sv/{bnm}-06_sv_noBND_other"), result_outdir) - -sv_noBND_manygenes <- sv_noBND %>% - dplyr::filter(ngen > max_genes) %>% - dplyr::select(nrow, vcfnum, `Tier (Top)`, Type, Start, End, - Effect, ngen, - Genes = Genes_original - ) - -gpgr::write_tsvjsongz(sv_noBND_manygenes, glue("sv/{bnm}-07_sv_noBND_manygenes"), result_outdir) - -sv_noBND_manytx <- sv_noBND %>% - dplyr::filter(ntrx > max_transcripts) %>% - dplyr::select(nrow, vcfnum, `Tier (Top)`, Type, Start, End, - Effect, ntrx, - Transcript = Transcript_original - ) +### __Copy number variants__ {.tabset .tabset-fade} -gpgr::write_tsvjsongz(sv_noBND_manytx, glue("sv/{bnm}-08_sv_noBND_manytranscripts"), result_outdir) - -sv_noBND_maindt <- sv_noBND_main %>% - DT::datatable( +```{r} +cnv.dt <- cnv |> purrr::map(function(x) { + x |> DT::datatable( filter = list(position = "top", clear = FALSE, plain = TRUE), class = "cell-border display compact", - rownames = FALSE, extensions = c("Buttons"), + rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), options = list( - pageLength = 20, - autoWidth = FALSE, + scroller = TRUE, scrollY = 400, scrollX = TRUE, autoWidth = FALSE, keys = TRUE, buttons = c("csv", "excel"), dom = "Blfrtip" ) ) +}) +``` -sv_noBND_otherdt <- sv_noBND_other %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Buttons"), - options = list( - pageLength = 20, - autoWidth = FALSE, - buttons = c("csv", "excel"), - dom = "Blfrtip" - ) - ) +#### Prioritised -sv_noBND_manygenesdt <- sv_noBND_manygenes %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Buttons", "KeyTable"), - options = list( - pageLength = 10, - autoWidth = FALSE, keys = TRUE, - buttons = c("csv", "excel"), - dom = "Blfrtip" - ) - ) +```{r} +cnv.dt$annotations +``` -sv_noBND_manytxdt <- sv_noBND_manytx %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Buttons", "KeyTable"), - options = list( - pageLength = 10, - autoWidth = FALSE, keys = TRUE, - buttons = c("csv", "excel"), - dom = "Blfrtip" - ) - ) +```{r, results='asis'} +blank_lines(2) ``` -#### Main Columns +#### Filtered -```{r sv_noBND_main, eval=!no_sv_found} -details::details(sv_detail_table(sv_noBND_main), lang = "none", summary = "Column Description") -sv_noBND_maindt +```{r} +cnv.dt$filtered ``` ```{r, results='asis'} blank_lines(2) ``` -#### Other Columns +#### Many Genes -```{r sv_noBND2_other, eval=!no_sv_found} -details::details(sv_detail_table(sv_noBND_other), lang = "none", summary = "Column Description") -sv_noBND_otherdt +```{r} +cnv.dt$many_genes ``` -#### Many Genes - -```{r sv_noBND_manygenes, eval=!no_sv_found} -details::details(sv_detail_table(sv_noBND_manygenes), lang = "none", summary = "Column Description") -sv_noBND_manygenesdt +```{r, results='asis'} +blank_lines(2) ``` #### Many Transcripts -```{r sv_noBND_manytx, eval=!no_sv_found} -details::details(sv_detail_table(sv_noBND_manytx), lang = "none", summary = "Column Description") -sv_noBND_manytxdt +```{r} +cnv.dt$many_transcripts ``` -## __Copy Number Variants__ -The purity and ploidy estimator -[PURPLE](https://github.com/hartwigmedical/hmftools/tree/master/purity-ploidy-estimator) -is used to generate a copy number profile for the somatic sample. - ```{r, results='asis'} -blank_lines(1) +blank_lines(2) ``` -### UMCCR __Gene Somatic__ CNV Calls {#cnv-gene} +### UMCCR __Gene Somatic__ CNV Calls {#gene-panel-cnv}
Description @@ -1429,7 +1161,7 @@ purple_cnv_som_gene$tab %>% blank_lines(2) ``` -### Genome-wide __Somatic__ CNV Segments {#cnv-segs} +### Genome-wide __Somatic__ CNV Segments {#cnv-segments}
Description @@ -1471,36 +1203,18 @@ purple_cnv_som$tab %>% blank_lines(2) ``` -### Genome-wide __Germline__ CNV Segments - -```{r purple_cnv_germ_tab} -purple_cnv_germ$tab %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), - options = list( - scroller = TRUE, scrollY = 400, scrollX = TRUE, autoWidth = TRUE, keys = TRUE, - buttons = c("csv", "excel"), dom = "Bfrtip" - ) - ) %>% - DT::formatCurrency(~ Start + End, currency = "", interval = 3, mark = ",", digits = 0) -``` - -```{r, results='asis'} -blank_lines(2) -``` - ### __PURPLE Charts__ {.tabset .tabset-fade #purple-charts} + PURPLE generates charts for summarising tumor sample characteristics. Description is from the [PURPLE docs](https://github.com/hartwigmedical/hmftools/tree/master/purity-ploidy-estimator#charts). #### Purity/ploidy + The following 'sunrise' chart shows the range of scores of all examined solutions of purity and ploidy. Crosshairs identify the best purity / ploidy solution. ```{r purple_plot_purityrange, out.width="40%"} -knitr::include_graphics(file.path(img_dir, paste0(bnm, ".purity.range.png"))) +knitr::include_graphics(file.path(img_dir, paste0(params$tumor_name, ".purity.range.png"))) ``` ```{r, results='asis'} @@ -1508,6 +1222,7 @@ blank_lines(2) ``` #### Copy number / Minor allele ploidy + The following figures show the AMBER BAF count weighted distribution of copy number and minor allele ploidy throughout the fitted segments. Copy numbers are broken down by colour into their respective minor @@ -1516,20 +1231,22 @@ down by copy number. ```{r purple_plot_copynumber, out.width="40%"} knitr::include_graphics(c( - file.path(img_dir, paste0(bnm, ".copynumber.png")), - file.path(img_dir, paste0(bnm, ".map.png")) + file.path(img_dir, paste0(params$tumor_name, ".copynumber.png")), + file.path(img_dir, paste0(params$tumor_name, ".map.png")) )) ``` #### Ploidy by CN + If a somatic variant VCF has been supplied, a figure will be produced showing the somatic variant ploidy broken down by copy number. ```{r purple_plot_somatic, out.width="40%"} -knitr::include_graphics(file.path(img_dir, paste0(bnm, ".somatic.png"))) +knitr::include_graphics(file.path(img_dir, paste0(params$tumor_name, ".somatic.png"))) ``` #### Clonality + The following diagram illustrates the clonality model of a typical sample. The top figure shows the histogram of somatic ploidy for all SNVs and INDELs in blue. @@ -1541,67 +1258,80 @@ We can determine the likelihood of a variant being subclonal at any given ploidy as shown in the bottom half of the figure. ```{r purple_plot_somaticclonality, out.width="50%"} -knitr::include_graphics(file.path(img_dir, paste0(bnm, ".somatic.clonality.png"))) +knitr::include_graphics(file.path(img_dir, paste0(params$tumor_name, ".somatic.clonality.png"))) ``` #### Segment + The contribution of each fitted segment to the final score of the best fit is shown in the following figure. Each segment is divided into its major and minor allele ploidy. The area of each circle shows the weight (AMBER baf count) of each segment. ```{r purple_plot_segment, out.width="40%"} -knitr::include_graphics(file.path(img_dir, paste0(bnm, ".segment.png"))) +knitr::include_graphics(file.path(img_dir, paste0(params$tumor_name, ".segment.png"))) ``` -## __Oncoviruses__ {.tabset .tabset-fade #oncoviruses} +## __Oncoviruses__ {#oncoviruses} -Oncoviruses and their integration sites. Viral sequences are obtained from the -GDC database. -Host genes are reported if the integration site falls on a gene or at least before 100kbp of the gene start. +### __Viral content__ -```{r oncoviruses_table} -present_viruses <- params$oncoviral_present_viruses -breakpoints_tsv <- params$oncoviral_breakpoints_tsv +```{r oncoviral_summary} +vb_summary <- gpgr::virusbreakend_summary_read(params$virusbreakend_tsv) -if (!is.na(breakpoints_tsv) && breakpoints_tsv != "" && file.exists(breakpoints_tsv)) { - breakpoints <- readr::read_tsv(breakpoints_tsv, col_names = T, col_types = "cciicicccc") - breakpoints %>% - dplyr::mutate(Coord = ifelse(is.na(end), start, str_c(start, "-", end))) %>% - dplyr::select( - Contig = contig, Coord, `Read support` = PAIR_COUNT, - `Disrupted genes` = DisruptedGenes, `Upstream genes` = UpstreamGenes, - `SV type` = svtype, ID, MATEID - ) %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - class = "cell-border display compact", - rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), - options = list( - scroller = TRUE, scrollY = 400, scrollX = TRUE, autoWidth = TRUE, keys = TRUE, - buttons = c("csv", "excel"), dom = "Bfrtip" - ) - ) %>% - DT::formatCurrency(~Coord, currency = "", interval = 3, mark = ",", digits = 0) +details::details(knitr::kable(vb_summary$descr), lang = "none", summary = "Column Description") + +if (nrow(vb_summary$tab) > 0) { + DT::datatable(vb_summary$tab, + filter = list(position = "top", clear = FALSE, plain = TRUE), + class = "cell-border display compact", + rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), + options = list( + scroller = TRUE, scrollY = 200, scrollX = TRUE, autoWidth = FALSE, keys = TRUE, + buttons = c("csv", "excel"), dom = "Blfrtip" + ) + ) |> + DT::formatCurrency(~ Length + Reads, currency = "", interval = 3, mark = ",", digits = 0) } else { - if (!is.na(present_viruses) && present_viruses != "" && file.exists(present_viruses)) { - viruses <- readr::read_file(present_viruses) - if (str_length(viruses) > 0) { - cat(str_c( - "Detected significant traces of content of the following viruses: ", - viruses, - ", however no evidence of viral integration into the host genome is observed." - )) - } else { - cat("No oncoviral content detected in this sample.") - } - } else { - cat("Oncoviral detection was disabled in the umccrise run with `-E oncoviruses`.") - } + cat("No oncoviral content detected.") } ``` ```{r, results='asis'} -blank_lines(1) +blank_lines(2) +``` + +### __Integration sites__ + +```{r oncoviral_integrations} +vb_integrations <- gpgr::virusbreakend_vcf_read(params$virusbreakend_vcf) + +details::details(knitr::kable(vb_integrations$descr), lang = "none", summary = "Column Description") + +if (nrow(vb_integrations$tab) > 0) { + format_common_cols <- c( + "Position", + "Fragment support", + "Fragment support (unmapped)", + "Softclip read support" + ) + + DT::datatable(vb_integrations$tab, + filter = list(position = "top", clear = FALSE, plain = TRUE), + class = "cell-border display compact", + rownames = FALSE, extensions = c("Scroller", "Buttons", "KeyTable"), + options = list( + scroller = TRUE, scrollY = 200, scrollX = TRUE, autoWidth = FALSE, keys = TRUE, + buttons = c("csv", "excel"), dom = "Blfrtip" + ) + ) |> + DT::formatCurrency(format_common_cols, currency = "", interval = 3, mark = ",", digits = 0) +} else { + cat("No oncoviral integrations detected.") +} +``` + +```{r, results='asis'} +blank_lines(2) ``` ## __Addendum__ {.tabset .tabset-fade #addendum} @@ -1611,43 +1341,47 @@ blank_lines(1) ### Conda Pkgs Main ```{r conda_list_main} -conda_pkgs <- - readr::read_table(params$conda_list, - col_names = c("env", "name", "version", "build", "channel"), - col_types = "ccccc" - ) %>% - dplyr::mutate( - channel = ifelse(is.na(channel), "MISSING", channel), - env = sub("^env", "umccrise", env), - env = sub("^umccrise_", "", env) +if (is.null(params$conda_list)) { + cat("No conda package list provided.") +} else { + conda_pkgs <- + readr::read_table(params$conda_list, + col_names = c("env", "name", "version", "build", "channel"), + col_types = "ccccc" + ) %>% + dplyr::mutate( + channel = ifelse(is.na(channel), "MISSING", channel), + env = sub("^env", "umccrise", env), + env = sub("^umccrise_", "", env) + ) %>% + dplyr::arrange(name) %>% + dplyr::group_by(name, version, build, channel) %>% + dplyr::summarise(envs = paste(env, collapse = ", "), .groups = "drop_last") %>% + dplyr::ungroup() + + # main pkgs + tibble::tibble( + name = c( + "umccrise", "r-gpgr", "r-sigrap", "r-base", "pcgr", "cpsr", "cacao", + "bioconductor-mutationalpatterns", "r-signature.tools.lib", "r-chord", + "hmftools-purple", "hmftools-amber", "hmftools-cobalt", "hmftools-sage", + "break-point-inspector", "multiqc", "multiqc-bcbio", "reference-data", + "bed_annotation", "vcf-stuff", "snakemake", "dvc", "ensembl-vep", + "samtools", "bcftools", "htslib", "ngs_utils", "python", "pandoc" + ) ) %>% - dplyr::arrange(name) %>% - dplyr::group_by(name, version, build, channel) %>% - dplyr::summarise(envs = paste(env, collapse = ", "), .groups = "drop_last") %>% - dplyr::ungroup() - -# main pkgs -tibble::tibble( - name = c( - "umccrise", "r-gpgr", "r-sigrap", "r-base", "pcgr", "cpsr", "cacao", - "bioconductor-mutationalpatterns", "r-signature.tools.lib", "r-chord", - "hmftools-purple", "hmftools-amber", "hmftools-cobalt", "hmftools-sage", - "break-point-inspector", "multiqc", "multiqc-bcbio", "reference-data", - "bed_annotation", "vcf-stuff", "snakemake", "dvc", "ensembl-vep", - "samtools", "bcftools", "htslib", "ngs_utils", "python", "pandoc" - ) -) %>% - dplyr::left_join(conda_pkgs, by = "name") %>% - knitr::kable(format = "html") %>% - kableExtra::kable_paper(c("hover", "striped"), full_width = TRUE, position = "left") %>% - kableExtra::column_spec(1, bold = TRUE) %>% - kableExtra::scroll_box(height = "300px") + dplyr::left_join(conda_pkgs, by = "name") %>% + knitr::kable(format = "html") %>% + kableExtra::kable_paper(c("hover", "striped"), full_width = TRUE, position = "left") %>% + kableExtra::column_spec(1, bold = TRUE) %>% + kableExtra::scroll_box(height = "300px") +} ``` ### Report Inputs ```{r report_inputs} -report_inputs <- dplyr::tibble(key = names(params), value = unlist(params)) +report_inputs <- dplyr::tibble(key = names(params), value = params) gpgr::write_tsvjsongz(report_inputs, glue("{bnm}-report_inputs"), result_outdir) report_inputs %>% @@ -1660,13 +1394,17 @@ report_inputs %>% ### Conda Pkgs All ```{r conda_list_all} -conda_pkgs %>% - DT::datatable( - filter = list(position = "top", clear = FALSE, plain = TRUE), - rownames = FALSE, extensions = c("Scroller", "Buttons"), - options = list( - scroller = TRUE, scrollX = TRUE, scrollY = 400, - buttons = c("csv"), dom = "Bfrtip" +if (is.null(params$conda_list)) { + cat("No conda package list provided.") +} else { + conda_pkgs %>% + DT::datatable( + filter = list(position = "top", clear = FALSE, plain = TRUE), + rownames = FALSE, extensions = c("Scroller", "Buttons"), + options = list( + scroller = TRUE, scrollX = TRUE, scrollY = 400, + buttons = c("csv"), dom = "Bfrtip" + ) ) - ) +} ``` diff --git a/inst/rmd/umccrise/render.R b/inst/rmd/umccrise/render.R index fd790e4..aa8cad7 100644 --- a/inst/rmd/umccrise/render.R +++ b/inst/rmd/umccrise/render.R @@ -1,38 +1,37 @@ #!/usr/bin/env Rscript -require(here) +require(here, include.only = "here") require(gpgr) -require(glue) +require(glue, include.only = "glue") require(purrr) -batch_name <- "SBJ02269__PRJ221202" -tumor_name <- "PRJ221202" -# umccrised_dir <- "/g/data/gx8/projects/diakumis/umccrise/data/SBJ02269" -# key_genes <- "/g/data/gx8/projects/diakumis/conda/envs/umccrise/lib/python3.7/site-packages/ngs_utils/reference_data/key_genes/umccr_cancer_genes.latest.tsv" -umccrised_dir <- here::here("nogit/umccrised_data/SBJ02269") -key_genes <- here::here("nogit/umccrised_data/umccr_cancer_genes.latest.tsv") +batch_name <- "SBJ00480_PTC_HCC1395t100pc" +tumor_name <- "PTC_HCC1395t100pc" +umccrised_dir <- here("nogit/umccrised_data/seqc_inputs.20231115") params <- list( - af_global = glue::glue("{umccrised_dir}/work/{batch_name}/cancer_report/afs/af_tumor.txt"), - af_keygenes = glue::glue("{umccrised_dir}/work/{batch_name}/cancer_report/afs/af_tumor_keygenes.txt"), + af_global = glue("{umccrised_dir}/sample_data/af_tumor.txt"), + af_keygenes = glue("{umccrised_dir}/sample_data/af_tumor_keygenes.txt"), batch_name = batch_name, - conda_list = glue::glue("{umccrised_dir}/work/{batch_name}/conda_pkg_list.txt"), - img_dir = glue::glue("{umccrised_dir}/img"), - key_genes = key_genes, - oncoviral_breakpoints_tsv = glue::glue("{umccrised_dir}/work/{batch_name}/oncoviruses/oncoviral_breakpoints.tsv"), - oncoviral_present_viruses = glue::glue("{umccrised_dir}/work/{batch_name}/oncoviruses/present_viruses.txt"), - out_file = here::here("nogit/umccrised_data/SBJ02269/foo_cancer_report.html"), - purple_germ_cnv = glue::glue("{umccrised_dir}/work/{batch_name}/purple/{batch_name}.purple.cnv.germline.tsv"), - purple_purity = glue::glue("{umccrised_dir}/work/{batch_name}/purple/{batch_name}.purple.purity.tsv"), - purple_qc = glue::glue("{umccrised_dir}/work/{batch_name}/purple/{batch_name}.purple.qc"), - purple_som_cnv = glue::glue("{umccrised_dir}/work/{batch_name}/purple/{batch_name}.purple.cnv.somatic.tsv"), - purple_som_gene_cnv = glue::glue("{umccrised_dir}/work/{batch_name}/purple/{batch_name}.purple.cnv.gene.tsv"), - purple_som_snv_vcf = glue::glue("{umccrised_dir}/work/{batch_name}/purple/{batch_name}.purple.somatic.vcf.gz"), - result_outdir = glue::glue("{umccrised_dir}/{batch_name}/cancer_report_tables2"), - somatic_snv_summary = glue::glue("{umccrised_dir}/work/{batch_name}/cancer_report/somatic_snv_summary.json"), - somatic_snv_vcf = glue::glue("{umccrised_dir}/{batch_name}/small_variants/{batch_name}-somatic-PASS.vcf.gz"), - somatic_sv_tsv = glue::glue("{umccrised_dir}/{batch_name}/structural/{batch_name}-manta.tsv"), - somatic_sv_vcf = glue::glue("{umccrised_dir}/{batch_name}/structural/{batch_name}-manta.vcf.gz"), + bcftools_stats = glue("{umccrised_dir}/sample_data/{tumor_name}.somatic.bcftools_stats.txt"), + conda_list = NULL, + dragen_hrd = glue("{umccrised_dir}/sample_data/{tumor_name}.hrdscore.csv"), + img_dir = glue("{umccrised_dir}/output/img"), + key_genes = glue("{umccrised_dir}/reference_data/umccr_cancer_genes.latest.tsv"), + oncokb_genes = glue("{umccrised_dir}/reference_data/oncokb_genes.20231113.tsv"), + virusbreakend_tsv = glue("{umccrised_dir}/sample_data/virusbreakend/{batch_name}.virusbreakend.vcf.summary.tsv"), + virusbreakend_vcf = glue("{umccrised_dir}/sample_data/virusbreakend/{batch_name}.virusbreakend.vcf"), + purple_purity = glue("{umccrised_dir}/sample_data/purple/{tumor_name}.purple.purity.tsv"), + purple_qc = glue("{umccrised_dir}/sample_data/purple/{tumor_name}.purple.qc"), + purple_som_cnv_ann = glue("{umccrised_dir}/sample_data/{tumor_name}.cnv.prioritised.tsv"), + purple_som_cnv = glue("{umccrised_dir}/sample_data/purple/{tumor_name}.purple.cnv.somatic.tsv"), + purple_som_gene_cnv = glue("{umccrised_dir}/sample_data/purple/{tumor_name}.purple.cnv.gene.tsv"), + purple_som_snv_vcf = glue("{umccrised_dir}/sample_data/purple/{tumor_name}.purple.somatic.vcf.gz"), + result_outdir = glue("{umccrised_dir}/output/cancer_report_tables"), + somatic_snv_vcf = glue("{umccrised_dir}/sample_data/{tumor_name}.pass.vcf.gz"), + somatic_snv_summary = glue("{umccrised_dir}/sample_data/{tumor_name}.somatic.variant_counts_process.json"), + somatic_sv_tsv = glue("{umccrised_dir}/sample_data/{tumor_name}.sv.prioritised.tsv"), + somatic_sv_vcf = glue("{umccrised_dir}/sample_data/{tumor_name}.sv.prioritised.vcf.gz"), tumor_name = tumor_name ) diff --git a/inst/rmd/umccrise/render.sh b/inst/rmd/umccrise/render.sh index 03bbc0f..9d608bf 100644 --- a/inst/rmd/umccrise/render.sh +++ b/inst/rmd/umccrise/render.sh @@ -24,7 +24,6 @@ $gpgr_cli canrep \ --somatic_sv_vcf "${umccrised_dir}/${batch_name}/structural/${batch_name}-manta.vcf.gz" \ --purple_som_gene_cnv "${umccrised_dir}/work/${batch_name}/purple/${batch_name}.purple.cnv.gene.tsv" \ --purple_som_cnv "${umccrised_dir}/work/${batch_name}/purple/${batch_name}.purple.cnv.somatic.tsv" \ - --purple_germ_cnv "${umccrised_dir}/work/${batch_name}/purple/${batch_name}.purple.cnv.germline.tsv" \ --purple_purity "${umccrised_dir}/work/${batch_name}/purple/${batch_name}.purple.purity.tsv" \ --purple_qc "${umccrised_dir}/work/${batch_name}/purple/${batch_name}.purple.qc" \ --purple_som_snv_vcf "${umccrised_dir}/work/${batch_name}/purple/${batch_name}.purple.somatic.vcf.gz" \ diff --git a/man/bcftools_stats_plot.Rd b/man/bcftools_stats_plot.Rd new file mode 100644 index 0000000..b2f9b17 --- /dev/null +++ b/man/bcftools_stats_plot.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/umccrise.R +\name{bcftools_stats_plot} +\alias{bcftools_stats_plot} +\title{Parse bcftools stats File} +\usage{ +bcftools_stats_plot(x = NULL) +} +\arguments{ +\item{x}{Path to bcftools stats \code{bcftools_stats.txt} file.} +} +\value{ +A ggplot2 object. +} +\description{ +Parses bcftools stats \code{bcftools_stats.txt} file. +} diff --git a/man/cancer_rmd.Rd b/man/cancer_rmd.Rd index 683eeab..ccc1012 100644 --- a/man/cancer_rmd.Rd +++ b/man/cancer_rmd.Rd @@ -9,18 +9,20 @@ cancer_rmd( af_keygenes, batch_name, conda_list, + dragen_hrd, img_dir, key_genes, - oncoviral_breakpoints_tsv, - oncoviral_present_viruses, - purple_germ_cnv, + oncokb_genes, + virusbreakend_tsv, + virusbreakend_vcf, purple_purity, purple_qc, + purple_som_cnv_ann, purple_som_cnv, purple_som_gene_cnv, purple_som_snv_vcf, - somatic_snv_summary, somatic_snv_vcf, + somatic_snv_summary, somatic_sv_tsv, somatic_sv_vcf, result_outdir, @@ -42,26 +44,28 @@ cancer_rmd( \item{key_genes}{Path to UMCCR cancer gene file.} -\item{oncoviral_breakpoints_tsv}{Path to \code{oncoviruses/oncoviral_breakpoints.tsv}.} +\item{oncokb_genes}{Path to OncoKB database file.} -\item{oncoviral_present_viruses}{Path to \code{oncoviruses/present_viruses.txt}.} +\item{virusbreakend_tsv}{Path to VIRUSBreakend summary file.} -\item{purple_germ_cnv}{Path to \code{purple.cnv.germline.tsv}.} +\item{virusbreakend_vcf}{Path to VIRUSBreakend VCF file.} \item{purple_purity}{Path to \code{purple.purity.tsv}.} \item{purple_qc}{Path to \code{purple.qc}.} +\item{purple_som_cnv_ann}{Path to annotated and prioritised \code{purple.cnv.somatic.tsv}.} + \item{purple_som_cnv}{Path to \code{purple.cnv.somatic.tsv}.} \item{purple_som_gene_cnv}{Path to \code{purple.cnv.gene.tsv}.} \item{purple_som_snv_vcf}{Path to \code{purple.somatic.vcf.gz}.} -\item{somatic_snv_summary}{Path to \code{somatic_snv_summary.json} JSON.} - \item{somatic_snv_vcf}{Path to \code{somatic-PASS.vcf.gz} SNV VCF.} +\item{somatic_snv_summary}{Path to \code{somatic_snv_summary.json} JSON.} + \item{somatic_sv_tsv}{Path to \code{manta.tsv} TSV file.} \item{somatic_sv_vcf}{Path to \code{manta.vcf.gz} VCF file.} diff --git a/man/dragen_hrd.Rd b/man/dragen_hrd.Rd new file mode 100644 index 0000000..2f85cb3 --- /dev/null +++ b/man/dragen_hrd.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/umccrise.R +\name{dragen_hrd} +\alias{dragen_hrd} +\title{Parse DRAGEN HRD File} +\usage{ +dragen_hrd(x = NULL) +} +\arguments{ +\item{x}{Path to DRAGEN \code{hrdscore.csv} file.} +} +\value{ +Tibble with a single row and the following score columns: +\itemize{ +\item HRD (homologous recombination deficiency) +\item LOH (loss of heterozygosity) +\item TAI (telomeric allelic imbalance) +\item LST (large-scale state transitions) +} + +If no input is provided, the fields are NA. +} +\description{ +Parses DRAGEN \code{hrdscore.csv} file. +} diff --git a/man/hrd_results_tabs.Rd b/man/hrd_results_tabs.Rd index be981c8..a8434af 100644 --- a/man/hrd_results_tabs.Rd +++ b/man/hrd_results_tabs.Rd @@ -4,12 +4,14 @@ \alias{hrd_results_tabs} \title{HRDetect and CHORD Summary Table} \usage{ -hrd_results_tabs(hrdetect_res, chord_res) +hrd_results_tabs(hrdetect_res, chord_res, dragen_res) } \arguments{ \item{hrdetect_res}{Result from running \code{\link[sigrap:hrdetect_run]{sigrap::hrdetect_run()}}.} \item{chord_res}{Result from running \code{\link[sigrap:chord_run]{sigrap::chord_run()}}.} + +\item{dragen_res}{Result from running \code{\link[=dragen_hrd]{dragen_hrd()}}.} } \value{ A list with a tibble and a gt_tbl object (see \code{\link[gt:gt]{gt::gt()}}). @@ -30,7 +32,8 @@ chord_res <- sigrap::chord_run( vcf.snv = snv, sample.name = nm, df.sv = gpgr:::chord_mantavcf2df(sv) ) -hrd_results_tabs(hrdetect_res = hrdetect_res, chord_res = chord_res) +dragen_res <- gpgr::dragen_hrd("path/to/sample.hrdscore.csv") +hrd_results_tabs(hrdetect_res = hrdetect_res, chord_res = chord_res, dragen_res = dragen_res) } } diff --git a/man/is_vcf_bpi.Rd b/man/is_vcf_bpi.Rd deleted file mode 100644 index b6ef2a6..0000000 --- a/man/is_vcf_bpi.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{is_vcf_bpi} -\alias{is_vcf_bpi} -\title{BPI run on VCF} -\usage{ -is_vcf_bpi(x) -} -\arguments{ -\item{x}{Path to VCF file.} -} -\description{ -BPI run on VCF -} diff --git a/man/process_sv.Rd b/man/process_sv.Rd deleted file mode 100644 index ef2ba77..0000000 --- a/man/process_sv.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sv.R -\name{process_sv} -\alias{process_sv} -\title{Process Structural Variants} -\usage{ -process_sv(x) -} -\arguments{ -\item{x}{Path to \code{manta.tsv} output by umccrise.} -} -\value{ -A list with melted/unmelted tibbles (these are NULL if TSV file was empty). -} -\description{ -Processes the Manta TSV file output by umccrise. -} -\examples{ -x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") -(sv <- process_sv(x)) -} diff --git a/man/purple_cnv_germ_process.Rd b/man/purple_cnv_germ_process.Rd deleted file mode 100644 index 73658b9..0000000 --- a/man/purple_cnv_germ_process.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/purple.R -\name{purple_cnv_germ_process} -\alias{purple_cnv_germ_process} -\title{Process PURPLE CNV germline File for UMCCRISE} -\usage{ -purple_cnv_germ_process(x) -} -\arguments{ -\item{x}{Path to \code{purple.cnv.germline.tsv} file.} -} -\value{ -List with two elements: -\itemize{ -\item \code{tab}: Tibble with more condensed columns. -\item \code{descr}: Description of tibble columns. -} -} -\description{ -Processes the \code{purple.cnv.germline.tsv} file. -and selects columns of interest. -} -\examples{ -x <- system.file("extdata/purple/purple.cnv.germline.tsv", package = "gpgr") -(pp <- purple_cnv_germ_process(x)) -} diff --git a/man/purple_cnv_germ_read.Rd b/man/purple_cnv_germ_read.Rd deleted file mode 100644 index 82e82f7..0000000 --- a/man/purple_cnv_germ_read.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/purple.R -\name{purple_cnv_germ_read} -\alias{purple_cnv_germ_read} -\title{Read PURPLE CNV Germline File} -\usage{ -purple_cnv_germ_read(x) -} -\arguments{ -\item{x}{Path to \code{purple.cnv.germline.tsv} file.} -} -\value{ -The input file as a tibble. -} -\description{ -Reads the \code{purple.cnv.germline.tsv} file. -} -\examples{ -x <- system.file("extdata/purple/purple.cnv.germline.tsv", package = "gpgr") -(p <- purple_cnv_germ_read(x)) -} diff --git a/man/purple_cnv_som_process.Rd b/man/purple_cnv_som_process.Rd index b90fcb8..851e122 100644 --- a/man/purple_cnv_som_process.Rd +++ b/man/purple_cnv_som_process.Rd @@ -17,8 +17,7 @@ List with two elements: } } \description{ -Processes the \code{purple.cnv.somatic.tsv} file. -and selects columns of interest. +Processes the \code{purple.cnv.somatic.tsv} file and selects columns of interest. } \examples{ x <- system.file("extdata/purple/purple.cnv.somatic.tsv", package = "gpgr") diff --git a/man/purple_version_read.Rd b/man/purple_version_read.Rd deleted file mode 100644 index afc364c..0000000 --- a/man/purple_version_read.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/purple.R -\name{purple_version_read} -\alias{purple_version_read} -\title{Read PURPLE version file} -\usage{ -purple_version_read(x) -} -\arguments{ -\item{x}{Path to the \code{purple.version} file.} -} -\value{ -A list with the elements: -\itemize{ -\item \code{version}: The version of PURPLE used. -\item \code{build_date}: The build date of the PURPLE version. -} -} -\description{ -Reads the \code{purple.version} file containing the PURPLE version and build date. -} -\examples{ -x <- system.file("extdata/purple/purple.version", package = "gpgr") -(v <- purple_version_read(x)) -} diff --git a/man/umccrise_read_sv_tsv.Rd b/man/umccrise_read_sv_tsv.Rd deleted file mode 100644 index 5f7a455..0000000 --- a/man/umccrise_read_sv_tsv.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sv.R -\name{umccrise_read_sv_tsv} -\alias{umccrise_read_sv_tsv} -\title{Read SV TSV} -\usage{ -umccrise_read_sv_tsv(x) -} -\arguments{ -\item{x}{Path to \code{manta.tsv} output by umccrise.} -} -\value{ -A tibble corresponding to the input TSV file. -} -\description{ -Reads the Manta TSV file output by umccrise. -} -\examples{ -x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") -(sv <- umccrise_read_sv_tsv(x)$data) -} diff --git a/man/virusbreakend_summary_read.Rd b/man/virusbreakend_summary_read.Rd new file mode 100644 index 0000000..e78287b --- /dev/null +++ b/man/virusbreakend_summary_read.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oncoviral.R +\name{virusbreakend_summary_read} +\alias{virusbreakend_summary_read} +\title{Read VIRUSBreakend Summary File} +\usage{ +virusbreakend_summary_read(x) +} +\arguments{ +\item{x}{Path to \code{virusbreakend.vcf.summary.tsv} file.} +} +\value{ +List with two elements: +\itemize{ +\item \code{tab}: Tibble containing data. +\item \code{descr}: Description of tibble columns. +} +} +\description{ +Reads the \code{virusbreakend.vcf.summary.tsv} file. +} +\examples{ +x <- system.file("extdata/virusbreakend/virusbreakend.vcf.summary.tsv", package = "gpgr") +(vb <- virusbreakend_summary_read(x)) +} diff --git a/man/virusbreakend_vcf_read.Rd b/man/virusbreakend_vcf_read.Rd new file mode 100644 index 0000000..56b8d09 --- /dev/null +++ b/man/virusbreakend_vcf_read.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oncoviral.R +\name{virusbreakend_vcf_read} +\alias{virusbreakend_vcf_read} +\title{Read VIRUSBreakend VCF File} +\usage{ +virusbreakend_vcf_read(x) +} +\arguments{ +\item{x}{Path to \code{virusbreakend.vcf} file.} +} +\value{ +List with two elements: +\itemize{ +\item \code{tab}: Tibble containing selected data. +\item \code{descr}: Description of tibble columns. +} +} +\description{ +Reads the \code{virusbreakend.vcf} file and selects data to present. +} +\examples{ +x <- system.file("extdata/virusbreakend/virusbreakend.vcf", package = "gpgr") +(vb <- virusbreakend_vcf_read(x)) +} diff --git a/tests/testthat/test-roxytest-testexamples-oncoviral.R b/tests/testthat/test-roxytest-testexamples-oncoviral.R new file mode 100644 index 0000000..cad645a --- /dev/null +++ b/tests/testthat/test-roxytest-testexamples-oncoviral.R @@ -0,0 +1,19 @@ +# Generated by roxytest: do not edit by hand! + +# File R/oncoviral.R: @testexamples + +test_that("Function virusbreakend_summary_read() @ L18", { + + x <- system.file("extdata/virusbreakend/virusbreakend.vcf.summary.tsv", package = "gpgr") + (vb <- virusbreakend_summary_read(x)) + expect_equal(colnames(vb)[ncol(vb)], "QCStatus") +}) + + +test_that("Function virusbreakend_vcf_read() @ L101", { + + x <- system.file("extdata/virusbreakend/virusbreakend.vcf", package = "gpgr") + (vb <- virusbreakend_vcf_read(x)) + expect_equal(colnames(vb)[ncol(vb)], "QC") +}) + diff --git a/tests/testthat/test-roxytest-testexamples-purple.R b/tests/testthat/test-roxytest-testexamples-purple.R index 6af5c55..a9fde90 100644 --- a/tests/testthat/test-roxytest-testexamples-purple.R +++ b/tests/testthat/test-roxytest-testexamples-purple.R @@ -10,7 +10,7 @@ test_that("Function purple_cnv_som_gene_read() @ L18", { }) -test_that("Function purple_cnv_som_gene_process() @ L62", { +test_that("Function purple_cnv_som_gene_process() @ L60", { x <- system.file("extdata/purple/purple.cnv.gene.tsv", package = "gpgr") g <- system.file("extdata/ref/umccr_cancer_genes_2019-03-20.tsv", package = "gpgr") @@ -19,7 +19,7 @@ test_that("Function purple_cnv_som_gene_process() @ L62", { }) -test_that("Function purple_cnv_som_read() @ L147", { +test_that("Function purple_cnv_som_read() @ L449", { x <- system.file("extdata/purple/purple.cnv.somatic.tsv", package = "gpgr") (p <- purple_cnv_som_read(x)) @@ -27,7 +27,7 @@ test_that("Function purple_cnv_som_read() @ L147", { }) -test_that("Function purple_cnv_som_process() @ L181", { +test_that("Function purple_cnv_som_process() @ L482", { x <- system.file("extdata/purple/purple.cnv.somatic.tsv", package = "gpgr") (pp <- purple_cnv_som_process(x)) @@ -35,33 +35,7 @@ test_that("Function purple_cnv_som_process() @ L181", { }) -test_that("Function purple_cnv_germ_read() @ L248", { - - x <- system.file("extdata/purple/purple.cnv.germline.tsv", package = "gpgr") - (p <- purple_cnv_germ_read(x)) - expect_equal(colnames(p)[ncol(p)], "majorAlleleCopyNumber") -}) - - -test_that("Function purple_cnv_germ_process() @ L272", { - - x <- system.file("extdata/purple/purple.cnv.germline.tsv", package = "gpgr") - (pp <- purple_cnv_germ_process(x)) - expect_equal(colnames(pp$tab)[ncol(pp$tab)], "GC (windowCount)") -}) - - -test_that("Function purple_version_read() @ L297", { - - x <- system.file("extdata/purple/purple.version", package = "gpgr") - (v <- purple_version_read(x)) - expect_equal(length(v), 2) - expect_equal(names(v), c("version", "build_date")) - expect_equal(v$version, "2.51") -}) - - -test_that("Function purple_qc_read() @ L323", { +test_that("Function purple_qc_read() @ L550", { x <- system.file("extdata/purple/purple.qc", package = "gpgr") (q <- purple_qc_read(x)) @@ -69,7 +43,7 @@ test_that("Function purple_qc_read() @ L323", { }) -test_that("Function purple_purity_read() @ L378", { +test_that("Function purple_purity_read() @ L608", { x <- system.file("extdata/purple/purple.purity.tsv", package = "gpgr") (p <- purple_purity_read(x)) diff --git a/tests/testthat/test-roxytest-testexamples-sv.R b/tests/testthat/test-roxytest-testexamples-sv.R index 782d5af..2ec89e0 100644 --- a/tests/testthat/test-roxytest-testexamples-sv.R +++ b/tests/testthat/test-roxytest-testexamples-sv.R @@ -20,7 +20,7 @@ test_that("Function split_double_col() @ L27", { }) -test_that("Function count_pieces() @ L76", { +test_that("Function count_pieces() @ L77", { (a <- gpgr:::count_pieces("foo,bar,baz", sep = ",")) (b <- gpgr:::count_pieces("foo", sep = ",")) @@ -34,7 +34,7 @@ test_that("Function count_pieces() @ L76", { }) -test_that("Function abbreviate_effect() @ L110", { +test_that("Function abbreviate_effect() @ L112", { (e1 <- gpgr:::abbreviate_effect("3_prime_UTR_truncation&start_lost&splice_region_variant")) (e2 <- gpgr:::abbreviate_effect("duplication&foo&gene_fusion&BOOM&intron_variant")) @@ -46,21 +46,3 @@ test_that("Function abbreviate_effect() @ L110", { expect_equal(e4, "badaboom, bar, foo, StopGain") }) - -test_that("Function umccrise_read_sv_tsv() @ L141", { - - x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") - (sv <- umccrise_read_sv_tsv(x)$data) - expect_equal(colnames(sv)[ncol(sv)], "ALT") -}) - - -test_that("Function process_sv() @ L197", { - - x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") - (sv <- process_sv(x)) - expect_true(inherits(sv, "list")) - expect_equal(length(sv), 4) - expect_equal(names(sv), c("unmelted", "melted", "tsv_descr", "col_descr")) -}) - diff --git a/tests/testthat/test-roxytest-testexamples-utils.R b/tests/testthat/test-roxytest-testexamples-utils.R index fb93447..26990c1 100644 --- a/tests/testthat/test-roxytest-testexamples-utils.R +++ b/tests/testthat/test-roxytest-testexamples-utils.R @@ -2,7 +2,7 @@ # File R/utils.R: @testexamples -test_that("Function is_vcf() @ L31", { +test_that("Function is_vcf() @ L21", { x <- system.file("extdata/umccrise/snv/somatic-ensemble-PASS.vcf.gz", package = "gpgr") (y <- is_vcf(x)) @@ -14,7 +14,7 @@ test_that("Function is_vcf() @ L31", { }) -test_that("Function vcf_is_empty() @ L105", { +test_that("Function vcf_is_empty() @ L97", { x <- system.file("extdata/umccrise/snv/somatic-ensemble-PASS.vcf.gz", package = "gpgr") (y <- vcf_is_empty(x)) @@ -41,7 +41,7 @@ test_that("Function vcf_is_empty() @ L105", { }) -test_that("Function tsv_is_empty() @ L141", { +test_that("Function tsv_is_empty() @ L133", { tmp1 <- tempfile()