diff --git a/R/oncokb.R b/R/oncokb.R index f506629..de557ef 100644 --- a/R/oncokb.R +++ b/R/oncokb.R @@ -1,5 +1,8 @@ +#' Read OncoKB +#' #' @param x Path to something. #' +#' @return Vector of genes. #' @export read_oncokb <- function(x) { readr::read_tsv(x) |> @@ -9,8 +12,11 @@ read_oncokb <- function(x) { dplyr::pull("Hugo Symbol") } +#' Get OncoKB Genes From Somewhere +#' #' @param x Path to something. -#' @param oncokb_genes Tibble of something. +#' @param oncokb_genes A tibble of something. +#' @return A vector I think. #' #' @export get_oncokb_genes <- function(x, oncokb_genes) { diff --git a/R/purple.R b/R/purple.R index 08f33b7..1046aa4 100644 --- a/R/purple.R +++ b/R/purple.R @@ -319,8 +319,12 @@ set_many_transcripts_cnv <- function(x) { ) } +#' Process CNV TSV +#' #' @param x Path to something. #' +#' @return List of many things. +#' #' @export process_cnv_tsv <- function(x) { # Read input @@ -342,19 +346,18 @@ process_cnv_tsv <- function(x) { dplyr::mutate(annotation = strsplit(.data$simple_ann, ",")) |> # Convert annotation fields into columns tidyr::unnest("annotation") |> - tidyr::separate( - "annotation", - c("Event", "Effect", "Genes", "Transcripts", "Detail", "Tier"), - sep = "\\|", convert = FALSE + tidyr::separate_wider_delim( + cols = "annotation", delim = "|", + names = c("Event", "Effect", "Genes", "Transcripts", "Detail", "Tier") ) |> # Create new columns and modify existing ones dplyr::mutate( - copyNumber = as.numeric(.data$copyNumber) |> round(2) %>% sprintf("%.2f", .), - minorAlleleCopyNumber = as.numeric(minorAlleleCopyNumber) |> round(2) %>% sprintf("%.2f", .), - majorAlleleCopyNumber = as.numeric(majorAlleleCopyNumber) |> round(2) %>% sprintf("%.2f", .), - "PURPLE CN Min+Maj" = paste0(minorAlleleCopyNumber, "+", majorAlleleCopyNumber), - "Genes" = stringr::str_replace_all(Genes, "&", ", "), - "Transcripts" = stringr::str_replace_all(Transcripts, "&", ", "), + copyNumber = sprintf("%.2f", round(as.numeric(.data$copyNumber), 2)), + minorAlleleCopyNumber = sprintf("%.2f", round(as.numeric(.data$minorAlleleCopyNumber), 2)), + majorAlleleCopyNumber = sprintf("%.2f", round(as.numeric(.data$majorAlleleCopyNumber), 2)), + "PURPLE CN Min+Maj" = paste0(.data$minorAlleleCopyNumber, "+", .data$majorAlleleCopyNumber), + "Genes" = stringr::str_replace_all(.data$Genes, "&", ", "), + "Transcripts" = stringr::str_replace_all(.data$Transcripts, "&", ", ") ) |> # Remove unused columns dplyr::select(-c( @@ -369,7 +372,7 @@ process_cnv_tsv <- function(x) { "segmentEndSupport", "segmentStartSupport", "sv_top_tier", - "simple_ann", + "simple_ann" )) # Abbreviate effects @@ -382,10 +385,10 @@ process_cnv_tsv <- function(x) { # Complete processing cnv.tmp <- cnv.annotations.split$retained |> # Reset sv_top_tier after removing annotations - dplyr::group_by(`Event ID`) |> + dplyr::group_by(.data$`Event ID`) |> dplyr::mutate( sv_top_tier = min(.data$Tier), - "Tier (top)" = paste0(.data$Tier, " (", .data$sv_top_tier, ")"), + "Tier (top)" = paste0(.data$Tier, " (", .data$sv_top_tier, ")") ) |> dplyr::ungroup() |> # Set unique annotation ID @@ -408,8 +411,8 @@ process_cnv_tsv <- function(x) { "PURPLE CN" = "copyNumber", "PURPLE CN Min+Maj" ) - cnv.tmp <- dplyr::select("cnv.tmp", tidyselect::all_of(c(column_selector, "Tier"))) - cnv.filtered <- dplyr::select(cnv.annotations.split$filtered, tidyselect::any_of(column_selector)) + cnv.tmp <- dplyr::select(cnv.tmp, dplyr::all_of(c(column_selector, "Tier"))) + cnv.filtered <- dplyr::select(cnv.annotations.split$filtered, dplyr::any_of(column_selector)) # Collapse selected annotations and set many genes cnv.many_genes_data <- set_many_genes_cnv(cnv.tmp) diff --git a/R/rmd.R b/R/rmd.R index 6ca8f73..dde3d6e 100644 --- a/R/rmd.R +++ b/R/rmd.R @@ -24,8 +24,8 @@ #' @param result_outdir Path to directory to write tidy JSON/TSV results. #' @param somatic_snv_vcf Path to `somatic-PASS.vcf.gz` SNV VCF. #' @param somatic_snv_summary Path to `somatic_snv_summary.json` JSON. -#' @param somatic_sv_tsv Path to `manta.tsv` TSV file. -#' @param somatic_sv_vcf Path to `manta.vcf.gz` VCF file. +#' @param somatic_sv_tsv Path to SV TSV file. +#' @param somatic_sv_vcf Path to SV VCF file. #' @param tumor_name Name of tumor sample. #' @param out_file Path to output HTML file (needs '.html' suffix) (def: `{tumor_name}_cancer_report.html`). #' @param quiet Suppress log printing during rendering. diff --git a/R/sv.R b/R/sv.R index 0c4adad..6a4ede5 100644 --- a/R/sv.R +++ b/R/sv.R @@ -297,7 +297,10 @@ set_many_transcripts_sv <- function(x) { ) } -#' @param x Path to something. +#' Process SV TSV +#' +#' @param x Path to SV TSV. +#' @return List of many things. #' #' @export process_sv <- function(x) { @@ -329,13 +332,13 @@ process_sv <- function(x) { ), start = paste(.data$chrom, base::format(.data$start, big.mark = ",", trim = TRUE), sep = ":"), Type = ifelse(is.na(.data$PURPLE_status), .data$svtype, "PURPLE_inf"), - "Record ID" = dplyr::row_number(), + "Record ID" = dplyr::row_number() ) |> dplyr::select(-c( "chrom", "PURPLE_status", "tier", - "svtype", + "svtype" )) # Split out breakpoints for merging @@ -349,7 +352,7 @@ process_sv <- function(x) { cols_to_split <- c("AF_PURPLE", "CN_PURPLE") double_cols <- split_double_col(sv.tmp, cols_to_split) sv.tmp <- sv.tmp |> - dplyr::select(-c("cols_to_split")) |> + dplyr::select(-c(dplyr::all_of(cols_to_split))) |> dplyr::bind_cols(double_cols) # Format a table for to be used as the SV Map @@ -372,7 +375,7 @@ process_sv <- function(x) { "IC_alt", "SR_PR_ref", "PURPLE AF" = "AF_PURPLE", - "PURPLE CN" = "CN_PURPLE", + "PURPLE CN" = "CN_PURPLE" ) |> dplyr::arrange(.data$`Record ID`) @@ -381,17 +384,17 @@ process_sv <- function(x) { # Split into individual annotations dplyr::mutate(annotation = strsplit(.data$annotation, ",")) |> # Convert annotation fields into columns - tidyr::unnest(.data$annotation) |> - tidyr::separate( - .data$annotation, c("Event", "Effect", "Genes", "Transcripts", "Detail", "Tier"), - sep = "\\|", convert = FALSE + tidyr::unnest("annotation") |> + tidyr::separate_wider_delim( + cols = "annotation", delim = "|", + names = c("Event", "Effect", "Genes", "Transcripts", "Detail", "Tier") ) |> # Remove gene_fusion annotations for variants where frameshift_variant&gene_fusion already exist - dplyr::group_by(dplyr::across(-.data$Effect)) |> + dplyr::group_by(dplyr::across(-"Effect")) |> dplyr::group_modify(remove_gene_fusion_dups) |> dplyr::ungroup() |> # Remove unused columns - dplyr::select(c(-.data$Event, -.data$ALT)) |> + dplyr::select(-c("Event", "ALT")) |> # Create columns, modify others dplyr::mutate( "Annotation ID" = dplyr::row_number(), @@ -432,7 +435,7 @@ process_sv <- function(x) { "PURPLE CN" = "CN_PURPLE", # Dropped after ops for non-map outputs "Top Tier", - "Type", + "Type" ) # Create and set many transcript values @@ -464,8 +467,8 @@ process_sv <- function(x) { #' @return A ggplot2 plot object. #' #' @examples -#' x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") -#' d <- process_sv(x)$unmelted +#' x <- system.file("extdata/sash/sv.prioritised.tsv", package = "gpgr") +#' d <- process_sv(x)$map #' plot_bnd_sr_pr_tot_lines(d) #' @export plot_bnd_sr_pr_tot_lines <- function(d, @@ -527,8 +530,8 @@ plot_bnd_sr_pr_tot_lines <- function(d, #' @return A ggplot2 plot object. #' #' @examples -#' x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") -#' d <- process_sv(x)$unmelted +#' x <- system.file("extdata/sash/sv.prioritised.tsv", package = "gpgr") +#' d <- process_sv(x)$map #' plot_bnd_sr_pr_tot_hist(d, "a title") #' @export plot_bnd_sr_pr_tot_hist <- function(d, diff --git a/man/cancer_rmd.Rd b/man/cancer_rmd.Rd index c361f4f..8845848 100644 --- a/man/cancer_rmd.Rd +++ b/man/cancer_rmd.Rd @@ -71,9 +71,9 @@ cancer_rmd( \item{somatic_snv_summary}{Path to \code{somatic_snv_summary.json} JSON.} -\item{somatic_sv_tsv}{Path to \code{manta.tsv} TSV file.} +\item{somatic_sv_tsv}{Path to SV TSV file.} -\item{somatic_sv_vcf}{Path to \code{manta.vcf.gz} VCF file.} +\item{somatic_sv_vcf}{Path to SV VCF file.} \item{result_outdir}{Path to directory to write tidy JSON/TSV results.} diff --git a/man/get_oncokb_genes.Rd b/man/get_oncokb_genes.Rd new file mode 100644 index 0000000..21bd062 --- /dev/null +++ b/man/get_oncokb_genes.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oncokb.R +\name{get_oncokb_genes} +\alias{get_oncokb_genes} +\title{Get OncoKB Genes From Somewhere} +\usage{ +get_oncokb_genes(x, oncokb_genes) +} +\arguments{ +\item{x}{Path to something.} + +\item{oncokb_genes}{A tibble of something.} +} +\value{ +A vector I think. +} +\description{ +Get OncoKB Genes From Somewhere +} diff --git a/man/plot_bnd_sr_pr_tot_hist.Rd b/man/plot_bnd_sr_pr_tot_hist.Rd index c060067..19538fd 100644 --- a/man/plot_bnd_sr_pr_tot_hist.Rd +++ b/man/plot_bnd_sr_pr_tot_hist.Rd @@ -25,7 +25,7 @@ Plots histograms for the number of split reads (\code{SR}), paired end reads (\c sum (\code{tot}) across all BNDs. Observations where the SR or PR value is 0 (NA) are not shown. } \examples{ -x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") -d <- process_sv(x)$unmelted +x <- system.file("extdata/sash/sv.prioritised.tsv", package = "gpgr") +d <- process_sv(x)$map plot_bnd_sr_pr_tot_hist(d, "a title") } diff --git a/man/plot_bnd_sr_pr_tot_lines.Rd b/man/plot_bnd_sr_pr_tot_lines.Rd index 82e8e51..b0cae9d 100644 --- a/man/plot_bnd_sr_pr_tot_lines.Rd +++ b/man/plot_bnd_sr_pr_tot_lines.Rd @@ -25,7 +25,7 @@ Plots the number of split reads (\code{SR}), paired end reads (\code{PR}), and t sum (\code{tot}) across all BNDs, sorted by \code{tot}. } \examples{ -x <- system.file("extdata/umccrise/sv/manta.tsv", package = "gpgr") -d <- process_sv(x)$unmelted +x <- system.file("extdata/sash/sv.prioritised.tsv", package = "gpgr") +d <- process_sv(x)$map plot_bnd_sr_pr_tot_lines(d) } diff --git a/man/process_cnv_tsv.Rd b/man/process_cnv_tsv.Rd new file mode 100644 index 0000000..1b5d772 --- /dev/null +++ b/man/process_cnv_tsv.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/purple.R +\name{process_cnv_tsv} +\alias{process_cnv_tsv} +\title{Process CNV TSV} +\usage{ +process_cnv_tsv(x) +} +\arguments{ +\item{x}{Path to something.} +} +\value{ +List of many things. +} +\description{ +Process CNV TSV +} diff --git a/man/process_sv.Rd b/man/process_sv.Rd new file mode 100644 index 0000000..38986b5 --- /dev/null +++ b/man/process_sv.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sv.R +\name{process_sv} +\alias{process_sv} +\title{Process SV TSV} +\usage{ +process_sv(x) +} +\arguments{ +\item{x}{Path to SV TSV.} +} +\value{ +List of many things. +} +\description{ +Process SV TSV +} diff --git a/man/read_oncokb.Rd b/man/read_oncokb.Rd new file mode 100644 index 0000000..43313e0 --- /dev/null +++ b/man/read_oncokb.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oncokb.R +\name{read_oncokb} +\alias{read_oncokb} +\title{Read OncoKB} +\usage{ +read_oncokb(x) +} +\arguments{ +\item{x}{Path to something.} +} +\value{ +Vector of genes. +} +\description{ +Read OncoKB +} diff --git a/tests/testthat/test-roxytest-testexamples-purple.R b/tests/testthat/test-roxytest-testexamples-purple.R index 3f83ec0..528f853 100644 --- a/tests/testthat/test-roxytest-testexamples-purple.R +++ b/tests/testthat/test-roxytest-testexamples-purple.R @@ -19,7 +19,7 @@ test_that("Function purple_cnv_som_gene_process() @ L60", { }) -test_that("Function purple_cnv_som_read() @ L444", { +test_that("Function purple_cnv_som_read() @ L449", { x <- system.file("extdata/purple/purple.cnv.somatic.tsv", package = "gpgr") (p <- purple_cnv_som_read(x)) @@ -27,7 +27,7 @@ test_that("Function purple_cnv_som_read() @ L444", { }) -test_that("Function purple_cnv_som_process() @ L477", { +test_that("Function purple_cnv_som_process() @ L482", { x <- system.file("extdata/purple/purple.cnv.somatic.tsv", package = "gpgr") (pp <- purple_cnv_som_process(x)) @@ -35,7 +35,7 @@ test_that("Function purple_cnv_som_process() @ L477", { }) -test_that("Function purple_qc_read() @ L545", { +test_that("Function purple_qc_read() @ L550", { x <- system.file("extdata/purple/purple.qc", package = "gpgr") (q <- purple_qc_read(x)) @@ -43,7 +43,7 @@ test_that("Function purple_qc_read() @ L545", { }) -test_that("Function purple_purity_read() @ L603", { +test_that("Function purple_purity_read() @ L608", { x <- system.file("extdata/purple/purple.purity.tsv", package = "gpgr") (p <- purple_purity_read(x))