From 856814843657b29a0465ea25bcf82f30a1698b59 Mon Sep 17 00:00:00 2001 From: Jeff Mandell Date: Tue, 2 Jul 2024 23:26:30 -0400 Subject: [PATCH] Add lift_bed() utility; muffle low mutation count warning for bootstrapped MP; bump version --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 5 +++ R/get_TCGA_project_MAF.R | 2 +- R/internal_read_maf.R | 4 +- R/lift_bed.R | 69 +++++++++++++++++++++++++++++++++++ R/plot_signature_effects.R | 7 ++-- R/run_mutational_patterns.R | 11 +++++- man/lift_bed.Rd | 28 ++++++++++++++ man/plot_signature_effects.Rd | 7 ++-- 10 files changed, 126 insertions(+), 10 deletions(-) create mode 100644 R/lift_bed.R create mode 100644 man/lift_bed.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0a64b305..d2800de3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cancereffectsizeR Type: Package Title: Calculate Cancer Effect Size -Version: 2.9.0 +Version: 2.9.1 Authors@R: c(person("Vincent L.", "Cannataro", email = "cannatarov@emmanuel.edu", role = c("aut"), comment = c(ORCID = "0000-0002-6364-7747")), person("Jeff", "Mandell", email = "jeff.mandell@yale.edu", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 5c18f501..9bc0b8a8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,6 +32,7 @@ export(get_gene_rates) export(get_sample_info) export(get_signature_weights) export(get_trinuc_rates) +export(lift_bed) export(list_ces_covariates) export(list_ces_refsets) export(list_ces_signature_sets) diff --git a/NEWS.md b/NEWS.md index 24ce88c6..1e30bbea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,11 @@ # cancereffectsizeR 3.0.0 Patch releases (as in, x.y.1 → x.y.2) have minor bug fixes or small improvements that do not significantly affect the numerical output of cancer effect analyses. Minor/major updates may change some outputs due to bug fixes or methodological tweaks, as described in these version notes.

+# cancereffectsizeR 2.9.1 +* Added lift_bed() function to ease conversion of BED intervals between genome builds. +* Added an initial version of an epistatic effect plotting function, plot_epistasis(). +* Various minor improvements. + # cancereffectsizeR 2.9.0 * plot_signature_effects() visualizes the relative contributions of mutational signatures to mutation and selection. * Change to how mutational_signature_effects() calculates cohort-averaged signature effect shares. See the function's updated documentation for clarification of how outputs are calculated. diff --git a/R/get_TCGA_project_MAF.R b/R/get_TCGA_project_MAF.R index 32199af1..62ee55cc 100644 --- a/R/get_TCGA_project_MAF.R +++ b/R/get_TCGA_project_MAF.R @@ -154,7 +154,7 @@ get_TCGA_project_MAF = function(project = NULL, filename = NULL, test_run = FALS num_files = ifelse(test_run, '5', '100000') response = httr::GET(files_endpt, query = list(filters = filters, - fields = "file_name,md5sum,release.version", size = num_files, format = 'JSON')) + fields = "file_name,md5sum", size = num_files, format = 'JSON')) if (response$status_code != 200) { msg = paste0("Could not get list of ", project, " cases (GDC API query failed with status code ", diff --git a/R/internal_read_maf.R b/R/internal_read_maf.R index a808714e..3b1cd471 100644 --- a/R/internal_read_maf.R +++ b/R/internal_read_maf.R @@ -224,7 +224,9 @@ read_in_maf = function(maf, refset_env, chr_col = "Chromosome", start_col = "Sta maf[within_sample_dup == FALSE & problem == 'duplicate_record' & is_tcga_patient == TRUE, problem := 'duplicate_from_TCGA_sample_merge'] maf[, c('within_sample_dup', 'is_tcga_patient') := NULL] } - maf[, Tumor_Sample_Barcode := NULL] + if(! 'Tumor_Sample_Barcode' %in% more_cols && ! identical(more_cols, 'all')) { + maf[, Tumor_Sample_Barcode := NULL] + } } diff --git a/R/lift_bed.R b/R/lift_bed.R new file mode 100644 index 00000000..ef77ab06 --- /dev/null +++ b/R/lift_bed.R @@ -0,0 +1,69 @@ +#' Convert BED intervals between genome builds +#' +#' Use this utility to convert BED intervals between genome coordinate systems using liftOver. Only +#' the chr/start/end fields of the input BED are used (strand is ignored). The output GRanges +#' will have no associated seqinfo. +#' +#' A warning is given if the lifted intervals are less than 95\% of the size of the original +#' intervals. When the BED input represents sequencing target intervals, most of the input +#' intervals will usually lift successfully. +#' +#' @param bed Pathname of a BED file, or a GRanges (typically loaded from a BED file with \code{rtracklayer::import.bed()}). +#' @param chain A UCSC-style chain file, or a Chain object (such as from \code{rtracklayer::import.chain()}). +#' @param outfile If not NULL, the returned GRanges will be saved to the specified path using \code{rtracklayer::export.chain()}. +#' @return GRanges representing lifted intervals from input \code{bed}. +#' @export +lift_bed = function(bed, chain, outfile = NULL) { + if(rlang::is_scalar_character(bed)) { + if(! file.exists(bed)) { + stop("Specified BED file does not exist.") + } + bed = rtracklayer::import.bed(bed) + } else if(! is(bed, 'GRanges')) { + stop('Input bed should be the path to a BED file or a GRanges object.') + } + bed = BiocGenerics::unstrand(bed) + + if(rlang::is_scalar_character(chain)) { + if(! file.exists(chain)) { + stop('Specified chain file does not exist.') + } + chain = rtracklayer::import.chain(chain) + } else if (! is(chain, 'Chain')) { + stop('Input chain should be the path to a UCSC-style chain file or a Chain object.') + } + names(chain) = sub("^chr", "", names(chain)) + seqlevelsStyle(bed) = 'NCBI' + lifted = sort(reduce(unlist(rtracklayer::liftOver(bed, chain)))) + seqlevelsStyle(lifted) = 'NCBI' + prop = sum(width(lifted)) / sum(width(reduce(bed))) + if(prop < .95) { + percent = round(prop * 100, 1) + msg = paste0('The lifted intervals cover ', percent, '% of the width of the original intervals.', + ' (For BED files representing sequencing target regions, typically most intervals', + ' will successfully lift between genome builds. If the percentage is very low,', + ' make sure you have the correct genome build for the input file.)') + warning(pretty_message(msg, emit = F)) + } + if(! is.null(outfile)) { + if(! rlang::is_scalar_character(outfile)) { + stop('outfile should be NULL or a scalar character indicating a valid file path.') + } + if(! outfile %like% '\\.bed(\\.gz)?$') { + stop('outfile must end in .bed or .bed.gz') + } + if(! dir.exists(dirname(outfile))) { + stop('Directory specified in outfile does not exist.') + } + if(file.exists(outfile)) { + stop('Specified outfile already exists.') + } + rtracklayer::export.bed(lifted, outfile) + message('Lifted BED intervals saved to ', outfile, '.') + } + if(is.null(outfile)) { + return(lifted) + } else { + return(invisible(lifted)) + } +} diff --git a/R/plot_signature_effects.R b/R/plot_signature_effects.R index 0f04181e..50cf1fd1 100644 --- a/R/plot_signature_effects.R +++ b/R/plot_signature_effects.R @@ -18,10 +18,11 @@ #' \item Add a "color" column to manually specify colors for each group. #' } #' Alternatively, setting \code{signature_groupings = "cannataro"} applies the same signature -#' grouping and color palette as -#' \href{https://academic.oup.com/mbe/article/39/5/msac084/6570859}{Cannataro et al. 2022}. +#' groupings and color palette as +#' \href{https://academic.oup.com/mbe/article/39/5/msac084/6570859}{Cannataro et al. 2022}. You can use +#' Cannataro signature groupings with a different color palette by specifying \code{viridis_option}. #' @param viridis_option A viridis color mapping, specified with a single letter ('A' to 'H'). By -#' default, map 'G' (mako) is used. +#' default, map 'G' (mako) unless using Cannataro signature groupings. #' @param num_sig_groups How many groups of signatures to display. Groups are ordered by their #' highest effect shares, and the rest get lumped into an "other signatures" group. #' @export diff --git a/R/run_mutational_patterns.R b/R/run_mutational_patterns.R index 29fd4746..c6d7d57a 100644 --- a/R/run_mutational_patterns.R +++ b/R/run_mutational_patterns.R @@ -38,7 +38,16 @@ run_mutational_patterns = function(tumor_trinuc_counts, signatures_df, signature signatures_output = tryCatch( { if(bootstrap_mutations) { - do.call(MutationalPatterns::fit_to_signatures_bootstrapped, args) + withCallingHandlers( + { + do.call(MutationalPatterns::fit_to_signatures_bootstrapped, args) + }, + warning = function(w) { + if (conditionMessage(w) %like% "At least one of your samples has less than") { + invokeRestart("muffleWarning") + } + } + ) } else { do.call(MutationalPatterns::fit_to_signatures_strict, args)$fit_res } diff --git a/man/lift_bed.Rd b/man/lift_bed.Rd new file mode 100644 index 00000000..cb03643d --- /dev/null +++ b/man/lift_bed.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lift_bed.R +\name{lift_bed} +\alias{lift_bed} +\title{Convert BED intervals between genome builds} +\usage{ +lift_bed(bed, chain, outfile = NULL) +} +\arguments{ +\item{bed}{Pathname of a BED file, or a GRanges (typically loaded from a BED file with \code{rtracklayer::import.bed()}).} + +\item{chain}{A UCSC-style chain file, or a Chain object (such as from \code{rtracklayer::import.chain()}).} + +\item{outfile}{If not NULL, the returned GRanges will be saved to the specified path using \code{rtracklayer::export.chain()}.} +} +\value{ +GRanges representing lifted intervals from input \code{bed}. +} +\description{ +Use this utility to convert BED intervals between genome coordinate systems using liftOver. Only +the chr/start/end fields of the input BED are used (strand is ignored). The output GRanges +will have no associated seqinfo. +} +\details{ +A warning is given if the lifted intervals are less than 95\% of the size of the original +intervals. When the BED input represents sequencing target intervals, most of the input +intervals will usually lift successfully. +} diff --git a/man/plot_signature_effects.Rd b/man/plot_signature_effects.Rd index a6d5da04..e86d91b5 100644 --- a/man/plot_signature_effects.Rd +++ b/man/plot_signature_effects.Rd @@ -28,11 +28,12 @@ from a separate run of mutational_signature_effects().} \item Add a "color" column to manually specify colors for each group. } Alternatively, setting \code{signature_groupings = "cannataro"} applies the same signature -grouping and color palette as -\href{https://academic.oup.com/mbe/article/39/5/msac084/6570859}{Cannataro et al. 2022}.} +groupings and color palette as +\href{https://academic.oup.com/mbe/article/39/5/msac084/6570859}{Cannataro et al. 2022}. You can use +Cannataro signature groupings with a different color palette by specifying \code{viridis_option}.} \item{viridis_option}{A viridis color mapping, specified with a single letter ('A' to 'H'). By -default, map 'G' (mako) is used.} +default, map 'G' (mako) unless using Cannataro signature groupings.} \item{num_sig_groups}{How many groups of signatures to display. Groups are ordered by their highest effect shares, and the rest get lumped into an "other signatures" group.}