Skip to content

Commit

Permalink
Add lift_bed() utility; muffle low mutation count warning for bootstr…
Browse files Browse the repository at this point in the history
…apped MP; bump version
  • Loading branch information
jeff-mandell committed Jul 3, 2024
1 parent 0555aa7 commit 8568148
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: cancereffectsizeR
Type: Package
Title: Calculate Cancer Effect Size
Version: 2.9.0
Version: 2.9.1
Authors@R: c(person("Vincent L.", "Cannataro", email = "[email protected]", role = c("aut"),
comment = c(ORCID = "0000-0002-6364-7747")),
person("Jeff", "Mandell", email = "[email protected]", role = c("aut", "cre"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export(get_gene_rates)
export(get_sample_info)
export(get_signature_weights)
export(get_trinuc_rates)
export(lift_bed)
export(list_ces_covariates)
export(list_ces_refsets)
export(list_ces_signature_sets)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# <font style = "opacity:0">cancereffectsizeR 3.0.0</font>
Patch releases (as in, x.y.1 &#8594; x.y.2) have minor bug fixes or small improvements that do not significantly affect the numerical output of cancer effect analyses. Minor/major updates may change some outputs due to bug fixes or methodological tweaks, as described in these version notes.<br><br>

# cancereffectsizeR 2.9.1
* Added lift_bed() function to ease conversion of BED intervals between genome builds.
* Added an initial version of an epistatic effect plotting function, plot_epistasis().
* Various minor improvements.

# cancereffectsizeR 2.9.0
* plot_signature_effects() visualizes the relative contributions of mutational signatures to mutation and selection.
* Change to how mutational_signature_effects() calculates cohort-averaged signature effect shares. See the function's updated documentation for clarification of how outputs are calculated.
Expand Down
2 changes: 1 addition & 1 deletion R/get_TCGA_project_MAF.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ get_TCGA_project_MAF = function(project = NULL, filename = NULL, test_run = FALS
num_files = ifelse(test_run, '5', '100000')
response = httr::GET(files_endpt,
query = list(filters = filters,
fields = "file_name,md5sum,release.version", size = num_files, format = 'JSON'))
fields = "file_name,md5sum", size = num_files, format = 'JSON'))

if (response$status_code != 200) {
msg = paste0("Could not get list of ", project, " cases (GDC API query failed with status code ",
Expand Down
4 changes: 3 additions & 1 deletion R/internal_read_maf.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,9 @@ read_in_maf = function(maf, refset_env, chr_col = "Chromosome", start_col = "Sta
maf[within_sample_dup == FALSE & problem == 'duplicate_record' & is_tcga_patient == TRUE, problem := 'duplicate_from_TCGA_sample_merge']
maf[, c('within_sample_dup', 'is_tcga_patient') := NULL]
}
maf[, Tumor_Sample_Barcode := NULL]
if(! 'Tumor_Sample_Barcode' %in% more_cols && ! identical(more_cols, 'all')) {
maf[, Tumor_Sample_Barcode := NULL]
}
}


Expand Down
69 changes: 69 additions & 0 deletions R/lift_bed.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' Convert BED intervals between genome builds
#'
#' Use this utility to convert BED intervals between genome coordinate systems using liftOver. Only
#' the chr/start/end fields of the input BED are used (strand is ignored). The output GRanges
#' will have no associated seqinfo.
#'
#' A warning is given if the lifted intervals are less than 95\% of the size of the original
#' intervals. When the BED input represents sequencing target intervals, most of the input
#' intervals will usually lift successfully.
#'
#' @param bed Pathname of a BED file, or a GRanges (typically loaded from a BED file with \code{rtracklayer::import.bed()}).
#' @param chain A UCSC-style chain file, or a Chain object (such as from \code{rtracklayer::import.chain()}).
#' @param outfile If not NULL, the returned GRanges will be saved to the specified path using \code{rtracklayer::export.chain()}.
#' @return GRanges representing lifted intervals from input \code{bed}.
#' @export
lift_bed = function(bed, chain, outfile = NULL) {
if(rlang::is_scalar_character(bed)) {
if(! file.exists(bed)) {
stop("Specified BED file does not exist.")
}
bed = rtracklayer::import.bed(bed)
} else if(! is(bed, 'GRanges')) {
stop('Input bed should be the path to a BED file or a GRanges object.')
}
bed = BiocGenerics::unstrand(bed)

if(rlang::is_scalar_character(chain)) {
if(! file.exists(chain)) {
stop('Specified chain file does not exist.')
}
chain = rtracklayer::import.chain(chain)
} else if (! is(chain, 'Chain')) {
stop('Input chain should be the path to a UCSC-style chain file or a Chain object.')
}
names(chain) = sub("^chr", "", names(chain))
seqlevelsStyle(bed) = 'NCBI'
lifted = sort(reduce(unlist(rtracklayer::liftOver(bed, chain))))
seqlevelsStyle(lifted) = 'NCBI'
prop = sum(width(lifted)) / sum(width(reduce(bed)))
if(prop < .95) {
percent = round(prop * 100, 1)
msg = paste0('The lifted intervals cover ', percent, '% of the width of the original intervals.',
' (For BED files representing sequencing target regions, typically most intervals',
' will successfully lift between genome builds. If the percentage is very low,',
' make sure you have the correct genome build for the input file.)')
warning(pretty_message(msg, emit = F))
}
if(! is.null(outfile)) {
if(! rlang::is_scalar_character(outfile)) {
stop('outfile should be NULL or a scalar character indicating a valid file path.')
}
if(! outfile %like% '\\.bed(\\.gz)?$') {
stop('outfile must end in .bed or .bed.gz')
}
if(! dir.exists(dirname(outfile))) {
stop('Directory specified in outfile does not exist.')
}
if(file.exists(outfile)) {
stop('Specified outfile already exists.')
}
rtracklayer::export.bed(lifted, outfile)
message('Lifted BED intervals saved to ', outfile, '.')
}
if(is.null(outfile)) {
return(lifted)
} else {
return(invisible(lifted))
}
}
7 changes: 4 additions & 3 deletions R/plot_signature_effects.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
#' \item Add a "color" column to manually specify colors for each group.
#' }
#' Alternatively, setting \code{signature_groupings = "cannataro"} applies the same signature
#' grouping and color palette as
#' \href{https://academic.oup.com/mbe/article/39/5/msac084/6570859}{Cannataro et al. 2022}.
#' groupings and color palette as
#' \href{https://academic.oup.com/mbe/article/39/5/msac084/6570859}{Cannataro et al. 2022}. You can use
#' Cannataro signature groupings with a different color palette by specifying \code{viridis_option}.
#' @param viridis_option A viridis color mapping, specified with a single letter ('A' to 'H'). By
#' default, map 'G' (mako) is used.
#' default, map 'G' (mako) unless using Cannataro signature groupings.
#' @param num_sig_groups How many groups of signatures to display. Groups are ordered by their
#' highest effect shares, and the rest get lumped into an "other signatures" group.
#' @export
Expand Down
11 changes: 10 additions & 1 deletion R/run_mutational_patterns.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,16 @@ run_mutational_patterns = function(tumor_trinuc_counts, signatures_df, signature
signatures_output = tryCatch(
{
if(bootstrap_mutations) {
do.call(MutationalPatterns::fit_to_signatures_bootstrapped, args)
withCallingHandlers(
{
do.call(MutationalPatterns::fit_to_signatures_bootstrapped, args)
},
warning = function(w) {
if (conditionMessage(w) %like% "At least one of your samples has less than") {
invokeRestart("muffleWarning")
}
}
)
} else {
do.call(MutationalPatterns::fit_to_signatures_strict, args)$fit_res
}
Expand Down
28 changes: 28 additions & 0 deletions man/lift_bed.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/plot_signature_effects.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8568148

Please sign in to comment.