Skip to content
This repository has been archived by the owner on Jun 1, 2020. It is now read-only.

Commit

Permalink
Extend documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
juliangehring committed Apr 21, 2015
1 parent 5f5cce7 commit ca3e84d
Show file tree
Hide file tree
Showing 21 changed files with 732 additions and 566 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/vignettes/SomaticSignatures-vignette.R
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SomaticSignatures
Type: Package
Title: Somatic Signatures
Version: 2.4.1
Version: 2.5.2
Author: Julian Gehring (EMBL Heidelberg)
Maintainer: Julian Gehring <[email protected]>
Description: The SomaticSignatures package identifies mutational signatures of single nucleotide variants (SNVs). It provides a infrastructure related to the methodology described in Nik-Zainal (2012, Cell), with flexibility in the matrix decomposition algorithms.
Expand Down
4 changes: 3 additions & 1 deletion R/correlate.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
## The following functions for correlationg signatures is currently not exported nor used in the package ##

retreiveSignatures <- function(vr, group, signatures, method = c("cospos", "cosine", "spearman", "pearson")) {

method = match.arg(method)
Expand Down Expand Up @@ -34,7 +36,7 @@ correlateSignatures <- function(m, s, method = c("cospos", "cosine", "spearman",
return(cc)
}


cossim <- function(x, y) {
res = crossprod(x, y)/sqrt(crossprod(x) * crossprod(y))
return(res)
Expand Down
4 changes: 4 additions & 0 deletions R/decomposition.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ nmfDecomposition <- function(x, r, ..., includeFit = FALSE) {
w = w[ ,ord]
h = h[ ,ord]

## name signatures: S1, ..., Sn
sig_names = paste0("S", 1:r)
colnames(w) = colnames(h) = sig_names
v = fitted(y)
Expand All @@ -36,6 +37,8 @@ kmeansDecomposition <- function(x, r, ..., includeFit = FALSE) {
h = matrix(0, n_samples, r)
h[ cbind(1:n_samples, as.vector(y$cluster)) ] = 1
stopifnot(all(rowSums(h) == 1))

## name signatures: S1, ..., Sn
sig_names = paste0("S", 1:r)
colnames(w) = colnames(h) = sig_names
v = fitted(y)
Expand All @@ -58,6 +61,7 @@ pcaDecomposition <- function(x, r, ..., includeFit = FALSE) {
h = loadings(y) ## samples x k
v = fitted(y)

## name signatures: S1, ..., Sn
sig_names = paste0("S", 1:r)
colnames(w) = colnames(h) = sig_names

Expand Down
4 changes: 2 additions & 2 deletions R/granges-utils.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
ucsc <- function(x) {
suppressMessages(seqlevelsStyle(x) <- "UCSC") ## '<-' needed
genome(x) = NA
genome(x) = NA ## avoid mismatches in 'genome' slots for overlaps
return(x)
}


ncbi <- function(x) {
suppressMessages(seqlevelsStyle(x) <- "NCBI") ## '<-' needed
genome(x) = NA
genome(x) = NA ## avoid mismatches in 'genome' slots for overlaps
return(x)
}

Expand Down
1 change: 1 addition & 0 deletions R/motif-matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ motifMatrix <- function(vr, group = "sampleNames", normalize = TRUE) {
stop(sprintf("Column '%s' not present in input object.", group))
}

## form the matrix
group_string = paste0(group, " ~ motif")
df$motif = factor(constructMotif(df$alteration, df$context))
y = t(acast(df, group_string, value.var = "motif", fun.aggregate = length))
Expand Down
1 change: 1 addition & 0 deletions R/mutational-signatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ findSignatures <- function(x, r, decomposition = nmfDecomposition, ...) {

dc = decomposition(x, r, ...)

## check returned object
required_names = c("m", "w", "h", "v")
if(any(!(required_names %in% names(dc)))) {
msg = paste0("The decomposition function must return a list with names: ",
Expand Down
9 changes: 5 additions & 4 deletions R/number-signatures.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
assessNumberSignatures <- function(m, nSigs, decomposition = nmfDecomposition, ...,
nReplicates = 1) {

## compute fit statistics for
## - outer :: number of signatures
## - inner :: replicates
dev = lapply(nSigs, function(r, m, decomposition, ...) {
d = lapply(1:nReplicates, function(i) {
dev = assessOneSignature(m, r, decomposition, ...)
Expand All @@ -10,6 +13,7 @@ assessNumberSignatures <- function(m, nSigs, decomposition = nmfDecomposition, .
return(do.call(rbind, d))
}, m, decomposition, ...)

## merge results to data frame
gof = do.call(rbind, dev)

return(gof)
Expand All @@ -22,8 +26,7 @@ plotNumberSignatures <- function(gof) {
measure.vars = c("RSS", "ExplainedVariance"), variable.name = "stat")

p = ggplot(m, aes_string(x = "NumberSignatures", y = "value", group = "NumberSignatures"))
p = p + stat_summary(fun.y = mean, fun.ymin = min,
fun.ymax = max, colour = "red", size = 1.2)
p = p + stat_summary(fun.y = mean, colour = "red", size = 2.5, geom = "point")
p = p + geom_point(color = "black", shape = 3)
p = p + facet_wrap(~stat, nrow = 2, scales = "free")
p = p + theme_bw() + xlab("Number of Signatures") + ylab("Statistic")
Expand All @@ -32,8 +35,6 @@ plotNumberSignatures <- function(gof) {
}


## Goodness-of-fit functions ##

assessOneSignature <- function(m, n, decomposition, ...) {

sigs = identifySignatures(m, n, decomposition, ...)
Expand Down
3 changes: 0 additions & 3 deletions R/rainfall.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ mutationDistance <- function(x) {
idx_change = start(seqnames(x)) ## where does a new chr begin
dist = diff(c(1, start(x))) ## to keep the same length
dist[idx_change] = start(x[idx_change]) ## for new chr: distance to start
#stopifnot(all(dist > 0)) ## TODO
#idx_same = (as(dist, "Rle") == 0)
#dist[idx_same] = dist[idx_same - 1]
x$distance = dist

return(x)
Expand Down
53 changes: 39 additions & 14 deletions R/signature-plots.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
plotSpectrum <- function(x, colorby = c("sample", "alteration")) {

colorby = match.arg(colorby)

## reused part of 'meltSignatures'
w_df = melt(x, varnames = c("motif", "sample"))
w_df$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2", w_df$motif)
w_df$context = sub("[ACGTN][ACGTN] (.+)", "\\1", w_df$motif)
Expand All @@ -9,7 +11,7 @@ plotSpectrum <- function(x, colorby = c("sample", "alteration")) {
p = p + geom_bar(aes_string(x = "context", y = "value", fill = colorby),
stat = "identity", position = "identity")
p = p + facet_grid(sample ~ alteration)
p = p + .theme_ss
p = p + theme_ss() + theme_small_axis()
p = p + theme(legend.position = "none")
p = p + scale_fill_brewer(palette = "Set3")
p = p + xlab("Motif") + ylab("Contribution")
Expand Down Expand Up @@ -42,12 +44,12 @@ plotFittedSpectrum <- function(s, colorby = c("sample", "alteration")) {

plotSignatureMap <- function(s) {

w_df = .meltSignatures(signatures(s))
w_df = meltSignatures(signatures(s))

p = ggplot(w_df)
p = p + geom_tile(aes_string(y = "motif", x = "signature", fill = "value"))
p = p + scale_fill_gradient2(name = "")
p = p + .theme_ss
p = p + theme_ss() + theme_small_axis(x = FALSE)
p = p + xlab("Signature") + ylab("Motif")

return(p)
Expand All @@ -63,13 +65,13 @@ plotSignatures <- function(s, normalize = FALSE, percent = FALSE) {
h = h * 100
}
}
w_df = .meltSignatures(h)
w_df = meltSignatures(h)

p = ggplot(w_df)
p = p + geom_bar(aes_string(x = "context", y = "value", fill = "alteration"),
stat = "identity", position = "identity")
p = p + facet_grid(signature ~ alteration)
p = p + .theme_ss
p = p + theme_ss() + theme_small_axis()
p = p + theme(legend.position = "none")
p = p + scale_fill_brewer(palette = "Set3")
p = p + xlab("Motif") + ylab("Contribution")
Expand All @@ -83,10 +85,16 @@ plotSampleMap <- function(s) {
h_df = melt(samples(s), varnames = c("sample", "signature"))
h_df$signature = factor(h_df$signature)

## lower zlim depending on data:
## - 0 for NMF-like methods
## - min(value) for others
zmin = min(h_df$value, na.rm = TRUE)
zmin = ifelse(zmin >= 0, 0, zmin)

p = ggplot(h_df)
p = p + geom_tile(aes_string(y = "sample", x = "signature", fill = "value"), color = "black")
p = p + scale_fill_gradient2(name = "Contribution", limits = c(0, NA)) ## for NMF
p = p + .theme_ss
p = p + scale_fill_gradient2(name = "Contribution", limits = c(zmin, NA))
p = p + theme_ss()
p = p + xlab("Signature") + ylab("Sample")

return(p)
Expand All @@ -106,22 +114,39 @@ plotSamples <- function(s, normalize = FALSE, percent = FALSE) {
w_df$signature = factor(w_df$signature)

p = ggplot(w_df)
p = p + geom_bar(aes_string(x = "sample", y = "value", fill = "signature"), color = "black", size = 0.3, stat = "identity", position = "stack")
p = p + .theme_ss
p = p + geom_bar(aes_string(x = "sample", y = "value", fill = "signature"),
color = "black", size = 0.3, stat = "identity", position = "stack")
p = p + theme_ss()
p = p + scale_fill_brewer(palette = "Set3")
p = p + xlab("Sample") + ylab("Signature Contribution")

return(p)
}


.theme_ss <- theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.text.y = element_text(hjust = 0.5),
axis.text = element_text(size = 6, family = "mono"))
theme_ss <- function() {

t = theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.text.y = element_text(hjust = 0.5))

return(t)
}

theme_small_axis <- function(x = TRUE, y = TRUE, size = 6, family = "mono") {
## decrease the x/y-axis label size
template = element_text(size = size, family = family)
t = theme_ss()
if(x)
t = t + theme(axis.text.x = template)
if(y)
t = t + theme(axis.text.y = template)

return(t)
}


.meltSignatures <- function(x, vars = c("motif", "signature")) {
meltSignatures <- function(x, vars = c("motif", "signature")) {

w_df = melt(x, varnames = vars)
w_df$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2", w_df$motif)
Expand Down
3 changes: 0 additions & 3 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,3 @@ showSome <- function(x, name, indent="") {
)
return(res)
}


dna_bases <- c("A", "C", "G", "T")
1 change: 1 addition & 0 deletions R/vignette-utils.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
scaSNVRanges <- function(chrs = hsAutosomes()) {

## explicit scoping here, since SCA package is optional
sca_all = SomaticCancerAlterations::scaLoadDatasets()
sca_metadata = SomaticCancerAlterations::scaMetadata()

Expand Down
17 changes: 12 additions & 5 deletions man/mutation-context.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

\description{

Extract the sequence context surrounding a SNV from a genomic
Extract the sequence context surrounding SNVs from a genomic
reference.

}
Expand All @@ -25,9 +25,9 @@ mutationContextH5vc(vc, ms, unify = TRUE)
[required]. For 'mutationContextMutect', an object as returned by
the 'readMutect' function.}

\item{ref}{A 'BSgenome' or 'FaFile' object representing the reference
sequence [required]. More generally, any object with a defined
'getSeq' method can be used.}
\item{ref}{A 'BSgenome', 'FaFile' or 'TwoBitfile' object representing
the reference sequence [required]. More generally, any object with
a defined 'getSeq' method can be used.}

\item{k}{The 'k'-mer size of the context, including the variant
position [integer, default: 3]. The variant will be located at the
Expand Down Expand Up @@ -57,7 +57,13 @@ mutationContextH5vc(vc, ms, unify = TRUE)

The somatic motifs of a SNV, composed out of (a) the base change and
(b) the sequence context surrounding the variant, is extracted from a
reference sequence with the 'mutationContext' function.
genomic sequence with the 'mutationContext' function.

Different types of classes that represent the genomic sequence can
used togther with the 'mutationContext' function: 'BSgenome',
'FastaFile' and 'TwoBitFile' objects are supported through
Bioconductor by default. See the vignette for examples discussing an
analysis with non-referene genomes.

For mutect variant calls, all relevant information is already
contained in the results and somatic motifs can constructed by using
Expand Down Expand Up @@ -87,6 +93,7 @@ mutationContextH5vc(vc, ms, unify = TRUE)

'mutationSpectrum' from the 'h5vc' package for 'mutationContextH5vc'

'showMethods("getSeq")' for genomic references that can be used
}

\examples{
Expand Down
12 changes: 6 additions & 6 deletions man/number-signatures.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ plotNumberSignatures(gof)
are used as summary statistics which can generally applied to all
decomposition approaches.

The 'plotNumberSignature' function visualizes the results of the
'assessNumberSignatures' step. Statistics of the indivdual replicates
are shown as black crosses, whereas the mean and the range across the
replicates are depicted in red.
The 'plotNumberSignatures' function visualizes the results of the
'assessNumberSignatures' analysis. Statistics of the indivdual
replicates are shown as gray crosses, whereas the mean across the
replicates is depicted in red.

In practice, these summary statisics should not be trusted blindly,
but rather interpreted together with biological knowledge and
Expand All @@ -73,8 +73,8 @@ plotNumberSignatures(gof)

\value{

- assessNumberSignatures: A data frame with the RSS and KL statistic
for each run
- assessNumberSignatures: A data frame with the RSS and explained
variance for each run

- plotNumberSignatures: A ggplot object

Expand Down
10 changes: 10 additions & 0 deletions man/sca-vranges.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@

}

\note{

While the 'scaSNVRanges' is provided for a convinient access to the
data of the 'SomaticCancerAlterations' package, we encourage you to
develop an understanding about the underlying data and its convertion
to a 'VRanges' object.

}


\value{

A 'VRanges' object with somatic SNV calls.
Expand Down
Loading

0 comments on commit ca3e84d

Please sign in to comment.