diff --git a/DESCRIPTION b/DESCRIPTION index 1c2aa48..f71c1fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: CancerEvolutionVisualization Title: Publication Quality Phylogenetic Tree Plots Version: 2.1.0 -Date: 2024-07-19 +Date: 2024-07-31 Authors@R: c( person("Paul Boutros", role = "cre", email = "PBoutros@mednet.ucla.edu"), person("Adriana Salcedo", role = "aut"), diff --git a/NAMESPACE b/NAMESPACE index 8b85eb5..51d41bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,5 +17,4 @@ export(create.ccf.heatmap) export(create.cluster.heatmap) export(create.ccf.summary.heatmap) - export(create.clone.genome.distribution.plot) diff --git a/NEWS b/NEWS index c0dcb8a..e198410 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,9 @@ -CancerEvolutionVisualization 2.1.0 2024-07-19 (Helena Winata, Dan Knight) +CancerEvolutionVisualization 2.1.0 2024-07-31 (Helena Winata, Dan Knight) ADDED * Optional "spread" column to control node/branch spacing * Plotting functions to visualize the distribution of clones across the genome. +* Documentation for heatmaps and clone-genome distirbution plor UPDATE * Fixed angle calculation bug where child angles do not follow diff --git a/R/Untitled.R b/R/Untitled.R deleted file mode 100644 index e69de29..0000000 diff --git a/R/create.ccf.summary.heatmap.R b/R/create.ccf.summary.heatmap.R index 394815d..ee384c4 100644 --- a/R/create.ccf.summary.heatmap.R +++ b/R/create.ccf.summary.heatmap.R @@ -1,6 +1,6 @@ create.ccf.summary.heatmap <- function( DF, - ccf.thres = 0, + ccf.thres = NULL, median.col = 'median.ccf.per.sample', clone.order = NULL, sample.order = NULL, @@ -24,7 +24,10 @@ create.ccf.summary.heatmap <- function( value = median.col, x.axis = 'clone.id' ); - arr[arr <= ccf.thres] <- 0; + + if (!is.null(ccf.thres)) { + arr[arr <= ccf.thres] <- 0; + } clone.df <- aggregate(CCF ~ clone.id, data = DF[DF$CCF > 0, ], FUN = length); sample.df <- aggregate(CCF ~ ID, data = DF[DF$CCF > 0, ], FUN = length); @@ -41,6 +44,7 @@ create.ccf.summary.heatmap <- function( data = clone.df, xaxis.cex = 0, xlab.label = NULL, + xaxis.tck = 0, ylab.label = 'SNV per clone', ylab.cex = subplot.ylab.cex, yaxis.cex = subplot.yaxis.cex, @@ -57,6 +61,7 @@ create.ccf.summary.heatmap <- function( xaxis.fontface = subplot.xaxis.fontface, xlimits = c( - max(sample.df$nsnv) * 0.05, max(sample.df$nsnv) * 1.05), yaxis.cex = 0, + yaxis.tck = 0, ylab.label = NULL, plot.horizontal = TRUE ); diff --git a/R/create.clone.genome.distribution.densityplot.R b/R/create.clone.genome.distribution.densityplot.R index 4bf68bf..e45b7d3 100644 --- a/R/create.clone.genome.distribution.densityplot.R +++ b/R/create.clone.genome.distribution.densityplot.R @@ -25,11 +25,10 @@ create.clone.genome.distribution.densityplot <- function( )); } -calculate.density.and.scale <- function(cluster.df, total.nsnv) { +calculate.density.and.scale <- function(cluster.df) { density <- density(x = cluster.df$genome.pos, bw = 'nrd', adjust = 0.05, na.rm = TRUE); density.df <- as.data.frame(density[c('x','y')]); density.df$clone.id <- unique(cluster.df$clone.id); - # density.df$scaled.y <- density.df$y * nrow(cluster.df) / total.nsnv; density.df$count <- nrow(cluster.df) / sum(density.df$y) * density.df$y; return(density.df) diff --git a/R/create.clone.genome.distribution.plot.R b/R/create.clone.genome.distribution.plot.R index ff0ef7d..2360d86 100644 --- a/R/create.clone.genome.distribution.plot.R +++ b/R/create.clone.genome.distribution.plot.R @@ -2,8 +2,8 @@ create.clone.genome.distribution.plot <- function( snv.df, genome.build = 'GRCh37', clone.order = NULL, - cluster.colours = NULL, - save.plt.dir = NULL, + clone.colours = NULL, + filename = NULL, multi.sample = FALSE, ... ) { @@ -15,16 +15,29 @@ create.clone.genome.distribution.plot <- function( if (is.null(clone.order)) { clone.order <- sort(unique(snv.df$clone.id)); } - if (multi.sample) { # if multi-sample is true, check for sample ids in 'ID' column - if (is.null(snv.df$ID)) { - stop('ID column must contain sample ID if multi.sample is TRUE') + + if (!is.null(filename)) { + save.plt <- filename; } + + if (multi.sample) { + # if multi-sample is true, check for sample ids in 'ID' column + if (is.null(snv.df$ID)) { + stop('ID column must contain sample ID if multi.sample is TRUE'); + } + # filename must be a directory + if (!dir.exists(save.plt)) { + stop('filename must be a directory if multi.sample is TRUE'); + } } else { + if (dir.exists(save.plt)) { + stop('filename must be a path (not a directory) if multi.sample is FALSE'); + } snv.df$ID <- 'all'; } - if (is.null(cluster.colours)) { - cluster.colours <- get.colours(clone.order, return.names = TRUE); + if (is.null(clone.colours)) { + clone.colours <- get.colours(clone.order, return.names = TRUE); } snv.df$clone.id <- factor(snv.df$clone.id, levels = clone.order); genome.pos.df <- get.genome.pos(snv.df, genome.build); @@ -36,17 +49,18 @@ create.clone.genome.distribution.plot <- function( for (s in unique(snv.df$ID)) { # Iterate through each sample ------------------------------------------------------------- - sample.df <- droplevels(snv.df[snv.df$ID == s, ]) print(paste('Plotting clone distribution across the genome for sample:', s)); + + sample.df <- droplevels(snv.df[snv.df$ID == s, ]) + if (multi.sample & !is.null(filename)) { + save.plt <- file.path(save.plt, paste0(s, '.png')); + } + plt <- create.clone.genome.distribution.plot.per.sample( sample.df, - cluster.colours[levels(sample.df$clone.id)], + clone.colours[levels(sample.df$clone.id)], chr.info, - save.plt = ifelse( - is.null(save.plt.dir), - NULL, - file.path(save.plt.dir, paste0(s, '_clone-genome-dist.png')) - ), + save.plt = ifelse(is.null(filename), NULL, save.plt), ... ); } @@ -54,7 +68,7 @@ create.clone.genome.distribution.plot <- function( create.clone.genome.distribution.plot.per.sample <- function( sample.df, - cluster.colours, + clone.colours, chr.info, save.plt = NULL, width = 18, @@ -75,26 +89,25 @@ create.clone.genome.distribution.plot.per.sample <- function( # calculate densities for each cluster -------------------------------------------------------- density.list <- list(); - for (c in unique(sample.df$clone.id)) { - if (sum(sample.df$clone.id == c) <= 1) { - warning(paste('Skipping clone', c, 'in sample', unique(sample.df$ID), 'since there is only one SNV')); + for (k in unique(sample.df$clone.id)) { + if (sum(sample.df$clone.id == k) <= 1) { + warning(paste('Skipping clone', k, 'in sample', unique(sample.df$ID), 'since there is only one SNV')); next; } - density.list[[c]] <- calculate.density.and.scale( - cluster.df = sample.df[sample.df$clone.id == c, ], - total.nsnv = nrow(sample.df) + density.list[[k]] <- calculate.density.and.scale( + cluster.df = sample.df[sample.df$clone.id == k, ] ); } density.df <- do.call(rbind, density.list); # get plot legend ----------------------------------------------------------------------------- - cluster.colours <- cluster.colours[levels(sample.df$clone.id)]; + clone.colours <- clone.colours[levels(sample.df$clone.id)]; cluster.legend <- BoutrosLab.plotting.general::legend.grob( list( legend = list( title = 'Clones', - labels = names(cluster.colours), - colours = c(cluster.colours), + labels = names(clone.colours), + colours = c(clone.colours), border = 'black' ) ), @@ -104,7 +117,7 @@ create.clone.genome.distribution.plot.per.sample <- function( ); # create individual plot ---------------------------------------------------------------------- - sample.df$colour <- cluster.colours[sample.df$clone.id]; + sample.df$colour <- clone.colours[sample.df$clone.id]; scatter.plt <- create.clone.genome.distribution.scatterplot( scatter.df = sample.df, nsnv = nrow(sample.df), @@ -122,7 +135,7 @@ create.clone.genome.distribution.plot.per.sample <- function( density.plt <- create.clone.genome.distribution.densityplot( density.df, - cluster.colours, + clone.colours, chr.info = chr.info, xaxis.tck = xaxis.tck, yaxis.tck = yaxis.tck, diff --git a/R/create.cluster.heatmap.R b/R/create.cluster.heatmap.R index 5befe11..fe525fe 100644 --- a/R/create.cluster.heatmap.R +++ b/R/create.cluster.heatmap.R @@ -1,8 +1,8 @@ create.cluster.heatmap <- function( DF, clone.colours = NULL, - plt.height = 6, - plt.width = 11, + height = 6, + width = 11, xaxis.col = NULL, legend.size = 3, legend.title.cex = 1.2, @@ -11,6 +11,7 @@ create.cluster.heatmap <- function( xlab.cex = 1.2, xaxis.cex = 1, xaxis.fontface = 'bold', + y.spacing = 1, colour.scheme = c('white', 'blue'), ... ) { @@ -85,7 +86,9 @@ create.cluster.heatmap <- function( legend = list(right = list( fun = legend.clone )), - height = plt.height, - width = plt.width + y.spacing = y.spacing, + right.legend.padding = 0.5, + height = height, + width = width )); } diff --git a/man/GRCh37.Rd b/man/GRCh37.Rd new file mode 100644 index 0000000..2fca90e --- /dev/null +++ b/man/GRCh37.Rd @@ -0,0 +1,6 @@ +\docType{data} +\name{GRCh37} +\alias{GRCh37} +\title{GRCh37 Chromosom Information} +\description{Chromosome information for the GRCh37 genome build. Used for plotting.} +\format{data.frame} \ No newline at end of file diff --git a/man/GRCh38.Rd b/man/GRCh38.Rd new file mode 100644 index 0000000..bf1b20c --- /dev/null +++ b/man/GRCh38.Rd @@ -0,0 +1,6 @@ +\docType{data} +\name{GRCh38} +\alias{GRCh38} +\title{GRCh38 Chromosom Information} +\description{Chromosome information for the GRCh38 genome build. Used for plotting.} +\format{data.frame} \ No newline at end of file diff --git a/man/create.ccf.heatmap.Rd b/man/create.ccf.heatmap.Rd new file mode 100644 index 0000000..e5eb77b --- /dev/null +++ b/man/create.ccf.heatmap.Rd @@ -0,0 +1,34 @@ +\name{create.ccf.heatmap} +\alias{create.ccf.heatmap} +\title{Subclone Tree Plot} +\description{ +Creates a heatmap of cancer cell fraction (CCF) distribution across tumour samples. The function is a wrapper around \code{BoutrosLab.plotting.general::create.heatmap()} with some changes in the default parameters. All parameter description are the same as in \code{BoutrosLab.plotting.general::create.heatmap()} except for \code{ccf.thres}. +} +\usage{ +create.ccf.heatmap( + x, + ccf.thres = NULL, + cluster.dimensions = 'both', + clustering.method = 'complete', + distance.method = 'euclidean', + xaxis.lab = '', + xlab.label = 'Mutations', + print.colour.key = FALSE, + colour.scheme = c('white', 'blue'), + ... + ) +} +\arguments{ + \item{x}{Either a data-frame or a matrix from which the heatmap is to created} + \item{ccf.thres}{CCF threshold to be applied to the heatmap. Values below the threshold will be set to 0. Defaults to \code{NULL}} + \item{cluster.dimensions}{Defaults to \dQuote{both}.} + \item{clustering.method}{Defaults to \dQuote{complete}.} + \item{distance.method}{Defaults to \dQuote{euclidean}.} + \item{xaxis.lab}{Defaults to an empty string.} + \item{xlab.label}{Defaults to \dQuote{Mutations}.} + \item{print.colour.key}{Defaults to \code{FALSE}.} + \item{colour.scheme}{Defaults to \code{c('white', 'blue')}.} + \item{...}{Pass through argument. See BoutrosLab.plotting.general::create.heatmap() for further details.} +} +\value{A `grob` object of the heatmap.} +\author{Helena Winata} diff --git a/man/create.ccf.summary.heatmap.Rd b/man/create.ccf.summary.heatmap.Rd new file mode 100644 index 0000000..d400aa3 --- /dev/null +++ b/man/create.ccf.summary.heatmap.Rd @@ -0,0 +1,52 @@ +\name{create.ccf.summary.heatmap} +\alias{create.ccf.summary.heatmap} +\title{Subclone Tree Plot} +\description{ +Creates a heatmap of cancer cell fraction (CCF) distribution across tumour samples with clone IDs as a covariate beneath the heatmap. Subplot parameters controls the appearance of the heatmap and barplots. See \code{BoutrosLab.plotting.general::create.barplot()} or \code{BoutrosLab.plotting.general::create.heatmap()} for parameter description. Legend parameters are passed to \code{BoutrosLab.plotting.general::legend.grob()}. +} + +\usage{ +create.ccf.summary.heatmap( + DF, + ccf.thres = NULL, + median.col = 'median.ccf.per.sample', + clone.order = NULL, + sample.order = NULL, + hm.col.scheme = c('white', 'blue'), + subplot.xlab.cex = 1.2, + subplot.xaxis.cex = 1, + subplot.xaxis.fontface = 'bold', + subplot.xaxis.rot = 90, + subplot.ylab.cex = 1.2, + subplot.yaxis.cex = 1, + subplot.yaxis.fontface = 'bold', + hm.xaxis.rot = 90, + legend.size = 3, + legend.title.cex = 1.2, + legend.label.cex = 1, + ... + ); +} +\arguments{ + \item{DF}{A data-frame with the following column names: 'ID', 'SNV.id', 'clone.id', 'CCF'.} + \item{ccf.thres}{CCF threshold to be applied to the heatmap. Values below the threshold will be set to 0. Defaults to \code{NULL}} + \item{median.col}{Defaults to \dQuote{median.ccf.per.sample}} + \item{clone.order}{Defaults to \code{NULL}} + \item{sample.order}{Defaults to \code{NULL}} + \item{hm.col.scheme}{Heatmap colour scheme. Defaults to \code{c('white', 'blue')}} + \item{subplot.xlab.cex}{Subplot parameter. Defaults to 1.2} + \item{subplot.xaxis.cex}{Subplot parameter. Defaults to 1} + \item{subplot.xaxis.fontface}{Subplot parameter. Defaults to \dQuote{bold}} + \item{subplot.xaxis.rot}{Subplot parameter. Defaults to 90} + \item{subplot.ylab.cex}{Subplot parameter. Defaults to 1.2} + \item{subplot.yaxis.cex}{Subplot parameter. Defaults to 1} + \item{subplot.yaxis.fontface}{Subplot parameter. Defaults to \dQuote{bold}} + \item{hm.xaxis.rot}{Subplot parameter. Defaults to 90} + \item{legend.size}{Legend parameter. Defaults to 3} + \item{legend.title.cex}{Legend parameter. Defaults to 1.2} + \item{legend.label.cex}{Legend parameter. Defaults to 1} + \item{...}{Pass through argument. See BoutrosLab.plotting.general::create.multipanelplot() for further details.} + +} +\value{A `grob` object of the summary plot.} +\author{Helena Winata} diff --git a/man/create.clone.genome.distribution.plot.Rd b/man/create.clone.genome.distribution.plot.Rd new file mode 100644 index 0000000..40dded4 --- /dev/null +++ b/man/create.clone.genome.distribution.plot.Rd @@ -0,0 +1,32 @@ +\name{create.clone.genome.distribution.plot} +\alias{create.clone.genome.distribution.plot} +\title{Create Clone Genome Distribution Plot} +\description{ +This function creates a plot showing the distribution of clones across the genome. It generates a scatter plot of the SNVs colored by clone ID and a density plot showing the density of each clone across the genome. The function can handle both single and multi-sample inputs. +} +\usage{ +create.clone.genome.distribution.plot( + snv.df, + genome.build = 'GRCh37', + clone.order = NULL, + clone.colours = NULL, + filename = NULL, + multi.sample = FALSE, + ... + ) +} +\arguments{ + \item{snv.df}{A data frame containing the SNV data. It must have columns 'chr', 'pos', and 'clone.id'. If \code{multi.sample = TRUE}, it must also have a column 'ID' specifying the sample ID for each SNV.} + \item{genome.build}{The genome build to use. Defaults to \dQuote{GRCh37}.} + \item{clone.order}{The order in which to plot the clones. If \code{NULL}, clones will be sorted alphabetically.} + \item{clone.colours}{A named vector specifying the color to use for each clone. If \code{NULL}, colors will be automatically assigned.} + \item{filename}{Directory or filepath to save the plot in. If \code{multi.sample = TRUE}, this must be a directory. if \code{multi.sample = FALSE}, this must be a filepath. If \code{NULL}, the plot will not be saved.} + \item{multi.sample}{Logical indicating whether the input data contains multiple samples. Defaults to \code{FALSE}.} + \item{...}{Additional arguments to be passed to \code{BoutrosLab.plotting.general::create.multipanelplot()}.} +} +\details{ +This function preprocesses the input data frame, extracts chromosome information, and iterates over each sample to create a clone genome distribution plot. For each sample, it calculates the density of each clone across the genome and creates a scatter plot of the SNVs colored by clone ID and a density plot showing the density of each clone. +} +\value{A `grob` object.} +\author{Helena Winata, Selina Wu} + diff --git a/man/create.cluster.heatmap.Rd b/man/create.cluster.heatmap.Rd new file mode 100644 index 0000000..e484cc3 --- /dev/null +++ b/man/create.cluster.heatmap.Rd @@ -0,0 +1,45 @@ +\name{create.cluster.heatmap} +\alias{create.cluster.heatmap} +\title{Subclone Tree Plot} +\description{ +Creates a heatmap of cancer cell fraction (CCF) distribution across tumour samples with clone IDs as a covariate beneath the heatmap. +} +\usage{ +create.cluster.heatmap( + DF, + clone.colours = NULL, + height = 6, + width = 11, + xaxis.col = NULL, + legend.size = 3, + legend.title.cex = 1.2, + legend.label.cex = 1, + filename = NULL, + xlab.cex = 1.2, + xaxis.cex = 1, + xaxis.fontface = 'bold', + y.spacing = 1, + colour.scheme = c('white', 'blue'), + ... + ); +} +\arguments{ + \item{DF}{A data-frame with the following column names: 'ID', 'SNV.id', 'clone.id', 'CCF'.} + \item{clone.colours}{Named list to provide a colour scheme for the clone ID covariate bar. If NULL, colours will be randomly generated. Defaults to \code{NULL}.} + \item{height}{Defaults to 6} + \item{width}{Defaults to 11} + \item{xaxis.col}{Column in DF to extract x-axis labels from. Defaults to \code{NULL}.} + \item{legend.size}{Width of the legend boxes in 'character' units. Defaults to 3} + \item{legend.title.cex}{Size of titles in the legends. Defaults to 1.2} + \item{legend.label.cex}{Size of text labels in the legends. Defaults to 1} + \item{filename}{Filename for tiff output, or if NULL returns the trellis object itself. Defaults to \code{NULL}.} + \item{xlab.cex}{Defaults to 1.2} + \item{xaxis.cex}{Defaults to 1} + \item{xaxis.fontface}{Defaults to \dQuote{bold}.} + \item{y.spacing}{Spacing between heatmap and clone covariate bar. Defaults to 1} + \item{colour.scheme}{Colour scheme for the heatmap. Defaults to \code{c('white', 'blue')}.} + \item{...}{Pass through argument. See BoutrosLab.plotting.general::create.heatmap() for further details.} +} +\value{A `grob` object of the heatmap.} +\author{Helena Winata} +\seealso{\code{\link{create.ccf.heatmap}}} \ No newline at end of file