Skip to content

Commit

Permalink
Merge branch 'main' into danknight-missing-edge
Browse files Browse the repository at this point in the history
  • Loading branch information
dan-knight authored Jul 31, 2024
2 parents ab951df + a3386ac commit c1ae02e
Show file tree
Hide file tree
Showing 14 changed files with 232 additions and 37 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CancerEvolutionVisualization
Title: Publication Quality Phylogenetic Tree Plots
Version: 2.1.0
Date: 2024-07-19
Date: 2024-07-31
Authors@R: c(
person("Paul Boutros", role = "cre", email = "[email protected]"),
person("Adriana Salcedo", role = "aut"),
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,4 @@ export(create.ccf.heatmap)
export(create.cluster.heatmap)
export(create.ccf.summary.heatmap)


export(create.clone.genome.distribution.plot)
3 changes: 2 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
CancerEvolutionVisualization 2.1.0 2024-07-19 (Helena Winata, Dan Knight)
CancerEvolutionVisualization 2.1.0 2024-07-31 (Helena Winata, Dan Knight)

ADDED
* Optional "spread" column to control node/branch spacing
* Plotting functions to visualize the distribution of clones across the genome.
* Documentation for heatmaps and clone-genome distirbution plor

UPDATE
* Fixed angle calculation bug where child angles do not follow
Expand Down
Empty file removed R/Untitled.R
Empty file.
9 changes: 7 additions & 2 deletions R/create.ccf.summary.heatmap.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
create.ccf.summary.heatmap <- function(
DF,
ccf.thres = 0,
ccf.thres = NULL,
median.col = 'median.ccf.per.sample',
clone.order = NULL,
sample.order = NULL,
Expand All @@ -24,7 +24,10 @@ create.ccf.summary.heatmap <- function(
value = median.col,
x.axis = 'clone.id'
);
arr[arr <= ccf.thres] <- 0;

if (!is.null(ccf.thres)) {
arr[arr <= ccf.thres] <- 0;
}

clone.df <- aggregate(CCF ~ clone.id, data = DF[DF$CCF > 0, ], FUN = length);
sample.df <- aggregate(CCF ~ ID, data = DF[DF$CCF > 0, ], FUN = length);
Expand All @@ -41,6 +44,7 @@ create.ccf.summary.heatmap <- function(
data = clone.df,
xaxis.cex = 0,
xlab.label = NULL,
xaxis.tck = 0,
ylab.label = 'SNV per clone',
ylab.cex = subplot.ylab.cex,
yaxis.cex = subplot.yaxis.cex,
Expand All @@ -57,6 +61,7 @@ create.ccf.summary.heatmap <- function(
xaxis.fontface = subplot.xaxis.fontface,
xlimits = c( - max(sample.df$nsnv) * 0.05, max(sample.df$nsnv) * 1.05),
yaxis.cex = 0,
yaxis.tck = 0,
ylab.label = NULL,
plot.horizontal = TRUE
);
Expand Down
3 changes: 1 addition & 2 deletions R/create.clone.genome.distribution.densityplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ create.clone.genome.distribution.densityplot <- function(
));
}

calculate.density.and.scale <- function(cluster.df, total.nsnv) {
calculate.density.and.scale <- function(cluster.df) {
density <- density(x = cluster.df$genome.pos, bw = 'nrd', adjust = 0.05, na.rm = TRUE);
density.df <- as.data.frame(density[c('x','y')]);
density.df$clone.id <- unique(cluster.df$clone.id);
# density.df$scaled.y <- density.df$y * nrow(cluster.df) / total.nsnv;
density.df$count <- nrow(cluster.df) / sum(density.df$y) * density.df$y;

return(density.df)
Expand Down
65 changes: 39 additions & 26 deletions R/create.clone.genome.distribution.plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ create.clone.genome.distribution.plot <- function(
snv.df,
genome.build = 'GRCh37',
clone.order = NULL,
cluster.colours = NULL,
save.plt.dir = NULL,
clone.colours = NULL,
filename = NULL,
multi.sample = FALSE,
...
) {
Expand All @@ -15,16 +15,29 @@ create.clone.genome.distribution.plot <- function(
if (is.null(clone.order)) {
clone.order <- sort(unique(snv.df$clone.id));
}
if (multi.sample) { # if multi-sample is true, check for sample ids in 'ID' column
if (is.null(snv.df$ID)) {
stop('ID column must contain sample ID if multi.sample is TRUE')

if (!is.null(filename)) {
save.plt <- filename;
}

if (multi.sample) {
# if multi-sample is true, check for sample ids in 'ID' column
if (is.null(snv.df$ID)) {
stop('ID column must contain sample ID if multi.sample is TRUE');
}
# filename must be a directory
if (!dir.exists(save.plt)) {
stop('filename must be a directory if multi.sample is TRUE');
}
} else {
if (dir.exists(save.plt)) {
stop('filename must be a path (not a directory) if multi.sample is FALSE');
}
snv.df$ID <- 'all';
}

if (is.null(cluster.colours)) {
cluster.colours <- get.colours(clone.order, return.names = TRUE);
if (is.null(clone.colours)) {
clone.colours <- get.colours(clone.order, return.names = TRUE);
}
snv.df$clone.id <- factor(snv.df$clone.id, levels = clone.order);
genome.pos.df <- get.genome.pos(snv.df, genome.build);
Expand All @@ -36,25 +49,26 @@ create.clone.genome.distribution.plot <- function(

for (s in unique(snv.df$ID)) {
# Iterate through each sample -------------------------------------------------------------
sample.df <- droplevels(snv.df[snv.df$ID == s, ])
print(paste('Plotting clone distribution across the genome for sample:', s));

sample.df <- droplevels(snv.df[snv.df$ID == s, ])
if (multi.sample & !is.null(filename)) {
save.plt <- file.path(save.plt, paste0(s, '.png'));
}

plt <- create.clone.genome.distribution.plot.per.sample(
sample.df,
cluster.colours[levels(sample.df$clone.id)],
clone.colours[levels(sample.df$clone.id)],
chr.info,
save.plt = ifelse(
is.null(save.plt.dir),
NULL,
file.path(save.plt.dir, paste0(s, '_clone-genome-dist.png'))
),
save.plt = ifelse(is.null(filename), NULL, save.plt),
...
);
}
}

create.clone.genome.distribution.plot.per.sample <- function(
sample.df,
cluster.colours,
clone.colours,
chr.info,
save.plt = NULL,
width = 18,
Expand All @@ -75,26 +89,25 @@ create.clone.genome.distribution.plot.per.sample <- function(

# calculate densities for each cluster --------------------------------------------------------
density.list <- list();
for (c in unique(sample.df$clone.id)) {
if (sum(sample.df$clone.id == c) <= 1) {
warning(paste('Skipping clone', c, 'in sample', unique(sample.df$ID), 'since there is only one SNV'));
for (k in unique(sample.df$clone.id)) {
if (sum(sample.df$clone.id == k) <= 1) {
warning(paste('Skipping clone', k, 'in sample', unique(sample.df$ID), 'since there is only one SNV'));
next;
}
density.list[[c]] <- calculate.density.and.scale(
cluster.df = sample.df[sample.df$clone.id == c, ],
total.nsnv = nrow(sample.df)
density.list[[k]] <- calculate.density.and.scale(
cluster.df = sample.df[sample.df$clone.id == k, ]
);
}
density.df <- do.call(rbind, density.list);

# get plot legend -----------------------------------------------------------------------------
cluster.colours <- cluster.colours[levels(sample.df$clone.id)];
clone.colours <- clone.colours[levels(sample.df$clone.id)];
cluster.legend <- BoutrosLab.plotting.general::legend.grob(
list(
legend = list(
title = 'Clones',
labels = names(cluster.colours),
colours = c(cluster.colours),
labels = names(clone.colours),
colours = c(clone.colours),
border = 'black'
)
),
Expand All @@ -104,7 +117,7 @@ create.clone.genome.distribution.plot.per.sample <- function(
);

# create individual plot ----------------------------------------------------------------------
sample.df$colour <- cluster.colours[sample.df$clone.id];
sample.df$colour <- clone.colours[sample.df$clone.id];
scatter.plt <- create.clone.genome.distribution.scatterplot(
scatter.df = sample.df,
nsnv = nrow(sample.df),
Expand All @@ -122,7 +135,7 @@ create.clone.genome.distribution.plot.per.sample <- function(

density.plt <- create.clone.genome.distribution.densityplot(
density.df,
cluster.colours,
clone.colours,
chr.info = chr.info,
xaxis.tck = xaxis.tck,
yaxis.tck = yaxis.tck,
Expand Down
11 changes: 7 additions & 4 deletions R/create.cluster.heatmap.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create.cluster.heatmap <- function(
DF,
clone.colours = NULL,
plt.height = 6,
plt.width = 11,
height = 6,
width = 11,
xaxis.col = NULL,
legend.size = 3,
legend.title.cex = 1.2,
Expand All @@ -11,6 +11,7 @@ create.cluster.heatmap <- function(
xlab.cex = 1.2,
xaxis.cex = 1,
xaxis.fontface = 'bold',
y.spacing = 1,
colour.scheme = c('white', 'blue'),
...
) {
Expand Down Expand Up @@ -85,7 +86,9 @@ create.cluster.heatmap <- function(
legend = list(right = list(
fun = legend.clone
)),
height = plt.height,
width = plt.width
y.spacing = y.spacing,
right.legend.padding = 0.5,
height = height,
width = width
));
}
6 changes: 6 additions & 0 deletions man/GRCh37.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
\docType{data}
\name{GRCh37}
\alias{GRCh37}
\title{GRCh37 Chromosom Information}
\description{Chromosome information for the GRCh37 genome build. Used for plotting.}
\format{data.frame}
6 changes: 6 additions & 0 deletions man/GRCh38.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
\docType{data}
\name{GRCh38}
\alias{GRCh38}
\title{GRCh38 Chromosom Information}
\description{Chromosome information for the GRCh38 genome build. Used for plotting.}
\format{data.frame}
34 changes: 34 additions & 0 deletions man/create.ccf.heatmap.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
\name{create.ccf.heatmap}
\alias{create.ccf.heatmap}
\title{Subclone Tree Plot}
\description{
Creates a heatmap of cancer cell fraction (CCF) distribution across tumour samples. The function is a wrapper around \code{BoutrosLab.plotting.general::create.heatmap()} with some changes in the default parameters. All parameter description are the same as in \code{BoutrosLab.plotting.general::create.heatmap()} except for \code{ccf.thres}.
}
\usage{
create.ccf.heatmap(
x,
ccf.thres = NULL,
cluster.dimensions = 'both',
clustering.method = 'complete',
distance.method = 'euclidean',
xaxis.lab = '',
xlab.label = 'Mutations',
print.colour.key = FALSE,
colour.scheme = c('white', 'blue'),
...
)
}
\arguments{
\item{x}{Either a data-frame or a matrix from which the heatmap is to created}
\item{ccf.thres}{CCF threshold to be applied to the heatmap. Values below the threshold will be set to 0. Defaults to \code{NULL}}
\item{cluster.dimensions}{Defaults to \dQuote{both}.}
\item{clustering.method}{Defaults to \dQuote{complete}.}
\item{distance.method}{Defaults to \dQuote{euclidean}.}
\item{xaxis.lab}{Defaults to an empty string.}
\item{xlab.label}{Defaults to \dQuote{Mutations}.}
\item{print.colour.key}{Defaults to \code{FALSE}.}
\item{colour.scheme}{Defaults to \code{c('white', 'blue')}.}
\item{...}{Pass through argument. See BoutrosLab.plotting.general::create.heatmap() for further details.}
}
\value{A `grob` object of the heatmap.}
\author{Helena Winata}
52 changes: 52 additions & 0 deletions man/create.ccf.summary.heatmap.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
\name{create.ccf.summary.heatmap}
\alias{create.ccf.summary.heatmap}
\title{Subclone Tree Plot}
\description{
Creates a heatmap of cancer cell fraction (CCF) distribution across tumour samples with clone IDs as a covariate beneath the heatmap. Subplot parameters controls the appearance of the heatmap and barplots. See \code{BoutrosLab.plotting.general::create.barplot()} or \code{BoutrosLab.plotting.general::create.heatmap()} for parameter description. Legend parameters are passed to \code{BoutrosLab.plotting.general::legend.grob()}.
}

\usage{
create.ccf.summary.heatmap(
DF,
ccf.thres = NULL,
median.col = 'median.ccf.per.sample',
clone.order = NULL,
sample.order = NULL,
hm.col.scheme = c('white', 'blue'),
subplot.xlab.cex = 1.2,
subplot.xaxis.cex = 1,
subplot.xaxis.fontface = 'bold',
subplot.xaxis.rot = 90,
subplot.ylab.cex = 1.2,
subplot.yaxis.cex = 1,
subplot.yaxis.fontface = 'bold',
hm.xaxis.rot = 90,
legend.size = 3,
legend.title.cex = 1.2,
legend.label.cex = 1,
...
);
}
\arguments{
\item{DF}{A data-frame with the following column names: 'ID', 'SNV.id', 'clone.id', 'CCF'.}
\item{ccf.thres}{CCF threshold to be applied to the heatmap. Values below the threshold will be set to 0. Defaults to \code{NULL}}
\item{median.col}{Defaults to \dQuote{median.ccf.per.sample}}
\item{clone.order}{Defaults to \code{NULL}}
\item{sample.order}{Defaults to \code{NULL}}
\item{hm.col.scheme}{Heatmap colour scheme. Defaults to \code{c('white', 'blue')}}
\item{subplot.xlab.cex}{Subplot parameter. Defaults to 1.2}
\item{subplot.xaxis.cex}{Subplot parameter. Defaults to 1}
\item{subplot.xaxis.fontface}{Subplot parameter. Defaults to \dQuote{bold}}
\item{subplot.xaxis.rot}{Subplot parameter. Defaults to 90}
\item{subplot.ylab.cex}{Subplot parameter. Defaults to 1.2}
\item{subplot.yaxis.cex}{Subplot parameter. Defaults to 1}
\item{subplot.yaxis.fontface}{Subplot parameter. Defaults to \dQuote{bold}}
\item{hm.xaxis.rot}{Subplot parameter. Defaults to 90}
\item{legend.size}{Legend parameter. Defaults to 3}
\item{legend.title.cex}{Legend parameter. Defaults to 1.2}
\item{legend.label.cex}{Legend parameter. Defaults to 1}
\item{...}{Pass through argument. See BoutrosLab.plotting.general::create.multipanelplot() for further details.}

}
\value{A `grob` object of the summary plot.}
\author{Helena Winata}
32 changes: 32 additions & 0 deletions man/create.clone.genome.distribution.plot.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
\name{create.clone.genome.distribution.plot}
\alias{create.clone.genome.distribution.plot}
\title{Create Clone Genome Distribution Plot}
\description{
This function creates a plot showing the distribution of clones across the genome. It generates a scatter plot of the SNVs colored by clone ID and a density plot showing the density of each clone across the genome. The function can handle both single and multi-sample inputs.
}
\usage{
create.clone.genome.distribution.plot(
snv.df,
genome.build = 'GRCh37',
clone.order = NULL,
clone.colours = NULL,
filename = NULL,
multi.sample = FALSE,
...
)
}
\arguments{
\item{snv.df}{A data frame containing the SNV data. It must have columns 'chr', 'pos', and 'clone.id'. If \code{multi.sample = TRUE}, it must also have a column 'ID' specifying the sample ID for each SNV.}
\item{genome.build}{The genome build to use. Defaults to \dQuote{GRCh37}.}
\item{clone.order}{The order in which to plot the clones. If \code{NULL}, clones will be sorted alphabetically.}
\item{clone.colours}{A named vector specifying the color to use for each clone. If \code{NULL}, colors will be automatically assigned.}
\item{filename}{Directory or filepath to save the plot in. If \code{multi.sample = TRUE}, this must be a directory. if \code{multi.sample = FALSE}, this must be a filepath. If \code{NULL}, the plot will not be saved.}
\item{multi.sample}{Logical indicating whether the input data contains multiple samples. Defaults to \code{FALSE}.}
\item{...}{Additional arguments to be passed to \code{BoutrosLab.plotting.general::create.multipanelplot()}.}
}
\details{
This function preprocesses the input data frame, extracts chromosome information, and iterates over each sample to create a clone genome distribution plot. For each sample, it calculates the density of each clone across the genome and creates a scatter plot of the SNVs colored by clone ID and a density plot showing the density of each clone.
}
\value{A `grob` object.}
\author{Helena Winata, Selina Wu}

Loading

0 comments on commit c1ae02e

Please sign in to comment.