Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hwinata add documentation #129

Merged
merged 14 commits into from
Jul 30, 2024
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CancerEvolutionVisualization
Title: Publication Quality Phylogenetic Tree Plots
Version: 2.1.0
Date: 2024-05-07
Date: 2024-07-30
Authors@R: c(
person("Paul Boutros", role = "cre", email = "[email protected]"),
person("Adriana Salcedo", role = "aut"),
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,4 @@ export(create.ccf.heatmap)
export(create.cluster.heatmap)
export(create.ccf.summary.heatmap)


export(create.clone.genome.distribution.plot)
3 changes: 2 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
CancerEvolutionVisualization 2.1.0 2024-05-07 (Helena Winata, Dan Knight)
CancerEvolutionVisualization 2.1.0 2024-07-3 (Helena Winata, Dan Knight)

ADDED
* Optional "spread" column to control node/branch spacing
* Plotting functions to visualize the distribution of clones across the genome.
* Documentation for heatmaps and clone-genome distirbution plor

UPDATE
* Fixed angle calculation bug where child angles do not follow
Expand Down
Empty file removed R/Untitled.R
Empty file.
9 changes: 7 additions & 2 deletions R/create.ccf.summary.heatmap.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
create.ccf.summary.heatmap <- function(
DF,
ccf.thres = 0,
ccf.thres = NULL,
median.col = 'median.ccf.per.sample',
clone.order = NULL,
sample.order = NULL,
Expand All @@ -24,7 +24,10 @@ create.ccf.summary.heatmap <- function(
value = median.col,
x.axis = 'clone.id'
);
arr[arr <= ccf.thres] <- 0;

if (!is.null(ccf.thres)) {
arr[arr <= ccf.thres] <- 0;
}

clone.df <- aggregate(CCF ~ clone.id, data = DF[DF$CCF > 0, ], FUN = length);
sample.df <- aggregate(CCF ~ ID, data = DF[DF$CCF > 0, ], FUN = length);
Expand All @@ -41,6 +44,7 @@ create.ccf.summary.heatmap <- function(
data = clone.df,
xaxis.cex = 0,
xlab.label = NULL,
xaxis.tck = 0,
ylab.label = 'SNV per clone',
ylab.cex = subplot.ylab.cex,
yaxis.cex = subplot.yaxis.cex,
Expand All @@ -57,6 +61,7 @@ create.ccf.summary.heatmap <- function(
xaxis.fontface = subplot.xaxis.fontface,
xlimits = c( - max(sample.df$nsnv) * 0.05, max(sample.df$nsnv) * 1.05),
yaxis.cex = 0,
yaxis.tck = 0,
ylab.label = NULL,
plot.horizontal = TRUE
);
Expand Down
3 changes: 1 addition & 2 deletions R/create.clone.genome.distribution.densityplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ create.clone.genome.distribution.densityplot <- function(
));
}

calculate.density.and.scale <- function(cluster.df, total.nsnv) {
calculate.density.and.scale <- function(cluster.df) {
density <- density(x = cluster.df$genome.pos, bw = 'nrd', adjust = 0.05, na.rm = TRUE);
density.df <- as.data.frame(density[c('x','y')]);
density.df$clone.id <- unique(cluster.df$clone.id);
# density.df$scaled.y <- density.df$y * nrow(cluster.df) / total.nsnv;
density.df$count <- nrow(cluster.df) / sum(density.df$y) * density.df$y;

return(density.df)
Expand Down
65 changes: 39 additions & 26 deletions R/create.clone.genome.distribution.plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ create.clone.genome.distribution.plot <- function(
snv.df,
genome.build = 'GRCh37',
clone.order = NULL,
cluster.colours = NULL,
save.plt.dir = NULL,
clone.colours = NULL,
filename = NULL,
multi.sample = FALSE,
...
) {
Expand All @@ -15,16 +15,29 @@ create.clone.genome.distribution.plot <- function(
if (is.null(clone.order)) {
clone.order <- sort(unique(snv.df$clone.id));
}
if (multi.sample) { # if multi-sample is true, check for sample ids in 'ID' column
if (is.null(snv.df$ID)) {
stop('ID column must contain sample ID if multi.sample is TRUE')

if (!is.null(filename)) {
save.plt <- filename;
}

if (multi.sample) {
# if multi-sample is true, check for sample ids in 'ID' column
if (is.null(snv.df$ID)) {
stop('ID column must contain sample ID if multi.sample is TRUE');
}
# filename must be a directory
if (!dir.exists(save.plt)) {
stop('filename must be a directory if multi.sample is TRUE');
}
} else {
if (dir.exists(save.plt)) {
stop('filename must be a path (not a directory) if multi.sample is FALSE');
}
snv.df$ID <- 'all';
}

if (is.null(cluster.colours)) {
cluster.colours <- get.colours(clone.order, return.names = TRUE);
if (is.null(clone.colours)) {
clone.colours <- get.colours(clone.order, return.names = TRUE);
}
snv.df$clone.id <- factor(snv.df$clone.id, levels = clone.order);
genome.pos.df <- get.genome.pos(snv.df, genome.build);
Expand All @@ -36,25 +49,26 @@ create.clone.genome.distribution.plot <- function(

for (s in unique(snv.df$ID)) {
# Iterate through each sample -------------------------------------------------------------
sample.df <- droplevels(snv.df[snv.df$ID == s, ])
print(paste('Plotting clone distribution across the genome for sample:', s));

sample.df <- droplevels(snv.df[snv.df$ID == s, ])
if (multi.sample & !is.null(filename)) {
save.plt <- file.path(save.plt, paste0(s, '.png'));
}

plt <- create.clone.genome.distribution.plot.per.sample(
sample.df,
cluster.colours[levels(sample.df$clone.id)],
clone.colours[levels(sample.df$clone.id)],
chr.info,
save.plt = ifelse(
is.null(save.plt.dir),
NULL,
file.path(save.plt.dir, paste0(s, '_clone-genome-dist.png'))
),
save.plt = ifelse(is.null(filename), NULL, save.plt),
...
);
}
}

create.clone.genome.distribution.plot.per.sample <- function(
sample.df,
cluster.colours,
clone.colours,
chr.info,
save.plt = NULL,
width = 18,
Expand All @@ -75,26 +89,25 @@ create.clone.genome.distribution.plot.per.sample <- function(

# calculate densities for each cluster --------------------------------------------------------
density.list <- list();
for (c in unique(sample.df$clone.id)) {
if (sum(sample.df$clone.id == c) <= 1) {
warning(paste('Skipping clone', c, 'in sample', unique(sample.df$ID), 'since there is only one SNV'));
for (k in unique(sample.df$clone.id)) {
if (sum(sample.df$clone.id == k) <= 1) {
warning(paste('Skipping clone', k, 'in sample', unique(sample.df$ID), 'since there is only one SNV'));
next;
}
density.list[[c]] <- calculate.density.and.scale(
cluster.df = sample.df[sample.df$clone.id == c, ],
total.nsnv = nrow(sample.df)
density.list[[k]] <- calculate.density.and.scale(
cluster.df = sample.df[sample.df$clone.id == k, ]
);
}
density.df <- do.call(rbind, density.list);

# get plot legend -----------------------------------------------------------------------------
cluster.colours <- cluster.colours[levels(sample.df$clone.id)];
clone.colours <- clone.colours[levels(sample.df$clone.id)];
cluster.legend <- BoutrosLab.plotting.general::legend.grob(
list(
legend = list(
title = 'Clones',
labels = names(cluster.colours),
colours = c(cluster.colours),
labels = names(clone.colours),
colours = c(clone.colours),
border = 'black'
)
),
Expand All @@ -104,7 +117,7 @@ create.clone.genome.distribution.plot.per.sample <- function(
);

# create individual plot ----------------------------------------------------------------------
sample.df$colour <- cluster.colours[sample.df$clone.id];
sample.df$colour <- clone.colours[sample.df$clone.id];
scatter.plt <- create.clone.genome.distribution.scatterplot(
scatter.df = sample.df,
nsnv = nrow(sample.df),
Expand All @@ -122,7 +135,7 @@ create.clone.genome.distribution.plot.per.sample <- function(

density.plt <- create.clone.genome.distribution.densityplot(
density.df,
cluster.colours,
clone.colours,
chr.info = chr.info,
xaxis.tck = xaxis.tck,
yaxis.tck = yaxis.tck,
Expand Down
11 changes: 7 additions & 4 deletions R/create.cluster.heatmap.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create.cluster.heatmap <- function(
DF,
clone.colours = NULL,
plt.height = 6,
plt.width = 11,
height = 6,
width = 11,
xaxis.col = NULL,
legend.size = 3,
legend.title.cex = 1.2,
Expand All @@ -11,6 +11,7 @@ create.cluster.heatmap <- function(
xlab.cex = 1.2,
xaxis.cex = 1,
xaxis.fontface = 'bold',
y.spacing = 1,
colour.scheme = c('white', 'blue'),
...
) {
Expand Down Expand Up @@ -85,7 +86,9 @@ create.cluster.heatmap <- function(
legend = list(right = list(
fun = legend.clone
)),
height = plt.height,
width = plt.width
y.spacing = y.spacing,
right.legend.padding = 0.5,
height = height,
width = width
));
}
6 changes: 6 additions & 0 deletions man/GRCh37.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
\docType{data}
\name{GRCh37}
\alias{GRCh37}
\title{GRCh37 Chromosom Information}
\description{Chromosome information for the GRCh37 genome build. Used for plotting.}
\format{data.frame}
6 changes: 6 additions & 0 deletions man/GRCh38.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
\docType{data}
\name{GRCh38}
\alias{GRCh38}
\title{GRCh38 Chromosom Information}
\description{Chromosome information for the GRCh38 genome build. Used for plotting.}
\format{data.frame}
34 changes: 34 additions & 0 deletions man/create.ccf.heatmap.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
\name{create.ccf.heatmap}
\alias{create.ccf.heatmap}
\title{Subclone Tree Plot}
\description{
Creates a heatmap of cancer cell fraction (CCF) distribution across tumour samples. The function is a wrapper around \code{BoutrosLab.plotting.general::create.heatmap()} with some changes in the default parameters. All parameter description are the same as in \code{BoutrosLab.plotting.general::create.heatmap()} except for \code{ccf.thres}.
}
\usage{
create.ccf.heatmap(
x,
ccf.thres = NULL,
cluster.dimensions = 'both',
clustering.method = 'complete',
distance.method = 'euclidean',
xaxis.lab = '',
xlab.label = 'Mutations',
print.colour.key = FALSE,
colour.scheme = c('white', 'blue'),
...
)
}
\arguments{
\item{x}{Either a data-frame or a matrix from which the heatmap is to created}
\item{ccf.thres}{CCF threshold to be applied to the heatmap. Values below the threshold will be set to 0. Defaults to \code{NULL}}
\item{cluster.dimensions}{Defaults to \dQuote{both}.}
\item{clustering.method}{Defaults to \dQuote{complete}.}
\item{distance.method}{Defaults to \dQuote{euclidean}.}
\item{xaxis.lab}{Defaults to an empty string.}
\item{xlab.label}{Defaults to \dQuote{Mutations}.}
\item{print.colour.key}{Defaults to \code{FALSE}.}
\item{colour.scheme}{Defaults to \code{c('white', 'blue')}.}
\item{...}{Pass through argument. See BoutrosLab.plotting.general::create.heatmap() for further details.}
}
\value{A `grob` object of the heatmap.}
\author{Helena Winata}
52 changes: 52 additions & 0 deletions man/create.ccf.summary.heatmap.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
\name{create.ccf.summary.heatmap}
\alias{create.ccf.summary.heatmap}
\title{Subclone Tree Plot}
\description{
Creates a heatmap of cancer cell fraction (CCF) distribution across tumour samples with clone IDs as a covariate beneath the heatmap. Subplot parameters controls the appearance of the heatmap and barplots. See \code{BoutrosLab.plotting.general::create.barplot()} or \code{BoutrosLab.plotting.general::create.heatmap()} for parameter description. Legend parameters are passed to \code{BoutrosLab.plotting.general::legend.grob()}.
}

\usage{
create.ccf.summary.heatmap(
DF,
ccf.thres = NULL,
median.col = 'median.ccf.per.sample',
clone.order = NULL,
sample.order = NULL,
hm.col.scheme = c('white', 'blue'),
subplot.xlab.cex = 1.2,
subplot.xaxis.cex = 1,
subplot.xaxis.fontface = 'bold',
subplot.xaxis.rot = 90,
subplot.ylab.cex = 1.2,
subplot.yaxis.cex = 1,
subplot.yaxis.fontface = 'bold',
hm.xaxis.rot = 90,
legend.size = 3,
legend.title.cex = 1.2,
legend.label.cex = 1,
...
);
}
\arguments{
\item{DF}{A data-frame with the following column names: 'ID', 'SNV.id', 'clone.id', 'CCF'.}
\item{ccf.thres}{CCF threshold to be applied to the heatmap. Values below the threshold will be set to 0. Defaults to \code{NULL}}
\item{median.col}{Defaults to \dQuote{median.ccf.per.sample}}
\item{clone.order}{Defaults to \code{NULL}}
\item{sample.order}{Defaults to \code{NULL}}
\item{hm.col.scheme}{Heatmap colour scheme. Defaults to \code{c('white', 'blue')}}
\item{subplot.xlab.cex}{Subplot parameter. Defaults to 1.2}
\item{subplot.xaxis.cex}{Subplot parameter. Defaults to 1}
\item{subplot.xaxis.fontface}{Subplot parameter. Defaults to \dQuote{bold}}
\item{subplot.xaxis.rot}{Subplot parameter. Defaults to 90}
\item{subplot.ylab.cex}{Subplot parameter. Defaults to 1.2}
\item{subplot.yaxis.cex}{Subplot parameter. Defaults to 1}
\item{subplot.yaxis.fontface}{Subplot parameter. Defaults to \dQuote{bold}}
\item{hm.xaxis.rot}{Subplot parameter. Defaults to 90}
\item{legend.size}{Legend parameter. Defaults to 3}
\item{legend.title.cex}{Legend parameter. Defaults to 1.2}
\item{legend.label.cex}{Legend parameter. Defaults to 1}
\item{...}{Pass through argument. See BoutrosLab.plotting.general::create.multipanelplot() for further details.}

}
\value{A `grob` object of the summary plot.}
\author{Helena Winata}
32 changes: 32 additions & 0 deletions man/create.clone.genome.distribution.plot.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
\name{create.clone.genome.distribution.plot}
\alias{create.clone.genome.distribution.plot}
\title{Create Clone Genome Distribution Plot}
\description{
This function creates a plot showing the distribution of clones across the genome. It generates a scatter plot of the SNVs colored by clone ID and a density plot showing the density of each clone across the genome. The function can handle both single and multi-sample inputs.
}
\usage{
create.clone.genome.distribution.plot(
snv.df,
genome.build = 'GRCh37',
clone.order = NULL,
clone.colours = NULL,
filename = NULL,
multi.sample = FALSE,
...
)
}
\arguments{
\item{snv.df}{A data frame containing the SNV data. It must have columns 'chr', 'pos', and 'clone.id'. If \code{multi.sample = TRUE}, it must also have a column 'ID' specifying the sample ID for each SNV.}
\item{genome.build}{The genome build to use. Defaults to \dQuote{GRCh37}.}
\item{clone.order}{The order in which to plot the clones. If \code{NULL}, clones will be sorted alphabetically.}
\item{clone.colours}{A named vector specifying the color to use for each clone. If \code{NULL}, colors will be automatically assigned.}
\item{filename}{Directory or filepath to save the plot in. If \code{multi.sample = TRUE}, this must be a directory. if \code{multi.sample = FALSE}, this must be a filepath. If \code{NULL}, the plot will not be saved.}
\item{multi.sample}{Logical indicating whether the input data contains multiple samples. Defaults to \code{FALSE}.}
\item{...}{Additional arguments to be passed to \code{BoutrosLab.plotting.general::create.multipanelplot()}.}
}
\details{
This function preprocesses the input data frame, extracts chromosome information, and iterates over each sample to create a clone genome distribution plot. For each sample, it calculates the density of each clone across the genome and creates a scatter plot of the SNVs colored by clone ID and a density plot showing the density of each clone.
}
\value{A `grob` object.}
\author{Helena Winata, Selina Wu}

Loading
Loading