diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..5ee63f5 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1 @@ +^\.covrignore$ diff --git a/.covrignore b/.covrignore new file mode 100644 index 0000000..1d7d085 --- /dev/null +++ b/.covrignore @@ -0,0 +1,3 @@ +R/init_ktplots.R +R/ktplots.R +R/correlationSpot.R diff --git a/DESCRIPTION b/DESCRIPTION index 753d180..291b4b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ktplots Title: Plot single-cell data dotplots -Version: 1.1.19 +Version: 1.1.20 Authors@R: person("Kelvin", "Tuong", email = c("z.tuong@uq.edu.au", "zkt22@cam.ac.uk", "kt16@sanger.ac.uk"), role = c("aut", "cre")) Description: Plotting tools for scData. License: MIT @@ -17,21 +17,16 @@ Imports: viridis, gtools, RColorBrewer, - purrr, - patchwork, - BiocParallel + circlize Suggests: SummarizedExperiment, SingleCellExperiment, - Seurat, - lmerTest, - car, + scater, testthat, covr, + ComplexHeatmap, plyr Depends: ggplot2 -Remotes: - zktuong/patchwork LazyData: true RoxygenNote: 7.1.2 diff --git a/NAMESPACE b/NAMESPACE index ccb5bc7..3c6225e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,23 +1,16 @@ # Generated by roxygen2: do not edit by hand export("%nin%") -export(GeomFlatViolin) -export(StackedVlnPlot) export(bottomleft_legend) export(bottomright_legend) -export(compare_cpdb) export(correlationSpot) -export(extract_max) export(geneDotPlot) -export(geom_flat_violin) export(init) export(init_ktplots) export(ktplots) -export(modify_vlnplot) -export(plot_compare_cpdb) export(plot_cpdb) export(plot_cpdb2) -export(rainCloudPlot) +export(plot_cpdb3) export(range01) export(small_axis) export(small_grid) @@ -25,7 +18,6 @@ export(small_guide) export(small_legend) export(topleft_legend) export(topright_legend) -import(BiocParallel) import(Matrix) import(RColorBrewer) import(devtools) @@ -34,7 +26,8 @@ import(ggplot2) import(ggraph) import(ggrepel) import(gtools) -import(patchwork) -import(purrr) import(reshape2) import(viridis) +importFrom(circlize,chordDiagram) +importFrom(circlize,circos.clear) +importFrom(grDevices,recordPlot) diff --git a/R/StackedVlnPlot.R b/R/StackedVlnPlot.R deleted file mode 100644 index 83ff5de..0000000 --- a/R/StackedVlnPlot.R +++ /dev/null @@ -1,74 +0,0 @@ -#' Plotting stacked violin plot -#' -## main function -#' @name StackedVlnPlot -#' @param obj single-cell data. can be seurat/summarizedexperiment object -#' @param features genes/features to plot -#' @param pt.size size of dots -#' @param plot.margin to adjust the white space between each plot. -#' @param ... passed to Seurat::VlnPlot -#' @return StackedVlnPlot -#' @source \url{https://divingintogeneticsandgenomics.rbind.io/post/stacked-violin-plot-for-visualizing-single-cell-data-in-seurat/} -#' @examples -#' data(kidneyimmune) -#' features<- c("CD79A", "MS4A1", "CD8A", "CD8B", "LYZ", "LGALS3", "S100A8", "GNLY", "NKG7", "KLRB1", "FCGR3A", "FCER1A", "CST3") -#' StackedVlnPlot(kidneyimmune, features = features) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) -#' @import ggplot2 -#' @import purrr -#' @import patchwork -#' @export -StackedVlnPlot <- function(obj, features, - pt.size = 0, - plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"), - ...) { - requireNamespace('purrr') - plot_list<- purrr::map(features, function(x) modify_vlnplot(obj = obj,feature = x, ...)) - - # Add back x-axis title to bottom plot. patchwork is going to support this? - plot_list[[length(plot_list)]]<- plot_list[[length(plot_list)]] + - theme(axis.text.x=element_text(), axis.ticks.x = element_line()) - - # change the y-axis tick to only max value - ymaxs<- purrr::map_dbl(plot_list, extract_max) - plot_list<- purrr::map2(plot_list, ymaxs, function(x,y) x + - scale_y_continuous(breaks = c(y)) + - expand_limits(y = y)) - requireNamespace('patchwork') - p <- patchwork::wrap_plots(plotlist = plot_list, ncol = 1) - return(p) -} - -## remove the x-axis text and tick -#' @name misc -#' @param obj single-cell data. can be seurat/summarizedexperiment object -#' @param feature genes/features to plot -#' @param pt.size size of dots -#' @param plot.margin to adjust the white space between each plot. -#' @param ... passed to Seurat::VlnPlot -#' @export -modify_vlnplot<- function(obj, - feature, - pt.size = 0, - plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"), - ...) { - requireNamespace('Seurat') - p<- Seurat::VlnPlot(obj, features = feature, pt.size = pt.size, ... ) + - xlab("") + ylab(feature) + ggtitle("") + - theme(legend.position = "none", - axis.text.x = element_blank(), - axis.ticks.x = element_blank(), - axis.title.y = element_text(size = rel(1), angle = 0), - axis.text.y = element_text(size = rel(1)), - plot.margin = plot.margin ) - return(p) -} - -## extract the max value of the y axis -#' @name misc -#' @param p value -#' @export -extract_max<- function(p){ - ymax<- max(ggplot_build(p)$layout$panel_scales_y[[1]]$range$range) - return(ceiling(ymax)) -} - diff --git a/R/compare_cpdb.R b/R/compare_cpdb.R deleted file mode 100644 index 2d1a0b6..0000000 --- a/R/compare_cpdb.R +++ /dev/null @@ -1,602 +0,0 @@ -#' Compare cellphonedb results performed on replicates -#' -#' @param cpdb_meta data.frame containing the sample name, cellphonedb 'out' folder path (containing means.txt and pvalues.txt/relevant_interactions.txt if version 3), and single-cell object file path (.h5ad or .rds) -#' @param sample_metadata data.frame containing the sample name, and groupings. -#' @param celltypes subset celltypes for comparison -#' @param celltype_col column name in single cell object holding celtype annotation. -#' @param groupby for significance testing. only if method is t.test or wilcoxon. -#' @param formula for signfiicance testing. only if method is lme. -#' @param method one of 'wilcox.test', 't.test', 'lmer' -#' @param BPPARAM BiocParallelParam class. -#' @param version3 boolean. if cellphonedb version 3 -#' @param verbose Whether or not to print messages. -#' @param p.adjust.mode whether or not to correct all interactions, or to perform it on a celltype-celltype basis. -#' @param p.adjust.method passed to `method`in `stats::p.adjust`. -#' @param cluster whether or not to cluster the output plot -#' @param result output from compare_cpdb -#' @param contrast name of contrast to plot. only if method used was 'lmer' -#' @param groups name of groups to facet. -#' @param alpha adjusted pvalue cut off to keep in the plot. -#' @param cluster whether or not to perform a simple clustering in the final plot -#' @param max_size maximum size of circles -#' @param limits upper and lower limits for plotting the results -#' @param highlight_significant whether or not to keep only significant results in the final plot -#' @param ... passed to tests. -#' @return results for plotting -#' @examples -#' \donttest{ -#' data(covid_sample_metadata) -#' data(covid_cpdb_meta) -#' -#' file <- system.file("extdata", "covid_cpdb.tar.gz", package = "ktplots") -#' file.copy(file, ".") -#' system("tar -xzf covid_cpdb.tar.gz") -#' -#' covid_sample_metadata$individual <- rep(c("A", "B"), 6) -#' out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, sample_metadata = covid_sample_metadata, -#' celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", -#' "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), celltype_col = "initial_clustering", -#' groupby = "Status_on_day_collection_summary") -#' plot_compare_cpdb(out) -#' } -#' @import BiocParallel -#' @import dplyr - -#' @export -compare_cpdb <- function(cpdb_meta, sample_metadata, celltypes, celltype_col, - groupby = NULL, formula = NULL, method = c('wilcox.test', 't.test', 'lmer'), - BPPARAM = SerialParam(), version3 = FALSE, verbose = TRUE, - p.adjust.mode = c('celltype', 'all'), p.adjust.method = 'fdr', ...) { - options(warn = -1) - sample <- cpdb_meta[, 1] - cpdb_out_folder <- cpdb_meta[, 2] - expression_file <- cpdb_meta[, 3] - names(cpdb_out_folder) <- sample - names(expression_file) <- sample - method = match.arg(method) - p.adjust.mode = match.arg(p.adjust.mode) - if (verbose) { - cat("Reading cellphonedb outputs", sep = "\n") - } - means <- bplapply(cpdb_out_folder, function(x) read.delim(paste0(x, "/means.txt"), - check.names = FALSE), BPPARAM = SerialParam(progressbar = verbose)) - if (version3){ - pval_file <- '/relevant_interactions.txt' - } else { - pval_file <- '/pvalues.txt' - } - pvals <- bplapply(cpdb_out_folder, function(x) read.delim(paste0(x, pval_file), - check.names = FALSE), BPPARAM = SerialParam(progressbar = verbose)) - - if (verbose) { - cat("Reading single cell object", sep = "\n") - } - - requireNamespace('Matrix') - sces <- bplapply(expression_file, function(x) { - if (endsWith(x, ".h5ad")) { - require(reticulate) - ad = import("anndata") - adata = ad$read_h5ad(paste0(x)) - counts <- Matrix::t(adata$X) - row.names(counts) <- row.names(adata$var) - colnames(counts) <- row.names(adata$obs) - sce <- SingleCellExperiment(list(counts = counts), colData = adata$obs, - rowData = adata$var) - } else { - sce <- readRDS(x) - if (class(sce) == "Seurat") { - sce <- as.SingleCellExperiment(sce) - } - } - return(sce) - }, BPPARAM = SerialParam(progressbar = verbose)) - - # set up cell type combinations - combs <- t(combn(celltypes, 2)) - # add in self-self interactions too - combs <- rbind(combs, cbind(as.character(celltypes), as.character(celltypes))) - comb_list <- apply(combs, 1, as.list) - - get_means <- function(x) { - if (!is.na(x)) { - out <- x$means - names(out) <- paste0(x$group, ">@<", x$Var1) - if (any(is.na(out))) { - out <- out[!is.na(out)] - } - return(out) - } else { - return(NA) - } - } - - # extract interactions with plot_cpdb - if (verbose) { - cat(paste0("Extracting cpdb results across ", dim(combs)[1], " celltype pairwise combinations"), - sep = "\n") - } - - ct_sigs <- bplapply(comb_list, function(x){ - bpmapply(function(sce, mean, pval) { - s <- tryCatch(plot_cpdb(cell_type1 = paste0(x[1], '$'), - cell_type2 = paste0(x[2], '$'), - scdata = sce, - idents = celltype_col, # column name where the cell ids are located in the metadata - means = mean, - pvals = pval, - keep_significant_only = TRUE, - scale = FALSE, - return_table = TRUE, - ), error = function(e) return(NA)) - out <- suppressWarnings(tryCatch(get_means(s), error = function(e) return(NA))) - return(out) - }, sces, means, pvals, BPPARAM = BPPARAM, SIMPLIFY = FALSE) - }, BPPARAM = SerialParam(progressbar = verbose)) - - # collapse to matrix - requireNamespace("plyr") - meta <- bplapply(ct_sigs, function(y) { - z <- as.data.frame(plyr::rbind.fill(lapply(y, function(y) { - tryCatch(as.data.frame(t(y), stringsAsFactors = FALSE), error = function(e) return(NA)) - }))) - row.names(z) <- names(sces) - return(z) - }, BPPARAM = BPPARAM) - - if (verbose) { - cat("Filtering interactions", sep = "\n") - } - # remove interactions that are all empty - meta2 <- bplapply(meta, function(y) { - if (!is.logical(y)) { - f <- apply(y, 2, function(x) all(is.na(x))) - y <- y[, !f] - } else { - y <- NA - } - return(y) - }, BPPARAM = BPPARAM) - meta2 <- bplapply(meta2, function(x) { - x[is.na(x)] <- 0 - return(x) - }, BPPARAM = SerialParam(progressbar = verbose)) - - if (verbose) { - cat("Preparing data", sep = "\n") - } - # split each column into a list - res2 <- bplapply(meta2, function(x) { - tmp <- as.list(x) - tmp <- bplapply(tmp, function(z) { - z <- as.numeric(z) - names(z) <- row.names(x) - return(z) - }, BPPARAM = BPPARAM) - return(tmp) - }, BPPARAM = SerialParam(progressbar = verbose)) - - requireNamespace('reshape2') - test_fun <- function(int_score, data, col, method, ...) { - # 'C' - data$int_score <- int_score - if (class(data[, col]) == "factor") { - lvls <- levels(data[, col]) - } else { - data[, col] <- factor(data[, col]) - lvls <- levels(data[, col]) - } - - if (method == "wilcox.test") { - test <- pairwise.wilcox.test(data$int_score, data[, col], p.adjust.method = "none", ...) - } else if (method == "t.test") { - # force Welch's t-test - test <- pairwise.t.test(data$int_score, data[, col], pool.sd = FALSE, p.adjust.method = "none", ...) - } - - tmp <- reshape2::melt(test$p.value) - p <- tmp$value - names(p) <- paste0(tmp$Var1, ">:<_vs_>:<", tmp$Var2) - p <- p[!is.na(p)] - diff <- group_by(data, get(col)) %>% summarise(mean = log1p(mean(int_score, - na.rm = TRUE))) - diff <- as.data.frame(diff) - row.names(diff) <- diff[, 1] - diff <- diff[, "mean", drop = FALSE] - n <- names(p) - n <- strsplit(n, ">:<_vs_>:<") - lfc <- lapply(n, function(x) { - diff[x[1], ] - diff[x[2], ] - }) - names(lfc) <- gsub(">:<_vs_>:<", "_vs_", names(p)) - names(p) <- gsub(">:<_vs_>:<", "_vs_", names(p)) - names(p) <- paste0("P_", names(p)) - outdf <- cbind(t(data.frame(lfc)), data.frame(p)) - colnames(outdf) <- c("LFC", "pval") - outdf$contrast <- rownames(outdf) - return(outdf) - } - - if (method != 'lmer'){ - if (is.null(groupby)) { - stop("Please provide column name for contrasts to be extracted.") - } - if (verbose) { - cat(paste0("Running pairwise ", method), sep = "\n") - } - res3 <- bplapply(res2, function(x) { - tmp <- bplapply(x, function(y) test_fun(int_score = y, data = sample_metadata, - col = groupby, method = method, ...), BPPARAM = BPPARAM) - tmp <- plyr::ldply(tmp, data.frame) - return(tmp) - }, BPPARAM = SerialParam(progressbar = verbose)) - res3 <- do.call(rbind, res3) - tmpct <- as.data.frame(do.call(rbind, strsplit(res3[,1], '>@<'))[,1:2]) - tmpct[,3] <- paste0(tmpct[,1], '>@<', tmpct[,2]) - res3$celltypes <- tmpct[,3] - res3$celltypes <- gsub('>@<', '-', res3$celltypes) - res3 <- split(res3, res3$contrast) - } else if (method == 'lmer'){ - require(lmerTest) - if (!is.null(formula)) { - fullFormula <- update.formula(formula, int_score ~ ., simplify = FALSE, - ) - } else { - stop("Please provide the formula") - } - - if (verbose) { - cat("Running lmer model", sep = "\n") - } - res3_ <- bplapply(res2, function(x) { - bplapply(x, function(y) { - sample_metadata[, "int_score"] <- y - results <- suppressMessages(suppressWarnings(tryCatch(lmer(fullFormula, - data = sample_metadata, ...), error = function(e) return(NA)))) - return(results) - }, BPPARAM = BPPARAM) - }, BPPARAM = SerialParam(progressbar = verbose)) - - if (verbose) { - cat("Extracting statistics", sep = "\n") - } - - res3fitstats <- suppressWarnings(bplapply(res3_, function(fit) { - if (length(fit) > 0) { - if (suppressMessages(suppressWarnings(!is.na(fit)))) { - out <- lapply(fit, function(fit2) { - if (suppressMessages(suppressWarnings(!is.na(fit2)))) { - y <- suppressWarnings(summary(fit2)) - E <- y$coefficients[-1, 1] - if (length(E) > 1) { - names(E) <- paste0("fit_estimates_", names(E)) - } else { - names(E) <- paste0("fit_estimates_", rownames(y$coefficients)[2]) - } - P <- y$coefficients[-1, 5] - if (length(E) > 1) { - names(P) <- paste0("fit_P_", names(P)) - } else { - names(P) <- paste0("fit_P_", rownames(y$coefficients)[2]) - } - return(c(E, P)) - } else { - return(NA) - } - }) - Singular <- lapply(fit, function(fit2) { - if (suppressMessages(suppressWarnings(!is.na(fit2)))) { - out <- as.numeric(isSingular(fit2)) - names(out) <- "Singular" - return(out) - } else { - return(NA) - } - }) - Conv <- lapply(fit, function(fit2) { - if (suppressMessages(suppressWarnings(!is.na(fit2)))) { - out <- length(slot(fit2, "optinfo")$conv$lme4$messages) - names(out) <- "Conv" - return(out) - } else { - return(NA) - } - }) - anv <- lapply(fit, function(fit2) { - if (suppressMessages(suppressWarnings(!is.na(fit2)))) { - anva <- anova(fit2, ddf="Satterthwaite") - wp <- anva[, 6] - names(wp) <- paste0("anova_P_", row.names(anva)) - return(wp) - } else { - return(NA) - } - }) - return(list(anv = anv, fit = out, Singular = Singular, Conv = Conv)) - } else { - return(NA) - } - } else { - return(NA) - } - }, BPPARAM = SerialParam(progressbar = verbose))) - res3fitstats2 <- bplapply(res3fitstats, function(x) { - if (length(x) > 0) { - output <- mapply(function(anv, out, Singular, Conv) { - return(c(anv, out, Singular, Conv)) - }, x$anv, x$fit, x$Singular, x$Conv, SIMPLIFY = FALSE) - return(output) - } else { - return(NA) - } - }, BPPARAM = BPPARAM) - - res3fitstats3 <- suppressWarnings(bplapply(res3fitstats2, function(x) do.call(rbind, - x), BPPARAM = BPPARAM)) - - res3 <- suppressMessages(suppressWarnings(do.call(rbind, res3fitstats3))) - res3 <- as.data.frame(res3) - tmpct <- as.data.frame(do.call(rbind, strsplit(row.names(res3), '>@<'))[,1:2]) - tmpct[,3] <- paste0(tmpct[,1], '>@<', tmpct[,2]) - res3$celltypes <- tmpct[,3] - res3$celltypes <- gsub('>@<', '-', res3$celltypes) - } - - if (verbose) { - cat("Correcting P values", sep = "\n") - } - if (p.adjust.mode == "all") { - if (method != "lmer") { - res3 <- bplapply(res3, function(x) { - x$padj <- p.adjust(x$pval, method = p.adjust.method) - row.names(x) <- x[, 1] - x <- x[, -1, drop = FALSE] - return(x) - }, BPPARAM = SerialParam(progressbar = verbose)) - } else { - p_cols <- grep("_P_", colnames(res3), value = TRUE) - for (p in p_cols) { - res3[, gsub("_P_", "_Padj_", p)] <- p.adjust(res3[, p], method = p.adjust.method) - } - } - } else if (p.adjust.mode == "celltype") { - if (method != "lmer") { - res3 <- bplapply(res3, function(x) { - tmp <- split(x, x$celltypes) - tmp <- bplapply(tmp, function(y) { - y$padj <- p.adjust(y$pval, method = p.adjust.method) - row.names(y) <- y[, 1] - y <- y[, -1, drop = FALSE] - return(y) - }, BPPARAM = SerialParam()) - names(tmp) <- NULL - tmp <- do.call(rbind, tmp) - tmp <- as.data.frame(tmp) - # sort ffor most significant to be on top - tmp <- tmp[order(tmp$padj), ] - return(tmp) - }, BPPARAM = SerialParam(progressbar = verbose)) - } else { - celltypes <- split(res3, res3$celltypes) - ctp <- bplapply(celltypes, function(x) { - p_cols <- grep("_P_", colnames(x), value = TRUE) - for (p in p_cols) { - x[, gsub("_P_", "_Padj_", p)] <- p.adjust(x[, p], method = p.adjust.method) - } - return(x) - }, BPPARAM = SerialParam()) - names(ctp) <- NULL - res3 <- do.call(rbind, ctp) - res3 <- as.data.frame(res3) - } - } - if (verbose) { - cat("---- Done! ----|", sep = "\n") - } - return(res3) -} - -#' @export -plot_compare_cpdb <- function(result, contrast = NULL, groups = NULL, alpha = 0.05, - color_spectrum = c("#2C7BB6", "#f7f7f7", "#D7191C"), max_size = 3, limits = c(-2, - 2), highlight_significant = TRUE, cluster = FALSE) { - prepare_compare_cpdb_results <- function(res, alpha = 0.05, contrast = NULL, - groups = NULL, cluster = FALSE) { - if (class(res) == "list") { - tmp <- lapply(res, function(x) { - x <- x[x$padj < alpha, ] - out_ <- make_table1(x) - return(out_) - }) - names(tmp) <- NULL - fin <- unique(row.names(do.call(rbind, tmp))) - out <- lapply(res, function(x) { - x <- x[row.names(x) %in% fin, ] - out_ <- make_table1(x) - return(out_) - }) - if (!is.null(groups)) { - out <- mapply(function(o, g) { - o$Group = g - return(o) - }, out, as.list(groups), SIMPLIFY = FALSE) - } - out <- do.call(rbind, out) - out <- as.data.frame(out) - if (!is.null(groups)) { - out$Group <- factor(out$Group, levels = groups) - } - } else if (class(res) == "data.frame") { - if (length(contrast) > 1) { - res1 <- lapply(contrast, function(x) res %>% - filter(get(paste0("fit_Padj_", x)) < alpha & Singular > 0 & Conv > - 0) %>% - select(sym(paste0("fit_estimates_", x)))) - res2 <- lapply(contrast, function(x) res %>% - filter(get(paste0("fit_Padj_", x)) < alpha & Singular > 0 & Conv > - 0) %>% - select(sym(paste0("fit_Padj_", x)))) - res1 <- lapply(res1, function(x) { - colnames(x) <- "beta" - return(x) - }) - res2 <- lapply(res2, function(x) { - colnames(x) <- "padj" - return(x) - }) - tmp <- mapply(function(r1, r2, c) make_table2(r1, r2, contrast = c), - res1, res2, as.list(contrast), SIMPLIFY = FALSE) - fin <- unique(row.names(do.call(rbind, tmp))) - res1 <- lapply(contrast, function(x) res[row.names(res) %in% fin, - ] %>% - select(sym(paste0("fit_estimates_", x)))) - res2 <- lapply(contrast, function(x) res[row.names(res) %in% fin, - ] %>% - select(sym(paste0("fit_Padj_", x)))) - res1 <- lapply(res1, function(x) { - colnames(x) <- "beta" - return(x) - }) - res2 <- lapply(res2, function(x) { - colnames(x) <- "padj" - return(x) - }) - out <- mapply(function(r1, r2, c) make_table2(r1, r2, contrast = c), - res1, res2, as.list(contrast), SIMPLIFY = FALSE) - if (!is.null(groups)) { - out <- mapply(function(o, g) { - o$Group = g - return(o) - }, out, as.list(groups), SIMPLIFY = FALSE) - } - out <- do.call(rbind, out) - out <- as.data.frame(out) - if (!is.null(groups)) { - out$Group <- factor(out$Group, levels = groups) - } - } else { - res1 <- res %>% - filter(get(paste0("fit_Padj_", contrast)) < alpha & Singular > - 0 & Conv > 0) %>% - select(sym(paste0("fit_estimates_", contrast))) - res2 <- res %>% - filter(get(paste0("fit_Padj_", contrast)) < alpha & Singular > - 0 & Conv > 0) %>% - select(sym(paste0("fit_Padj_", contrast))) - colnames(res1) <- "beta" - colnames(res2) <- "padj" - out <- make_table2(res1, res2, contrast) - } - } - if (nrow(out) > 0) { - if (class(res) == "list") { - val.var = "LFC" - } else { - val.var = "beta" - } - if (cluster) { - order1 <- suppressMessages(reshape2::dcast(out, interaction ~ celltypes, - value.var = val.var)) - rownames(order1) <- order1$interaction - order1 <- order1[, -1] - order2 <- suppressMessages(reshape2::dcast(out, celltypes ~ interaction, - value.var = val.var)) - rownames(order2) <- order2$celltypes - order2 <- order2[, -1] - order1[is.na(order1)] <- 0 - order2[is.na(order2)] <- 0 - d1 <- dist(order1) - h1 <- hclust(d1) - d2 <- dist(order2) - h2 <- hclust(d2) - order3 <- order1[h1$order, h2$order] - out$celltypes <- factor(out$celltypes, levels = colnames(order3)) - out$interaction <- factor(out$interaction, levels = rownames(order3)) - } - - out$sig <- "no" - out$sig[out$padj < alpha] <- "yes" - return(out) - } else { - return(NULL) - } - } - - make_table1 <- function(df) { - rows <- strsplit(row.names(df), ">@<") - interaction <- lapply(rows, function(x) x[3]) - interaction <- do.call(rbind, interaction) - from <- lapply(rows, function(x) paste(x[1])) - from <- do.call(rbind, from) - to <- lapply(rows, function(x) paste(x[2])) - to <- do.call(rbind, to) - df <- as.data.frame(cbind(df[, c("LFC", "padj", "contrast", "celltypes")], - interaction = interaction, from = from, to = to)) - return(df) - } - - - make_table2 <- function(res1, res2, contrast) { - rows <- strsplit(row.names(res1), ">@<") - cellcell <- lapply(rows, function(x) paste(x[1:2], collapse = "-")) - interaction <- lapply(rows, function(x) x[3]) - cellcell <- do.call(rbind, cellcell) - interaction <- do.call(rbind, interaction) - from <- lapply(rows, function(x) paste(x[1])) - from <- do.call(rbind, from) - to <- lapply(rows, function(x) paste(x[2])) - to <- do.call(rbind, to) - res <- as.data.frame(cbind(res1, res2, contrast = rep(contrast, length(rows)), - celltypes = cellcell, interaction = interaction, from = from, to = to)) - return(res) - } - - if (class(result) == "list") { - val.var = "LFC" - } else { - val.var = "beta" - } - - dat <- prepare_compare_cpdb_results(result, alpha = alpha, contrast = contrast, - groups = groups, cluster = cluster) - if (is.null(dat)) { - stop("no significant hits.") - } else { - if (class(result) == "list") { - fcol = "LFC" - } else { - fcol = "beta" - } - p <- ggplot(dat, aes(y = interaction, x = celltypes, fill = get(fcol), colour = sig, - size = abs(get(fcol)))) + geom_point(shape = 21) + scale_size_area(limits = limits, - oob = scales::squish, max_size = max_size, name = paste0("abs(", fcol, - ")")) + theme_classic() + theme_bw() + theme(axis.line = element_blank(), - panel.border = element_rect(colour = "black", size = 0.1), panel.grid = element_line(colour = "grey", - size = 0.1), axis.ticks = element_line(colour = "black", size = 0.1), - axis.text.y = element_text(colour = "black"), axis.text.x = element_text(colour = "black", - angle = 90, vjust = 0.5, hjust = 1), legend.direction = "vertical", - legend.box = "horizontal") - if (any(grepl("Group", colnames(dat)))) { - p <- p + facet_grid(~Group) - } - p <- p + scale_fill_gradient2(low = color_spectrum[1], mid = color_spectrum[2], - high = color_spectrum[3], na.value = NA, guide = "colourbar", aesthetics = "fill", - limits = limits, oob = scales::squish, name = fcol) - if (highlight_significant) { - if (all(dat$sig == 'yes')){ - p <- p + scale_colour_manual(values = "red", na.value = NA, drop = TRUE, na.translate = FALSE) - } else { - p <- p + scale_colour_manual(values = c(NA, "red"), na.value = NA, drop = TRUE, na.translate = FALSE) - } - } else { - if (all(dat$sig == 'yes')){ - p <- p + scale_colour_manual(values = "red", na.value = NA, - drop = TRUE, na.translate = FALSE) - } else { - p <- p + scale_colour_manual(values = c("$f7f7f7", "red"), na.value = NA, - drop = TRUE, na.translate = FALSE) - } - } - return(p) - } -} \ No newline at end of file diff --git a/R/data.R b/R/data.R index e2a1f61..403bdc8 100644 --- a/R/data.R +++ b/R/data.R @@ -3,16 +3,14 @@ #' kidneyimmune - A small set of demo data from Stewart et al. 2019 Science. See \url{https://www.kidneycellatlas.org/} #' @docType data #' @usage data(kidneyimmune) -#' @format A Seurat object with the following slots filled +#' @format A SingleCellExperiment object with the following slots filled #' \describe{ #' \item{assays}{ -#' \itemize{Currently only contains one assay ("RNA" - scRNA-seq expression data) -#' \item{data - Normalized expression data}} +#' \itemize{Currently only contains "counts" and "logcounts" +#' \item{counts - Raw expression data} +#' \item{logcounts - Normalized expression data}} #' } -#' \item{meta.data}{Cell level metadata} -#' \item{active.assay}{Current default assay} -#' \item{active.ident}{Current default idents} -#' \item{version}{Seurat version used to create the object} +#' \item{colData}{Cell level metadata} #' } #' @source \url{https://www.kidneycellatlas.org/} #' @examples diff --git a/R/geom_flat_violin.R b/R/geom_flat_violin.R deleted file mode 100644 index 95eadca..0000000 --- a/R/geom_flat_violin.R +++ /dev/null @@ -1,83 +0,0 @@ -#' geom_flat_violin -#' -#' @param mapping see ggplot2::layer -#' @param data see ggplot2::layer -#' @param stat see ggplot2::layer -#' @param position see ggplot2::layer -#' @param trim see ggplot2::layer -#' @param scale see ggplot2::layer -#' @param show.legend see ggplot2::layer -#' @param inherit.aes see ggplot2::layer -#' @param ... passed to ggplot2::layer -#' @return geom_flat_violin -#' @source \url{https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R} -#' @examples -#' \donttest{ -#' data(iris) -#' ggplot(iris, aes(Species, Sepal.Length)) + -#' geom_flat_violin() + -#' coord_flip() -#' } -#' @import dplyr -#' @import ggplot2 -#' @export -geom_flat_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", - position = "dodge", trim = TRUE, scale = "area", - show.legend = NA, inherit.aes = TRUE, ...) { - - layer( - data = data, - mapping = mapping, - stat = stat, - geom = GeomFlatViolin, - position = position, - show.legend = show.legend, - inherit.aes = inherit.aes, - params = list( - trim = trim, - scale = scale, - ... - ) - ) -} -#' @name misc -#' @export -GeomFlatViolin <- ggproto("GeomFlatViolin", Geom, - setup_data = function(data, params) { - data$width <- data$width %nin% - params$width %nin% (resolution(data$x, FALSE) * 0.9) - - # ymin, ymax, xmin, and xmax define the bounding rectangle for each group - data %>% - group_by(group) %>% - mutate(ymin = min(y), - ymax = max(y), - xmin = x, - xmax = x + width / 2) - - }, - - draw_group = function(data, panel_scales, coord) { - # Find the points for the line to go all the way around - data <- transform(data, xminv = x, - xmaxv = x + violinwidth * (xmax - x)) - - # Make sure it's sorted properly to draw the outline - newdata <- rbind(plyr::arrange(transform(data, x = xminv), y), - plyr::arrange(transform(data, x = xmaxv), -y)) - - # Close the polygon: set first and last point the same - # Needed for coord_polar and such - newdata <- rbind(newdata, newdata[1,]) - - ggplot2:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord)) - }, - - draw_key = draw_key_polygon, - - default_aes = aes(weight = 1, colour = "grey20", fill = "white", size = 0.5, - alpha = NA, linetype = "solid"), - - required_aes = c("x", "y") -) - diff --git a/R/plot_cpdb2.R b/R/plot_cpdb2.R index 7a3cbe5..3e5e92a 100644 --- a/R/plot_cpdb2.R +++ b/R/plot_cpdb2.R @@ -23,7 +23,7 @@ #' @param version3 if is cellphonedb version3. #' @param return_df whether to just return this as a data.frame rather than plotting iot #' @param ... passes arguments plot_cpdb -#' @return ggplot dot plot object of cellphone db output +#' @return Plotting cellphonedb results as a weird chord diagram #' @examples #' \donttest{ #' } @@ -38,7 +38,7 @@ plot_cpdb2 <- function(cell_type1, cell_type2, scdata, idents, means, pvals, dec desiredInteractions = NULL, interaction_grouping = NULL, edge_group_colors = NULL, node_group_colors = NULL, version3 = FALSE, return_df = FALSE, ...) { if (class(scdata) == "Seurat") { - stop("Sorry not supported yet. Please use a SingleCellExperiment object.") + stop("Sorry not supported. Please use a SingleCellExperiment object.") } if (length(separator) > 0) { sep = separator diff --git a/R/plot_cpdb3.R b/R/plot_cpdb3.R new file mode 100644 index 0000000..4cb7992 --- /dev/null +++ b/R/plot_cpdb3.R @@ -0,0 +1,549 @@ +#' Plotting cellphonedb results as a chord diagram +#' +#' @param cell_type1 cell type 1 +#' @param cell_type2 cell type 2 +#' @param scdata single-cell data. can be seurat/summarizedexperiment object +#' @param idents vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns +#' @param means object holding means.txt from cpdb output +#' @param pvals object holding pvals.txt from cpdb output +#' @param deconvoluted object holding deconvoluted.txt from cpdb output +#' @param p.adjust.method correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none') +#' @param keep_significant_only logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb. +#' @param split.by column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. +#' @param scale logical. scale the expression to mean +/- SD. NULL defaults to TRUE. +#' @param standard_scale logical. scale the expression to range from 0 to 1. Default is TRUE +#' @param separator default = NULL. separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this. +#' @param gene_symbol_mapping default = NULL.column name for rowData in sce holding the actual gene symbols if row names aren't gene symbols +#' @param frac default = 0.1. Minimum fraction of celtypes expressing a gene in order to keep the interaction. Gene must be expressesd >= `frac` in either of the pair of celltypes in order to keep. +#' @param remove_self default = TRUE. Remove self-self arcs. +#' @param desiredInteractions default = NULL. Specific list of celltype comparisons e.g. list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2')). Also accepts a dataframe where first column is celltype 1 and 2nd column is celltype 2. +#' @param version3 if is cellphonedb version3. +#' @param directional Whether links have directions. 1 means the direction is from the first column in df to the second column, -1 is the reverse, 0 is no direction, and 2 for two directional. +#' @param alpha transparency for links +#' @param edge_colors vector of colors for links +#' @param grid_colors vector of colrs for grids +#' @param show_legend whether or not to show the legend +#' @param legend.pos.x x position of legend +#' @param legend.pos.y y position of legend +#' @param ... passes arguments plot_cpdb +#' @return Plotting cellphonedb results as a CellChat inspired chord diagram +#' @examples +#' \donttest{ +#' } +#' @importFrom circlize circos.clear chordDiagram +#' @importFrom grDevices recordPlot +#' @export + +plot_cpdb3 <- function(cell_type1, cell_type2, scdata, idents, means, pvals, deconvoluted, + p.adjust.method = NULL, keep_significant_only = TRUE, split.by = NULL, standard_scale = TRUE, + separator = NULL, gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, + desiredInteractions = NULL, version3 = FALSE, directional = 1, + alpha = 0.5, edge_colors = NULL, grid_colors = NULL, show_legend = TRUE, legend.pos.x = 20, + legend.pos.y = 20, ...) { + if (class(scdata) == "Seurat") { + stop("Sorry not supported. Please use a SingleCellExperiment object.") + } + if (length(separator) > 0) { + sep = separator + } else { + sep = ">@<" + } + + lr_interactions = plot_cpdb(cell_type1 = cell_type1, cell_type2 = cell_type2, + scdata = scdata, idents = idents, split.by = split.by, means = means, pvals = pvals, + keep_significant_only = keep_significant_only, standard_scale = standard_scale, + return_table = TRUE, version3 = version3, ...) + requireNamespace("SummarizedExperiment") + requireNamespace("SingleCellExperiment") + if (is.null(split.by)) { + if (any(lr_interactions[, 3] > 0)) { + if (any(is.na(lr_interactions[, 3]))) { + lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[, + 3]), ] + } else { + lr_interactions <- lr_interactions[lr_interactions[, 3] > 0, ] + } + } + } + subset_clusters <- unique(unlist(lapply(as.character(lr_interactions$group), + strsplit, sep))) + sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, idents] %in% subset_clusters] + interactions <- means[, c("interacting_pair", "gene_a", "gene_b", "partner_a", + "partner_b")] + interactions$converted <- gsub("-", " ", interactions$interacting_pair) + interactions$converted <- gsub("_", "-", interactions$converted) + interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, + ] + tm0 <- do.call(c, lapply(as.list(interactions_subset$interacting_pair), strsplit, + "_")) + if (any(lapply(tm0, length) > 2)) { + complex_id <- which(lapply(tm0, length) > 2) + interactions_subset_ = interactions_subset[complex_id, ] + simple_1 = interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_b)] + partner_1 = gsub("complex:", "", interactions_subset_$partner_b[grep("complex:", + interactions_subset_$partner_b)]) + partner_2 = gsub("complex:", "", interactions_subset_$partner_a[grep("complex:", + interactions_subset_$partner_a)]) + simple_2 = interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_a)] + for (i in seq_along(simple_1)) { + simple_1[i] <- gsub(paste0(partner_1[i], "_|_", partner_1[i]), "", simple_1[i]) + } + for (i in seq_along(simple_2)) { + simple_2[i] <- gsub(paste0(partner_2[i], "_|_", partner_2[i]), "", simple_2[i]) + } + tmpdf <- rbind(cbind(simple_1, partner_1), cbind(partner_2, simple_2)) + tmplist <- split(tmpdf, seq(nrow(tmpdf))) + for (i in seq_along(complex_id)) { + tm0[[complex_id[i]]] <- tmplist[[i]] + } + tm0 = data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2))) + colnames(tm0) <- c("id_a", "id_b") + interactions_subset <- cbind(interactions_subset, tm0) + dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b", + "id_a", "id_b")] + } else { + tm0 = data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2))) + colnames(tm0) <- c("id_a", "id_b") + interactions_subset <- cbind(interactions_subset, tm0) + dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b", + "id_a", "id_b")] + } + + # extract all the possible genes. + geneid = unique(c(interactions_subset$id_a, interactions_subset$id_b)) + # rmg = which(geneid == '') if (length(rmg) > 0){ geneid = + # geneid[-which(geneid == '')] } + if (all(!geneid %in% row.names(sce_subset))) { + geneid = unique(c(interactions_subset$gene_a, interactions_subset$gene_b)) + } + sce_subset_tmp <- sce_subset[row.names(sce_subset) %in% geneid, ] + + if (dim(sce_subset_tmp)[1] == 0) { + stop("Gene ids not found. Are you sure your single-cell object contains human gene symbols?") + } + # split to list and calculate celltype mean for each treatment group + meta <- as.data.frame(SummarizedExperiment::colData(sce_subset_tmp)) + + cellTypeMeans <- function(x) { + cm <- Matrix::rowMeans(SingleCellExperiment::counts(x)) + return(cm) + } + + cellTypeFraction <- function(x) { + cm <- Matrix::rowMeans(SingleCellExperiment::counts(x) > 0) + return(cm) + } + + if (!is.null(split.by)) { + sce_list <- list() + sce_list_alt <- list() + for (x in unique(meta[, split.by])) { + sce_list[[x]] <- list() + sce_list_alt[[x]] <- list() + } + + for (n in names(sce_list)) { + for (x in unique(meta[, idents])) { + sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, idents] == x & meta[, + split.by] == n] + sce_list_alt[[n]][[x]] <- sce_subset[, meta[, idents] == x & meta[, + split.by] == n] + } + } + + sce_list2 <- lapply(sce_list, function(y) { + z <- lapply(y, cellTypeMeans) + return(z) + }) + sce_list3 <- lapply(sce_list, function(y) { + z <- lapply(y, cellTypeFraction) + return(z) + }) + + sce_list2 <- lapply(sce_list2, function(x) do.call(cbind, x)) + sce_list3 <- lapply(sce_list3, function(x) do.call(cbind, x)) + + for (n in names(sce_list2)) { + colnames(sce_list2[[n]]) <- paste0(paste0(n, "_"), colnames(sce_list2[[n]])) + colnames(sce_list3[[n]]) <- paste0(paste0(n, "_"), colnames(sce_list3[[n]])) + } + + sce_list2 <- do.call(cbind, sce_list2) + sce_list3 <- do.call(cbind, sce_list3) + } else { + sce_list <- list() + sce_list_alt <- list() + + for (x in unique(meta[, idents])) { + sce_list[[x]] <- sce_subset_tmp[, meta[, idents] == x] + sce_list_alt[[x]] <- sce_subset[, meta[, idents] == x] + } + + sce_list2 <- lapply(sce_list, cellTypeMeans) + sce_list3 <- lapply(sce_list, cellTypeFraction) + + sce_list2 <- do.call(cbind, sce_list2) + sce_list3 <- do.call(cbind, sce_list3) + } + + + keep_a <- which(dictionary$gene_a != "") + keep_b <- which(dictionary$gene_b != "") + id_a_dict <- dictionary$id_a[keep_a] + names(id_a_dict) <- dictionary$gene_a[keep_a] + id_b_dict <- dictionary$id_b[keep_b] + names(id_b_dict) <- dictionary$gene_b[keep_b] + id_dict <- c(id_a_dict, id_b_dict) + id_dict <- id_dict[!duplicated(names(id_dict))] + humanreadablename = c() + for (i in row.names(sce_list2)) { + humanreadablename = c(humanreadablename, as.character(unlist(id_dict[i]))) + } + rownames(sce_list2) <- humanreadablename + rownames(sce_list3) <- humanreadablename + + findComplex <- function(interaction) { + idxa <- which(interaction$gene_a == "") + idxb <- which(interaction$gene_b == "") + complexa <- gsub("complex:", "", interaction$partner_a[idxa]) + complexb <- gsub("complex:", "", interaction$partner_b[idxb]) + + if (length(complexa) > 0) { + if (length(complexb) > 0) { + res <- c(complexa, complexb) + } else { + res <- complexa + } + } else if (length(complexb) > 0) { + res <- complexb + } else { + res <- NULL + } + return(res) + } + + # Utility function to retrieve the mean + cellTypeExpr_complex <- function(sce_, genes, gene_symbol_mapping = NULL) { + scex <- tryCatch(sce_[genes, ], error = function(e) { + if (!is.null(gene_symbol_mapping)) { + sce_[which(rowData(sce_)[, gene_symbol_mapping] %in% genes), ] + } else { + sce_[which(rowData(sce_)[, "index"] %in% genes), ] + } + }) + + cm <- mean(Matrix::rowMeans(SingleCellExperiment::counts(scex))) + return(cm) + } + # to retrieve the fraction, we use the average fraction of all genes + # mapping to the complex + cellTypeFraction_complex <- function(sce_, genes, gene_symbol_mapping = NULL) { + scex <- tryCatch(sce_[genes, ], error = function(e) { + if (!is.null(gene_symbol_mapping)) { + sce_[which(rowData(sce_)[, gene_symbol_mapping] %in% genes), ] + } else { + sce_[which(rowData(sce_)[, "index"] %in% genes), ] + } + }) + + cm <- mean(Matrix::rowMeans(SingleCellExperiment::counts(scex) > 0)) + return(cm) + } + + decon_subset <- deconvoluted[deconvoluted$complex_name %in% findComplex(interactions_subset), + ] + if (nrow(decon_subset) > 0) { + # although multiple rows are returned, really it's the same value for + # the same complex + decon_subset <- split(decon_subset, decon_subset$complex_name) + decon_subset_expr <- lapply(decon_subset, function(x) { + x <- x[, colnames(x) %in% colnames(sce_list2)] + x <- colMeans(x) + return(x) + }) + + if (!is.null(split.by)) { + decon_subset_fraction <- lapply(decon_subset, function(x) { + x <- unique(x$gene_name) + test <- lapply(sce_list_alt, function(y) { + return(lapply(y, cellTypeFraction_complex, x, gene_symbol_mapping)) + }) + return(test) + }) + decon_subset_fraction <- lapply(decon_subset_fraction, function(x) { + y <- lapply(x, function(z) do.call(cbind, z)) + for (i in 1:length(y)) { + colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]])) + } + y <- do.call(cbind, y) + return(y) + }) + } else { + decon_subset_fraction <- lapply(decon_subset, function(x) { + x <- unique(x$gene_name) + test <- lapply(sce_list_alt, function(y) { + return(cellTypeFraction_complex(y, x, gene_symbol_mapping)) + }) + return(do.call(cbind, test)) + }) + } + + decon_subset_expr <- do.call(rbind, decon_subset_expr) + decon_subset_fraction <- do.call(rbind, decon_subset_fraction) + row.names(decon_subset_fraction) <- row.names(decon_subset_expr) + + requireNamespace("plyr") + expr_df <- plyr::rbind.fill.matrix(sce_list2, decon_subset_expr) + row.names(expr_df) <- c(row.names(sce_list2), row.names(decon_subset_expr)) + fraction_df <- plyr::rbind.fill.matrix(sce_list3, decon_subset_fraction) + row.names(fraction_df) <- c(row.names(sce_list3), row.names(decon_subset_fraction)) + + } else { + expr_df <- sce_list2 + fraction_df <- sce_list3 + } + + + # make a big fat edgelist + if (!is.null(desiredInteractions)) { + if (class(desiredInteractions) == "list") { + desiredInteractions_ <- c(desiredInteractions, lapply(desiredInteractions, + rev)) + cell_type_grid <- as.data.frame(do.call(rbind, desiredInteractions_)) + } else if ((class(desiredInteractions) == "data.frame")) { + cell_type_grid <- desiredInteractions + } + cells_test = unique(unlist(desiredInteractions)) + } else { + cells_test <- tryCatch(unique(droplevels(meta[, idents])), error = function(e) unique(meta[, + idents])) + cell_type_grid <- expand.grid(cells_test, cells_test) + } + + if (remove_self) { + rm_idx <- which(cell_type_grid[, 1] == cell_type_grid[, 2]) + if (length(rm_idx) > 0) { + cell_type_grid <- cell_type_grid[-rm_idx, ] + } + } + + ligand <- interactions_subset$id_a + receptor <- interactions_subset$id_b + pair <- interactions_subset$interacting_pair + converted_pair <- interactions_subset$converted + producers <- as.character(cell_type_grid[, 1]) + receivers <- as.character(cell_type_grid[, 2]) + + generateDf <- function(ligand, sep, receptor, pair, converted_pair, producers, + receivers, cell_type_means, cell_type_fractions, splitted = NULL) { + if (!is.null(splitted)) { + pp <- paste0(splitted, "_", producers) + rc <- paste0(splitted, "_", receivers) + } else { + pp <- producers + rc <- receivers + } + producer_expression <- data.frame() + producer_fraction <- data.frame() + for (i in seq_along(pp)) { + for (j in seq_along(ligand)) { + if (any(grepl(paste0("^", ligand[j], "$"), row.names(cell_type_means)))) { + x <- cell_type_means[ligand[j], pp[i]] + y <- cell_type_fractions[ligand[j], pp[i]] + } else { + if (any(grepl(paste0("^", ligand[j], "$"), row.names(sce_subset)))) { + x <- cellTypeExpr_complex(sce_list_alt[[pp[i]]], ligand[j], gene_symbol_mapping) + y <- cellTypeFraction_complex(sce_list_alt[[pp[i]]], ligand[j], + gene_symbol_mapping) + } else { + x <- 0 + y <- 0 + } + } + producer_expression[ligand[j], pp[i]] <- x + producer_fraction[ligand[j], pp[i]] <- y + } + } + + receiver_expression <- data.frame() + receiver_fraction <- data.frame() + for (i in seq_along(rc)) { + for (j in seq_along(receptor)) { + if (any(grepl(paste0("^", receptor[j], "$"), row.names(cell_type_means)))) { + x <- cell_type_means[receptor[j], rc[i]] + y <- cell_type_fractions[receptor[j], rc[i]] + } else { + if (any(grepl(paste0("^", receptor[j], "$"), row.names(sce_subset)))) { + x <- cellTypeExpr_complex(sce_list_alt[[rc[i]]], receptor[j], + gene_symbol_mapping) + y <- cellTypeFraction_complex(sce_list_alt[[rc[i]]], receptor[j], + gene_symbol_mapping) + } else { + x <- 0 + y <- 0 + } + } + receiver_expression[receptor[j], rc[i]] <- x + receiver_fraction[receptor[j], rc[i]] <- y + } + } + test_df = list() + for (i in seq_along(pp)) { + px <- pp[i] + rx <- rc[i] + for (j in seq_along(pair)) { + lg <- ligand[j] + rcp <- receptor[j] + pr <- pair[j] + out = data.frame(c(lg, rcp, pr, px, rx, producer_expression[lg, px], + producer_fraction[lg, px], receiver_expression[rcp, rx], receiver_fraction[rcp, + rx])) + test_df = c(test_df, out) + } + } + df_ <- do.call(rbind, test_df) + row.names(df_) <- 1:nrow(df_) + colnames(df_) <- c("ligand", "receptor", "pair", "producer", "receiver", + "producer_expression", "producer_fraction", "receiver_expression", "receiver_fraction") + df_ <- as.data.frame(df_) + df_$from = paste0(df_$producer, sep, df_$ligand) + df_$to = paste0(df_$receiver, sep, df_$receptor) + if (!is.null(splitted)) { + df_$producer_ = df_$producer + df_$receiver_ = df_$receiver + df_$from = gsub(paste0(splitted, "_"), "", df_$from) + df_$to = gsub(paste0(splitted, "_"), "", df_$to) + df_$producer = gsub(paste0(splitted, "_"), "", df_$producer) + df_$receiver = gsub(paste0(splitted, "_"), "", df_$receiver) + df_$barcode = paste0(df_$producer_, "-", df_$receiver_, sep, converted_pair) + } else { + df_$barcode = paste0(df_$producer, "-", df_$receiver, sep, converted_pair) + } + + return(df_) + } + barcodes = paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + dfx <- list() + if (!is.null(split.by)) { + for (i in unique(meta[, split.by])) { + dfx[[i]] <- generateDf(ligand, sep, receptor, pair, converted_pair, producers, + receivers, expr_df, fraction_df, i) + dfx[[i]] <- dfx[[i]][dfx[[i]]$barcode %in% barcodes, ] + } + } else { + dfx[[1]] = generateDf(ligand, sep, receptor, pair, converted_pair, producers, + receivers, expr_df, fraction_df) + dfx[[1]] <- dfx[[1]][dfx[[1]]$barcode %in% barcodes, ] + } + + scPalette <- function(n) { + colorSpace <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#F29403", "#F781BF", + "#BC9DCC", "#A65628", "#54B0E4", "#222F75", "#1B9E77", "#B2DF8A", "#E3BE00", + "#FB9A99", "#E7298A", "#910241", "#00CDD1", "#A6CEE3", "#CE1261", "#5E4FA2", + "#8CA77B", "#00441B", "#DEDC00", "#B3DE69", "#8DD3C7", "#999999") + if (n <= length(colorSpace)) { + colors <- colorSpace[1:n] + } else { + colors <- (grDevices::colorRampPalette(colorSpace))(n) + } + return(colors) + } + + chord_diagram <- function(tmp_dfx, lr_interactions, p.adjust_method, scaled, + alpha, directional, show_legend, edge_cols, grid_cols, legend.pos.x, legend.pos.y) { + + if (scaled) { + interactions_items <- lr_interactions$scaled_means + } else { + interactions_items <- lr_interactions$means + } + names(interactions_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + if (!is.null(p.adjust_method)) { + pvals_items <- lr_interactions$pvals_adj + } else { + pvals_items <- lr_interactions$pvals + } + names(pvals_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + interactions_items[is.na(pvals_items)] <- 1 + tmp_dfx$pair <- gsub("_", " - ", tmp_dfx$pair) + tmp_dfx$value <- interactions_items[tmp_dfx$barcode] + tmp_dfx$pval <- pvals_items[tmp_dfx$barcode] + if (!is.null(edge_cols)) { + if (length(edge_cols) != length(unique(tmp_dfx$pair))) { + stop(paste0("Please provide ", length(unique(tmp_dfx$pair)), " to edge_colors.")) + } else { + edge_color <- edge_cols + } + } else { + edge_color <- scPalette(length(unique(tmp_dfx$pair))) + } + + if (!is.null(grid_cols)) { + if (length(grid_cols) != length(unique(tmp_dfx$receiver))) { + stop(paste0("Please provide ", length(unique(tmp_dfx$receiver)), + " to grid_colors.")) + } else { + grid_color <- grid_cols + } + } else { + grid_color <- scPalette(length(unique(tmp_dfx$receiver))) + } + names(edge_color) <- unique(tmp_dfx$pair) + names(grid_color) <- unique(tmp_dfx$receiver) + tmp_dfx$edge_color = edge_color[tmp_dfx$pair] + tmp_dfx$edge_color <- colorspace::adjust_transparency(tmp_dfx$edge_color, + alpha = alpha) + tmp_dfx$edge_color[is.na(tmp_dfx$pval)] <- "#00000000" + tmp_dfx$grid_color = grid_color[tmp_dfx$receiver] + + tmp_dfx <- tmp_dfx[!duplicated(tmp_dfx$barcode), ] + + if (directional == 2) { + link.arr.type = "triangle" + } else { + link.arr.type = "big.arrow" + } + + cells <- unique(c(tmp_dfx$producer, tmp_dfx$receiver)) + names(cells) <- cells + + circos.clear() + chordDiagram(tmp_dfx[c("producer", "receiver", "value")], directional = directional, + direction.type = c("diffHeight", "arrows"), link.arr.type = link.arr.type, + annotationTrack = c("name", "grid"), col = tmp_dfx$edge_color, grid.col = grid_color, + group = cells) + requireNamespace("ComplexHeatmap") + if (show_legend) { + lgd <- ComplexHeatmap::Legend(at = names(edge_color), type = "grid", + legend_gp = grid::gpar(fill = edge_color), title = "interactions") + ComplexHeatmap::draw(lgd, x = unit(1, "npc") - unit(legend.pos.x, "mm"), + y = unit(legend.pos.y, "mm"), just = c("right", "bottom")) + } + + circos.clear() + + gg <- recordPlot() + return(gg) + } + gl <- list() + if (length(show_legend) > 1) { + for (i in 1:length(dfx)) { + gl[[i]] <- tryCatch(chord_diagram(dfx[[i]], lr_interactions, p.adjust.method, + standard_scale, alpha, directional, show_legend[i], edge_colors, + grid_colors, legend.pos.x, legend.pos.y), error = function(e) return(NA)) + } + } else { + for (i in 1:length(dfx)) { + gl[[i]] <- tryCatch(chord_diagram(dfx[[i]], lr_interactions, p.adjust.method, + standard_scale, alpha, directional, show_legend, edge_colors, grid_colors, + legend.pos.x, legend.pos.y), error = function(e) return(NA)) + } + } + + if (length(gl) > 1) { + return(gl) + } else { + return(gl[[1]]) + } + +} \ No newline at end of file diff --git a/R/rainCloudPlot.R b/R/rainCloudPlot.R deleted file mode 100644 index f745313..0000000 --- a/R/rainCloudPlot.R +++ /dev/null @@ -1,38 +0,0 @@ -#' rainCloudPlot -#' -#' @param data plotting data -#' @param groupby x axis -#' @param parameter y axis -#' @param default default plotting options -#' @param ... passed to geom_glat_violin -#' @source \url{https://wellcomeopenresearch.org/articles/4-63} -#' @return rainCloudPlot -#' @examples -#' \donttest{ -#' data(iris) -#' rainCloudPlot(data = iris, groupby = "Species", parameter = "Sepal.Length") + coord_flip() -#' } -#' @import dplyr -#' @import ggplot2 -#' @export -#' -rainCloudPlot <- function(data, groupby, parameter, default = TRUE, pt.size = .5, ...){ - - g <- ggplot(data, aes(x = get(groupby), y = get(parameter), fill = get(groupby))) - - if (default){ - g <- g + geom_flat_violin(position = position_nudge(x = .1, y = 0), adjust = 1.5, trim = FALSE, alpha = .5, colour = NA, ...) + - geom_point(aes(x = as.numeric(get(groupby))-.15, y = get(parameter), colour = get(groupby)), position = position_jitter(width = .05), size = pt.size, shape = 20) + - geom_boxplot(outlier.shape = NA, alpha = .5, width = .1, colour = "black") + - theme_classic() + - labs(y= parameter, x = groupby, fill = groupby, colour = groupby) - } else { - g <- g + geom_flat_violin(...)+ - geom_point(aes(x = as.numeric(get(groupby))-.15, y = get(parameter), colour = get(groupby)), position = position_jitter(width = .05), size = pt.size, shape = 20) + - geom_boxplot(outlier.shape = NA, alpha = .5, width = .1, colour = "black") + - theme_classic() + - labs(y= parameter, x = groupby, fill = groupby, colour = groupby) - } - - return(g) -} \ No newline at end of file diff --git a/README.md b/README.md index 7ed52d8..5877b4a 100644 --- a/README.md +++ b/README.md @@ -20,14 +20,11 @@ devtools::install_github('zktuong/ktplots', dependencies = TRUE) ```R library(ktplots) ``` -There is a test dataset in Seurat format to test the functions. +There is a test dataset in `SingleCellExperiment` format to test the functions. ```R -# note, you need to load Seurat to interact with it -# so maybe install Seurat if you haven't already -# if (!requireNamespace("Seurat", quietly = TRUE)) - # install.packages("Seurat") -library(Seurat) +library(SingleCellExperiment) data(kidneyimmune) +#Some functions accept Seurat objects too. ``` The data is downsampled from the [kidney cell atlas](https://kidneycellatlas.org). @@ -104,6 +101,7 @@ if ```genes``` and ```gene.family``` are both not specified, the function will t Specifying ```keep_significant_only``` will only keep those that are p<0.05 (which you can try to adjust with ```p.adjust.method```). + ### plot_cpdb2 Generates a circos-style wire/arc/chord plot for cellphonedb results. @@ -119,9 +117,8 @@ library(ktplots) data(kidneyimmune) data(cpdb_output2) -sce <- Seurat::as.SingleCellExperiment(kidneyimmune) p <- plot_cpdb2(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', - scdata = sce, + scdata = kidneyimmune, idents = 'celltype', # column name where the cell ids are located in the metadata means = means2, pvals = pvals2, @@ -195,119 +192,29 @@ test <- plot_cpdb2(cell_type1 = "CD4_Tem|CD4_Tcm|CD4_Treg", # same usage style a ``` ![plot_cpd2](exampleImages/plot_cpdb2.png) -## New feature - -### compare_cpdb/plot_compare_cpdb -These functions piggy-back on the original `plot_cpdb` function and generates the results from cellphonedb that are run separately on replicates. The goal of this function is to able to compare interactions between groups. - -I've provided example datasets from randomnly selected `Healthy` and `Severe` samples from [Stephenson et al. COVID-19 single cell data set published in Nature Medicine 2021](https://www.nature.com/articles/s41591-021-01329-2). This should get it running and for you to see how the input data folder should be organised: -```R -data(covid_sample_metadata) -data(covid_cpdb_meta) -file <- system.file("extdata", "covid_cpdb.tar.gz", package = "ktplots") -# copy and unpack wherever you want this to end up -file.copy(file, ".") -system("tar -xzf covid_cpdb.tar.gz") -``` - -It requires 1) an input table like so: -```R -> covid_cpdb_meta - sample_id cellphonedb_folder sce_file -1 MH9143325 covid_cpdb/MH9143325/out covid_cpdb/MH9143325/sce.rds -2 MH9143320 covid_cpdb/MH9143320/out covid_cpdb/MH9143320/sce.rds -3 MH9143274 covid_cpdb/MH9143274/out covid_cpdb/MH9143274/sce.rds -4 MH8919226 covid_cpdb/MH8919226/out covid_cpdb/MH8919226/sce.rds -5 MH8919227 covid_cpdb/MH8919227/out covid_cpdb/MH8919227/sce.rds -6 newcastle49 covid_cpdb/newcastle49/out covid_cpdb/newcastle49/sce.rds -7 MH9179822 covid_cpdb/MH9179822/out covid_cpdb/MH9179822/sce.rds -8 MH8919178 covid_cpdb/MH8919178/out covid_cpdb/MH8919178/sce.rds -9 MH8919177 covid_cpdb/MH8919177/out covid_cpdb/MH8919177/sce.rds -10 MH8919176 covid_cpdb/MH8919176/out covid_cpdb/MH8919176/sce.rds -11 MH8919179 covid_cpdb/MH8919179/out covid_cpdb/MH8919179/sce.rds -12 MH9179826 covid_cpdb/MH9179826/out covid_cpdb/MH9179826/sce.rds -``` -where each row is a sample, the location of the `out` folder generated by cellphonedb and a single-cell object used to generate the cellphonedb result. If cellphonedb was ran with a `.h5ad` the sce_file would be the path to the `.h5ad` file. - -and 2) a sample metadata data frame for the tests to run: -```R -> covid_sample_metadata - sample_id Status_on_day_collection_summary -MH9143325 MH9143325 Severe -MH9143320 MH9143320 Severe -MH9143274 MH9143274 Severe -MH8919226 MH8919226 Healthy -MH8919227 MH8919227 Healthy -newcastle49 newcastle49 Severe -MH9179822 MH9179822 Severe -MH8919178 MH8919178 Healthy -MH8919177 MH8919177 Healthy -MH8919176 MH8919176 Healthy -MH8919179 MH8919179 Healthy -MH9179826 MH9179826 Severe -``` - -So if I want to compare between Severe vs Healthy, I would specify the function as follows: -```R -## set up the levels -covid_sample_metadata$Status_on_day_collection_summary <- factor(covid_sample_metadata$Status_on_day_collection_summary, levels = c('Healthy', 'Severe')) - -out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, - sample_metadata = covid_sample_metadata, - celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), # the actual celltypes you want to test - celltype_col = "initial_clustering", # the column that holds the cell type annotation in the sce object - groupby = "Status_on_day_collection_summary") # the column in the sample_metadata that holds the column where you want to do the comparison. In this example, it's Severe vs Healthy -``` -This returns a list of dataframes (for each contrast found) with which you can use to plot the results. - -`plot_compare_cpdb` is a simple function to achieve that but you can always just make a custom plotting function based on what you want. - -```R -plot_compare_cpdb(out) # red is significantly increased in Severe compared to Healthy. -``` -![plot_compare_cpdb](exampleImages/plot_compare_cpdb_example.png) - -If there are multiple contrasts and groups, you can facet the plot by specifying `groups = c('group1', 'group2')`. +### plot_cpdb3 +Generates a chord diagram inspired from [CellChat](https://github.com/sqjin/CellChat)'s way of showing the data! +Usage is similar to `plot_cpdb2` but with reduced options. Additional kwargs are passed to `plot_cpdb`. ```R -# let's mock up a new contrast like this -covid_sample_metadata$Status_on_day_collection_summary <- c(rep('Severe', 3), rep('Healthy', 2), rep('notSevere', 2), rep('Healthy', 4), 'notSevere') -out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, - sample_metadata = covid_sample_metadata, - celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), # the actual celltypes you want to test - celltype_col = "initial_clustering", # the column that holds the cell type annotation in the sce object - groupby = "Status_on_day_collection_summary") -plot_compare_cpdb(out, alpha = .1, groups = names(out)) # there's no significant hit at 0.05 in this dummy example -``` -![plot_compare_cpdb4](exampleImages/plot_compare_cpdb_example4.png) - -The default method uses a pairwise `wilcox.test`. Alternatives are pairwise Welch's `t.test` or a linear mixed model with `lmer`. - -To run the linear mixed effect model, it expects that the input data is paired (i.e an individual with multiple samples corresponding to multiple timepoints): +library(ktplots) +data(kidneyimmune) +data(cpdb_output2) -```R -# just as a dummy example, lets say the samples are matched where there are two samples per individual -covid_sample_metadata$individual <- rep(c("A", "B", "C", "D", "E", "F"), 2) - -# actually run it -out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, - sample_metadata = covid_sample_metadata, - celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", - "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), - celltype_col = "initial_clustering", - groupby = "Status_on_day_collection_summary", - formula = "~ Status_on_day_collection_summary + (1|individual)", # formula passed to lmer - method = "lmer") - -plot_compare_cpdb(out, contrast = 'Status_on_day_collection_summarySevere') # use the colnames(out) to pick the right column. +p <- plot_cpdb3(cell_type1 = 'B cell', cell_type2 = 'CD4T cell|MNPd', + scdata = kidneyimmune, + idents = 'celltype', # column name where the cell ids are located in the metadata + means = means2, + pvals = pvals2, + deconvoluted = decon2, # new options from here on specific to plot_cpdb3 + keep_significant_only = TRUE, + standard_scale = TRUE, + remove_self = TRUE + ) +p ``` -![plot_compare_cpdb2](exampleImages/plot_compare_cpdb_example2.png) -Specifying `cluster = TRUE` will move the rows and columns to make it look a bit more ordered. -```R -plot_compare_cpdb(out, contrast = 'Status_on_day_collection_summarySevere', cluster = TRUE) -``` -![plot_compare_cpdb3](exampleImages/plot_compare_cpdb_example3.png) +![plot_cpdb3](exampleImages/plot_cpdb3.png) ## Other useful functions @@ -349,37 +256,6 @@ cowplot::plot_grid(pa, pb, p1, p2, ncol = 2) ``` ![plot_cpdb](exampleImages/correlationSpot_example.png) -### StackedVlnPlot -Generates a stacked violinplot like in scanpy's ```sc.pl.stacked_violin```. - -Credits to [@tangming2005](https://twitter.com/tangming2005). -```R -features <- c("CD79A", "MS4A1", "CD8A", "CD8B", "LYZ", "LGALS3", "S100A8", "GNLY", "NKG7", "KLRB1", "FCGR3A", "FCER1A", "CST3") -StackedVlnPlot(kidneyimmune, features = features) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) -``` -![StackedVlnPlot](exampleImages/StackedVlnPlot_example.png) -Seems like standard ggplot ```theme``` functions only work on the x-axis. Need to work out how to adjust that. - -### rainCloudPlot -Generates a raincloudplot to use boxplot, scatterplot and violin all at once! - -Adopted from [https://wellcomeopenresearch.org/articles/4-63](https://wellcomeopenresearch.org/articles/4-63) -```R -rainCloudPlot(data = kidneyimmune@meta.data, groupby = "celltype", parameter = "n_counts") + coord_flip() -``` -![rainCloudPlot](exampleImages/rainCloudPlot_example.png) - -### small_legend/small_guide/small_axis/small_grid/topright_legend/topleft_legend/bottomleft_legend/bottomright_legend -As shown in the examples above, these are some functions to quickly adjust the size and position of ggplots. -```R -# for example -g <- Seurat::DimPlot(kidneyimmune, group.by = "celltype") -g1 <- g + small_legend() + small_guide() + small_axis() + bottomleft_legend() -library(patchwork) -g + g1 -``` -![gghelperfunctions](exampleImages/gghelperfunctions_example.png) - ### Citation If you find these functions useful, please consider leaving a star, citing this repository, and/or citing the following [DOI](https://doi.org/10.5281/zenodo.5717922): diff --git a/data/kidneyimmune.RData b/data/kidneyimmune.RData index 51df78c..3e29ff8 100644 Binary files a/data/kidneyimmune.RData and b/data/kidneyimmune.RData differ diff --git a/exampleImages/plot_cpdb3.png b/exampleImages/plot_cpdb3.png new file mode 100644 index 0000000..0c44835 Binary files /dev/null and b/exampleImages/plot_cpdb3.png differ diff --git a/man/StackedVlnPlot.Rd b/man/StackedVlnPlot.Rd deleted file mode 100644 index 6ef212b..0000000 --- a/man/StackedVlnPlot.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/StackedVlnPlot.R -\name{StackedVlnPlot} -\alias{StackedVlnPlot} -\title{Plotting stacked violin plot} -\source{ -\url{https://divingintogeneticsandgenomics.rbind.io/post/stacked-violin-plot-for-visualizing-single-cell-data-in-seurat/} -} -\usage{ -StackedVlnPlot( - obj, - features, - pt.size = 0, - plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"), - ... -) -} -\arguments{ -\item{obj}{single-cell data. can be seurat/summarizedexperiment object} - -\item{features}{genes/features to plot} - -\item{pt.size}{size of dots} - -\item{plot.margin}{to adjust the white space between each plot.} - -\item{...}{passed to Seurat::VlnPlot} -} -\value{ -StackedVlnPlot -} -\description{ -Plotting stacked violin plot -} -\examples{ -data(kidneyimmune) -features<- c("CD79A", "MS4A1", "CD8A", "CD8B", "LYZ", "LGALS3", "S100A8", "GNLY", "NKG7", "KLRB1", "FCGR3A", "FCER1A", "CST3") -StackedVlnPlot(kidneyimmune, features = features) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) -} diff --git a/man/compare_cpdb.Rd b/man/compare_cpdb.Rd deleted file mode 100644 index 4a1d08d..0000000 --- a/man/compare_cpdb.Rd +++ /dev/null @@ -1,88 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compare_cpdb.R -\name{compare_cpdb} -\alias{compare_cpdb} -\title{Compare cellphonedb results performed on replicates} -\usage{ -compare_cpdb( - cpdb_meta, - sample_metadata, - celltypes, - celltype_col, - groupby = NULL, - formula = NULL, - method = c("wilcox.test", "t.test", "lmer"), - BPPARAM = SerialParam(), - version3 = FALSE, - verbose = TRUE, - p.adjust.mode = c("celltype", "all"), - p.adjust.method = "fdr", - ... -) -} -\arguments{ -\item{cpdb_meta}{data.frame containing the sample name, cellphonedb 'out' folder path (containing means.txt and pvalues.txt/relevant_interactions.txt if version 3), and single-cell object file path (.h5ad or .rds)} - -\item{sample_metadata}{data.frame containing the sample name, and groupings.} - -\item{celltypes}{subset celltypes for comparison} - -\item{celltype_col}{column name in single cell object holding celtype annotation.} - -\item{groupby}{for significance testing. only if method is t.test or wilcoxon.} - -\item{formula}{for signfiicance testing. only if method is lme.} - -\item{method}{one of 'wilcox.test', 't.test', 'lmer'} - -\item{BPPARAM}{BiocParallelParam class.} - -\item{version3}{boolean. if cellphonedb version 3} - -\item{verbose}{Whether or not to print messages.} - -\item{p.adjust.mode}{whether or not to correct all interactions, or to perform it on a celltype-celltype basis.} - -\item{p.adjust.method}{passed to `method`in `stats::p.adjust`.} - -\item{...}{passed to tests.} - -\item{result}{output from compare_cpdb} - -\item{contrast}{name of contrast to plot. only if method used was 'lmer'} - -\item{groups}{name of groups to facet.} - -\item{alpha}{adjusted pvalue cut off to keep in the plot.} - -\item{cluster}{whether or not to perform a simple clustering in the final plot} - -\item{max_size}{maximum size of circles} - -\item{limits}{upper and lower limits for plotting the results} - -\item{highlight_significant}{whether or not to keep only significant results in the final plot} -} -\value{ -results for plotting -} -\description{ -Compare cellphonedb results performed on replicates -} -\examples{ -\donttest{ -data(covid_sample_metadata) -data(covid_cpdb_meta) - -file <- system.file("extdata", "covid_cpdb.tar.gz", package = "ktplots") -file.copy(file, ".") -system("tar -xzf covid_cpdb.tar.gz") - -covid_sample_metadata$individual <- rep(c("A", "B"), 6) -out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, sample_metadata = covid_sample_metadata, - celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", - "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), celltype_col = "initial_clustering", - groupby = "Status_on_day_collection_summary") -plot_compare_cpdb(out) -} -} diff --git a/man/geom_flat_violin.Rd b/man/geom_flat_violin.Rd deleted file mode 100644 index 5dcb8e8..0000000 --- a/man/geom_flat_violin.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/geom_flat_violin.R -\name{geom_flat_violin} -\alias{geom_flat_violin} -\title{geom_flat_violin} -\source{ -\url{https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R} -} -\usage{ -geom_flat_violin( - mapping = NULL, - data = NULL, - stat = "ydensity", - position = "dodge", - trim = TRUE, - scale = "area", - show.legend = NA, - inherit.aes = TRUE, - ... -) -} -\arguments{ -\item{mapping}{see ggplot2::layer} - -\item{data}{see ggplot2::layer} - -\item{stat}{see ggplot2::layer} - -\item{position}{see ggplot2::layer} - -\item{trim}{see ggplot2::layer} - -\item{scale}{see ggplot2::layer} - -\item{show.legend}{see ggplot2::layer} - -\item{inherit.aes}{see ggplot2::layer} - -\item{...}{passed to ggplot2::layer} -} -\value{ -geom_flat_violin -} -\description{ -geom_flat_violin -} -\examples{ -\donttest{ -data(iris) -ggplot(iris, aes(Species, Sepal.Length)) + - geom_flat_violin() + - coord_flip() -} -} diff --git a/man/kidneyimmune.Rd b/man/kidneyimmune.Rd index 453f638..95f2c52 100644 --- a/man/kidneyimmune.Rd +++ b/man/kidneyimmune.Rd @@ -15,16 +15,14 @@ \alias{covid_sample_metadata} \title{kidneyimmune} \format{ -A Seurat object with the following slots filled +A SingleCellExperiment object with the following slots filled \describe{ \item{assays}{ - \itemize{Currently only contains one assay ("RNA" - scRNA-seq expression data) - \item{data - Normalized expression data}} + \itemize{Currently only contains "counts" and "logcounts" + \item{counts - Raw expression data} + \item{logcounts - Normalized expression data}} } - \item{meta.data}{Cell level metadata} - \item{active.assay}{Current default assay} - \item{active.ident}{Current default idents} - \item{version}{Seurat version used to create the object} + \item{colData}{Cell level metadata} } Data frames containing outputs after CellPhoneDB analysis diff --git a/man/misc.Rd b/man/misc.Rd index b440eac..cb4590a 100644 --- a/man/misc.Rd +++ b/man/misc.Rd @@ -1,12 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/StackedVlnPlot.R, R/geom_flat_violin.R, -% R/misc.R -\docType{data} +% Please edit documentation in R/misc.R \name{misc} \alias{misc} -\alias{modify_vlnplot} -\alias{extract_max} -\alias{GeomFlatViolin} \alias{range01} \alias{\%nin\%} \alias{small_legend} @@ -18,22 +13,7 @@ \alias{bottomleft_legend} \alias{bottomright_legend} \title{miscellaneous functions} -\format{ -An object of class \code{GeomFlatViolin} (inherits from \code{Geom}, \code{ggproto}, \code{gg}) of length 6. -} \usage{ -modify_vlnplot( - obj, - feature, - pt.size = 0, - plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"), - ... -) - -extract_max(p) - -GeomFlatViolin - range01(x) x \%nin\% y @@ -55,24 +35,14 @@ bottomleft_legend(legendmargin = margin(6, 6, 6, 6), ...) bottomright_legend(legendmargin = margin(6, 6, 6, 6), ...) } \arguments{ -\item{obj}{single-cell data. can be seurat/summarizedexperiment object} - -\item{feature}{genes/features to plot} - -\item{pt.size}{size of dots} - -\item{plot.margin}{to adjust the white space between each plot.} - -\item{...}{passed to ggplot2::theme} - -\item{p}{value} - \item{x}{a vector} \item{y}{a vector} \item{fontsize}{float/int} +\item{...}{passed to ggplot2::theme} + \item{guidesize}{float/int} \item{linethickness}{float/int} @@ -115,4 +85,3 @@ g + bottomleft_legend() g + bottomright_legend() } } -\keyword{datasets} diff --git a/man/plot_cpdb2.Rd b/man/plot_cpdb2.Rd index bba5daa..27677f5 100644 --- a/man/plot_cpdb2.Rd +++ b/man/plot_cpdb2.Rd @@ -77,7 +77,7 @@ plot_cpdb2( \item{scale}{logical. scale the expression to mean +/- SD. NULL defaults to TRUE.} } \value{ -ggplot dot plot object of cellphone db output +Plotting cellphonedb results as a weird chord diagram } \description{ Plotting cellphonedb results diff --git a/man/plot_cpdb3.Rd b/man/plot_cpdb3.Rd new file mode 100644 index 0000000..1cc51d3 --- /dev/null +++ b/man/plot_cpdb3.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_cpdb3.R +\name{plot_cpdb3} +\alias{plot_cpdb3} +\title{Plotting cellphonedb results as a chord diagram} +\usage{ +plot_cpdb3( + cell_type1, + cell_type2, + scdata, + idents, + means, + pvals, + deconvoluted, + p.adjust.method = NULL, + keep_significant_only = TRUE, + split.by = NULL, + standard_scale = TRUE, + separator = NULL, + gene_symbol_mapping = NULL, + frac = 0.1, + remove_self = TRUE, + desiredInteractions = NULL, + version3 = FALSE, + directional = 1, + alpha = 0.5, + edge_colors = NULL, + grid_colors = NULL, + show_legend = TRUE, + legend.pos.x = 20, + legend.pos.y = 20, + ... +) +} +\arguments{ +\item{cell_type1}{cell type 1} + +\item{cell_type2}{cell type 2} + +\item{scdata}{single-cell data. can be seurat/summarizedexperiment object} + +\item{idents}{vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns} + +\item{means}{object holding means.txt from cpdb output} + +\item{pvals}{object holding pvals.txt from cpdb output} + +\item{deconvoluted}{object holding deconvoluted.txt from cpdb output} + +\item{p.adjust.method}{correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')} + +\item{keep_significant_only}{logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb.} + +\item{split.by}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} + +\item{standard_scale}{logical. scale the expression to range from 0 to 1. Default is TRUE} + +\item{separator}{default = NULL. separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this.} + +\item{gene_symbol_mapping}{default = NULL.column name for rowData in sce holding the actual gene symbols if row names aren't gene symbols} + +\item{frac}{default = 0.1. Minimum fraction of celtypes expressing a gene in order to keep the interaction. Gene must be expressesd >= `frac` in either of the pair of celltypes in order to keep.} + +\item{remove_self}{default = TRUE. Remove self-self arcs.} + +\item{desiredInteractions}{default = NULL. Specific list of celltype comparisons e.g. list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2')). Also accepts a dataframe where first column is celltype 1 and 2nd column is celltype 2.} + +\item{version3}{if is cellphonedb version3.} + +\item{directional}{Whether links have directions. 1 means the direction is from the first column in df to the second column, -1 is the reverse, 0 is no direction, and 2 for two directional.} + +\item{alpha}{transparency for links} + +\item{edge_colors}{vector of colors for links} + +\item{grid_colors}{vector of colrs for grids} + +\item{show_legend}{whether or not to show the legend} + +\item{legend.pos.x}{x position of legend} + +\item{legend.pos.y}{y position of legend} + +\item{...}{passes arguments plot_cpdb} + +\item{scale}{logical. scale the expression to mean +/- SD. NULL defaults to TRUE.} +} +\value{ +Plotting cellphonedb results as a CellChat inspired chord diagram +} +\description{ +Plotting cellphonedb results as a chord diagram +} +\examples{ +\donttest{ +} +} diff --git a/man/rainCloudPlot.Rd b/man/rainCloudPlot.Rd deleted file mode 100644 index 4e8b39c..0000000 --- a/man/rainCloudPlot.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rainCloudPlot.R -\name{rainCloudPlot} -\alias{rainCloudPlot} -\title{rainCloudPlot} -\source{ -\url{https://wellcomeopenresearch.org/articles/4-63} -} -\usage{ -rainCloudPlot(data, groupby, parameter, default = TRUE, pt.size = 0.5, ...) -} -\arguments{ -\item{data}{plotting data} - -\item{groupby}{x axis} - -\item{parameter}{y axis} - -\item{default}{default plotting options} - -\item{...}{passed to geom_glat_violin} -} -\value{ -rainCloudPlot -} -\description{ -rainCloudPlot -} -\examples{ -\donttest{ -data(iris) -rainCloudPlot(data = iris, groupby = "Species", parameter = "Sepal.Length") + coord_flip() -} -} diff --git a/tests/testthat/test_compare_cpdb.R b/tests/testthat/test_compare_cpdb.R deleted file mode 100644 index a8f66e0..0000000 --- a/tests/testthat/test_compare_cpdb.R +++ /dev/null @@ -1,65 +0,0 @@ -data(covid_sample_metadata) -data(covid_cpdb_meta) - -file <- system.file("extdata", "covid_cpdb.tar.gz", package = "ktplots") -file.copy(file, ".") -system("tar -xzf covid_cpdb.tar.gz") - -covid_sample_metadata$individual <- rep(c("A", "B", "C", "D", "E", "F"), 2) - -test_that("compare_cpdb works 1", { - out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, sample_metadata = covid_sample_metadata, - celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", - "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), celltype_col = "initial_clustering", - groupby = "Status_on_day_collection_summary") - p1 <- plot_compare_cpdb(out) - p2 <- plot_compare_cpdb(out, cluster = TRUE, group = 'test') - expect_true(nrow(out[[1]]) == 8339) - expect_true(ncol(out[[1]]) == 5) - expect_true(length(which(out[[1]]$padj < 0.05)) == 289) - expect_true(length(which(out[[1]]$pval < 0.05)) == 941) - expect_true(is.ggplot(p1)) - expect_true(is.ggplot(p2)) -}) - - -test_that("compare_cpdb works 2", { - out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, sample_metadata = covid_sample_metadata, - celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", - "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), celltype_col = "initial_clustering", - groupby = "Status_on_day_collection_summary", method = "t.test") - expect_true(nrow(out[[1]]) == 8339) - expect_true(ncol(out[[1]]) == 5) - expect_true(length(which(out[[1]]$padj < 0.05)) == 372) - expect_true(length(which(out[[1]]$pval < 0.05)) == 964) -}) - -test_that("compare_cpdb works 3", { - out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, sample_metadata = covid_sample_metadata, - celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", - "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), celltype_col = "initial_clustering", - groupby = "Status_on_day_collection_summary", formula = "~ Status_on_day_collection_summary + (1|individual)", - method = "lmer") - expect_true(nrow(out) == 8339) - p <- plot_compare_cpdb(out, contrast = 'Status_on_day_collection_summarySevere', groups = 'Severe') - expect_true(is.ggplot(p)) -}) - - -test_that("compare_cpdb works 4", { - out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, sample_metadata = covid_sample_metadata, - celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", - "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), celltype_col = "initial_clustering", - groupby = "Status_on_day_collection_summary", p.adjust.mode = 'all') - expect_true(length(which(out[[1]]$padj < 0.05)) == 0) - expect_true(length(which(out[[1]]$pval < 0.05)) == 941) -}) - -# test_that("compare_cpdb works 5", { -# covid_sample_metadata$Status_on_day_collection_summary <- c(rep('Severe', 3), rep('Healthy', 2), rep('notSevere', 2), rep('Healthy', 4), 'notSevere') -# out <- compare_cpdb(cpdb_meta = covid_cpdb_meta, sample_metadata = covid_sample_metadata, -# celltypes = c("B_cell", "CD14", "CD16", "CD4", "CD8", "DCs", "MAIT", "NK_16hi", -# "NK_56hi", "Plasmablast", "Platelets", "Treg", "gdT", "pDC"), celltype_col = "initial_clustering", -# groupby = "Status_on_day_collection_summary") -# expect_error(plot_compare_cpdb(out)) -# }) diff --git a/tests/testthat/test_cpdbplot1.R b/tests/testthat/test_cpdbplot1.R index c84923a..720646b 100644 --- a/tests/testthat/test_cpdbplot1.R +++ b/tests/testthat/test_cpdbplot1.R @@ -67,7 +67,7 @@ test_that("werid characters are ok", { }) test_that("plot_cpdb2 works 1",{ - sce <- Seurat::as.SingleCellExperiment(kidneyimmune) + sce <- kidneyimmune p <- plot_cpdb2(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', scdata = sce, idents = 'celltype', # column name where the cell ids are located in the metadata @@ -99,9 +99,8 @@ test_that("plot_cpdb2 works 1",{ test_that("plot_cpdb2 works 2",{ - sce <- Seurat::as.SingleCellExperiment(kidneyimmune) p <- plot_cpdb2(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', - scdata = sce, + scdata = kidneyimmune, idents = 'celltype', # column name where the cell ids are located in the metadata split.by = 'Experiment', # column name where the grouping column is. Optional. means = means, @@ -138,4 +137,47 @@ test_that("plot_cpdb2 works 2",{ expect_false(is.ggplot(p[[9]])) expect_false(is.ggplot(p[[10]])) expect_false(is.ggplot(p[[11]])) +}) + + +test_that("plot_cpdb3 works 1",{ + p <- plot_cpdb3(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + scdata = kidneyimmune, + idents = 'celltype', # column name where the cell ids are located in the metadata + means = means2, + pvals = pvals2, + deconvoluted = decon2, # new options from here on specific to plot_cpdb2 + keep_significant_only = TRUE, + standard_scale = TRUE, + remove_self = TRUE + ) + + expect_that(class(p), equals("recordedplot")) +}) + + +test_that("plot_cpdb3 2",{ + p <- plot_cpdb3(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + scdata = kidneyimmune, + idents = 'celltype', # column name where the cell ids are located in the metadata + split.by = 'Experiment', # column name where the grouping column is. Optional. + means = means, + pvals = pvals, + deconvoluted = decon, # new options from here on specific to plot_cpdb2 + keep_significant_only = TRUE, + standard_scale = TRUE, + remove_self = TRUE + ) + + expect_that(class(p[[1]]), equals("recordedplot")) + expect_that(class(p[[2]]), equals("recordedplot")) + expect_that(class(p[[3]]), equals("recordedplot")) + expect_that(class(p[[4]]), equals("recordedplot")) + expect_that(class(p[[5]]), equals("recordedplot")) + expect_that(class(p[[6]]), equals("recordedplot")) + expect_true(is.na(p[[7]])) + expect_that(class(p[[8]]), equals("recordedplot")) + expect_true(is.na(p[[9]])) + expect_true(is.na(p[[10]])) + expect_true(is.na(p[[11]])) }) \ No newline at end of file diff --git a/tests/testthat/test_flatviolin.R b/tests/testthat/test_flatviolin.R deleted file mode 100644 index ff9dfe5..0000000 --- a/tests/testthat/test_flatviolin.R +++ /dev/null @@ -1,5 +0,0 @@ -data(iris) -test_that('flat_violin_works', { - p <- ggplot(iris, aes(Species, Sepal.Length)) + geom_flat_violin() + coord_flip() - expect_true(is.ggplot(p)) -}) diff --git a/tests/testthat/test_miscellaneous.R b/tests/testthat/test_miscellaneous.R index 631c56a..002aa77 100644 --- a/tests/testthat/test_miscellaneous.R +++ b/tests/testthat/test_miscellaneous.R @@ -1,5 +1,5 @@ data(kidneyimmune) -g <- Seurat::DimPlot(kidneyimmune, group.by = "celltype") +g <- scater::plotReducedDim(kidneyimmune, dimred = "UMAP", colour_by = "celltype") test_that('miscellaneous works1', { g1 <- g + small_legend() + small_guide() + small_axis() + small_grid() diff --git a/tests/testthat/test_raincloud.R b/tests/testthat/test_raincloud.R deleted file mode 100644 index 8dfe5a2..0000000 --- a/tests/testthat/test_raincloud.R +++ /dev/null @@ -1,5 +0,0 @@ -data(iris) -test_that('rainCloudPlot works', { - p <- rainCloudPlot(data = iris, groupby = "Species", parameter = "Sepal.Length") + coord_flip() - expect_true(is.ggplot(p)) -}) \ No newline at end of file diff --git a/tests/testthat/test_stackedviolin.R b/tests/testthat/test_stackedviolin.R deleted file mode 100644 index 2441795..0000000 --- a/tests/testthat/test_stackedviolin.R +++ /dev/null @@ -1,7 +0,0 @@ -data(kidneyimmune) -features<- c("CD79A", "MS4A1", "CD8A", "CD8B", "LYZ", "LGALS3", "S100A8", "GNLY", "NKG7", "KLRB1", "FCGR3A", "FCER1A", "CST3") - -test_that('StackedVlnPlot works', { - p <- StackedVlnPlot(kidneyimmune, features = features) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) - expect_true(is.ggplot(p)) -}) \ No newline at end of file