diff --git a/.github/workflows/vignette.yml b/.github/workflows/vignette.yml new file mode 100644 index 0000000..e18653b --- /dev/null +++ b/.github/workflows/vignette.yml @@ -0,0 +1,68 @@ +on: + push: + branches: + - "master" + pull_request: + branches: + - "*" + +name: vignette + +jobs: + vignette: + defaults: + run: + shell: bash -l {0} + strategy: + matrix: + config: + - { os: macos-latest } + runs-on: ${{ matrix.config.os }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + id: install-r + + - uses: r-lib/actions/setup-pandoc@v2 + + - id: bioc + name: Check bioc version + run: | + echo "##[set-output name=mainbiocversion;]$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')" + echo "##[set-output name=subbiocversion;]$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $2}')" + echo "##[set-output name=biocversion;]$(Rscript -e 'cat(as.character(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')" + shell: bash -l {0} + + - name: Install pak and query dependencies + run: | + install.packages("pak", repos = "https://r-lib.github.io/p/pak/dev/") + saveRDS(pak::pkg_deps("local::.", dependencies = TRUE), ".github/r-depends.rds") + shell: Rscript {0} + + - name: Cache R packages + uses: actions/cache@v2 + with: + path: | + ${{ env.R_LIBS_USER }} + !${{ env.R_LIBS_USER }}/pak + key: ${{ env.cache-version }}-${{ matrix.config.os }}-biocversion-RELEASE_${{ steps.bioc.outputs.mainbiocversion}}_${{ steps.bioc.outputs.subbiocversion}}-r-${{ matrix.config.r-version}}-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ matrix.config.os }}-biocversion-RELEASE_${{ steps.bioc.outputs.mainbiocversion}}_${{ steps.bioc.outputs.subbiocversion}}-r-${{ matrix.config.r-version}}- + + - name: Install dependencies + run: | + pak::local_install_dev_deps(upgrade = TRUE, dependencies = c("all", "Config/Needs/website")) + pak::pkg_install("pkgdown") + shell: Rscript {0} + + - name: Install package + run: R CMD INSTALL . + + - name: Build and deploy pkgdown site + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' + shell: bash -l {0} diff --git a/DESCRIPTION b/DESCRIPTION index fc71ec6..96f4e32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,26 +1,26 @@ Package: ktplots Title: Plot single-cell data dotplots -Version: 1.2.5 +Version: 2.0.0 Authors@R: person("Kelvin", "Tuong", email = c("z.tuong@uq.edu.au"), role = c("aut", "cre")) Description: Plotting tools for scData. -License: MIT +License: MIT + file LICENSE Encoding: UTF-8 biocViews: Imports: - devtools, - Matrix, - dplyr, - ggplot2, - ggraph, - ggrepel, - reshape2, - viridis, - gtools, - RColorBrewer, - purrr, - circlize, - ComplexHeatmap, - pheatmap + devtools, + Matrix, + dplyr, + ggplot2, + ggraph, + ggrepel, + reshape2, + viridis, + gtools, + RColorBrewer, + purrr, + circlize, + ComplexHeatmap, + pheatmap Suggests: SummarizedExperiment, SingleCellExperiment, @@ -28,8 +28,6 @@ Suggests: testthat, covr, plyr -Depends: - ggplot2 LazyData: true RoxygenNote: 7.2.3 Collate: diff --git a/R/combine_cpdb.R b/R/combine_cpdb.R index 0ea9136..1116bdc 100644 --- a/R/combine_cpdb.R +++ b/R/combine_cpdb.R @@ -13,19 +13,17 @@ #' @export combine_cpdb <- function(...) { output <- list(...) - anames <- c( - "id_cp_interaction", "interacting_pair", "partner_a", "partner_b", + anames <- c("id_cp_interaction", "interacting_pair", "partner_a", "partner_b", "gene_a", "gene_b", "secreted", "receptor_a", "receptor_b", "annotation_strategy", - "is_integrin" - ) - bnames <- c( - "gene_name", "uniprot", "is_complex", "protein_name", "complex_name", - "id_cp_interaction" - ) + "is_integrin") + bnames <- c("gene_name", "uniprot", "is_complex", "protein_name", "complex_name", + "id_cp_interaction") if (all(colnames(output[[1]])[1:11] == anames)) { - out <- output %>% reduce(full_join, by = anames) + out <- output %>% + reduce(full_join, by = anames) } else if (all(colnames(output[[1]])[1:6] == bnames)) { - out <- output %>% reduce(full_join, by = bnames) + out <- output %>% + reduce(full_join, by = bnames) } return(out) -} +} \ No newline at end of file diff --git a/R/correlationSpot.R b/R/correlationSpot.R index 8ce54d9..10ca3db 100644 --- a/R/correlationSpot.R +++ b/R/correlationSpot.R @@ -20,22 +20,10 @@ #' @return SpatialFeaturePlot #' @export -correlationSpot <- function( - st, - genes = NULL, - celltypes = NULL, - geneset = NULL, - mode = c("high", "low", "both"), - cutoff = 0.5, - standardize = TRUE, - dims = 1:30, - k.params = 10, - resolution = 1, - rna_slot = "SCT", - label_slot = "predictions", - by = c("image", "expression"), - average_by_cluster = FALSE, - ...) { +correlationSpot <- function(st, genes = NULL, celltypes = NULL, geneset = NULL, mode = c("high", + "low", "both"), cutoff = 0.5, standardize = TRUE, dims = 1:30, k.params = 10, + resolution = 1, rna_slot = "SCT", label_slot = "predictions", by = c("image", + "expression"), average_by_cluster = FALSE, ...) { requireNamespace("Seurat") requireNamespace("FNN") @@ -44,7 +32,7 @@ correlationSpot <- function( st1 <- Seurat::FindNeighbors(st1, reduction = "pca", dims = dims, k.param = k.params) if (average_by_cluster) { st1 <- Seurat::FindClusters(st1, verbose = FALSE, resolution = resolution) - st1 <- Seurat::AddMetaData(st1, Idents(st1), "main_cluster") + st1 <- Seurat::AddMetaData(st1, Seurat::Idents(st1), "main_cluster") } # extract SNN graph @@ -53,7 +41,8 @@ correlationSpot <- function( knn_idx <- FNN::get.knn(graph, k = k.params)$nn.index } else if (by == "image") { mat <- st1@images$slice1@coordinates[, c("row", "col")] - graph <- as.matrix(dist(mat)) + requireNamespace("stats") + graph <- as.matrix(stats::dist(mat)) knn_idx <- FNN::get.knn(graph, k = k.params)$nn.index } @@ -109,7 +98,8 @@ correlationSpot <- function( } # get the nth percentile of each column - r1 <- apply(df, 2, quantile, cutoff) + requireNamespace("stats") + r1 <- apply(df, 2, stats::quantile, cutoff) if (mode != "both") { if (mode == "high") { for (i in seq_along(r1)) { @@ -126,28 +116,29 @@ correlationSpot <- function( if (ncol(df) == 2) { df <- cbind(as.data.frame(df), main_cluster = st1@meta.data[, c("main_cluster")]) } else { + requireNamespace("stats") if (length(genes) > 1 && length(celltypes) > 1) { - dx_g <- prcomp(df[, genes]) - dx_c <- prcomp(df[, celltypes]) + dx_g <- stats::prcomp(df[, genes]) + dx_c <- stats::prcomp(df[, celltypes]) dx_g <- as.data.frame(dx_g$x)[, 1] dx_c <- as.data.frame(dx_c$x)[, 1] df <- cbind(dx_g, dx_c, main_cluster = st1@meta.data[, "main_cluster"]) } else if (length(genes) > 1 && length(celltypes) < 1) { - dx_g <- prcomp(df[, genes]) + dx_g <- stats::prcomp(df[, genes]) dx_g <- as.data.frame(dx_g$x)[, 1] df <- cbind(dx_g, main_cluster = st1@meta.data[, "main_cluster"]) } else if (length(celltypes) > 1 && length(genes) == 1) { dx_g <- df[, genes] - dx_c <- prcomp(df[, celltypes]) + dx_c <- stats::prcomp(df[, celltypes]) dx_c <- as.data.frame(dx_c$x)[, 1] df <- cbind(dx_g, dx_c, main_cluster = st1@meta.data[, c("main_cluster")]) } else if (length(celltypes) == 1 && length(genes) > 1) { dx_c <- df[, celltypes] - dx_g <- prcomp(df[, genes]) + dx_g <- stats::prcomp(df[, genes]) dx_g <- as.data.frame(dx_g$x)[, 1] df <- cbind(dx_g, dx_c, main_cluster = st1@meta.data[, c("main_cluster")]) } else if (length(celltypes) > 1 && length(genes) < 1) { - dx_c <- prcomp(df[, celltypes]) + dx_c <- stats::prcomp(df[, celltypes]) dx_c <- as.data.frame(dx_c$x)[, 1] df <- cbind(dx_c, main_cluster = st1@meta.data[, "main_cluster"]) } @@ -156,28 +147,29 @@ correlationSpot <- function( if (ncol(df) == 2) { df <- as.data.frame(df) } else { + requireNamespace("stats") if (length(genes) > 1 && length(celltypes) > 1) { - dx_g <- prcomp(df[, genes]) - dx_c <- prcomp(df[, celltypes]) + dx_g <- stats::prcomp(df[, genes]) + dx_c <- stats::prcomp(df[, celltypes]) dx_g <- as.data.frame(dx_g$x)[, 1] dx_c <- as.data.frame(dx_c$x)[, 1] df <- cbind(dx_g, dx_c) } else if (length(celltypes) > 1 && length(genes) == 1) { dx_g <- df[, genes] - dx_c <- prcomp(df[, celltypes]) + dx_c <- stats::prcomp(df[, celltypes]) dx_c <- as.data.frame(dx_c$x)[, 1] df <- cbind(dx_g, dx_c) } else if (length(celltypes) == 1 && length(genes) > 1) { dx_c <- dx_g[, celltypes] - dx_g <- prcomp(df[, genes]) + dx_g <- stats::prcomp(df[, genes]) dx_g <- as.data.frame(dx_g$x)[, 1] df <- cbind(dx_g, dx_c) } else if (length(genes) > 1 && length(celltypes) < 1) { - dx_g <- prcomp(df[, genes]) + dx_g <- stats::prcomp(df[, genes]) dx_g <- as.data.frame(dx_g$x)[, 1] df <- dx_g } else if (length(celltypes) > 1 && length(genes) < 1) { - dx_c <- prcomp(df[, celltypes]) + dx_c <- stats::prcomp(df[, celltypes]) dx_c <- as.data.frame(dx_c$x)[, 1] df <- dx_c } @@ -193,13 +185,15 @@ correlationSpot <- function( if (average_by_cluster) { nn_list <- lapply(nn_list, function(x) { + requireNamespace("stats") tmp <- x[, -which(colnames(x) %in% c("main_cluster"))] - res <- cor(tmp[, 1], tmp[, 2]) + res <- stats::cor(tmp[, 1], tmp[, 2]) return(res) }) } else { nn_list <- lapply(nn_list, function(tmp) { - res <- cor(tmp[, 1], tmp[, 2]) + requireNamespace("stats") + res <- stats::cor(tmp[, 1], tmp[, 2]) return(res) }) } @@ -219,4 +213,4 @@ correlationSpot <- function( g <- Seurat::SpatialFeaturePlot(st, features = "correlation", ...) return(g) -} +} \ No newline at end of file diff --git a/R/data.R b/R/data.R index 403bdc8..1a23f66 100644 --- a/R/data.R +++ b/R/data.R @@ -108,3 +108,53 @@ #' @rdname kidneyimmune #' @docType data "covid_sample_metadata" + +#' cpdb_output_v5 +#' +#' cpdb_output_v5 - Dataframe of CellPhoneDB output means.txt file +#' @rdname kidneyimmune +#' @docType data +#' @usage data(cpdb_output_v5) +#' @format data after CellPhoneDB v5 analysis +#' @examples +#' data(cpdb_output_v5) +"means_v5" + + +#' sce_v5 +#' +#' sce_v5 - A small dummy singlecelldata for cellphonedb v5 +#' @rdname kidneyimmune +#' @docType data +"sce_v5" + + +#' relevant_interactions_v5 +#' +#' relevant_interactions_v5 - Dataframe of CellPhoneDB output relevant_interactions.txt file +#' @rdname kidneyimmune +#' @docType data +"relevant_interactions_v5" + + +#' decon_v5 +#' +#' decon_v5 - Dataframe of CellPhoneDB output deconvoluted.txt file +#' @rdname kidneyimmune +#' @docType data +"decon_v5" + + +#' cellsign_v5 +#' +#' cellsign_v5 - Dataframe of CellPhoneDB output CellSign.txt file +#' @rdname kidneyimmune +#' @docType data +"cellsign_v5" + +#' interaction_scores_v5 +#' +#' interaction_scores_v5 - Dataframe of CellPhoneDB output interaction_scores.txt file +#' @rdname kidneyimmune +#' @docType data +"interaction_scores_v5" diff --git a/R/geneDotPlot.R b/R/geneDotPlot.R index a59bc95..243afa8 100644 --- a/R/geneDotPlot.R +++ b/R/geneDotPlot.R @@ -1,13 +1,13 @@ #' Plotting genes as dotplot #' #' @param scdata single-cell data. can be seurat/summarizedexperiment object -#' @param idents column name holding the idents for each cell +#' @param celltype_key column name holding the celltype for each cell #' @param genes genes you want to plot -#' @param split.by column name in the metadata/coldata table to split the spots by. If not provided, it will plot via idents provided. +#' @param splitby_key column name in the metadata/coldata table to split the spots by. If not provided, it will plot via celltype_key provided. #' @param pct.threshold float. required to keep gene expressed by minimum percentage of cells #' @param scale logical. scale the expression to mean +/- SD. NULL defaults to TRUE. #' @param standard_scale logical. scale the expression to range from 0 to 1. NULL defaults to FALSE. -#' @param keepLevels logical. keep the original factor of the levels of the idents (for plotting) +#' @param keepLevels logical. keep the original factor of the levels of the celltype_key (for plotting) #' @param save.plot logical. will try to save the pdf #' @param h height of plot #' @param w width of plot @@ -21,7 +21,7 @@ #' @examples #' \donttest{ #' data(kidneyimmune) -#' geneDotPlot(kidneyimmune, genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), idents = "celltype", split.by = "Project", standard_scale = TRUE) + theme(strip.text.x = element_text(angle = 45, hjust = 0)) +#' geneDotPlot(kidneyimmune, genes = c('CD68', 'CD80', 'CD86', 'CD74', 'CD2', 'CD5'), celltype_key = 'celltype', splitby_key = 'Project', standard_scale = TRUE) + theme(strip.text.x = element_text(angle = 45, hjust = 0)) #' } #' @import dplyr #' @import gtools @@ -30,9 +30,13 @@ #' @import reshape2 #' @import RColorBrewer #' @export -geneDotPlot <- function(scdata, idents, genes, split.by = NULL, pct.threshold = 0.05, scale = NULL, standard_scale = NULL, keepLevels = TRUE, save.plot = FALSE, h = 5, w = 5, filepath = NULL, filename = NULL, heat_cols = NULL, col_limits = NULL, fill = FALSE, outline_col = "black", outline_size = .2) { +geneDotPlot <- function(scdata, celltype_key, genes, splitby_key = NULL, pct.threshold = 0.05, + scale = NULL, standard_scale = NULL, keepLevels = TRUE, save.plot = FALSE, h = 5, + w = 5, filepath = NULL, filename = NULL, heat_cols = NULL, col_limits = NULL, + fill = FALSE, outline_col = "black", outline_size = 0.2) { if (class(scdata) %in% c("SingleCellExperiment", "SummarizedExperiment")) { - cat("data provided is a SingleCellExperiment/SummarizedExperiment object", sep = "\n") + cat("data provided is a SingleCellExperiment/SummarizedExperiment object", + sep = "\n") cat("extracting expression matrix", sep = "\n") requireNamespace("SummarizedExperiment") exp_mat <- SummarizedExperiment::assay(scdata) @@ -50,31 +54,38 @@ geneDotPlot <- function(scdata, idents, genes, split.by = NULL, pct.threshold = metadata <- scdata@meta.data } - cat(paste0("attempting to subset the expression matrix to the ", length(genes), " genes provided"), sep = "\n") - # expr_mat_filtered <- exp_mat[row.names(exp_mat) %in% genes, ] - # exp_mat <- as.matrix(exp_mat) - expr_mat_filtered <- exp_mat[match(rev(genes), row.names(exp_mat))[!is.na(match(rev(genes), row.names(exp_mat)))], , drop = FALSE] + cat(paste0("attempting to subset the expression matrix to the ", length(genes), + " genes provided"), sep = "\n") + # expr_mat_filtered <- exp_mat[row.names(exp_mat) %in% genes, ] exp_mat <- + # as.matrix(exp_mat) + expr_mat_filtered <- exp_mat[match(rev(genes), row.names(exp_mat))[!is.na(match(rev(genes), + row.names(exp_mat)))], , drop = FALSE] - cat(paste0("found ", dim(expr_mat_filtered)[1], " genes in the expression matrix", sep = "\n")) + cat(paste0("found ", dim(expr_mat_filtered)[1], " genes in the expression matrix", + sep = "\n")) - if (!is.null(split.by)) { - labels <- paste0(as.character(metadata[[split.by]]), "_", as.character(metadata[[idents]])) + if (!is.null(splitby_key)) { + labels <- paste0(as.character(metadata[[splitby_key]]), "_", as.character(metadata[[celltype_key]])) labels <- factor(labels) } else { - cat("no groups information provided. defaulting to idents only", sep = "\n") - labels <- factor(metadata[[idents]]) + cat("no groups information provided. defaulting to celltype_key only", sep = "\n") + labels <- factor(metadata[[celltype_key]]) } cat("preparing the final dataframe ...", sep = "\n") - quick_prep <- function(expr, label, groups. = NULL, scale. = scale, meta = metadata, id = idents, standard_scale. = standard_scale) { - expr.df <- tryCatch(data.frame(label = label, t(as.matrix(expr)), check.names = FALSE), error = function(e) { - data.frame(label = label, t(Matrix::Matrix(expr, sparse = FALSE)), check.names = FALSE) - }) + quick_prep <- function(expr, label, groups. = NULL, scale. = scale, meta = metadata, + id = celltype_key, standard_scale. = standard_scale) { + expr.df <- tryCatch(data.frame(label = label, t(as.matrix(expr)), check.names = FALSE), + error = function(e) { + data.frame(label = label, t(Matrix::Matrix(expr, sparse = FALSE)), + check.names = FALSE) + }) meanExpr <- split(expr.df, expr.df$label) meanExpr <- lapply(meanExpr, function(x) { x <- x[, -1, drop = FALSE] - x <- x %>% colMeans() + x <- x %>% + colMeans() return(x) }) @@ -93,9 +104,9 @@ geneDotPlot <- function(scdata, idents, genes, split.by = NULL, pct.threshold = if (length(scale.) < 1) { if (length(standard_scale.) > 0) { if (standard_scale.) { - meanExpr <- meanExpr_ + meanExpr <- meanExpr_ } else { - meanExpr <- meanExpr + meanExpr <- meanExpr } } else { meanExpr <- scale(meanExpr) @@ -103,13 +114,13 @@ geneDotPlot <- function(scdata, idents, genes, split.by = NULL, pct.threshold = } else { if (scale.) { if (length(standard_scale.) > 0) { - if (standard_scale.) { - meanExpr <- meanExpr_ - } else { - meanExpr <- scale(meanExpr) - } - } else { + if (standard_scale.) { + meanExpr <- meanExpr_ + } else { meanExpr <- scale(meanExpr) + } + } else { + meanExpr <- scale(meanExpr) } } else { meanExpr <- meanExpr @@ -132,7 +143,7 @@ geneDotPlot <- function(scdata, idents, genes, split.by = NULL, pct.threshold = names(pct) <- levels(label) pct <- do.call(rbind, pct) - final.pct <- pct / cellNumbers + final.pct <- pct/cellNumbers meltedMeanExpr <- reshape2::melt(meanExpr) meltedfinal.pct <- reshape2::melt(final.pct) @@ -140,17 +151,22 @@ geneDotPlot <- function(scdata, idents, genes, split.by = NULL, pct.threshold = if (!is.null(groups)) { meltedMeanExpr$Var3 <- gsub(".*_", "", meltedMeanExpr$Var1) meltedfinal.pct$Var3 <- gsub(".*_", "", meltedfinal.pct$Var1) - meltedfinal.pct <- meltedfinal.pct[order(meltedfinal.pct$Var3, meltedfinal.pct$Var2), ] - meltedMeanExpr <- meltedMeanExpr[order(meltedMeanExpr$Var3, meltedMeanExpr$Var2), ] + meltedfinal.pct <- meltedfinal.pct[order(meltedfinal.pct$Var3, meltedfinal.pct$Var2), + ] + meltedMeanExpr <- meltedMeanExpr[order(meltedMeanExpr$Var3, meltedMeanExpr$Var2), + ] meltedfinal.pct <- meltedfinal.pct[, -4] meltedMeanExpr <- meltedMeanExpr[, -4] } else { - meltedfinal.pct <- meltedfinal.pct[order(meltedfinal.pct$Var1, meltedfinal.pct$Var2), ] - meltedMeanExpr <- meltedMeanExpr[order(meltedMeanExpr$Var1, meltedMeanExpr$Var2), ] + meltedfinal.pct <- meltedfinal.pct[order(meltedfinal.pct$Var1, meltedfinal.pct$Var2), + ] + meltedMeanExpr <- meltedMeanExpr[order(meltedMeanExpr$Var1, meltedMeanExpr$Var2), + ] } df <- cbind(meltedMeanExpr, meltedfinal.pct$value) - if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < 1) | (length(standard_scale.) > 0 && standard_scale.)) { + if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < + 1) | (length(standard_scale.) > 0 && standard_scale.)) { colnames(df) <- c("celltype", "gene", "scale.mean", "pct") } else { colnames(df) <- c("celltype", "gene", "mean", "pct") @@ -175,14 +191,15 @@ geneDotPlot <- function(scdata, idents, genes, split.by = NULL, pct.threshold = return(df) } - if (!is.null(split.by)) { - plot.df <- quick_prep(expr_mat_filtered, labels, levels(droplevels(factor(metadata[[split.by]])))) + if (!is.null(splitby_key)) { + plot.df <- quick_prep(expr_mat_filtered, labels, levels(droplevels(factor(metadata[[splitby_key]])))) } else { plot.df <- quick_prep(expr_mat_filtered, labels) } if (!is.null(pct.threshold)) { - cat(paste0("setting minimum percentage of cells expressing gene to be ", pct.threshold * 100, "% of cluster/cell-type"), sep = "\n") + cat(paste0("setting minimum percentage of cells expressing gene to be ", + pct.threshold * 100, "% of cluster/cell-type"), sep = "\n") filter <- split(plot.df, plot.df$gene) remove.genes <- lapply(filter, function(x) { @@ -225,182 +242,171 @@ geneDotPlot <- function(scdata, idents, genes, split.by = NULL, pct.threshold = } # subset the plotting objects - doplot <- function(obj, group. = NULL, file_name = filename, file_path = filepath, dim_w = w, dim_h = h, limits. = col_limits, do.plot = save.plot, scale. = scale, standard_scale. = standard_scale) { + doplot <- function(obj, group. = NULL, file_name = filename, file_path = filepath, + dim_w = w, dim_h = h, limits. = col_limits, do.plot = save.plot, scale. = scale, + standard_scale. = standard_scale) { + requireNamespace("scales") if (is.null(group.)) { - if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < 1) | (length(standard_scale.) > 0 && standard_scale.)) { + if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < + 1) | (length(standard_scale.) > 0 && standard_scale.)) { g <- ggplot(obj, aes(x = 0, y = gene, size = pct, colour = scale.mean)) } else { g <- ggplot(obj, aes(x = 0, y = gene, size = pct, colour = mean)) } - g <- g + geom_point(pch = 16, na.rm = TRUE) + - scale_y_discrete(position = "left") + - scale_x_discrete(position = "bottom") + - scale_colour_gradientn(colors = heat_cols, limits = limits., na.value = "grey90", oob = scales::squish) + - scale_radius(range = c(0, 4), limits = c(0, 1)) + - theme_bw() + - theme( - axis.text.x = element_text(angle = 90, hjust = 1), - axis.title.x = element_blank(), - axis.ticks = element_blank(), - axis.title.y = element_blank(), - axis.line = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.border = element_blank(), - strip.background = element_blank() - ) + - facet_grid(~cell_type) + g <- g + geom_point(pch = 16, na.rm = TRUE) + scale_y_discrete(position = "left") + + scale_x_discrete(position = "bottom") + scale_colour_gradientn(colors = heat_cols, + limits = limits., na.value = "grey90", oob = scales::squish) + scale_radius(range = c(0, + 4), limits = c(0, 1)) + theme_bw() + theme(axis.text.x = element_text(angle = 90, + hjust = 1), axis.title.x = element_blank(), axis.ticks = element_blank(), + axis.title.y = element_blank(), axis.line = element_blank(), panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), panel.border = element_blank(), + strip.background = element_blank()) + facet_grid(~cell_type) } else { - if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < 1) | (length(standard_scale.) > 0 && standard_scale.)) { + if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < + 1) | (length(standard_scale.) > 0 && standard_scale.)) { g <- ggplot(obj, aes(x = group, y = gene, size = pct, colour = scale.mean)) } else { g <- ggplot(obj, aes(x = group, y = gene, size = pct, colour = mean)) } - g <- g + geom_point(pch = 16, na.rm = TRUE) + - scale_y_discrete(position = "left") + - scale_x_discrete(position = "bottom") + - scale_colour_gradientn(colors = heat_cols, limits = limits., na.value = "grey90", oob = scales::squish) + - scale_radius(range = c(0, 4), limits = c(0, 1)) + - theme_bw() + - theme( - axis.text.x = element_text(angle = 90, hjust = 1), - axis.title.x = element_blank(), - axis.ticks = element_blank(), - axis.title.y = element_blank(), - axis.line = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.border = element_blank(), - strip.background = element_blank() - ) + - facet_grid(~cell_type) + g <- g + geom_point(pch = 16, na.rm = TRUE) + scale_y_discrete(position = "left") + + scale_x_discrete(position = "bottom") + scale_colour_gradientn(colors = heat_cols, + limits = limits., na.value = "grey90", oob = scales::squish) + scale_radius(range = c(0, + 4), limits = c(0, 1)) + theme_bw() + theme(axis.text.x = element_text(angle = 90, + hjust = 1), axis.title.x = element_blank(), axis.ticks = element_blank(), + axis.title.y = element_blank(), axis.line = element_blank(), panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), panel.border = element_blank(), + strip.background = element_blank()) + facet_grid(~cell_type) } if (do.plot) { if (is.null(file_name) && is.null(file_path)) { out_path <- "./geneDotPlot.df" warning("no file name provided. saving plot to ", getwd(), "/geneDotPlot.pdf") - ggsave("./geneDotPlot.pdf", plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) + ggsave("./geneDotPlot.pdf", plot = g, width = dim_w, height = dim_h, + device = "pdf", useDingbats = FALSE) } else if (!is.null(file_name) && is.null(file_path)) { cat(paste0("saving plot to ", file_name), sep = "\n") - tryCatch(ggsave(file_name, plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE), error = function(e) { - ggsave("./geneDotPlot.df", plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) - warning("file name provided is not suitable. saving as geneDotPlot.pdf") + tryCatch(ggsave(file_name, plot = g, width = dim_w, height = dim_h, + device = "pdf", useDingbats = FALSE), error = function(e) { + ggsave("./geneDotPlot.df", plot = g, width = dim_w, height = dim_h, + device = "pdf", useDingbats = FALSE) + warning("file name provided is not suitable. saving as geneDotPlot.pdf") }) } else if (is.null(file_name) && !is.null(file_path)) { cat(paste0("saving plot to ", file_path), sep = "\n") if (grepl(".pdf", file_path)) { - ggsave(file_path, plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) + ggsave(file_path, plot = g, width = dim_w, height = dim_h, device = "pdf", + useDingbats = FALSE) } else { - dir.create(file_path, recursive = TRUE) - ggsave(paste0(file_path, "/geneDotPlot.df"), plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) - warning(paste0("file path provided is not suitable. saving as ", file_path, "/geneDotPlot.pdf")) + dir.create(file_path, recursive = TRUE) + ggsave(paste0(file_path, "/geneDotPlot.df"), plot = g, width = dim_w, + height = dim_h, device = "pdf", useDingbats = FALSE) + warning(paste0("file path provided is not suitable. saving as ", + file_path, "/geneDotPlot.pdf")) } } else if (!is.null(file_name) && !is.null(file_path)) { - cat(paste0("saving plot to ", paste0(file_path, "/", file_name)), sep = "\n") + cat(paste0("saving plot to ", paste0(file_path, "/", file_name)), + sep = "\n") dir.create(file_path, recursive = TRUE) - tryCatch(ggsave(paste0(file_path, "/", file_name), plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE), error = function(e) { - ggsave("./geneDotPlot.df", plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) - warning("file path provided is not suitable. saving as geneDotPlot.pdf") + tryCatch(ggsave(paste0(file_path, "/", file_name), plot = g, width = dim_w, + height = dim_h, device = "pdf", useDingbats = FALSE), error = function(e) { + ggsave("./geneDotPlot.df", plot = g, width = dim_w, height = dim_h, + device = "pdf", useDingbats = FALSE) + warning("file path provided is not suitable. saving as geneDotPlot.pdf") }) } } return(g) } - fillplot <- function(obj, group. = NULL, file_name = filename, file_path = filepath, dim_w = w, dim_h = h, limits. = col_limits, do.plot = save.plot, scale. = scale, standard_scale. = standard_scale, outline_col. = outline_col, outline_size. = outline_size) { + fillplot <- function(obj, group. = NULL, file_name = filename, file_path = filepath, + dim_w = w, dim_h = h, limits. = col_limits, do.plot = save.plot, scale. = scale, + standard_scale. = standard_scale, outline_col. = outline_col, outline_size. = outline_size) { + requireNamespace("scales") if (is.null(group.)) { - if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < 1) | (length(standard_scale.) > 0 && standard_scale.)) { + if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < + 1) | (length(standard_scale.) > 0 && standard_scale.)) { g <- ggplot(obj, aes(x = 0, y = gene, size = pct, fill = scale.mean)) } else { g <- ggplot(obj, aes(x = 0, y = gene, size = pct, fill = mean)) } g <- g + geom_point(pch = 21, color = outline_col., stroke = outline_size.) + - scale_y_discrete(position = "left") + - scale_x_discrete(position = "bottom") + - scale_fill_gradientn(colors = heat_cols, limits = limits., na.value = "grey90", oob = scales::squish) + - scale_radius(range = c(0, 4), limits = c(0, 1)) + - theme_bw() + theme( - axis.text.x = element_text(angle = 90, hjust = 1), - axis.title.x = element_blank(), - axis.ticks = element_blank(), - axis.title.y = element_blank(), - axis.line = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.border = element_blank(), - strip.background = element_blank() - ) + - facet_grid(~cell_type) + scale_y_discrete(position = "left") + scale_x_discrete(position = "bottom") + + scale_fill_gradientn(colors = heat_cols, limits = limits., na.value = "grey90", + oob = scales::squish) + scale_radius(range = c(0, 4), limits = c(0, + 1)) + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1), + axis.title.x = element_blank(), axis.ticks = element_blank(), axis.title.y = element_blank(), + axis.line = element_blank(), panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), panel.border = element_blank(), + strip.background = element_blank()) + facet_grid(~cell_type) } else { - if ((length(scale.) > 0 && scale.) | (length(scale.) < - 1 && length(standard_scale.) < 1) | (length(standard_scale.) > - 0 && standard_scale.)) { - g <- ggplot(obj, aes( - x = group, y = gene, size = pct, - fill = scale.mean - )) + if ((length(scale.) > 0 && scale.) | (length(scale.) < 1 && length(standard_scale.) < + 1) | (length(standard_scale.) > 0 && standard_scale.)) { + g <- ggplot(obj, aes(x = group, y = gene, size = pct, fill = scale.mean)) } else { - g <- ggplot(obj, aes( - x = group, y = gene, size = pct, - fill = mean - )) + g <- ggplot(obj, aes(x = group, y = gene, size = pct, fill = mean)) } - g <- g + geom_point(pch = 21, color = outline_col., stroke = outline_size.) + scale_y_discrete(position = "left") + - scale_x_discrete(position = "bottom") + - scale_fill_gradientn(colors = heat_cols, limits = limits., na.value = "grey90", oob = scales::squish) + - scale_radius(range = c(0, 4), limits = c(0, 1)) + - theme_bw() + theme( - axis.text.x = element_text(angle = 90, hjust = 1), - axis.title.x = element_blank(), - axis.ticks = element_blank(), - axis.title.y = element_blank(), - axis.line = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.border = element_blank(), - strip.background = element_blank() - ) + - facet_grid(~cell_type) + g <- g + geom_point(pch = 21, color = outline_col., stroke = outline_size.) + + scale_y_discrete(position = "left") + scale_x_discrete(position = "bottom") + + scale_fill_gradientn(colors = heat_cols, limits = limits., na.value = "grey90", + oob = scales::squish) + scale_radius(range = c(0, 4), limits = c(0, + 1)) + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1), + axis.title.x = element_blank(), axis.ticks = element_blank(), axis.title.y = element_blank(), + axis.line = element_blank(), panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), panel.border = element_blank(), + strip.background = element_blank()) + facet_grid(~cell_type) } if (do.plot) { if (is.null(file_name) && is.null(file_path)) { out_path <- "./geneDotPlot.df" warning("no file name provided. saving plot to ", getwd(), "/geneDotPlot.pdf") - ggsave("./geneDotPlot.pdf", plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) + ggsave("./geneDotPlot.pdf", plot = g, width = dim_w, height = dim_h, + device = "pdf", useDingbats = FALSE) } else if (!is.null(file_name) && is.null(file_path)) { cat(paste0("saving plot to ", file_name), sep = "\n") - tryCatch(ggsave(file_name, plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE), error = function(e) { - ggsave("./geneDotPlot.df", plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) - warning("file name provided is not suitable. saving as geneDotPlot.pdf") + tryCatch(ggsave(file_name, plot = g, width = dim_w, height = dim_h, + device = "pdf", useDingbats = FALSE), error = function(e) { + ggsave("./geneDotPlot.df", plot = g, width = dim_w, height = dim_h, + device = "pdf", useDingbats = FALSE) + warning("file name provided is not suitable. saving as geneDotPlot.pdf") }) } else if (is.null(file_name) && !is.null(file_path)) { cat(paste0("saving plot to ", file_path), sep = "\n") if (grepl(".pdf", file_path)) { - ggsave(file_path, plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) + ggsave(file_path, plot = g, width = dim_w, height = dim_h, device = "pdf", + useDingbats = FALSE) } else { - dir.create(file_path, recursive = TRUE) - ggsave(paste0(file_path, "/geneDotPlot.df"), plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) - warning(paste0("file path provided is not suitable. saving as ", file_path, "/geneDotPlot.pdf")) + dir.create(file_path, recursive = TRUE) + ggsave(paste0(file_path, "/geneDotPlot.df"), plot = g, width = dim_w, + height = dim_h, device = "pdf", useDingbats = FALSE) + warning(paste0("file path provided is not suitable. saving as ", + file_path, "/geneDotPlot.pdf")) } } else if (!is.null(file_name) && !is.null(file_path)) { - cat(paste0("saving plot to ", paste0(file_path, "/", file_name)), sep = "\n") + cat(paste0("saving plot to ", paste0(file_path, "/", file_name)), + sep = "\n") dir.create(file_path, recursive = TRUE) - tryCatch(ggsave(paste0(file_path, "/", file_name), plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE), error = function(e) { - ggsave("./geneDotPlot.df", plot = g, width = dim_w, height = dim_h, device = "pdf", useDingbats = FALSE) - warning("file path provided is not suitable. saving as geneDotPlot.pdf") + tryCatch(ggsave(paste0(file_path, "/", file_name), plot = g, width = dim_w, + height = dim_h, device = "pdf", useDingbats = FALSE), error = function(e) { + ggsave("./geneDotPlot.df", plot = g, width = dim_w, height = dim_h, + device = "pdf", useDingbats = FALSE) + warning("file path provided is not suitable. saving as geneDotPlot.pdf") }) } } return(g) } if (fill) { - gg <- fillplot(plot.df.final, split.by, file_name = filename, file_path = filepath, dim_w = w, dim_h = h, limits. = col_limits, do.plot = save.plot, scale. = scale, standard_scale. = standard_scale, outline_col. = outline_col, outline_size. = outline_size) + gg <- fillplot(plot.df.final, splitby_key, file_name = filename, file_path = filepath, + dim_w = w, dim_h = h, limits. = col_limits, do.plot = save.plot, scale. = scale, + standard_scale. = standard_scale, outline_col. = outline_col, outline_size. = outline_size) } else { - gg <- doplot(plot.df.final, split.by, file_name = filename, file_path = filepath, dim_w = w, dim_h = h, limits. = col_limits, do.plot = save.plot, scale. = scale, standard_scale. = standard_scale) + gg <- doplot(plot.df.final, splitby_key, file_name = filename, file_path = filepath, + dim_w = w, dim_h = h, limits. = col_limits, do.plot = save.plot, scale. = scale, + standard_scale. = standard_scale) } gg return(gg) -} +} \ No newline at end of file diff --git a/R/init_ktplots.R b/R/init_ktplots.R index 5261cfc..880ae04 100644 --- a/R/init_ktplots.R +++ b/R/init_ktplots.R @@ -20,10 +20,9 @@ init_ktplots <- function() { init <- function(package, dependencies = FALSE) { setwd(paste0("~/Documents/GitHub/", package)) - requireNamespace("roxygen2") requireNamespace("devtools") devtools::document() setwd("..") devtools::install(package, dependencies = dependencies) setwd(package) -} +} \ No newline at end of file diff --git a/R/ktplots.R b/R/ktplots.R index caec0b4..c1c0bd8 100644 --- a/R/ktplots.R +++ b/R/ktplots.R @@ -12,4 +12,4 @@ ktplots <- function() { requireNamespace("ktplots") requireNamespace("roxygen2") init_ktplots() -} +} \ No newline at end of file diff --git a/R/misc.R b/R/misc.R index 4eed171..c6b4102 100644 --- a/R/misc.R +++ b/R/misc.R @@ -8,7 +8,7 @@ #' x <- range01(runif(100)) #' @export range01 <- function(x) { - (x - min(x)) / (max(x) - min(x)) + (x - min(x))/(max(x) - min(x)) } @@ -23,7 +23,8 @@ range01 <- function(x) { #' } #' @export "%nin%" <- function(x, y) { - if (!is.null(x)) x else y + if (!is.null(x)) + x else y } #' @import dplyr @@ -36,14 +37,11 @@ range01 <- function(x) { #' g + small_legend() #' } #' @export -small_legend <- function(fontsize = 5, keysize = .1, marginsize = c(-.1, 0, 0, 0), ...) { - small_legend_theme <- theme( - legend.title = element_text(size = fontsize), - legend.text = element_text(size = fontsize), - legend.key.size = unit(keysize, "lines"), - legend.margin = margin(marginsize[1], marginsize[2], marginsize[3], marginsize[4], unit = "cm"), - ... - ) +small_legend <- function(fontsize = 5, keysize = 0.1, marginsize = c(-0.1, 0, 0, + 0), ...) { + small_legend_theme <- theme(legend.title = element_text(size = fontsize), legend.text = element_text(size = fontsize), + legend.key.size = unit(keysize, "lines"), legend.margin = margin(marginsize[1], + marginsize[2], marginsize[3], marginsize[4], unit = "cm"), ...) return(small_legend_theme) } @@ -56,10 +54,8 @@ small_legend <- function(fontsize = 5, keysize = .1, marginsize = c(-.1, 0, 0, 0 #' } #' @export small_guide <- function(guidesize = 1, ...) { - small_guide <- guides( - shape = guide_legend(override.aes = list(size = guidesize)), - color = guide_legend(override.aes = list(size = guidesize)), ... - ) + small_guide <- guides(shape = guide_legend(override.aes = list(size = guidesize)), + color = guide_legend(override.aes = list(size = guidesize)), ...) return(small_guide) } @@ -73,14 +69,10 @@ small_guide <- function(guidesize = 1, ...) { #' } #' @export small_axis <- function(fontsize = 4, linethickness = 0.1, ...) { - axis <- theme( - text = element_text(size = fontsize), - axis.text = element_text(size = fontsize), - axis.text.x = element_text(size = fontsize), - axis.text.y = element_text(size = fontsize), - axis.line = element_line(size = linethickness), - axis.ticks = element_line(size = linethickness), ... - ) + axis <- theme(text = element_text(size = fontsize), axis.text = element_text(size = fontsize), + axis.text.x = element_text(size = fontsize), axis.text.y = element_text(size = fontsize), + axis.line = element_line(linewidth = linethickness), axis.ticks = element_line(linewidth = linethickness), + ...) return(axis) } @@ -92,8 +84,9 @@ small_axis <- function(fontsize = 4, linethickness = 0.1, ...) { #' g + small_grid() #' } #' @export -small_grid <- function(linethickness = 0.1, panelthickness = .3, ...) { - grid <- theme(panel.grid = element_line(size = linethickness), panel.border = element_rect(size = panelthickness), ...) +small_grid <- function(linethickness = 0.1, panelthickness = 0.3, ...) { + grid <- theme(panel.grid = element_line(linewidth = linethickness), panel.border = element_rect(linewidth = panelthickness), + ...) return(grid) } @@ -106,7 +99,8 @@ small_grid <- function(linethickness = 0.1, panelthickness = .3, ...) { #' } #' @export topright_legend <- function(legendmargin = margin(6, 6, 6, 6), ...) { - legend <- theme(legend.position = c(.99, .99), legend.justification = c("right", "top"), legend.box.just = "right", legend.margin = legendmargin, ...) + legend <- theme(legend.position = c(0.99, 0.99), legend.justification = c("right", + "top"), legend.box.just = "right", legend.margin = legendmargin, ...) return(legend) } @@ -119,7 +113,8 @@ topright_legend <- function(legendmargin = margin(6, 6, 6, 6), ...) { #' } #' @export topleft_legend <- function(legendmargin = margin(6, 6, 6, 6), ...) { - legend <- theme(legend.position = c(.01, .99), legend.justification = c("left", "top"), legend.box.just = "left", legend.margin = legendmargin, ...) + legend <- theme(legend.position = c(0.01, 0.99), legend.justification = c("left", + "top"), legend.box.just = "left", legend.margin = legendmargin, ...) return(legend) } @@ -132,7 +127,8 @@ topleft_legend <- function(legendmargin = margin(6, 6, 6, 6), ...) { #' } #' @export bottomleft_legend <- function(legendmargin = margin(6, 6, 6, 6), ...) { - legend <- theme(legend.position = c(.01, .01), legend.justification = c("left", "bottom"), legend.box.just = "left", legend.margin = legendmargin, ...) + legend <- theme(legend.position = c(0.01, 0.01), legend.justification = c("left", + "bottom"), legend.box.just = "left", legend.margin = legendmargin, ...) return(legend) } @@ -145,6 +141,7 @@ bottomleft_legend <- function(legendmargin = margin(6, 6, 6, 6), ...) { #' } #' @export bottomright_legend <- function(legendmargin = margin(6, 6, 6, 6), ...) { - legend <- theme(legend.position = c(.99, .01), legend.justification = c("right", "bottom"), legend.box.just = "left", legend.margin = legendmargin, ...) + legend <- theme(legend.position = c(0.99, 0.01), legend.justification = c("right", + "bottom"), legend.box.just = "left", legend.margin = legendmargin, ...) return(legend) -} +} \ No newline at end of file diff --git a/R/plot_cpdb.R b/R/plot_cpdb.R index db0070e..9e47ce4 100644 --- a/R/plot_cpdb.R +++ b/R/plot_cpdb.R @@ -1,658 +1,643 @@ -#' Plotting cellphonedb results +#' Plotting CellPhoneDB results #' -#' @param cell_type1 cell type 1 -#' @param cell_type2 cell type 2 -#' @param scdata single-cell data. can be seurat/summarizedexperiment object -#' @param idents vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns -#' @param means object holding means.txt from cpdb output -#' @param pvals object holding pvals.txt from cpdb output. Use relevant_interactions.txt if degs_analysis mode. +#' @param scdata single-cell data. can be Seurat/SingleCellExperiment object +#' @param cell_type1 Name of cell type 1. Accepts regex pattern. +#' @param cell_type2 Name of cell type 2. Accepts regex pattern. +#' @param celltype_key Column name in metadata/colData storing the celltype annotations. Values in this column should match the second column of the input `meta.txt` used for CellPhoneDB. +#' @param means Data frame corresponding to `means.txt` from CellPhoneDB. +#' @param pvals Data frame corresponding to `pvalues.txt` or `relevant_interactions.txt` from CellPhoneDB. +#' @param interaction_scores Data frame corresponding to `interaction_scores.txt` from CellPhoneDB version 5 onwards. +#' @param cellsign Data frame corresponding to `CellSign.txt` from CellPhoneDB version 5 onwards. #' @param max_size max size of points. -#' @param p.adjust.method correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none') #' @param keep_significant_only logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb. -#' @param split.by column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. -#' @param gene.family default = NULL. some predefined group of genes. can take one (or several) of these default options: 'chemokines', 'Th1', 'Th2', 'Th17', 'Treg', 'costimulatory', 'coinhibitory', 'niche'. Also accepts name(s) of custom gene families. -#' @param custom_gene_family default = NULL. If provided, will update the gene.family function with this custom entry. Both `gene.family` (name of the custom family) and `custom_gene_family` must be specified for this to work. Provide either a data.frame with column names as name of family and genes in rows or a named likes like : list("customfamily" = c("genea", "geneb", "genec")) -#' @param genes default = NULL. can specify custom list of genes if gene.family is NULL -#' @param scale logical. scale the expression to mean +/- SD. NULL defaults to TRUE. +#' @param splitby_key column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. +#' @param gene_family default = NULL. some predefined group of genes. can take one (or several) of these default options: 'chemokines', 'Th1', 'Th2', 'Th17', 'Treg', 'costimulatory', 'coinhibitory', 'niche'. Also accepts name(s) of custom gene families. +#' @param custom_gene_family default = NULL. If provided, will update the gene_family function with this custom entry. Both `gene_family` (name of the custom family) and `custom_gene_family` must be specified for this to work. Provide either a data.frame with column names as name of family and genes in rows or a named likes like : list('customfamily' = c('genea', 'geneb', 'genec')) +#' @param genes default = NULL. can specify custom list of genes if gene_family is NULL #' @param standard_scale logical. scale the expression to range from 0 to 1. NULL defaults to FALSE. #' @param cluster_rows logical. whether or not to cluster the rows. #' @param col_option specify plotting colours -#' @param default_stlye default = TRUE. Show all mean values and trace significant interactions with `higlight` colour. If FALSE, significant interactions will be presented as a white ring. -#' @param noir default = FALSE. makes it b/w -#' @param highlight colour for highlighting p <0.05 +#' @param default_style default = TRUE. Show all mean values and trace significant interactions with `higlight` colour. If FALSE, significant interactions will be presented as a white ring. +#' @param highlight_col colour for highlighting p <0.05 #' @param highlight_size stroke size for highlight if p < 0.05. if NULL, scales to -log10(pval). -#' @param separator separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this. -#' @param special_character_search_pattern search pattern if the cell type names contains special character. NULL defaults to '/|:|\\?|\\*|\\+|[\\]|\\(|\\)'. -#' @param degs_analysis if is cellphonedb degs_analysis mode. -#' @param verbose prints cat/print statements if TRUE. +#' @param max_highlight_size max size of stroke for highlight. +#' @param special_character_regex_pattern search pattern if the cell type names contains special character. NULL defaults to '/|:|\\?|\\*|\\+|[\\]|\\(|\\)'. +#' @param degs_analysis if is CellPhoneDB degs_analysis mode. #' @param return_table whether or not to return as a table rather than to plot. #' @param exclude_interactions if provided, the interactions will be removed from the output. +#' @param min_interaction_score Filtering the interactions shown by including only those above the given interaction score. +#' @param scale_alpha_by_interaction_scores Whether or not to filter values by the interaction score. +#' @param scale_alpha_by_cellsign Whether or not to filter the transparency of interactions by the cellsign. +#' @param filter_by_cellsign Filter out interactions with a 0 value cellsign. #' @param ... passes arguments to grep for cell_type1 and cell_type2. #' @return ggplot dot plot object of cellphone db output #' @examples #' \donttest{ #' data(kidneyimmune) #' data(cpdb_output) -#' plot_cpdb('B cell', 'CD4T cell', kidneyimmune, 'celltype', means, pvals, split.by = 'Experiment', genes = c('CXCL13', 'CD274', 'CXCR5')) -#' plot_cpdb('B cell', 'CD4T cell', kidneyimmune, 'celltype', means, pvals, split.by = 'Experiment', gene.family = 'chemokines') +#' plot_cpdb(kidneyimmune, 'B cell', 'CD4T cell', 'celltype', means, pvals, splitby_key = 'Experiment', genes = c('CXCL13', 'CD274', 'CXCR5')) +#' plot_cpdb(kidneyimmune, 'B cell', 'CD4T cell', 'celltype', means, pvals, splitby_key = 'Experiment', gene_family = 'chemokines') #' } #' @include utils.R +#' @import dplyr #' @import viridis #' @import ggplot2 #' @import reshape2 #' @export -plot_cpdb <- function(cell_type1, cell_type2, scdata, idents, means, pvals, max_size = 8, - p.adjust.method = NULL, keep_significant_only = FALSE, split.by = NULL, gene.family = NULL, - custom_gene_family = NULL, genes = NULL, scale = NULL, standard_scale = NULL, cluster_rows = TRUE, - col_option = viridis::viridis(50), default_style = TRUE, noir = FALSE, highlight = "red", - highlight_size = NULL, separator = NULL, special_character_search_pattern = NULL, - degs_analysis = FALSE, verbose = FALSE, return_table = FALSE, exclude_interactions = NULL, - ...) { - requireNamespace("grDevices") - if (class(scdata) %in% c("SingleCellExperiment", "SummarizedExperiment")) { - if (verbose) { - cat("data provided is a SingleCellExperiment/SummarizedExperiment object", - sep = "\n") - cat("extracting expression matrix", sep = "\n") - } - requireNamespace("SummarizedExperiment") - requireNamespace("SingleCellExperiment") - # exp_mat <- SummarizedExperiment::assay(scdata) - metadata <- SummarizedExperiment::colData(scdata) - } else if (class(scdata) == "Seurat") { - if (verbose) { - cat("data provided is a Seurat object", sep = "\n") - cat("extracting expression matrix", sep = "\n") - } - metadata <- scdata@meta.data +plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals, + interaction_scores = NULL, cellsign = NULL, max_size = 8, keep_significant_only = TRUE, + splitby_key = NULL, gene_family = NULL, custom_gene_family = NULL, genes = NULL, + standard_scale = TRUE, cluster_rows = TRUE, col_option = viridis::viridis(50), + default_style = TRUE, highlight_col = "red", highlight_size = NULL, max_highlight_size = 2, + special_character_regex_pattern = NULL, degs_analysis = FALSE, return_table = FALSE, + exclude_interactions = NULL, min_interaction_score = 0, scale_alpha_by_interaction_scores = FALSE, + scale_alpha_by_cellsign = FALSE, filter_by_cellsign = FALSE, title = "", ...) { + requireNamespace("SingleCellExperiment") + requireNamespace("grDevices") + if (is.null(special_character_regex_pattern)) { + special_character_regex_pattern <- DEFAULT_SPEC_PAT + } + if (class(scdata) %in% c("SingleCellExperiment", "SummarizedExperiment")) { + metadata <- SingleCellExperiment::colData(scdata) + } else if (class(scdata) == "Seurat") { + metadata <- scdata@meta.data + } + means_mat <- .prep_table(means) + pvals_mat <- .prep_table(pvals) + if (!is.null(interaction_scores)) { + interaction_scores_mat <- .prep_table(interaction_scores) + } else if (!is.null(cellsign)) { + cellsign_mat <- .prep_table(cellsign) + } + col_start <- ifelse(colnames(pvals_mat)[DEFAULT_CLASS_COL] == "classification", + DEFAULT_V5_COL_START, DEFAULT_COL_START) + if (degs_analysis) { + pvals_mat[, col_start:ncol(pvals_mat)] <- 1 - pvals_mat[, col_start:ncol(pvals_mat)] + } + # ok front load a 'dictionary' here. + if (col_start == DEFAULT_V5_COL_START) { + v5tmp <- reshape2::melt(means_mat, id.vars = colnames(means_mat)[1:col_start]) + special_sep <- paste0(rep(DEFAULT_SEP, 3), collapse = "") + row.names(v5tmp) <- paste0(gsub("_", "-", v5tmp$interacting_pair), special_sep, + v5tmp$variable) + v5tmp <- v5tmp[, c("is_integrin", "directionality", "classification")] + } + cell_type1 <- .sub_pattern(cell_type = cell_type1, pattern = special_character_regex_pattern) + cell_type2 <- .sub_pattern(cell_type = cell_type2, pattern = special_character_regex_pattern) + query_list <- .prep_query_group(data = means_mat, genes = genes, gene_family = gene_family, + custom_gene_family = custom_gene_family) + query <- query_list[["query"]] + query_group <- query_list[["query_group"]] + # prepare the cell_type query + if (!is.null(splitby_key)) { + labels <- paste0(metadata[[splitby_key]], "_", metadata[[celltype_key]]) + if (is.factor(metadata[[splitby_key]]) & is.factor(metadata[[celltype_key]])) { + labels <- factor(labels, levels = paste0(levels(metadata[[splitby_key]]), + "_", rep(levels(metadata[[celltype_key]]), each = length(levels(metadata[[splitby_key]]))))) + } else { + labels <- factor(labels) } - if (length(separator) > 0) { - sep = separator + labels <- levels(labels) + groups <- factor(metadata[[splitby_key]]) + groups <- levels(groups) + if (length(groups) > 0) { + # the purpose for this step is to allow for special characters to be used + # in the celltype grepping + if (length(groups) > 1) { + labels2 <- gsub(paste0(paste0(groups, "_"), collapse = "|"), "", + labels) + } else { + labels2 <- gsub(paste0(groups, "_"), "", labels) + } + # this returns the indices from the labels + ct1 <- grep(cell_type1, labels2, ...) + ct2 <- grep(cell_type2, labels2, ...) + c_type1 <- as.list(labels[ct1]) + c_type2 <- as.list(labels[ct2]) + c_type1 <- lapply(c_type1, .sub_pattern, pattern = special_character_regex_pattern) + c_type2 <- lapply(c_type2, .sub_pattern, pattern = special_character_regex_pattern) + grp <- as.list(groups) + celltype <- list() + for (i in 1:length(c_type1)) { + celltype[[i]] <- .create_celltype_query(ctype1 = c_type1[[i]], ctype2 = c_type2, + sep = DEFAULT_SEP) + celltype[[i]] <- lapply(grp, .keep_interested_groups, ct = celltype[[i]], + sep = DEFAULT_SEP) + } + for (i in 1:length(celltype)) { + celltype[[i]] <- celltype[[i]][-which(celltype[[i]] == "")] + } + celltype <- lapply(celltype, unlist) + if (any(unlist(lapply(celltype, is.null)))) { + rm <- which(unlist(lapply(celltype, is.null))) + celltype <- celltype[-rm] + } + cell_type <- do.call(paste0, list(celltype, collapse = "|")) } else { - sep = ">@<" + labels <- metadata[[celltype_key]] + labels <- factor(labels) + labels <- levels(labels) + c_type1 <- as.list(grep(cell_type1, labels, value = TRUE, ...)) + c_type2 <- as.list(grep(cell_type2, labels, value = TRUE, ...)) + c_type1 <- lapply(c_type1, .sub_pattern, pattern = special_character_regex_pattern) + c_type2 <- lapply(c_type2, .sub_pattern, pattern = special_character_regex_pattern) + celltype <- list() + for (i in 1:length(c_type1)) { + celltype[[i]] <- .create_celltype_query(ctype1 = c_type1[[i]], ctype2 = c_type2, + sep = DEFAULT_SEP) + } + cell_type <- do.call(paste0, list(celltype, collapse = "|")) } - means_mat <- means - pvals_mat <- pvals - rownames(means_mat) <- make.names(means_mat$interacting_pair, unique = TRUE) - rownames(pvals_mat) <- make.names(pvals_mat$interacting_pair, unique = TRUE) - colnames(means_mat) <- gsub("\\|", sep, colnames(means_mat)) - rownames(means_mat) <- gsub("_", "-", rownames(means_mat)) - rownames(means_mat) <- gsub("[.]", " ", rownames(means_mat)) - colnames(pvals_mat) <- gsub("\\|", sep, colnames(pvals_mat)) - rownames(pvals_mat) <- gsub("_", "-", rownames(pvals_mat)) - rownames(pvals_mat) <- gsub("[.]", " ", rownames(pvals_mat)) - if (degs_analysis) { - pvals_mat[, 12:ncol(pvals_mat)] <- 1 - pvals_mat[, 12:ncol(pvals_mat)] + } else { + labels <- metadata[[celltype_key]] + labels <- factor(labels) + labels <- levels(labels) + c_type1 <- as.list(grep(cell_type1, labels, value = TRUE)) + c_type2 <- as.list(grep(cell_type2, labels, value = TRUE)) + c_type1 <- lapply(c_type1, .sub_pattern, pattern = special_character_regex_pattern) + c_type2 <- lapply(c_type2, .sub_pattern, pattern = special_character_regex_pattern) + celltype <- list() + for (i in 1:length(c_type1)) { + celltype[[i]] <- .create_celltype_query(ctype1 = c_type1[[i]], ctype2 = c_type2, + sep = DEFAULT_SEP) } - if (length(p.adjust.method) > 0) { - pvals_tmp <- pvals[, 12:ncol(pvals)] - pvals_adj <- matrix(p.adjust(as.vector(as.matrix(pvals_tmp)), method = p.adjust.method), - ncol = ncol(pvals_tmp)) - colnames(pvals_adj) <- colnames(pvals_tmp) - pvals <- cbind(pvals[, c(1:11)], pvals_adj) + cell_type <- do.call(paste0, list(celltype, collapse = "|")) + } + if (!is.null(gene_family) & is.null(genes)) { + if (length(gene_family) == 1) { + means_mat <- .prep_data_querygroup_celltype1(.data = means_mat, .query_group = query_group, + .gene_family = gene_family, .cell_type = cell_type, .celltype = celltype, + ...) + pvals_mat <- .prep_data_querygroup_celltype1(.data = pvals_mat, .query_group = query_group, + .gene_family = gene_family, .cell_type = cell_type, .celltype = celltype, + ...) + if (!is.null(interaction_scores)) { + interaction_scores_mat <- .prep_data_querygroup_celltype1(.data = interaction_scores_mat, + .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, + .celltype = celltype, ...) + } else if (!is.null(cellsign)) { + cellsign_mat <- .prep_data_querygroup_celltype1(.data = cellsign_mat, + .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, + .celltype = celltype, ...) + } + } else if (length(gene_family) > 1) { + means_mat <- .prep_data_querygroup_celltype2(.data = means_mat, .query_group = query_group, + .gene_family = gene_family, .cell_type = cell_type, .celltype = celltype, + ...) + pvals_mat <- .prep_data_querygroup_celltype2(.data = pvals_mat, .query_group = query_group, + .gene_family = gene_family, .cell_type = cell_type, .celltype = celltype, + ...) + if (!is.null(interaction_scores)) { + interaction_scores_mat <- .prep_data_querygroup_celltype2(.data = interaction_scores_mat, + .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, + .celltype = celltype, ...) + } else if (!is.null(cellsign)) { + cellsign_mat <- .prep_data_querygroup_celltype2(.data = cellsign_mat, + .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, + .celltype = celltype, ...) + } } - if (is.null(special_character_search_pattern)) { - pattern <- "/|:|\\?|\\*|\\+|[\\]|\\(|\\)|\\/" - } else { - pattern <- special_character_search_pattern + } else if (is.null(gene_family) & !is.null(genes) | is.null(gene_family) & is.null(genes)) { + means_mat <- .prep_data_query_celltype(.data = means_mat, .query = query, + .cell_type = cell_type, .celltype = celltype, ...) + pvals_mat <- .prep_data_query_celltype(.data = pvals_mat, .query = query, + .cell_type = cell_type, .celltype = celltype, ...) + if (!is.null(interaction_scores)) { + interaction_scores_mat <- .prep_data_query_celltype(.data = interaction_scores_mat, + .cell_type = cell_type, .celltype = celltype, ...) + } else if (!is.null(cellsign)) { + cellsign_mat <- .prep_data_query_celltype(.data = cellsign_mat, .cell_type = cell_type, + .celltype = celltype, ...) + } + } + if (length(means_mat) == 0) { + stop("Please check your options for splitby_key and your celltypes.") + } + # rearrange the columns so that it interleaves the two groups + if (!is.null(splitby_key)) { + if (length(groups) > 0) { + grp <- as.list(groups) + group_i <- lapply(grp, function(g) { + gx <- grep(g, colnames(means_mat), ...) + return(gx) + }) + group_id <- do.call(c, group_i) + means_mat <- means_mat[, as.vector(group_id), drop = FALSE] + if (dim(pvals_mat)[2] > 0) { + pvals_mat <- pvals_mat[, as.vector(group_id), drop = FALSE] + } else { + stop("No significant hits.") + } + } + } + if (keep_significant_only) { + if (dim(pvals_mat)[2] == 0) { + stop("No significant hits.") + } + } + if (cluster_rows) { + if (nrow(means_mat) > 2) { + requireNamespace("stats") + d <- stats::dist(as.data.frame(means_mat)) + h <- stats::hclust(d) + means_mat <- means_mat[h$order, , drop = FALSE] } - cell_type1 <- .sub_pattern(cell_type1, pattern) - cell_type2 <- .sub_pattern(cell_type2, pattern) - if (length(idents) > 1) { - ct1 = grep(cell_type1, idents, value = TRUE, ...) - ct2 = grep(cell_type2, idents, value = TRUE, ...) - checklabels1 <- any(idents %in% c(ct1, ct2)) + } + # scaling + if (standard_scale) { + means_mat2 <- apply(means_mat, 1, range01) + means_mat2 <- t(means_mat2) + } else { + means_mat2 <- means_mat + } + # remove rows that are entirely 0 + whichempty <- which(rowSums(means_mat2) == 0) + if (length(whichempty) > 0) { + means_mat2 <- means_mat2[whichempty, , drop = FALSE] + } + means_mat2 <- as.matrix(means_mat2) + requireNamespace("reshape2") + if (standard_scale) { + df_means <- reshape2::melt(means_mat2, value.name = "scaled_means") + } else { + df_means <- reshape2::melt(means_mat2, value.name = "means") + } + pvals_mat2 <- as.matrix(pvals_mat) + df_pvals <- reshape2::melt(pvals_mat2, value.name = "pvals") + if (!is.null(interaction_scores)) { + interaction_scores_mat2 <- as.matrix(interaction_scores_mat) + df_interaction_scores <- reshape2::melt(interaction_scores_mat2, value.name = "interaction_scores") + } else if (!is.null(cellsign)) { + cellsign_mat2 <- as.matrix(cellsign_mat) + df_cellsign <- reshape2::melt(cellsign_mat2, value.name = "cellsign") + } + # use dplyr left_join to combine df_means and the pvals column in df_pvals. + # df_means and df_pvals should have the same Var1 and Var2. non-mathc should + # fill with NA. + df <- dplyr::left_join(df_means, df_pvals, by = c("Var1", "Var2")) + if (!is.null(interaction_scores)) { + df <- dplyr::left_join(df, df_interaction_scores, by = c("Var1", "Var2")) + } else if (!is.null(cellsign)) { + df <- dplyr::left_join(df, df_cellsign, by = c("Var1", "Var2")) + } + xp <- which(df$pvals == 1) + if (length(xp) > 0) { + df$pvals[which(df$pvals == 1)] <- NA + } + if (keep_significant_only) { + # keep the entire row/ all the comparisons + df_ <- split(df, as.character(df$Var1)) + anysig <- lapply(df_, function(x) { + keep <- any(x$pvals < 0.05) + return(keep) + }) + df_ <- df_[which(unlist(anysig) == TRUE)] + names(df_) <- NULL + df <- do.call(rbind, df_) + } + df$pvals[which(df$pvals == 0)] <- 0.001 + df$pvals[which(df$pvals >= 0.05)] <- NA + if (!is.null(splitby_key)) { + if (length(groups) > 0) { + grp <- as.list(groups) + grp2 <- lapply(grp, function(i) { + x <- paste0(i, "_") + return(x) + }) + searchterm <- do.call(paste, list(grp2, collapse = "|")) + df$group <- gsub(searchterm, "", df$Var2) + } + } else { + df$group <- df$Var2 + } + if (keep_significant_only) { + if (standard_scale) { + if (length(df$scaled_means) == 0) { + stop("No significant genes found and plotting will not proceed.") + } } else { - ct1 = grep(cell_type1, metadata[[idents]], value = TRUE, ...) - ct2 = grep(cell_type2, metadata[[idents]], value = TRUE, ...) - checklabels1 <- any(metadata[[idents]] %in% c(ct1, ct2)) + if (length(df$means) == 0) { + stop("No significant genes found and plotting will not proceed.") + } } - if (!is.null(split.by)) { - if (length(idents) > 1) { - labels <- paste0(metadata[[split.by]], "_", idents) + } + row.names(df) <- paste0(df$Var1, paste0(rep(DEFAULT_SEP, 3), collapse = ""), + df$Var2) + df$Var2 <- gsub(DEFAULT_SEP, "-", df$Var2) + final_levels <- unique(df$Var2) + df$Var2 <- factor(df$Var2, unique(df$Var2)) + df$x_means_ <- df[, colnames(df_means)[3]] + df$x_means_[df[, colnames(df)[4]] < 0.05] <- NA + df$x_stroke <- df$x_means_ + df$x_stroke[!is.na(df$x_stroke)] <- 0 + df$x_stroke[is.na(df$x_stroke)] <- 2 + if (!is.null(exclude_interactions)) { + df <- df[!df$Var1 %in% c(exclude_interactions), ] + } + if (!is.null(interaction_scores)) { + df$x_means_[which(df$interaction_scores < 0)] <- NA + } else if (!is.null(cellsign)) { + df$cellsign[which(df$cellsign < 1)] <- 0.5 + } + df$significant <- ifelse(df$pvals < 0.05, "yes", NA) + if (all(is.na(df$significant))) { + df$significant <- "no" + highlight_col <- "#ffffff" + } + if (default_style) { + df$significant[is.na(df$significant)] <- "no" + } + if (col_start == DEFAULT_V5_COL_START) { + requireNamespace("tibble") + df <- dplyr::left_join(df %>% + tibble::rownames_to_column(), v5tmp %>% + tibble::rownames_to_column(), by = "rowname") + row.names(df) <- df$rowname + } + + if (return_table) { + return(df) + } else { + if (!is.null(interaction_scores)) { + requireNamespace("dplyr") + df <- df %>% + dplyr::filter(interaction_scores >= min_interaction_score) + if (scale_alpha_by_interaction_scores == TRUE) { + if (default_style) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means, alpha = interaction_scores)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means, alpha = interaction_scores)) + } } else { - labels <- paste0(metadata[[split.by]], "_", metadata[[idents]]) - } - labels <- factor(labels) - labels <- levels(labels) - groups <- factor(metadata[[split.by]]) - groups <- levels(groups) - if (length(groups) > 0) { - # the purpose for this step is to allow for special characters to - # be used in the celltype grepping - if (length(groups) > 1) { - labels2 = gsub(paste0(paste0(groups, "_"), collapse = "|"), "", labels) + if (all(df$significant == "no")) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means, alpha = interaction_scores)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means, alpha = interaction_scores)) + } + default_style <- TRUE + } else { + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size, + alpha = interaction_scores)) } else { - labels2 = gsub(paste0(groups, "_"), "", labels) + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke, + alpha = interaction_scores)) } - # this returns the indices from the labels - ct1 = grep(cell_type1, labels2, value = TRUE, ...) - ct2 = grep(cell_type2, labels2, value = TRUE, ...) - } else { - if (length(idents) > 1) { - labels <- idents + } else { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size, + alpha = interaction_scores)) } else { - labels <- metadata[[idents]] + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke, alpha = interaction_scores)) } - labels <- factor(labels) - labels <- levels(labels) - ct1 = grep(cell_type1, labels, value = TRUE, ...) - ct2 = grep(cell_type2, labels, value = TRUE, ...) + } + } } - } else { - if (length(idents) > 1) { - labels <- idents + } else { + if (default_style) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means)) + } } else { - labels <- metadata[[idents]] - } - labels <- factor(labels) - labels <- levels(labels) - ct1 = grep(cell_type1, labels, value = TRUE, ...) - ct2 = grep(cell_type2, labels, value = TRUE, ...) - ct1 = paste0(ct1, collapse = "|") - ct2 = paste0(ct2, collapse = "|") - } - x1 = ct1[ct1 %in% ""] - x2 = ct2[ct2 %in% ""] - if (length(x1) > 0) { - ct1[ct1 %in% ""] <- NA - } - if (length(x2) > 0) { - ct2[ct2 %in% ""] <- NA - } - checklabels2 <- any(colnames(means_mat) %in% c(ct1, ct2)) - if (!checklabels1) { - if (length(idents) > 1) { - # relatively relaxed criteria to allow for the program to continue - options(warn = -1) - ct_1 <- grep(cell_type1, idents, value = TRUE, ...) - ct_2 <- grep(cell_type2, idents, value = TRUE, ...) - options(warn = 0) - checklabels2 <- any(idents %in% ct_1) - if (checklabels2) { - checklabels2 <- any(idents %in% ct_2) - if (!checklabels2) { - stop("Cannot find cell types.\nThe error is mismatch between cell_type2 and the single cell metadata (or idents provided).") - } + if (all(df$significant == "no")) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means)) + } + default_style <- TRUE + } else { + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size)) } else { - stop("Cannot find cell types.\nThe error is mismatch between cell_type1 and the single cell metadata (or idents provided).") + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke)) } - } else { - options(warn = -1) - ct_1 <- grep(cell_type1, metadata[[idents]], value = TRUE, ...) - ct_2 <- grep(cell_type2, metadata[[idents]], value = TRUE, ...) - options(warn = 0) - checklabels2 <- any(metadata[[idents]] %in% ct_1) - if (checklabels2) { - checklabels2 <- any(metadata[[idents]] %in% ct_2) - if (!checklabels2) { - stop("Cannot find cell types.\nThe error is mismatch between cell_type2 and the single cell metadata (or idents provided).") - } + } else { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size)) } else { - stop("Cannot find cell types.\nThe error is mismatch between cell_type1 and the single cell metadata (or idents provided).") + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke)) } + } + } } - } - if (!checklabels2) { - # relatively relaxed criteria to allow for the program to continue - options(warn = -1) - ct_1 <- grep(cell_type1, colnames(means_mat), value = TRUE) - ct_2 <- grep(cell_type2, colnames(means_mat), value = TRUE) - options(warn = 0) - checklabels2 <- any(colnames(means_mat) %in% ct_1) - if (checklabels2) { - checklabels2 <- any(colnames(means_mat) %in% ct_2) - if (!checklabels2) { - stop("Cannot find cell types. The error is mismatch between cell_type2 and the cpdb metadata.") - } - } else { - stop("Cannot find cell types. The error is mismatch between cell_type1 and the cpdb metadata.") - } - } - if (checklabels1 & checklabels2) { - if (verbose) { - cat("Found cell types in the input data provided. Proceeding with plotting.", - sep = "\n") - } - } - if (is.null(gene.family) & is.null(genes)) { - if (verbose) { - cat("options genes or gene.family are not specified.\nusing entire cpdb output.\n") - } - query <- grep("", means_mat$interacting_pair) - if (verbose) { - cat("for future reference, genes or gene.family can be specified, not both.\ngene.family can be one of the following:", - sep = "\n") - print(c("chemokines", "Th1", "Th2", "Th17", "Treg", "costimulatory", - "coinhibitory", "niche")) - cat("otherwise, please provide gene(s) as a vector in the genes option", - sep = "\n") - } - } - if (!is.null(gene.family) & !is.null(genes)) { - stop("Please specify either genes or gene.family, not both") - if (verbose) { - cat("gene.family can be one of the following:", sep = "\n") - print(c("chemokines", "Th1", "Th2", "Th17", "Treg", "costimulatory", - "coinhibitory", "niche")) - cat("otherwise, please provide gene(s) as a vector in the genes option", - sep = "\n") - } - } - if (!is.null(gene.family) & is.null(genes)) { - chemokines <- grep("^CXC|CCL|CCR|CX3|XCL|XCR", means_mat$interacting_pair) - th1 <- grep("IL2|IL12|IL18|IL27|IFNG|IL10|TNF$|TNF |LTA|LTB|STAT1|CCR5|CXCR3|IL12RB1|IFNGR1|TBX21|STAT4", - means_mat$interacting_pair) - th2 <- grep("IL4|IL5|IL25|IL10|IL13|AREG|STAT6|GATA3|IL4R", means_mat$interacting_pair) - th17 <- grep("IL21|IL22|IL24|IL26|IL17A|IL17A|IL17F|IL17RA|IL10|RORC|RORA|STAT3|CCR4|CCR6|IL23RA|TGFB", - means_mat$interacting_pair) - treg <- grep("IL35|IL10|FOXP3|IL2RA|TGFB", means_mat$interacting_pair) - costimulatory <- grep("CD86|CD80|CD48|LILRB2|LILRB4|TNF|CD2|ICAM|SLAM|LT[AB]|NECTIN2|CD40|CD70|CD27|CD28|CD58|TSLP|PVR|CD44|CD55|CD[1-9]", - means_mat$interacting_pair) - coinhibitory <- grep("SIRP|CD47|ICOS|TIGIT|CTLA4|PDCD1|CD274|LAG3|HAVCR|VSIR", - means_mat$interacting_pair) - niche <- grep("CSF", means_mat$interacting_pair) - query_group <- list(chemokines = chemokines, chemokine = chemokines, th1 = th1, - th2 = th2, th17 = th17, treg = treg, costimulatory = costimulatory, coinhibitory = coinhibitory, - costimulation = costimulatory, coinhibition = coinhibitory, niche = niche) - - if (!is.null(custom_gene_family)){ - cgf <- as.list(custom_gene_family) - cgf <- lapply(cgf, function(x) grep(paste(x, collapse = "|"), means_mat$interacting_pair)) - query_group <- c(query_group, cgf) - } - } else if (is.null(gene.family) & !is.null(genes)) { - query <- grep(paste(genes, collapse = "|"), means_mat$interacting_pair) - } - if (!is.null(split.by)) { - if (length(idents) > 1) { - labels <- paste0(metadata[[split.by]], "_", idents) - } else { - labels <- paste0(metadata[[split.by]], "_", metadata[[idents]]) - } - chk1 = class(metadata[[split.by]]) - chk2 = class(metadata[[idents]]) - if (chk1 == "factor" & chk2 == "factor") { - labels <- factor(labels, levels = paste0(levels(metadata[[split.by]]), - "_", rep(levels(metadata[[idents]]), each = length(levels(metadata[[split.by]]))))) + } + } else if (!is.null(cellsign)) { + if (filter_by_cellsign == TRUE) { + requireNamespace("dplyr") + df <- df %>% + dplyr::filter(cellsign >= 1) + } + if (scale_alpha_by_cellsign == TRUE) { + if (default_style) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means, alpha = cellsign)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means, alpha = cellsign)) + } } else { - labels <- factor(labels) - } - labels <- levels(labels) - groups <- factor(metadata[[split.by]]) - groups <- levels(groups) - if (length(groups) > 0) { - # the purpose for this step is to allow for special characters to - # be used in the celltype grepping - if (length(groups) > 1) { - labels2 = gsub(paste0(paste0(groups, "_"), collapse = "|"), "", labels) + if (all(df$significant == "no")) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means, alpha = cellsign)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means, alpha = cellsign)) + } + default_style <- TRUE + } else { + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size, + alpha = cellsign)) } else { - labels2 = gsub(paste0(groups, "_"), "", labels) - } - # this returns the indices from the labels - ct1 = grep(cell_type1, labels2, ...) - ct2 = grep(cell_type2, labels2, ...) - c_type1 <- as.list(labels[ct1]) - c_type2 <- as.list(labels[ct2]) - c_type1 <- lapply(c_type1, .sub_pattern, pattern) - c_type2 <- lapply(c_type2, .sub_pattern, pattern) - grp <- as.list(groups) - celltype = list() - for (i in 1:length(c_type1)) { - celltype[[i]] <- .create_celltype_query(c_type1[[i]], c_type2, sep) - celltype[[i]] <- lapply(grp, .keep_interested_groups, celltype[[i]], - sep) - } - for (i in 1:length(celltype)) { - celltype[[i]] <- celltype[[i]][-which(celltype[[i]] == "")] + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke, + alpha = cellsign)) } - celltype <- lapply(celltype, unlist) - if (any(unlist(lapply(celltype, is.null)))) { - rm <- which(unlist(lapply(celltype, is.null))) - celltype <- celltype[-rm] - } - cell_type <- do.call(paste0, list(celltype, collapse = "|")) - } else { - if (length(idents) > 1) { - labels <- idents + } else { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size, + alpha = cellsign)) } else { - labels <- metadata[[idents]] + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke, alpha = cellsign)) } - labels <- factor(labels) - labels <- levels(labels) - c_type1 = as.list(grep(cell_type1, labels, value = TRUE, ...)) - c_type2 = as.list(grep(cell_type2, labels, value = TRUE, ...)) - c_type1 <- lapply(c_type1, .sub_pattern, pattern) - c_type2 <- lapply(c_type2, .sub_pattern, pattern) - celltype = list() - for (i in 1:length(c_type1)) { - celltype[[i]] <- .create_celltype_query(c_type1[[i]], c_type2, sep) - } - cell_type <- do.call(paste0, list(celltype, collapse = "|")) + } + } } - } else { - if (length(idents) > 1) { - labels <- idents + } else { + if (default_style) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means)) + } } else { - labels <- metadata[[idents]] - } - labels <- factor(labels) - labels <- levels(labels) - c_type1 = as.list(grep(cell_type1, labels, value = TRUE)) - c_type2 = as.list(grep(cell_type2, labels, value = TRUE)) - c_type1 <- lapply(c_type1, .sub_pattern, pattern) - c_type2 <- lapply(c_type2, .sub_pattern, pattern) - celltype = list() - for (i in 1:length(c_type1)) { - celltype[[i]] <- .create_celltype_query(c_type1[[i]], c_type2, sep) - } - cell_type <- do.call(paste0, list(celltype, collapse = "|")) - } - if (!is.null(gene.family) & is.null(genes)) { - if (length(gene.family) == 1){ - means_mat <- suppressWarnings(tryCatch(means_mat[query_group[[tolower(gene.family)]], - grep(cell_type, colnames(means_mat), useBytes = TRUE, ...), drop = FALSE], - error = function(e) { - colidx <- lapply(celltype, function(z) grep(z, colnames(means_mat), - useBytes = TRUE, ...)) - colidx <- unique(do.call(c, colidx)) - tmpm <- means_mat[query_group[[tolower(gene.family)]], colidx, drop = FALSE] - return(tmpm) - })) - pvals_mat <- suppressWarnings(tryCatch(pvals_mat[query_group[[tolower(gene.family)]], - grep(cell_type, colnames(pvals_mat), useBytes = TRUE, ...), drop = FALSE], - error = function(e) { - colidx <- lapply(celltype, function(z) grep(z, colnames(pvals_mat), - useBytes = TRUE, ...)) - colidx <- unique(do.call(c, colidx)) - tmpm <- pvals_mat[query_group[[tolower(gene.family)]], colidx, drop = FALSE] - return(tmpm) - })) - } else if (length(gene.family) > 1){ - means_mat <- suppressWarnings(tryCatch(means_mat[unlist(query_group[c(tolower(gene.family))], use.names = FALSE), - grep(cell_type, colnames(means_mat), useBytes = TRUE, ...), drop = FALSE], - error = function(e) { - colidx <- lapply(celltype, function(z) grep(z, colnames(means_mat), - useBytes = TRUE, ...)) - colidx <- unique(do.call(c, colidx)) - tmpm <- means_mat[unlist(query_group[c(tolower(gene.family))], use.names = FALSE), colidx, drop = FALSE] - return(tmpm) - })) - pvals_mat <- suppressWarnings(tryCatch(pvals_mat[unlist(query_group[c(tolower(gene.family))], use.names = FALSE), - grep(cell_type, colnames(pvals_mat), useBytes = TRUE, ...), drop = FALSE], - error = function(e) { - colidx <- lapply(celltype, function(z) grep(z, colnames(pvals_mat), - useBytes = TRUE, ...)) - colidx <- unique(do.call(c, colidx)) - tmpm <- pvals_mat[unlist(query_group[c(tolower(gene.family))], use.names = FALSE), colidx, drop = FALSE] - return(tmpm) - })) - } - } else if (is.null(gene.family) & !is.null(genes) | is.null(gene.family) & is.null(genes)) { - means_mat <- suppressWarnings(tryCatch(means_mat[query, grep(cell_type, colnames(means_mat), - useBytes = TRUE, ...), drop = FALSE], error = function(e) { - colidx <- lapply(celltype, function(z) grep(z, colnames(means_mat), useBytes = TRUE, - ...)) - colidx <- unique(do.call(c, colidx)) - tmpm <- means_mat[query, colidx, drop = FALSE] - return(tmpm) - })) - pvals_mat <- suppressWarnings(tryCatch(pvals_mat[query, grep(cell_type, colnames(pvals_mat), - useBytes = TRUE, ...), drop = FALSE], error = function(e) { - colidx <- lapply(celltype, function(z) grep(z, colnames(pvals_mat), useBytes = TRUE, - ...)) - colidx <- unique(do.call(c, colidx)) - tmpm <- pvals_mat[query, colidx, drop = FALSE] - return(tmpm) - })) - } - if (length(means_mat) == 0) { - stop("Please check your options for split.by and your celltypes.") - } - # rearrange the columns so that it interleaves the two groups - if (!is.null(split.by)) { - if (length(groups) > 0) { - grp <- as.list(groups) - group_i <- lapply(grp, function(g) { - gx <- grep(g, colnames(means_mat), ...) - return(gx) - }) - group_id <- do.call(c, group_i) - means_mat <- means_mat[, as.vector(group_id), drop = FALSE] - if (dim(pvals_mat)[2] > 0) { - pvals_mat <- pvals_mat[, as.vector(group_id), drop = FALSE] + if (all(df$significant == "no")) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means)) + } + default_style <- TRUE + } else { + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size)) } else { - stop("No significant hits.") + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke)) } + } else { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke)) + } + } + } } - } - if (keep_significant_only) { - if (dim(pvals_mat)[2] == 0) { - stop("No significant hits.") - } - } - if (cluster_rows) { - if (nrow(means_mat) > 2) { - d <- dist(as.data.frame(means_mat)) - h <- hclust(d) - means_mat <- means_mat[h$order, , drop = FALSE] - pvals_mat <- pvals_mat[h$order, , drop = FALSE] - } - } - # scaling - if (length(standard_scale) > 0) { + } + } else { + if (default_style) { if (standard_scale) { - means_mat_ <- apply(means_mat, 1, range01) - means_mat_ <- t(means_mat_) + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, fill = scaled_means, + size = scaled_means)) } else { - means_mat_ <- means_mat + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, fill = means, + size = means)) } - } - if (ncol(means_mat) > 1) { - if (length(scale) < 1) { - if (length(standard_scale) > 0) { - if (standard_scale) { - means_mat2 <- means_mat_ - } else { - means_mat2 <- means_mat - } - } else { - means_mat2 <- t(scale(t(means_mat))) - } + } else { + if (all(df$significant == "no")) { + if (standard_scale) { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, + fill = means, size = means)) + } + default_style <- TRUE } else { - if (scale) { - if (length(standard_scale) > 0) { - if (standard_scale) { - means_mat2 <- means_mat_ - } else { - means_mat2 <- t(scale(t(means_mat))) - } - } else { - means_mat2 <- t(scale(t(means_mat))) - } - } else { - means_mat2 <- means_mat - } + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke)) + } + } else { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size)) + } else { + g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke)) + } + } } - } else { - standard_scale = FALSE - scale = FALSE - means_mat2 <- means_mat - } - pvals_mat2 <- as.matrix(pvals_mat) - means_mat2 <- as.matrix(means_mat2) - xx <- which(means_mat == 0) - if (length(xx) > 0) { - means_mat2[which(means_mat == 0)] <- NA + } } - # remove rows that are entirely NA - pvals_mat2 <- pvals_mat2[rowSums(is.na(means_mat2)) != ncol(means_mat2), , drop = FALSE] - means_mat2 <- means_mat2[rowSums(is.na(means_mat2)) != ncol(means_mat2), , drop = FALSE] - if ((length(standard_scale) > 0 && standard_scale) | (length(scale) > 0 && scale) | - (length(scale) < 1 && length(standard_scale) < 1)) { - df_means <- melt(means_mat2, value.name = "scaled_means") - } else { - df_means <- melt(means_mat2, value.name = "means") - } - if (length(p.adjust.method) > 0) { - df_pvals <- melt(pvals_mat2, value.name = "pvals_adj") - df <- data.frame(cbind(df_means, pvals_adj = df_pvals$pvals_adj)) - xp <- which(df$pvals_adj == 1) - if (length(xp) > 0) { - df$pvals_adj[which(df$pvals_adj == 1)] <- NA - } - if (keep_significant_only) { - # keep the entire row/ all the comparisons - df_ <- split(df, as.character(df$Var1)) - anysig <- lapply(df_, function(x) { - keep <- any(x$pvals_adj < 0.05) - return(keep) - }) - df_ <- df_[which(unlist(anysig))] - names(df_) <- NULL - df <- do.call(rbind, df_) - } - df$pvals_adj[which(df$pvals_adj == 0)] <- 0.001 - df$pvals_adj[which(df$pvals_adj >= 0.05)] <- NA + g <- g + geom_point(pch = 21, na.rm = TRUE) + theme_bw() + theme(axis.text.x = element_text(angle = 45, + hjust = 0, color = "#000000"), axis.text.y = element_text(color = "#000000"), + axis.title.x = element_blank(), axis.title.y = element_blank(), legend.direction = "vertical", + legend.box = "horizontal") + scale_x_discrete(position = "top") + scale_radius(range = c(0, + max_size)) + scale_linewidth(range = c(0, max_highlight_size)) + if (default_style) { + g <- g + scale_colour_manual(values = c(yes = highlight_col, no = "#ffffff"), + na.value = NA, na.translate = FALSE) + guides(fill = guide_colourbar(barwidth = 4, + label = TRUE, ticks = TRUE, draw.ulim = TRUE, draw.llim = TRUE, order = 1), + size = guide_legend(reverse = TRUE, order = 2), stroke = guide_legend(reverse = TRUE, + order = 3)) + if (length(col_option) == 1) { + g <- g + scale_fill_gradientn(colors = (grDevices::colorRampPalette(c("white", + col_option)))(100), na.value = "white") + } else { + g <- g + scale_fill_gradientn(colors = c("white", (grDevices::colorRampPalette(col_option))(99)), + na.value = "white") + } } else { - df_pvals <- melt(pvals_mat2, value.name = "pvals") - df <- data.frame(cbind(df_means, pvals = df_pvals$pvals)) - xp <- which(df$pvals == 1) - if (length(xp) > 0) { - df$pvals[which(df$pvals == 1)] <- NA - } - if (keep_significant_only) { - # keep the entire row/ all the comparisons - df_ <- split(df, as.character(df$Var1)) - anysig <- lapply(df_, function(x) { - keep <- any(x$pvals < 0.05) - return(keep) - }) - df_ <- df_[which(unlist(anysig))] - names(df_) <- NULL - df <- do.call(rbind, df_) - } - df$pvals[which(df$pvals == 0)] <- 0.001 - df$pvals[which(df$pvals >= 0.05)] <- NA + g <- g + scale_fill_manual(values = highlight_col, na.value = "#ffffff", + na.translate = TRUE) + guides(colour = guide_colourbar(barwidth = 4, + label = TRUE, ticks = TRUE, draw.ulim = TRUE, draw.llim = TRUE, order = 1), + size = guide_legend(reverse = TRUE, order = 2), stroke = guide_legend(reverse = TRUE, + order = 3)) + df2 <- df + if (standard_scale) { + df2$scaled_means[df$pvals < 0.05] <- NA + g <- g + geom_point(aes(x = Var2, y = Var1, colour = scaled_means, + size = scaled_means), data = df2, inherit_aes = FALSE, na_rm = TRUE) + } else { + df2$means[df$pvals < 0.05] <- NA + g <- g + geom_point(aes(x = Var2, y = Var1, colour = means, size = means), + data = df2, inherit_aes = FALSE, na_rm = TRUE) + } + if (length(col_option) == 1) { + g <- g + scale_colour_gradientn(colors = (grDevices::colorRampPalette(c("white", + col_option)))(100), na.value = "white") + } else { + g <- g + scale_colour_gradientn(colors = c("white", (grDevices::colorRampPalette(col_option))(99)), + na.value = "white") + } } - if (!is.null(split.by)) { - if (length(groups) > 0) { - grp <- as.list(groups) - grp2 <- lapply(grp, function(i) { - x <- paste0(i, "_") - return(x) - }) - searchterm <- do.call(paste, list(grp2, collapse = "|")) - df$group <- gsub(searchterm, "", df$Var2) - } - } else { - df$group <- df$Var2 + if (!is.null(interaction_scores) & (scale_alpha_by_interaction_scores == + TRUE)) { + g <- g + scale_alpha_continuous(breaks = c(0, 25, 50, 75, 100)) } - if (keep_significant_only) { - if ((length(standard_scale) > 0 && standard_scale) | (length(scale) > 0 && - scale) | (length(scale) < 1 && length(standard_scale) < 1)) { - if (length(df$scaled_means) == 0) { - stop("No significant genes found and plotting will not proceed.") - } - } else { - if (length(df$means) == 0) { - stop("No significant genes found and plotting will not proceed.") - } - } + if (!is.null(cellsign) & (scale_alpha_by_cellsign == TRUE)) { + g <- g + scale_alpha_continuous(breaks = c(0, 1)) } - - df$Var2 <- gsub(sep, "-", df$Var2) - final_levels = unique(df$Var2) - df$Var2 <- factor(df$Var2, unique(df$Var2)) - df$x_means_ <- df[, colnames(df_means)[3]] - df$x_means_[df[, colnames(df)[4]] < 0.05] <- NA - df$x_stroke = df$x_means_ - df$x_stroke[!is.na(df$x_stroke)] <- 0 - df$x_stroke[is.na(df$x_stroke)] <- 2 - if (!is.null(exclude_interactions)) { - df <- df[!df$Var1 %in% c(exclude_interactions),] + if (!is.null(highlight_size)) { + g <- g + guides(stroke = "none") } - if (return_table) { - return(df) - } else { - if (default_style) { - if ((length(standard_scale) > 0 && standard_scale) | (length(scale) > - 0 && scale) | (length(scale) < 1 && length(standard_scale) < 1)) { - if (length(p.adjust.method) > 0 && p.adjust.method != "none") { - g <- ggplot(df, aes(x = Var2, y = Var1, color = -log10(pvals_adj), - fill = scaled_means, size = scaled_means)) - } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = -log10(pvals), - fill = scaled_means, size = scaled_means)) - } - } else { - if (length(p.adjust.method) > 0 && p.adjust.method != "none") { - g <- ggplot(df, aes(x = Var2, y = Var1, color = -log10(pvals_adj), - fill = means, size = means)) - } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = -log10(pvals), - fill = means, size = means)) - } - } - if (!is.null(highlight_size)) { - g <- g + geom_point(pch = 21, na.rm = TRUE, stroke = highlight_size) - } else { - if (length(p.adjust.method) > 0 && p.adjust.method != "none") { - s = -log10(df$pvals_adj) - s[is.na(s)] <- 0 - g <- g + geom_point(pch = 21, na.rm = TRUE, stroke = s) - } else { - s = -log10(df$pvals) - s[is.na(s)] <- 0 - g <- g + geom_point(pch = 21, na.rm = TRUE, stroke = s) - } - } - g <- g + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 0, - color = "#000000"), axis.text.y = element_text(color = "#000000"), - axis.ticks = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) + - scale_x_discrete(position = "top") + scale_color_gradientn(colors = highlight, - na.value = "white") + scale_radius(range = c(0, max_size)) - if (noir) { - g <- g + scale_fill_gradient(low = "white", high = "#131313", na.value = "white") - } else { - if (length(col_option) == 1) { - g <- g + scale_fill_gradientn(colors = grDevices::colorRampPalette(c("white", - col_option))(100), na.value = "white") - } else { - g <- g + scale_fill_gradientn(colors = c("white", grDevices::colorRampPalette(col_option)(99)), - na.value = "white") - } - } - } else { - if ((length(standard_scale) > 0 && standard_scale) | (length(scale) > - 0 && scale) | (length(scale) < 1 && length(standard_scale) < 1)) { - g <- ggplot(df, aes(x = Var2, y = Var1, size = scaled_means, color = scaled_means)) - df2 <- df %>% - filter(is.na(x_means_)) - g <- g + geom_point(pch = 16, na.rm = TRUE) - g <- g + geom_point(data = df2, aes(x = Var2, y = Var1, size = scaled_means, - fill = x_means_, stroke = x_stroke), pch = 21, na.rm = TRUE) - } else { - g <- ggplot(df, aes(x = Var2, y = Var1, size = means, color = means)) - df2 <- df %>% - filter(is.na(x_means_)) - g <- g + geom_point(pch = 16, na.rm = TRUE) - g <- g + geom_point(data = df2, aes(x = Var2, y = Var1, size = scaled_means, - fill = x_means_, stroke = x_stroke), pch = 21, na.rm = TRUE) - } - g <- g + theme_bw() + scale_fill_gradientn(colors = col_option, na.value = "white", - guide = FALSE) + scale_colour_gradientn(colors = col_option) + theme(axis.text.x = element_text(angle = 45, - hjust = 0, color = "#000000"), axis.text.y = element_text(color = "#000000"), - axis.ticks = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) + - scale_x_discrete(position = "top") + scale_radius(range = c(0, max_size)) - } - if (!is.null(gene.family) & is.null(genes)) { - if (length(gene.family) > 1){ - gene.family <- paste(gene.family, collapse = ', ') - } - g <- g + ggtitle(gene.family) - } - return(g) + if (title != "") { + g <- g + ggtitle(title) + } else if (!is.null(gene_family) & is.null(genes)) { + if (length(gene_family) > 1) { + gene_family <- paste(gene_family, collapse = ", ") + } + g <- g + ggtitle(gene_family) } -} + return(g) + } +} \ No newline at end of file diff --git a/R/plot_cpdb2.R b/R/plot_cpdb2.R index 2bd206f..25cddcf 100644 --- a/R/plot_cpdb2.R +++ b/R/plot_cpdb2.R @@ -1,30 +1,27 @@ -#' Plotting cellphonedb results +#' Plotting CellPhoneDB results #' +#' @param scdata single-cell data. Must be SingleCellExperiment object #' @param cell_type1 cell type 1 #' @param cell_type2 cell type 2 -#' @param scdata single-cell data. can be seurat/summarizedexperiment object -#' @param idents vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns +#' @param celltype_key column name of scdata's metadata. MUST match cpdb's columns #' @param means object holding means.txt from cpdb output #' @param pvals object holding pvals.txt from cpdb output #' @param deconvoluted object holding deconvoluted.txt from cpdb output -#' @param p.adjust.method correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none') #' @param keep_significant_only logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb. -#' @param split.by column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. -#' @param scale logical. scale the expression to mean +/- SD. NULL defaults to TRUE. +#' @param splitby_key column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. #' @param standard_scale logical. scale the expression to range from 0 to 1. Default is TRUE -#' @param separator default = NULL. separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this. #' @param gene_symbol_mapping default = NULL.column name for rowData in sce holding the actual gene symbols if row names aren't gene symbols #' @param frac default = 0.1. Minimum fraction of celtypes expressing a gene in order to keep the interaction. Gene must be expressesd >= `frac` in either of the pair of celltypes in order to keep. #' @param remove_self default = TRUE. Remove self-self arcs. #' @param desiredInteractions default = NULL. Specific list of celltype comparisons e.g. list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2')). Also accepts a dataframe where first column is celltype 1 and 2nd column is celltype 2. -#' @param interaction_grouping default = NULL. dataframe specifying groupings of cellphonedb interactions. First column must be cellphonedb's interacting_pair column. second column is whatever grouping you want. -#' @param edge_group_colors default = NULL. vector for colour mapping for edge groups. only used if split.by is specified. +#' @param interaction_grouping default = NULL. dataframe specifying groupings of CellPhoneDB interactions. First column must be CellPhoneDB's interacting_pair column. second column is whatever grouping you want. +#' @param edge_group_colors default = NULL. vector for colour mapping for edge groups. only used if splitby_key is specified. #' @param node_group_colors default = NULL. vector for colour mapping for node labels. -#' @param degs_analysis if is cellphonedb degs_analysis mode. +#' @param degs_analysis if is CellPhoneDB degs_analysis mode. #' @param return_df whether to just return this as a data.frame rather than plotting iot #' @param plot_score_as_thickness logical. Whether to scale the thickness of the edges to the interaction score and scale alpha to -log10(significance). Default is TRUE. FALSE will be opposite behaviour #' @param ... passes arguments plot_cpdb -#' @return Plotting cellphonedb results as a weird chord diagram +#' @return Plotting CellPhoneDB results as a weird chord diagram #' @examples #' \donttest{ #' @@ -35,68 +32,50 @@ #' @import ggrepel #' @export -plot_cpdb2 <- function( - cell_type1, cell_type2, scdata, idents, means, pvals, deconvoluted, - p.adjust.method = NULL, keep_significant_only = TRUE, split.by = NULL, standard_scale = TRUE, - separator = NULL, gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, - desiredInteractions = NULL, interaction_grouping = NULL, edge_group_colors = NULL, - node_group_colors = NULL, degs_analysis = FALSE, return_df = FALSE, plot_score_as_thickness = TRUE, - ...) { +plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals, + deconvoluted, keep_significant_only = TRUE, splitby_key = NULL, standard_scale = TRUE, + gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, desiredInteractions = NULL, + interaction_grouping = NULL, edge_group_colors = NULL, node_group_colors = NULL, + degs_analysis = FALSE, return_df = FALSE, plot_score_as_thickness = TRUE, ...) { if (class(scdata) == "Seurat") { stop("Sorry not supported. Please use a SingleCellExperiment object.") } - if (length(separator) > 0) { - sep <- separator - } else { - sep <- ">@<" - } - lr_interactions <- plot_cpdb( - cell_type1 = cell_type1, cell_type2 = cell_type2, scdata = scdata, - idents = idents, split.by = split.by, means = means, pvals = pvals, keep_significant_only = keep_significant_only, - standard_scale = standard_scale, return_table = TRUE, degs_analysis = degs_analysis, ... - ) + lr_interactions <- plot_cpdb(scdata = scdata, cell_type1 = cell_type1, cell_type2 = cell_type2, + celltype_key = celltype_key, splitby_key = splitby_key, means = means, pvals = pvals, + keep_significant_only = keep_significant_only, standard_scale = standard_scale, + return_table = TRUE, degs_analysis = degs_analysis, ...) requireNamespace("SummarizedExperiment") requireNamespace("SingleCellExperiment") - if (is.null(split.by)) { + if (is.null(splitby_key)) { if (any(lr_interactions[, 3] > 0)) { if (any(is.na(lr_interactions[, 3]))) { - lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[ - , - 3 - ]), ] + lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[, + 3]), ] } else { lr_interactions <- lr_interactions[lr_interactions[, 3] > 0, ] } } } - subset_clusters <- unique(unlist(lapply( - as.character(lr_interactions$group), - strsplit, sep - ))) - sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, idents] %in% subset_clusters] - interactions <- means[, c( - "interacting_pair", "gene_a", "gene_b", "partner_a", - "partner_b", "receptor_a", "receptor_b" - )] + subset_clusters <- unique(unlist(lapply(as.character(lr_interactions$group), + strsplit, DEFAULT_SEP))) + sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, celltype_key] %in% + subset_clusters] + interactions <- means[, c("interacting_pair", "gene_a", "gene_b", "partner_a", + "partner_b", "receptor_a", "receptor_b")] interactions$converted <- gsub("-", " ", interactions$interacting_pair) interactions$converted <- gsub("_", "-", interactions$converted) - interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, ] - tm0 <- do.call(c, lapply( - as.list(interactions_subset$interacting_pair), strsplit, - "_" - )) + interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, + ] + tm0 <- do.call(c, lapply(as.list(interactions_subset$interacting_pair), strsplit, + "_")) if (any(lapply(tm0, length) > 2)) { complex_id <- which(lapply(tm0, length) > 2) interactions_subset_ <- interactions_subset[complex_id, ] simple_1 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_b)] - partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep( - "complex:", - interactions_subset_$partner_b - )]) - partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep( - "complex:", - interactions_subset_$partner_a - )]) + partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep("complex:", + interactions_subset_$partner_b)]) + partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep("complex:", + interactions_subset_$partner_a)]) simple_2 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_a)] for (i in seq_along(simple_1)) { simple_1[i] <- gsub(paste0(partner_1[i], "_|_", partner_1[i]), "", simple_1[i]) @@ -109,34 +88,28 @@ plot_cpdb2 <- function( for (i in seq_along(complex_id)) { tm0[[complex_id[i]]] <- tmplist[[i]] } - tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2))) + tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2))) colnames(tm0) <- c("id_a", "id_b") interactions_subset <- cbind(interactions_subset, tm0) - dictionary <- interactions_subset[, c( - "gene_a", "gene_b", "partner_a", "partner_b", - "id_a", "id_b", "receptor_a", "receptor_b" - )] + dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b", + "id_a", "id_b", "receptor_a", "receptor_b")] } else { - tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2))) + tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2))) colnames(tm0) <- c("id_a", "id_b") interactions_subset <- cbind(interactions_subset, tm0) - dictionary <- interactions_subset[, c( - "gene_a", "gene_b", "partner_a", "partner_b", - "id_a", "id_b", "receptor_a", "receptor_b" - )] + dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b", + "id_a", "id_b", "receptor_a", "receptor_b")] } if (!is.null(interaction_grouping)) { if ((class(interaction_grouping) == "data.frame")) { - interactions_subset$group <- interaction_grouping[, 2][match( - interactions_subset$interacting_pair, - interaction_grouping[, 1] - )] + interactions_subset$group <- interaction_grouping[, 2][match(interactions_subset$interacting_pair, + interaction_grouping[, 1])] } } # extract all the possible genes. geneid <- unique(c(interactions_subset$id_a, interactions_subset$id_b)) - # rmg = which(geneid == '') if (length(rmg) > 0){ geneid = geneid[-which(geneid - # == '')] } + # rmg = which(geneid == '') if (length(rmg) > 0){ geneid = + # geneid[-which(geneid == '')] } if (all(!geneid %in% row.names(sce_subset))) { geneid <- unique(c(interactions_subset$gene_a, interactions_subset$gene_b)) } @@ -147,23 +120,19 @@ plot_cpdb2 <- function( } # split to list and calculate celltype mean for each treatment group meta <- as.data.frame(SummarizedExperiment::colData(sce_subset_tmp)) - if (!is.null(split.by)) { + if (!is.null(splitby_key)) { sce_list <- list() sce_list_alt <- list() - for (x in unique(meta[, split.by])) { + for (x in unique(meta[, splitby_key])) { sce_list[[x]] <- list() sce_list_alt[[x]] <- list() } for (n in names(sce_list)) { - for (x in unique(meta[, idents])) { - sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, idents] == x & meta[ - , - split.by - ] == n] - sce_list_alt[[n]][[x]] <- sce_subset[, meta[, idents] == x & meta[ - , - split.by - ] == n] + for (x in unique(meta[, celltype_key])) { + sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, celltype_key] == x & + meta[, splitby_key] == n] + sce_list_alt[[n]][[x]] <- sce_subset[, meta[, celltype_key] == x & + meta[, splitby_key] == n] } } sce_list2 <- lapply(sce_list, function(y) { @@ -185,9 +154,9 @@ plot_cpdb2 <- function( } else { sce_list <- list() sce_list_alt <- list() - for (x in unique(meta[, idents])) { - sce_list[[x]] <- sce_subset_tmp[, meta[, idents] == x] - sce_list_alt[[x]] <- sce_subset[, meta[, idents] == x] + for (x in unique(meta[, celltype_key])) { + sce_list[[x]] <- sce_subset_tmp[, meta[, celltype_key] == x] + sce_list_alt[[x]] <- sce_subset[, meta[, celltype_key] == x] } sce_list2 <- lapply(sce_list, .cellTypeMeans) sce_list3 <- lapply(sce_list, .cellTypeFraction) @@ -208,28 +177,29 @@ plot_cpdb2 <- function( } rownames(sce_list2) <- humanreadablename rownames(sce_list3) <- humanreadablename - decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset), ] + decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset), + ] if (nrow(decon_subset) > 0) { - # although multiple rows are returned, really it's the same value for the same - # complex + # although multiple rows are returned, really it's the same value for + # the same complex decon_subset <- split(decon_subset, decon_subset$complex_name) decon_subset_expr <- lapply(decon_subset, function(x) { x <- x[, colnames(x) %in% colnames(sce_list2)] x <- colMeans(x) return(x) }) - if (!is.null(split.by)) { + if (!is.null(splitby_key)) { decon_subset_fraction <- lapply(decon_subset, function(x) { x <- unique(x$gene_name) test <- lapply(sce_list_alt, function(y) { - return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping)) + return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping)) }) return(test) }) decon_subset_fraction <- lapply(decon_subset_fraction, function(x) { y <- lapply(x, function(z) do.call(cbind, z)) for (i in 1:length(y)) { - colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]])) + colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]])) } y <- do.call(cbind, y) return(y) @@ -238,7 +208,7 @@ plot_cpdb2 <- function( decon_subset_fraction <- lapply(decon_subset, function(x) { z <- unique(x$gene_name) test <- lapply(sce_list_alt, function(y) { - return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping)) + return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping)) }) return(do.call(cbind, test)) }) @@ -258,24 +228,20 @@ plot_cpdb2 <- function( # make a big fat edgelist if (!is.null(desiredInteractions)) { if (class(desiredInteractions) == "list") { - desiredInteractions_ <- c(desiredInteractions, lapply( - desiredInteractions, - rev - )) + desiredInteractions_ <- c(desiredInteractions, lapply(desiredInteractions, + rev)) cell_type_grid <- as.data.frame(do.call(rbind, desiredInteractions_)) } else if ((class(desiredInteractions) == "data.frame")) { cell_type_grid <- desiredInteractions } cells_test <- unique(unlist(desiredInteractions)) } else { - cells_test <- tryCatch(unique(droplevels(meta[, idents])), error = function(e) { - unique(meta[ - , - idents - ]) + cells_test <- tryCatch(unique(droplevels(meta[, celltype_key])), error = function(e) { + unique(meta[, celltype_key]) }) cell_type_grid <- expand.grid(cells_test, cells_test) } + if (remove_self) { rm_idx <- which(cell_type_grid[, 1] == cell_type_grid[, 2]) if (length(rm_idx) > 0) { @@ -290,251 +256,32 @@ plot_cpdb2 <- function( receptor_b <- interactions_subset$receptor_b producers <- as.character(cell_type_grid[, 1]) receivers <- as.character(cell_type_grid[, 2]) - barcodes <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + barcodes <- paste0(lr_interactions$Var2, DEFAULT_SEP, lr_interactions$Var1) dfx <- list() - if (!is.null(split.by)) { - for (i in unique(meta[, split.by])) { - dfx[[i]] <- .generateDf( - ligand = ligand, sep = sep, receptor = receptor, + if (!is.null(splitby_key)) { + for (i in unique(meta[, splitby_key])) { + dfx[[i]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor, receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, producers = producers, receivers = receivers, cell_type_means = expr_df, cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt, - gsm = gene_symbol_mapping, splitted = i - ) + gsm = gene_symbol_mapping, splitted = i) dfx[[i]] <- dfx[[i]][dfx[[i]]$barcode %in% barcodes, ] } } else { - dfx[[1]] <- .generateDf( - ligand = ligand, sep = sep, receptor = receptor, receptor_a = receptor_a, - receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, + dfx[[1]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor, + receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, producers = producers, receivers = receivers, cell_type_means = expr_df, cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt, - gsm = gene_symbol_mapping - ) + gsm = gene_symbol_mapping) dfx[[1]] <- dfx[[1]][dfx[[1]]$barcode %in% barcodes, ] } if (return_df) { return(dfx) } else { # set the bundled connections - df0 <- lapply(dfx, function(x) x[x$producer_fraction >= frac | x$receiver_fraction >= frac, ]) # save this for later + df0 <- lapply(dfx, function(x) x[x$producer_fraction >= frac | x$receiver_fraction >= + frac, ]) # save this for later # now construct the hierachy - constructGraph <- function(input_group, sep, el, el0, unique_id, interactions_df, - plot_cpdb_out, edge_group = FALSE, edge_group_colors = NULL, node_group_colors = NULL) { - require(igraph) - celltypes <- unique(c(as.character(el$producer), as.character(el$receiver))) - el1 <- data.frame( - from = "root", to = celltypes, barcode_1 = NA, barcode_2 = NA, - barcode_3 = NA - ) - el2 <- data.frame( - from = celltypes, to = paste0(celltypes, sep, "ligand"), - barcode_1 = NA, barcode_2 = NA, barcode_3 = NA - ) - el3 <- data.frame( - from = celltypes, to = paste0(celltypes, sep, "receptor"), - barcode_1 = NA, barcode_2 = NA, barcode_3 = NA - ) - el4 <- do.call(rbind, lapply(celltypes, function(x) { - cell_ligands <- grep(x, el$from, value = TRUE) - cell_ligands_idx <- grep(x, el$from) - if (length(cell_ligands) > 0) { - df <- data.frame( - from = paste0(x, sep, "ligand"), to = cell_ligands, - barcode_1 = el$barcode[cell_ligands_idx], barcode_2 = el$pair[cell_ligands_idx], - barcode_3 = paste0(el$from[cell_ligands_idx], sep, el$to[cell_ligands_idx]) - ) - } else { - df <- NULL - } - })) - el5 <- do.call(rbind, lapply(celltypes, function(x) { - cell_ligands <- grep(x, el$to, value = TRUE) - cell_ligands_idx <- grep(x, el$to) - if (length(cell_ligands) > 0) { - df <- data.frame( - from = paste0(x, sep, "receptor"), to = cell_ligands, - barcode_1 = el$barcode[cell_ligands_idx], barcode_2 = el$pair[cell_ligands_idx], - barcode_3 = paste0(el$from[cell_ligands_idx], sep, el$to[cell_ligands_idx]) - ) - } else { - df <- NULL - } - })) - gr_el <- do.call(rbind, list(el1, el2, el3, el4, el5)) - plot_cpdb_out$barcode <- paste0(plot_cpdb_out$Var2, sep, plot_cpdb_out$Var1) - mean_col <- grep("means$", colnames(plot_cpdb_out), value = TRUE) - means <- plot_cpdb_out[ - match(gr_el$barcode_1, plot_cpdb_out$barcode), - mean_col - ] - pval_col <- grep("pvals", colnames(plot_cpdb_out), value = TRUE) - pvals <- plot_cpdb_out[ - match(gr_el$barcode_1, plot_cpdb_out$barcode), - pval_col - ] - gr_el <- cbind(gr_el, means, pvals) - if (edge_group) { - groups <- interactions_df$group[match(gr_el$barcode_2, interactions_df$interacting_pair)] - } - gr <- graph_from_edgelist(as.matrix(gr_el[, 1:2])) - E(gr)$interaction_score <- as.numeric(means) - E(gr)$pvals <- as.numeric(pvals) - if (edge_group) { - E(gr)$group <- groups - } - E(gr)$name <- gr_el$barcode_3 - # order the graph vertices - V(gr)$type <- NA - V(gr)$type[V(gr)$name %in% el4$to] <- "ligand" - V(gr)$type[V(gr)$name %in% el5$to] <- "receptor" - from <- match(el0$from, V(gr)$name) - to <- match(el0$to, V(gr)$name) - dat <- data.frame(from = el0$from, to = el0$to) - if (nrow(dat) > 0) { - dat$barcode <- paste0(dat$from, sep, dat$to) - interaction_score <- E(gr)$interaction_score[match(dat$barcode, gr_el$barcode_3)] - pval <- E(gr)$pvals[match(dat$barcode, gr_el$barcode_3)] - if (any(is.na(pval))) { - pval[is.na(pval)] <- 1 - } - if (!all(is.na(range01(-log10(pval))))) { - pval <- range01(-log10(pval)) - } - if (edge_group) { - group <- E(gr)$group[match(dat$barcode, gr_el$barcode_3)] - } - ligand_expr <- data.frame( - cell_mol = el$from, expression = el$producer_expression, - fraction = el$producer_fraction - ) - recep_expr <- data.frame( - cell_mol = el$to, expression = el$receiver_expression, - fraction = el$receiver_fraction - ) - expression <- rbind(ligand_expr, recep_expr) - df <- igraph::as_data_frame(gr, "both") - df$vertices$expression <- 0 - df$vertices$fraction <- 0 - df$vertices$expression <- as.numeric(expression$expression)[match( - df$vertices$name, - expression$cell_mol - )] - df$vertices$fraction <- as.numeric(expression$fraction)[match( - df$vertices$name, - expression$cell_mol - )] - df$vertices$celltype <- "" - for (x in cells_test) { - idx <- grepl(paste0(x, sep), df$vertices$name) - df$vertices$celltype[idx] <- x - } - df$vertices$label <- df$vertices$name - df$vertices$label[!df$vertices$name %in% c(el0$from, el0$to)] <- "" - gr <- graph_from_data_frame(df$edges, directed = TRUE, vertices = df$vertices) - for (x in unique_id) { - V(gr)$label <- gsub(paste0(x, sep), "", V(gr)$label) - } - require(ggraph) - require(ggrepel) - if (!is.null(edge_group_colors)) { - edge_group_colors <- edge_group_colors - } else { - nn <- length(unique(E(gr)$group)) - edge_group_colors <- .gg_color_hue(nn) - } - if (!is.null(node_group_colors)) { - node_group_colors <- node_group_colors - } else { - nn <- length(unique(meta[, idents])) - node_group_colors <- .gg_color_hue(nn) - } - # plot the graph - if (edge_group) { - if (plot_score_as_thickness) { - pl <- ggraph(gr, layout = "dendrogram", circular = TRUE) + - geom_conn_bundle( - data = get_con( - from = from, to = to, - group = group, `-log10(sig)` = pval, interaction_score = interaction_score - ), - aes(colour = group, alpha = `-log10(sig)`, width = interaction_score), - tension = 0.5 - ) # + scale_edge_width(range = c(1, 3)) + scale_edge_alpha(limits = c(0, 1)) + - } else { - pl <- ggraph(gr, layout = "dendrogram", circular = TRUE) + - geom_conn_bundle( - data = get_con( - from = from, to = to, - group = group, `-log10(sig)` = pval, interaction_score = interaction_score - ), - aes(colour = group, alpha = interaction_score, width = `-log10(sig)`), - tension = 0.5 - ) # + scale_edge_width(range = c(1, 3)) + scale_edge_alpha(limits = c(0, 1)) + - } - pl <- pl + scale_edge_color_manual(values = edge_group_colors) + - geom_node_point(pch = 19, aes( - size = fraction, filter = leaf, - color = celltype, alpha = type - )) + theme_void() + coord_fixed() + - scale_size_continuous(limits = c(0, 1)) + scale_shape_manual(values = c( - ligand = 19, - receptor = 15 - )) + scale_color_manual(values = node_group_colors) + - geom_text_repel(aes(x = x, y = y, label = label), - segment.square = TRUE, - segment.inflect = TRUE, segment.size = 0.2, force = 0.5, - size = 2, force_pull = 0 - ) + scale_alpha_manual(values = c( - ligand = 0.5, - receptor = 1 - )) + small_legend(keysize = 0.5) + ggtitle(input_group) - } else { - if (plot_score_as_thickness) { - pl <- ggraph(gr, layout = "dendrogram", circular = TRUE) + - geom_conn_bundle( - data = get_con( - from = from, to = to, - `-log10(sig)` = pval, interaction_score = interaction_score - ), - aes(alpha = `-log10(sig)`, width = interaction_score), - tension = 0.5 - ) - } else { - pl <- ggraph(gr, layout = "dendrogram", circular = TRUE) + - geom_conn_bundle( - data = get_con( - from = from, to = to, - `-log10(sig)` = pval, interaction_score = interaction_score - ), - aes(alpha = interaction_score, width = `-log10(sig)`), - tension = 0.5 - ) - } - # scale_edge_width(range = c(1, 3)) + - # scale_edge_alpha(limits = c(0, 1)) + - pl <- pl + scale_edge_color_manual(values = edge_group_colors) + - geom_node_point(pch = 19, aes( - size = fraction, filter = leaf, - color = celltype, alpha = type - )) + theme_void() + coord_fixed() + - scale_size_continuous(limits = c(0, 1)) + scale_shape_manual(values = c( - ligand = 19, - receptor = 15 - )) + scale_color_manual(values = node_group_colors) + - geom_text_repel(aes(x = x, y = y, label = label), - segment.square = TRUE, - segment.inflect = TRUE, segment.size = 0.2, force = 0.5, - size = 2, force_pull = 0 - ) + # geom_node_text(aes(x = x*1.15, y=y*1.15, filter = leaf, label=label, size # =0.01)) + size - scale_alpha_manual(values = c(ligand = 0.5, receptor = 1)) + - small_legend(keysize = 0.5) + ggtitle(input_group) - } - return(pl) - } else { - return(NA) - } - } gl <- list() if (!is.null(interaction_grouping)) { edge_group <- TRUE @@ -544,34 +291,33 @@ plot_cpdb2 <- function( cantplot <- c() noplot <- FALSE for (i in 1:length(dfx)) { - if (!is.null(split.by)) { + if (!is.null(splitby_key)) { if (nrow(dfx[[i]]) > 0 & nrow(df0[[i]]) > 0) { - gl[[i]] <- constructGraph( - names(dfx)[i], sep, dfx[[i]], df0[[i]], - cells_test, interactions_subset, lr_interactions, edge_group, - edge_group_colors, node_group_colors - ) + gl[[i]] <- .constructGraph(input_group = names(dfx)[i], sep = DEFAULT_SEP, + el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset, + plot_cpdb_out = lr_interactions, celltype_key = celltype_key, + edge_group = edge_group, edge_group_colors = edge_group_colors, + node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness) } else { - gl[[i]] <- NA - cantplot <- c(cantplot, names(dfx)[i]) + gl[[i]] <- NA + cantplot <- c(cantplot, names(dfx)[i]) } } else { if (nrow(dfx[[i]]) > 0 & nrow(df0[[i]]) > 0) { - gl[[i]] <- constructGraph( - NULL, sep, dfx[[i]], df0[[i]], cells_test, - interactions_subset, lr_interactions, edge_group, edge_group_colors, - node_group_colors - ) + gl[[i]] <- .constructGraph(input_group = NULL, sep = DEFAULT_SEP, + el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset, + plot_cpdb_out = lr_interactions, celltype_key = celltype_key, + edge_group = edge_group, edge_group_colors = edge_group_colors, + node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness) } else { - gl[[i]] <- NA - noplot <- TRUE + gl[[i]] <- NA + noplot <- TRUE } } } if (length(cantplot) > 0) { - cat("The following groups in split.by cannot be plotted due to missing/no significant interactions/celltypes", - sep = "\n" - ) + cat("The following groups in splitby_key cannot be plotted due to missing/no significant interactions/celltypes", + sep = "\n") cat(cantplot, sep = "\n") } if (noplot) { @@ -583,4 +329,4 @@ plot_cpdb2 <- function( return(gl[[1]]) } } -} +} \ No newline at end of file diff --git a/R/plot_cpdb3.R b/R/plot_cpdb3.R index fd6a7d6..a0d4807 100644 --- a/R/plot_cpdb3.R +++ b/R/plot_cpdb3.R @@ -1,23 +1,20 @@ -#' Plotting cellphonedb results as a chord diagram +#' Plotting CellPhoneDB results as a chord diagram #' +#' @param scdata single-cell data. can be seurat/summarizedexperiment object #' @param cell_type1 cell type 1 #' @param cell_type2 cell type 2 -#' @param scdata single-cell data. can be seurat/summarizedexperiment object -#' @param idents vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns +#' @param celltype_key vector holding the celltype_key for each cell or column name of scdata's metadata. MUST match cpdb's columns #' @param means object holding means.txt from cpdb output #' @param pvals object holding pvals.txt from cpdb output #' @param deconvoluted object holding deconvoluted.txt from cpdb output -#' @param p.adjust.method correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none') #' @param keep_significant_only logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb. -#' @param split.by column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. -#' @param scale logical. scale the expression to mean +/- SD. NULL defaults to TRUE. +#' @param splitby_key column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. #' @param standard_scale logical. scale the expression to range from 0 to 1. Default is TRUE -#' @param separator default = NULL. separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this. #' @param gene_symbol_mapping default = NULL.column name for rowData in sce holding the actual gene symbols if row names aren't gene symbols #' @param frac default = 0.1. Minimum fraction of celtypes expressing a gene in order to keep the interaction. Gene must be expressesd >= `frac` in either of the pair of celltypes in order to keep. #' @param remove_self default = TRUE. Remove self-self arcs. #' @param desiredInteractions default = NULL. Specific list of celltype comparisons e.g. list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2')). Also accepts a dataframe where first column is celltype 1 and 2nd column is celltype 2. -#' @param degs_analysis if is cellphonedb degs_analysis mode. +#' @param degs_analysis if is CellPhoneDB degs_analysis mode. #' @param directional Whether links have directions. 1 means the direction is from the first column in df to the second column, -1 is the reverse, 0 is no direction, and 2 for two directional. #' @param alpha transparency for links #' @param edge_colors vector of colors for links @@ -26,7 +23,7 @@ #' @param legend.pos.x x position of legend #' @param legend.pos.y y position of legend #' @param ... passes arguments plot_cpdb -#' @return Plotting cellphonedb results as a CellChat inspired chord diagram +#' @return Plotting CellPhoneDB results as a CellChat inspired chord diagram #' @examples #' \donttest{ #' @@ -36,69 +33,50 @@ #' @importFrom grDevices recordPlot #' @export -plot_cpdb3 <- function( - cell_type1, cell_type2, scdata, idents, means, pvals, deconvoluted, - p.adjust.method = NULL, keep_significant_only = TRUE, split.by = NULL, standard_scale = TRUE, - separator = NULL, gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, - desiredInteractions = NULL, degs_analysis = FALSE, directional = 1, alpha = 0.5, - edge_colors = NULL, grid_colors = NULL, show_legend = TRUE, legend.pos.x = 20, - legend.pos.y = 20, ...) { +plot_cpdb3 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals, + deconvoluted, keep_significant_only = TRUE, splitby_key = NULL, standard_scale = TRUE, + gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, desiredInteractions = NULL, + degs_analysis = FALSE, directional = 1, alpha = 0.5, edge_colors = NULL, grid_colors = NULL, + show_legend = TRUE, legend.pos.x = 20, legend.pos.y = 20, ...) { if (class(scdata) == "Seurat") { stop("Sorry not supported. Please use a SingleCellExperiment object.") } - if (length(separator) > 0) { - sep <- separator - } else { - sep <- ">@<" - } - lr_interactions <- plot_cpdb( - cell_type1 = cell_type1, cell_type2 = cell_type2, - scdata = scdata, idents = idents, split.by = split.by, means = means, pvals = pvals, + lr_interactions <- plot_cpdb(scdata = scdata, cell_type1 = cell_type1, cell_type2 = cell_type2, + celltype_key = celltype_key, splitby_key = splitby_key, means = means, pvals = pvals, keep_significant_only = keep_significant_only, standard_scale = standard_scale, - return_table = TRUE, degs_analysis = degs_analysis, ... - ) + return_table = TRUE, degs_analysis = degs_analysis, ...) requireNamespace("SummarizedExperiment") requireNamespace("SingleCellExperiment") - if (is.null(split.by)) { + if (is.null(splitby_key)) { if (any(lr_interactions[, 3] > 0)) { if (any(is.na(lr_interactions[, 3]))) { - lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[ - , - 3 - ]), ] + lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[, + 3]), ] } else { lr_interactions <- lr_interactions[lr_interactions[, 3] > 0, ] } } } - subset_clusters <- unique(unlist(lapply( - as.character(lr_interactions$group), - strsplit, sep - ))) - sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, idents] %in% subset_clusters] - interactions <- means[, c( - "interacting_pair", "gene_a", "gene_b", "partner_a", - "partner_b", "receptor_a", "receptor_b" - )] + subset_clusters <- unique(unlist(lapply(as.character(lr_interactions$group), + strsplit, DEFAULT_SEP))) + sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, celltype_key] %in% + subset_clusters] + interactions <- means[, c("interacting_pair", "gene_a", "gene_b", "partner_a", + "partner_b", "receptor_a", "receptor_b")] interactions$converted <- gsub("-", " ", interactions$interacting_pair) interactions$converted <- gsub("_", "-", interactions$converted) - interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, ] - tm0 <- do.call(c, lapply( - as.list(interactions_subset$interacting_pair), strsplit, - "_" - )) + interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, + ] + tm0 <- do.call(c, lapply(as.list(interactions_subset$interacting_pair), strsplit, + "_")) if (any(lapply(tm0, length) > 2)) { complex_id <- which(lapply(tm0, length) > 2) interactions_subset_ <- interactions_subset[complex_id, ] simple_1 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_b)] - partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep( - "complex:", - interactions_subset_$partner_b - )]) - partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep( - "complex:", - interactions_subset_$partner_a - )]) + partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep("complex:", + interactions_subset_$partner_b)]) + partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep("complex:", + interactions_subset_$partner_a)]) simple_2 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_a)] for (i in seq_along(simple_1)) { simple_1[i] <- gsub(paste0(partner_1[i], "_|_", partner_1[i]), "", simple_1[i]) @@ -111,21 +89,17 @@ plot_cpdb3 <- function( for (i in seq_along(complex_id)) { tm0[[complex_id[i]]] <- tmplist[[i]] } - tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2))) + tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2))) colnames(tm0) <- c("id_a", "id_b") interactions_subset <- cbind(interactions_subset, tm0) - dictionary <- interactions_subset[, c( - "gene_a", "gene_b", "partner_a", "partner_b", - "id_a", "id_b", "receptor_a", "receptor_b" - )] + dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b", + "id_a", "id_b", "receptor_a", "receptor_b")] } else { - tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2))) + tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2))) colnames(tm0) <- c("id_a", "id_b") interactions_subset <- cbind(interactions_subset, tm0) - dictionary <- interactions_subset[, c( - "gene_a", "gene_b", "partner_a", "partner_b", - "id_a", "id_b", "receptor_a", "receptor_b" - )] + dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b", + "id_a", "id_b", "receptor_a", "receptor_b")] } # extract all the possible genes. geneid <- unique(c(interactions_subset$id_a, interactions_subset$id_b)) @@ -138,23 +112,19 @@ plot_cpdb3 <- function( } # split to list and calculate celltype mean for each treatment group meta <- as.data.frame(SummarizedExperiment::colData(sce_subset_tmp)) - if (!is.null(split.by)) { + if (!is.null(splitby_key)) { sce_list <- list() sce_list_alt <- list() - for (x in unique(meta[, split.by])) { + for (x in unique(meta[, splitby_key])) { sce_list[[x]] <- list() sce_list_alt[[x]] <- list() } for (n in names(sce_list)) { - for (x in unique(meta[, idents])) { - sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, idents] == x & meta[ - , - split.by - ] == n] - sce_list_alt[[n]][[x]] <- sce_subset[, meta[, idents] == x & meta[ - , - split.by - ] == n] + for (x in unique(meta[, celltype_key])) { + sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, celltype_key] == x & + meta[, splitby_key] == n] + sce_list_alt[[n]][[x]] <- sce_subset[, meta[, celltype_key] == x & + meta[, splitby_key] == n] } } sce_list2 <- lapply(sce_list, function(y) { @@ -176,9 +146,9 @@ plot_cpdb3 <- function( } else { sce_list <- list() sce_list_alt <- list() - for (x in unique(meta[, idents])) { - sce_list[[x]] <- sce_subset_tmp[, meta[, idents] == x] - sce_list_alt[[x]] <- sce_subset[, meta[, idents] == x] + for (x in unique(meta[, celltype_key])) { + sce_list[[x]] <- sce_subset_tmp[, meta[, celltype_key] == x] + sce_list_alt[[x]] <- sce_subset[, meta[, celltype_key] == x] } sce_list2 <- lapply(sce_list, .cellTypeMeans) sce_list3 <- lapply(sce_list, .cellTypeFraction) @@ -199,7 +169,8 @@ plot_cpdb3 <- function( } rownames(sce_list2) <- humanreadablename rownames(sce_list3) <- humanreadablename - decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset), ] + decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset), + ] if (nrow(decon_subset) > 0) { # although multiple rows are returned, really it's the same value for # the same complex @@ -209,18 +180,18 @@ plot_cpdb3 <- function( x <- colMeans(x) return(x) }) - if (!is.null(split.by)) { + if (!is.null(splitby_key)) { decon_subset_fraction <- lapply(decon_subset, function(x) { z <- unique(x$gene_name) test <- lapply(sce_list_alt, function(y) { - return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping)) + return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping)) }) return(test) }) decon_subset_fraction <- lapply(decon_subset_fraction, function(x) { y <- lapply(x, function(z) do.call(cbind, z)) for (i in 1:length(y)) { - colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]])) + colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]])) } y <- do.call(cbind, y) return(y) @@ -229,7 +200,7 @@ plot_cpdb3 <- function( decon_subset_fraction <- lapply(decon_subset, function(x) { z <- unique(x$gene_name) test <- lapply(sce_list_alt, function(y) { - return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping)) + return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping)) }) return(do.call(cbind, test)) }) @@ -249,21 +220,16 @@ plot_cpdb3 <- function( # make a big fat edgelist if (!is.null(desiredInteractions)) { if (class(desiredInteractions) == "list") { - desiredInteractions_ <- c(desiredInteractions, lapply( - desiredInteractions, - rev - )) + desiredInteractions_ <- c(desiredInteractions, lapply(desiredInteractions, + rev)) cell_type_grid <- as.data.frame(do.call(rbind, desiredInteractions_)) } else if ((class(desiredInteractions) == "data.frame")) { cell_type_grid <- desiredInteractions } cells_test <- unique(unlist(desiredInteractions)) } else { - cells_test <- tryCatch(unique(droplevels(meta[, idents])), error = function(e) { - unique(meta[ - , - idents - ]) + cells_test <- tryCatch(unique(droplevels(meta[, celltype_key])), error = function(e) { + unique(meta[, celltype_key]) }) cell_type_grid <- expand.grid(cells_test, cells_test) } @@ -281,127 +247,46 @@ plot_cpdb3 <- function( receptor_b <- interactions_subset$receptor_b producers <- as.character(cell_type_grid[, 1]) receivers <- as.character(cell_type_grid[, 2]) - barcodes <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + barcodes <- paste0(lr_interactions$Var2, DEFAULT_SEP, lr_interactions$Var1) dfx <- list() - if (!is.null(split.by)) { - for (i in unique(meta[, split.by])) { - dfx[[i]] <- .generateDf( - ligand = ligand, sep = sep, receptor = receptor, + if (!is.null(splitby_key)) { + for (i in unique(meta[, splitby_key])) { + dfx[[i]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor, receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, producers = producers, receivers = receivers, cell_type_means = expr_df, cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt, - gsm = gene_symbol_mapping, splitted = i - ) + gsm = gene_symbol_mapping, splitted = i) dfx[[i]] <- dfx[[i]][dfx[[i]]$barcode %in% barcodes, ] } } else { - dfx[[1]] <- .generateDf( - ligand = ligand, sep = sep, receptor = receptor, receptor_a = receptor_a, - receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, + dfx[[1]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor, + receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, producers = producers, receivers = receivers, cell_type_means = expr_df, cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt, - gsm = gene_symbol_mapping - ) + gsm = gene_symbol_mapping) dfx[[1]] <- dfx[[1]][dfx[[1]]$barcode %in% barcodes, ] } - chord_diagram <- function(tmp_dfx, lr_interactions, p.adjust_method, scaled, - alpha, directional, show_legend, edge_cols, grid_cols, legend.pos.x, legend.pos.y, - title) { - tmp_dfx <- .swap_ligand_receptor(tmp_dfx) - if (scaled) { - interactions_items <- lr_interactions$scaled_means - } else { - interactions_items <- lr_interactions$means - } - names(interactions_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) - if (!is.null(p.adjust_method)) { - pvals_items <- lr_interactions$pvals_adj - } else { - pvals_items <- lr_interactions$pvals - } - names(pvals_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) - interactions_items[is.na(pvals_items)] <- 1 - tmp_dfx$pair_swap <- gsub("_", " - ", tmp_dfx$pair_swap) - tmp_dfx$value <- interactions_items[tmp_dfx$barcode] - tmp_dfx$pval <- pvals_items[tmp_dfx$barcode] - edge_color <- .scPalette(length(unique(tmp_dfx$pair_swap))) - names(edge_color) <- unique(tmp_dfx$pair_swap) - if (!is.null(edge_cols)) { - edge_color[names(edge_cols)] <- edge_cols - } - if (!is.null(grid_cols)) { - if (length(grid_cols) != length(unique(tmp_dfx$receiver_swap))) { - stop(paste0( - "Please provide ", length(unique(tmp_dfx$receiver_swap)), - " to grid_colors." - )) - } else { - grid_color <- grid_cols - } - } else { - grid_color <- .scPalette(length(unique(tmp_dfx$receiver_swap))) - } - if (is.null(grid_cols)) { - names(grid_color) <- unique(tmp_dfx$receiver_swap) - } - tmp_dfx$edge_color <- edge_color[tmp_dfx$pair_swap] - tmp_dfx$edge_color <- colorspace::adjust_transparency(tmp_dfx$edge_color, - alpha = alpha - ) - tmp_dfx$edge_color[is.na(tmp_dfx$pval)] <- NA - tmp_dfx$grid_color <- grid_color[tmp_dfx$receiver_swap] - tmp_dfx$grid_color[is.na(tmp_dfx$pval)] <- NA - tmp_dfx <- tmp_dfx[!duplicated(tmp_dfx$barcode), ] - if (directional == 2) { - link.arr.type <- "triangle" - } else { - link.arr.type <- "big.arrow" - } - cells <- unique(c(tmp_dfx$producer_swap, tmp_dfx$receiver_swap)) - names(cells) <- cells - circos.clear() - chordDiagram(tmp_dfx[c("producer_swap", "receiver_swap", "value")], - directional = directional, - direction.type = c("diffHeight", "arrows"), link.arr.type = link.arr.type, - annotationTrack = c("name", "grid"), col = tmp_dfx$edge_color, grid.col = grid_color, - group = cells - ) - requireNamespace("ComplexHeatmap") - if (show_legend) { - lgd <- ComplexHeatmap::Legend( - at = names(edge_color), type = "grid", - legend_gp = grid::gpar(fill = edge_color), title = "interactions" - ) - ComplexHeatmap::draw(lgd, - x = unit(1, "npc") - unit(legend.pos.x, "mm"), - y = unit(legend.pos.y, "mm"), just = c("right", "bottom") - ) - } - title(main = title) - circos.clear() - gg <- recordPlot() - return(gg) - } + gl <- list() if (length(show_legend) > 1) { for (i in 1:length(dfx)) { - gl[[i]] <- tryCatch(chord_diagram( - dfx[[i]], lr_interactions, p.adjust.method, - standard_scale, alpha, directional, show_legend[i], edge_colors, - grid_colors, legend.pos.x, legend.pos.y, names(dfx)[i] - ), error = function(e) { - return(NA) - }) + gl[[i]] <- tryCatch(.chord_diagram3(tmp_df = dfx[[i]], lr_interaction = lr_interactions, + scaled = standard_scale, sep = DEFAULT_SEP, alpha = alpha, directional = directional, + show_legend = show_legend[i], edge_cols = edge_colors, grid_cols = grid_colors, + legend.pos.x = legend.pos.x, legend.pos.y = legend.pos.y, title = names(dfx)[i]), + error = function(e) { + return(NA) + }) } } else { for (i in 1:length(dfx)) { - gl[[i]] <- tryCatch(chord_diagram( - dfx[[i]], lr_interactions, p.adjust.method, - standard_scale, alpha, directional, show_legend, edge_colors, grid_colors, - legend.pos.x, legend.pos.y, names(dfx)[i] - ), error = function(e) { - return(NA) - }) + gl[[i]] <- tryCatch(.chord_diagram3(tmp_dfx = dfx[[i]], lr_interaction = lr_interactions, + scaled = standard_scale, sep = DEFAULT_SEP, alpha = alpha, directional = directional, + show_legend = show_legend, edge_cols = edge_colors, grid_cols = grid_colors, + legend.pos.x = legend.pos.x, legend.pos.y = legend.pos.y, title = names(dfx)[i]), + error = function(e) { + return(NA) + }) } } if (length(gl) > 1) { @@ -409,4 +294,4 @@ plot_cpdb3 <- function( } else { return(gl[[1]]) } -} +} \ No newline at end of file diff --git a/R/plot_cpdb4.R b/R/plot_cpdb4.R index 8d96352..217a662 100644 --- a/R/plot_cpdb4.R +++ b/R/plot_cpdb4.R @@ -1,24 +1,21 @@ -#' Plotting select interactions from cellphonedb results as a chord diagram +#' Plotting select interactions from CellPhoneDB results as a chord diagram #' -#' @param interaction interaction to plot. Please use '-' to separate the two molecules e.g. CD40-CD40LG +#' @param scdata single-cell data. can be seurat/summarizedexperiment object #' @param cell_type1 cell type 1 #' @param cell_type2 cell type 2 -#' @param scdata single-cell data. can be seurat/summarizedexperiment object -#' @param idents vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns +#' @param celltype_key vector holding the celltype_key for each cell or column name of scdata's metadata. MUST match cpdb's columns #' @param means object holding means.txt from cpdb output #' @param pvals object holding pvals.txt from cpdb output #' @param deconvoluted object holding deconvoluted.txt from cpdb output -#' @param p.adjust.method correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none') +#' @param interaction interaction to plot. Please use '-' to separate the two molecules e.g. CD40-CD40LG #' @param keep_significant_only logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb. -#' @param split.by column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. -#' @param scale logical. scale the expression to mean +/- SD. NULL defaults to TRUE. +#' @param splitby_key column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. #' @param standard_scale logical. scale the expression to range from 0 to 1. Default is TRUE -#' @param separator default = NULL. separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this. #' @param gene_symbol_mapping default = NULL.column name for rowData in sce holding the actual gene symbols if row names aren't gene symbols #' @param frac default = 0.1. Minimum fraction of celtypes expressing a gene in order to keep the interaction. Gene must be expressesd >= `frac` in either of the pair of celltypes in order to keep. #' @param remove_self default = TRUE. Remove self-self arcs. #' @param desiredInteractions default = NULL. Specific list of celltype comparisons e.g. list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2')). Also accepts a dataframe where first column is celltype 1 and 2nd column is celltype 2. -#' @param degs_analysis if is cellphonedb degs_analysis mode. +#' @param degs_analysis if is CellPhoneDB degs_analysis mode. #' @param directional Whether links have directions. 1 means the direction is from the first column in df to the second column, -1 is the reverse, 0 is no direction, and 2 for two directional. #' @param alpha transparency for links #' @param edge_colors vector of colors for links @@ -28,7 +25,7 @@ #' @param legend.pos.x x position of legend #' @param legend.pos.y y position of legend #' @param ... passes arguments plot_cpdb -#' @return Plotting cellphonedb results as a CellChat inspired chord diagram for specific interactions +#' @return Plotting CellPhoneDB results as a CellChat inspired chord diagram for specific interactions #' @examples #' \donttest{ #' @@ -38,44 +35,30 @@ #' @importFrom grDevices recordPlot #' @export -plot_cpdb4 <- function( - interaction, cell_type1, cell_type2, scdata, idents, means, - pvals, deconvoluted, p.adjust.method = NULL, keep_significant_only = TRUE, split.by = NULL, - standard_scale = TRUE, separator = NULL, gene_symbol_mapping = NULL, frac = 0.1, - remove_self = TRUE, desiredInteractions = NULL, degs_analysis = FALSE, directional = 1, - alpha = 0.5, edge_colors = NULL, grid_colors = NULL, grid_scale = 0.1, show_legend = TRUE, +plot_cpdb4 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals, + deconvoluted, interaction, keep_significant_only = TRUE, splitby_key = NULL, + standard_scale = TRUE, gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, + desiredInteractions = NULL, degs_analysis = FALSE, directional = 1, alpha = 0.5, + edge_colors = NULL, grid_colors = NULL, grid_scale = 0.1, show_legend = TRUE, legend.pos.x = 20, legend.pos.y = 20, ...) { genes <- strsplit(interaction, "-") genesx <- unlist(lapply(genes, function(g) { - c(paste(g, collapse = "-"), paste(rev(g), - collapse = "-" - )) + c(paste(g, collapse = "-"), paste(rev(g), collapse = "-")) })) if (class(scdata) == "Seurat") { stop("Sorry not supported. Please use a SingleCellExperiment object.") } - if (length(separator) > 0) { - sep <- separator - } else { - sep <- ">@<" - } - lr_interactions <- plot_cpdb( - cell_type1 = ".", cell_type2 = ".", scdata = scdata, - idents = idents, split.by = split.by, means = means, pvals = pvals, keep_significant_only = keep_significant_only, - standard_scale = standard_scale, return_table = TRUE, degs_analysis = degs_analysis, - ... - ) + lr_interactions <- plot_cpdb(scdata = scdata, cell_type1 = ".", cell_type2 = ".", + celltype_key = celltype_key, splitby_key = splitby_key, means = means, pvals = pvals, + keep_significant_only = keep_significant_only, standard_scale = standard_scale, + return_table = TRUE, degs_analysis = degs_analysis, ...) lr_interactions <- lr_interactions[lr_interactions$Var1 %in% genesx, ] - lr_interactions <- cbind(lr_interactions, do.call(rbind, strsplit( - as.character(lr_interactions$group), - ">@<" - ))) + lr_interactions <- cbind(lr_interactions, do.call(rbind, strsplit(as.character(lr_interactions$group), + ">@<"))) vals1 <- grep(paste0(c(cell_type1, cell_type2), collapse = "|"), lr_interactions$`1`, - value = TRUE - ) + value = TRUE) vals2 <- grep(paste0(c(cell_type1, cell_type2), collapse = "|"), lr_interactions$`2`, - value = TRUE - ) + value = TRUE) vals <- unique(c(vals1, vals2)) lr_interactions[!((lr_interactions$`1` %in% vals) & (lr_interactions$`2` %in% vals)), 3] <- 0 @@ -83,34 +66,26 @@ plot_cpdb4 <- function( vals)), 4] <- NA requireNamespace("SummarizedExperiment") requireNamespace("SingleCellExperiment") - subset_clusters <- unique(unlist(lapply( - as.character(lr_interactions$group), - strsplit, sep - ))) - sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, idents] %in% subset_clusters] - interactions <- means[, c( - "interacting_pair", "gene_a", "gene_b", "partner_a", - "partner_b", "receptor_a", "receptor_b" - )] + subset_clusters <- unique(unlist(lapply(as.character(lr_interactions$group), + strsplit, DEFAULT_SEP))) + sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, celltype_key] %in% + subset_clusters] + interactions <- means[, c("interacting_pair", "gene_a", "gene_b", "partner_a", + "partner_b", "receptor_a", "receptor_b")] interactions$converted <- gsub("-", " ", interactions$interacting_pair) interactions$converted <- gsub("_", "-", interactions$converted) - interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, ] - tm0 <- do.call(c, lapply( - as.list(interactions_subset$interacting_pair), strsplit, - "_" - )) + interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, + ] + tm0 <- do.call(c, lapply(as.list(interactions_subset$interacting_pair), strsplit, + "_")) if (any(lapply(tm0, length) > 2)) { complex_id <- which(lapply(tm0, length) > 2) interactions_subset_ <- interactions_subset[complex_id, ] simple_1 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_b)] - partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep( - "complex:", - interactions_subset_$partner_b - )]) - partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep( - "complex:", - interactions_subset_$partner_a - )]) + partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep("complex:", + interactions_subset_$partner_b)]) + partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep("complex:", + interactions_subset_$partner_a)]) simple_2 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_a)] for (i in seq_along(simple_1)) { simple_1[i] <- gsub(paste0(partner_1[i], "_|_", partner_1[i]), "", simple_1[i]) @@ -123,21 +98,17 @@ plot_cpdb4 <- function( for (i in seq_along(complex_id)) { tm0[[complex_id[i]]] <- tmplist[[i]] } - tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2))) + tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2))) colnames(tm0) <- c("id_a", "id_b") interactions_subset <- cbind(interactions_subset, tm0) - dictionary <- interactions_subset[, c( - "gene_a", "gene_b", "partner_a", "partner_b", - "id_a", "id_b", "receptor_a", "receptor_b" - )] + dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b", + "id_a", "id_b", "receptor_a", "receptor_b")] } else { - tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2))) + tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2))) colnames(tm0) <- c("id_a", "id_b") interactions_subset <- cbind(interactions_subset, tm0) - dictionary <- interactions_subset[, c( - "gene_a", "gene_b", "partner_a", "partner_b", - "id_a", "id_b", "receptor_a", "receptor_b" - )] + dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b", + "id_a", "id_b", "receptor_a", "receptor_b")] } # extract all the possible genes. geneid <- unique(c(interactions_subset$id_a, interactions_subset$id_b)) @@ -150,23 +121,19 @@ plot_cpdb4 <- function( } # split to list and calculate celltype mean for each treatment group meta <- as.data.frame(SummarizedExperiment::colData(sce_subset_tmp)) - if (!is.null(split.by)) { + if (!is.null(splitby_key)) { sce_list <- list() sce_list_alt <- list() - for (x in unique(meta[, split.by])) { + for (x in unique(meta[, splitby_key])) { sce_list[[x]] <- list() sce_list_alt[[x]] <- list() } for (n in names(sce_list)) { - for (x in unique(meta[, idents])) { - sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, idents] == x & meta[ - , - split.by - ] == n] - sce_list_alt[[n]][[x]] <- sce_subset[, meta[, idents] == x & meta[ - , - split.by - ] == n] + for (x in unique(meta[, celltype_key])) { + sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, celltype_key] == x & + meta[, splitby_key] == n] + sce_list_alt[[n]][[x]] <- sce_subset[, meta[, celltype_key] == x & + meta[, splitby_key] == n] } } sce_list2 <- lapply(sce_list, function(y) { @@ -188,9 +155,9 @@ plot_cpdb4 <- function( } else { sce_list <- list() sce_list_alt <- list() - for (x in unique(meta[, idents])) { - sce_list[[x]] <- sce_subset_tmp[, meta[, idents] == x] - sce_list_alt[[x]] <- sce_subset[, meta[, idents] == x] + for (x in unique(meta[, celltype_key])) { + sce_list[[x]] <- sce_subset_tmp[, meta[, celltype_key] == x] + sce_list_alt[[x]] <- sce_subset[, meta[, celltype_key] == x] } sce_list2 <- lapply(sce_list, .cellTypeMeans) sce_list3 <- lapply(sce_list, .cellTypeFraction) @@ -211,7 +178,8 @@ plot_cpdb4 <- function( } rownames(sce_list2) <- humanreadablename rownames(sce_list3) <- humanreadablename - decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset), ] + decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset), + ] if (nrow(decon_subset) > 0) { # although multiple rows are returned, really it's the same value for # the same complex @@ -221,18 +189,18 @@ plot_cpdb4 <- function( x <- colMeans(x) return(x) }) - if (!is.null(split.by)) { + if (!is.null(splitby_key)) { decon_subset_fraction <- lapply(decon_subset, function(x) { z <- unique(x$gene_name) test <- lapply(sce_list_alt, function(y) { - return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping)) + return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping)) }) return(test) }) decon_subset_fraction <- lapply(decon_subset_fraction, function(x) { y <- lapply(x, function(z) do.call(cbind, z)) for (i in 1:length(y)) { - colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]])) + colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]])) } y <- do.call(cbind, y) return(y) @@ -241,7 +209,7 @@ plot_cpdb4 <- function( decon_subset_fraction <- lapply(decon_subset, function(x) { z <- unique(x$gene_name) test <- lapply(sce_list_alt, function(y) { - return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping)) + return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping)) }) return(do.call(cbind, test)) }) @@ -261,21 +229,16 @@ plot_cpdb4 <- function( # make a big fat edgelist if (!is.null(desiredInteractions)) { if (class(desiredInteractions) == "list") { - desiredInteractions_ <- c(desiredInteractions, lapply( - desiredInteractions, - rev - )) + desiredInteractions_ <- c(desiredInteractions, lapply(desiredInteractions, + rev)) cell_type_grid <- as.data.frame(do.call(rbind, desiredInteractions_)) } else if ((class(desiredInteractions) == "data.frame")) { cell_type_grid <- desiredInteractions } cells_test <- unique(unlist(desiredInteractions)) } else { - cells_test <- tryCatch(unique(droplevels(meta[, idents])), error = function(e) { - unique(meta[ - , - idents - ]) + cells_test <- tryCatch(unique(droplevels(meta[, celltype_key])), error = function(e) { + unique(meta[, celltype_key]) }) cell_type_grid <- expand.grid(cells_test, cells_test) } @@ -293,148 +256,52 @@ plot_cpdb4 <- function( receptor_b <- interactions_subset$receptor_b producers <- as.character(cell_type_grid[, 1]) receivers <- as.character(cell_type_grid[, 2]) - barcodes <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + barcodes <- paste0(lr_interactions$Var2, DEFAULT_SEP, lr_interactions$Var1) dfx <- list() - if (!is.null(split.by)) { - for (i in unique(meta[, split.by])) { - dfx[[i]] <- .generateDf( - ligand = ligand, sep = sep, receptor = receptor, + if (!is.null(splitby_key)) { + for (i in unique(meta[, splitby_key])) { + dfx[[i]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor, receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, producers = producers, receivers = receivers, cell_type_means = expr_df, cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt, - gsm = gene_symbol_mapping, splitted = i - ) + gsm = gene_symbol_mapping, splitted = i) dfx[[i]] <- dfx[[i]][dfx[[i]]$barcode %in% barcodes, ] } } else { - dfx[[1]] <- .generateDf( - ligand = ligand, sep = sep, receptor = receptor, receptor_a = receptor_a, - receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, + dfx[[1]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor, + receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair, producers = producers, receivers = receivers, cell_type_means = expr_df, cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt, - gsm = gene_symbol_mapping - ) + gsm = gene_symbol_mapping) dfx[[1]] <- dfx[[1]][dfx[[1]]$barcode %in% barcodes, ] } - - chord_diagram <- function(tmp_dfx, lr_interactions, p.adjust_method, scaled, - alpha, directional, show_legend, edge_cols, grid_cols, legend.pos.x, legend.pos.y, - title, grid_scale) { - tmp_dfx <- .swap_ligand_receptor(tmp_dfx) - unique_celltype <- unique(c(lr_interactions$`1`, lr_interactions$`2`)) - na_df <- data.frame(t(combn(unique_celltype, 2))) - colnames(na_df) <- c("producer_swap", "receiver_swap") - if (scaled) { - interactions_items <- lr_interactions$scaled_means - } else { - interactions_items <- lr_interactions$means - } - names(interactions_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) - if (!is.null(p.adjust_method)) { - pvals_items <- lr_interactions$pvals_adj - } else { - pvals_items <- lr_interactions$pvals - } - names(pvals_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) - interactions_items[is.na(pvals_items)] <- 1 - tmp_dfx$pair_swap <- gsub("_", " - ", tmp_dfx$pair_swap) - tmp_dfx$value <- interactions_items[tmp_dfx$barcode] - tmp_dfx$pval <- pvals_items[tmp_dfx$barcode] - edge_color <- .scPalette(length(unique(tmp_dfx$pair_swap))) - names(edge_color) <- unique(tmp_dfx$pair_swap) - if (!is.null(edge_cols)) { - edge_color[names(edge_cols)] <- edge_cols - } - if (!is.null(grid_cols)) { - if (length(grid_cols) != length(unique(tmp_dfx$receiver_swap))) { - stop(paste0( - "Please provide ", length(unique(tmp_dfx$receiver_swap)), - " to grid_colors." - )) - } else { - grid_color <- grid_cols - } - } else { - grid_color <- .scPalette(length(unique(tmp_dfx$receiver_swap))) - } - if (is.null(grid_cols)) { - names(grid_color) <- unique(tmp_dfx$receiver_swap) - } - tmp_dfx$edge_color <- edge_color[tmp_dfx$pair_swap] - tmp_dfx$edge_color <- colorspace::adjust_transparency(tmp_dfx$edge_color, - alpha = alpha - ) - tmp_dfx$edge_color[is.na(tmp_dfx$pval)] <- NA - tmp_dfx$grid_color <- grid_color[tmp_dfx$receiver_swap] - tmp_dfx$grid_color[is.na(tmp_dfx$pval)] <- NA - tmp_dfx <- tmp_dfx[!duplicated(tmp_dfx$barcode), ] - # filter to non na - tmp_dfx_not_na <- tmp_dfx[!is.na(tmp_dfx$pval), ] - emptydf <- data.frame(matrix(ncol = ncol(tmp_dfx_not_na), nrow = nrow(na_df))) - colnames(emptydf) <- colnames(tmp_dfx_not_na) - emptydf$producer_swap <- na_df$producer_swap - emptydf$receiver_swap <- na_df$receiver_swap - tmp_dfx <- rbind(tmp_dfx_not_na, emptydf) - tmp_dfx$value[is.na(tmp_dfx$value)] <- grid_scale - if (directional == 2) { - link.arr.type <- "triangle" - } else { - link.arr.type <- "big.arrow" - } - cells <- unique(c(tmp_dfx$producer_swap, tmp_dfx$receiver_swap)) - names(cells) <- cells - circos.clear() - chordDiagram(tmp_dfx[c("producer_swap", "receiver_swap", "value")], - directional = directional, - direction.type = c("diffHeight", "arrows"), link.arr.type = link.arr.type, - annotationTrack = c("name", "grid"), col = tmp_dfx$edge_color, grid.col = grid_color, - group = cells - ) - requireNamespace("ComplexHeatmap") - if (show_legend) { - lgd <- ComplexHeatmap::Legend( - at = names(edge_color), type = "grid", - legend_gp = grid::gpar(fill = edge_color), title = "interactions" - ) - ComplexHeatmap::draw(lgd, - x = unit(1, "npc") - unit(legend.pos.x, "mm"), - y = unit(legend.pos.y, "mm"), just = c("right", "bottom") - ) - } - title(main = title) - circos.clear() - gg <- recordPlot() - return(gg) - } gl <- list() if (length(show_legend) > 1) { for (i in 1:length(dfx)) { - gl[[i]] <- tryCatch( - chord_diagram( - dfx[[i]], lr_interactions, p.adjust.method, - standard_scale, alpha, directional, show_legend[i], edge_colors, - grid_colors, legend.pos.x, legend.pos.y, names(dfx)[i], grid_scale - ), - error = function(e) { - return(NA) - } - ) + gl[[i]] <- tryCatch(.chord_diagram4(tmp_dfx = dfx[[i]], lr_interactions = lr_interactions, + scaled = standard_scale, sep = DEFAULT_SEP, alpha = alpha, directional = directional, + show_legend = show_legend[i], edge_cols = edge_colors, grid_cols = grid_colors, + legend.pos.x = legend.pos.x, legend.pos.y = legend.pos.y, title = names(dfx)[i], + grid_scale = grid_scale), error = function(e) { + return(NA) + }) } } else { for (i in 1:length(dfx)) { - gl[[i]] <- tryCatch(chord_diagram( - dfx[[i]], lr_interactions, p.adjust.method, - standard_scale, alpha, directional, show_legend, edge_colors, grid_colors, - legend.pos.x, legend.pos.y, names(dfx)[i], grid_scale - ), error = function(e) { + gl[[i]] <- tryCatch(.chord_diagram4(tmp_dfx = dfx[[i]], lr_interactions = lr_interactions, + scaled = standard_scale, sep = DEFAULT_SEP, alpha = alpha, directional = directional, + show_legend = show_legend, edge_cols = edge_colors, grid_cols = grid_colors, + legend.pos.x = legend.pos.x, legend.pos.y = legend.pos.y, title = names(dfx)[i], + grid_scale = grid_scale), error = function(e) { return(NA) }) } } + if (length(gl) > 1) { return(gl) } else { return(gl[[1]]) } -} +} \ No newline at end of file diff --git a/R/plot_cpdb_heatmap.R b/R/plot_cpdb_heatmap.R index a865e3c..84ea918 100644 --- a/R/plot_cpdb_heatmap.R +++ b/R/plot_cpdb_heatmap.R @@ -1,8 +1,8 @@ -#' Plotting cellphonedb results as a heatmap -#' -#' @param pvals object holding pvals.txt from cpdb output. Use relevant_interactions.txt if degs_analysis mode. -#' @param degs_analysis if is cellphonedb degs_analysis mode. -#' @param log1p_transform whether to log1p transform the matrix before plotting. +#' Plotting CellPhoneDB results as a heatmap + +#' @param pvals Dataframe corresponding to `pvalues.txt` or `relevant_interactions.txt` from CellPhoneDB. +#' @param degs_analysis Whether `CellPhoneDB` was run in `deg_analysis` mode +#' @param log1p_transform Whether to log1p transform the output. #' @param show_rownames whether to show row names. #' @param show_colnames whether to show column names. #' @param scale scaling mode for pheatmap. @@ -20,8 +20,6 @@ #' @param high_col high colour for heatmap. #' @param alpha pvalue threshold to trim. #' @param return_tables whether or not to return the results as a table rather than the heatmap -#' @param degs_analysis if is cellphonedb degs_analysis mode. -#' @param verbose prints cat/print statements if TRUE. #' @param symmetrical whether or not to return as symmetrical matrix #' @param ... passed to pheatmap::pheatmap. #' @return pheatmap object of cellphone db output @@ -29,24 +27,25 @@ #' \donttest{ #' data(kidneyimmune) #' data(cpdb_output2) -#' plot_cpdb_heatmap(kidneyimmune, "celltype", pvals2) +#' plot_cpdb_heatmap(pvals2) #' } #' @import pheatmap #' @export plot_cpdb_heatmap <- function( - pvals, log1p_transform = FALSE, show_rownames = TRUE, - show_colnames = TRUE, scale = "none", cluster_cols = TRUE, cluster_rows = TRUE, - border_color = "white", fontsize_row = 11, fontsize_col = 11, family = "Arial", - main = "", treeheight_col = 0, treeheight_row = 0, low_col = "dodgerblue4", mid_col = "peachpuff", - high_col = "deeppink4", alpha = 0.05, return_tables = FALSE, degs_analysis = FALSE, - verbose = FALSE, symmetrical = TRUE, ...) { + pvals, degs_analysis = FALSE, log1p_transform = FALSE, + show_rownames = TRUE, show_colnames = TRUE, scale = "none", cluster_cols = TRUE, + cluster_rows = TRUE, border_color = "white", fontsize_row = 11, fontsize_col = 11, + family = "Arial", main = "", treeheight_col = 0, treeheight_row = 0, low_col = "dodgerblue4", + mid_col = "peachpuff", high_col = "deeppink4", alpha = 0.05, return_tables = FALSE, + symmetrical = TRUE, ...) { requireNamespace("reshape2") requireNamespace("grDevices") all_intr <- pvals + col_start <- ifelse(colnames(all_intr)[DEFAULT_CLASS_COL] == "classification", DEFAULT_V5_COL_START, DEFAULT_COL_START) intr_pairs <- all_intr$interacting_pair - all_intr <- t(all_intr[, -c(1:11)]) + all_intr <- t(all_intr[, -c(1:col_start - 1)]) colnames(all_intr) <- intr_pairs all_count <- reshape2::melt(all_intr) if (!degs_analysis) { @@ -54,7 +53,6 @@ plot_cpdb_heatmap <- function( } else { all_count$significant <- all_count$value == 1 } - count1x <- all_count %>% group_by(Var1) %>% summarise(COUNT = sum(significant)) %>% @@ -76,7 +74,7 @@ plot_cpdb_heatmap <- function( diag(count_mat) <- dcm } - if (log1p_transform) { + if (log1p_transform == TRUE) { count_mat <- log1p(count_mat) } diff --git a/R/utils.R b/R/utils.R index 7a8f452..08d99ce 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,84 @@ +#' @import ggplot2 +#' @import ggraph +#' @importFrom circlize circos.clear chordDiagram +#' @importFrom grDevices recordPlot + +DEFAULT_SEP <- ">@<" +DEFAULT_SPEC_PAT <- "/|:|\\?|\\*|\\+|[\\]|\\(|\\)|\\/" +DEFAULT_V5_COL_START <- 14 +DEFAULT_CLASS_COL <- 13 +DEFAULT_COL_START <- 12 + + +.prep_table <- function(data) { + dat <- data + rownames(dat) <- make.names(dat$interacting_pair, unique = TRUE) + colnames(dat) <- gsub("\\|", DEFAULT_SEP, colnames(dat)) + rownames(dat) <- gsub("_", "-", rownames(dat)) + rownames(dat) <- gsub("[.]", " ", rownames(dat)) + return(dat) +} + +.set_x_stroke <- function(df, isnull, stroke) { + for (i in seq_len(nrow(df))) { + if (isnull) { + nullstatus <- is.na(df[i, "x_stroke"]) + } else { + nullstatus <- !is.na(df[i, "x_stroke"]) + } + if (nullstatus) { + df[i, "x_stroke"] <- stroke + } + } + return(df) +} + +.prep_query_group <- function(data, genes = NULL, gene_family = NULL, custom_gene_family = NULL) { + if (is.null(gene_family) & is.null(genes)) { + query_group <- NULL + query_id <- grep("", data$interacting_pair) + query <- row.names(data)[query_id] + } else if (!is.null(gene_family) & !is.null(genes)) { + stop("Please specify either genes or gene_family, not both") + } else if (!is.null(gene_family) & is.null(genes)) { + chemokines <- grep("^CXC|CCL|CCR|CX3|XCL|XCR", data$interacting_pair) + th1 <- grep("IL2|IL12|IL18|IL27|IFNG|IL10|TNF$|TNF |LTA|LTB|STAT1|CCR5|CXCR3|IL12RB1|IFNGR1|TBX21|STAT4", data$interacting_pair) + th2 <- grep("IL4|IL5|IL25|IL10|IL13|AREG|STAT6|GATA3|IL4R", data$interacting_pair) + th17 <- grep("IL21|IL22|IL24|IL26|IL17A|IL17A|IL17F|IL17RA|IL10|RORC|RORA|STAT3|CCR4|CCR6|IL23RA|TGFB", data$interacting_pair) + treg <- grep("IL35|IL10|FOXP3|IL2RA|TGFB", data$interacting_pair) + costimulatory <- grep("CD86|CD80|CD48|LILRB2|LILRB4|TNF|CD2|ICAM|SLAM|LT[AB]|NECTIN2|CD40|CD70|CD27|CD28|CD58|TSLP|PVR|CD44|CD55|CD[1-9]", data$interacting_pair) + coinhibitory <- grep("SIRP|CD47|ICOS|TIGIT|CTLA4|PDCD1|CD274|LAG3|HAVCR|VSIR", data$interacting_pair) + query_group <- list( + chemokines = chemokines, + chemokine = chemokines, + th1 = th1, + th2 = th2, + th17 = th17, + treg = treg, + costimulatory = costimulatory, + coinhibitory = coinhibitory, + costimulation = costimulatory, + coinhibition = coinhibitory + ) + if (!is.null(custom_gene_family)) { + cgf <- as.list(custom_gene_family) + cgf <- lapply(cgf, function(x) { + q_id <- grep(paste(x, collapse = "|"), data$interacting_pair) + q <- row.names(data)[q_id] + return(q) + }) + query_group <- c(query_group, cgf) + } + query <- NULL + } else if (is.null(gene_family) & !is.null(genes)) { + query_group <- NULL + query_id <- grep(paste(genes, collapse = "|"), data$interacting_pair) + query <- row.names(data)[query_id] + } + out <- list("query_group" = query_group, "query" = query) + return(out) +} + .sub_pattern <- function(cell_type, pattern) { cell_type_tmp <- unlist(strsplit(cell_type, "*")) if (any(grepl(pattern, cell_type_tmp))) { @@ -11,21 +92,83 @@ } .gg_color_hue <- function(n) { - hues = seq(15, 375, length = n + 1) - hcl(h = hues, l = 65, c = 100)[1:n] + requireNamespace("grDevices") + hues <- seq(15, 375, length = n + 1) + grDevices::hcl(h = hues, l = 65, c = 100)[1:n] +} + +.prep_data_querygroup_celltype1 <- function(.data, .query_group, .gene_family, .cell_type, .celltype, ...) { + dat <- suppressWarnings(tryCatch( + .data[.query_group[[tolower(.gene_family)]], + grep(.cell_type, colnames(.data), useBytes = TRUE, ...), + drop = FALSE, + ], + error = function(e) { + colidx <- lapply(.celltype, function(z) { + grep(z, colnames(.data), + useBytes = TRUE, ... + ) + }) + colidx <- unique(do.call(c, colidx)) + tmpm <- .data[.query_group[[tolower(.gene_family)]], colidx, drop = FALSE] + return(tmpm) + } + )) + dat <- dat[rowSums(is.na(dat)) == 0, ] + return(dat) } +.prep_data_querygroup_celltype2 <- function(.data, .query_group, .gene_family, .cell_type, .celltype, ...) { + dat <- suppressWarnings(tryCatch( + .data[unlist(.query_group[c(tolower(.gene_family))], use.names = FALSE), + grep(.cell_type, colnames(.data), useBytes = TRUE, ...), + drop = FALSE + ], + error = function(e) { + colidx <- lapply(.celltype, function(z) { + grep(z, colnames(.data), + useBytes = TRUE, ... + ) + }) + colidx <- unique(do.call(c, colidx)) + tmpm <- .data[unlist(.query_group[c(tolower(.gene_family))], use.names = FALSE), colidx, drop = FALSE] + return(tmpm) + } + )) + dat <- dat[rowSums(is.na(dat)) == 0, ] + return(dat) +} + +.prep_data_query_celltype <- function(.data, .query, .cell_type, .celltype, ...) { + dat <- suppressWarnings(tryCatch(.data[.query, grep(.cell_type, colnames(.data), + useBytes = TRUE, ... + ), drop = FALSE], error = function(e) { + colidx <- lapply(.celltype, function(z) { + grep(z, colnames(.data), + useBytes = TRUE, + ... + ) + }) + colidx <- unique(do.call(c, colidx)) + tmpm <- .data[.query, colidx, drop = FALSE] + return(tmpm) + })) + dat <- dat[rowSums(is.na(dat)) == 0, ] + return(dat) +} + + .create_celltype_query <- function(ctype1, ctype2, sep) { - ct1 = list() - ct2 = list() + ct1 <- list() + ct2 <- list() for (i in 1:length(ctype2)) { - ct1[i] = paste0("^", ctype1, sep, ctype2[i], "$") - ct2[i] = paste0("^", ctype2[i], sep, ctype1, "$") + ct1[i] <- paste0("^", ctype1, sep, ctype2[i], "$") + ct2[i] <- paste0("^", ctype2[i], sep, ctype1, "$") } - ct_1 = do.call(paste0, list(ct1, collapse = "|")) - ct_2 = do.call(paste0, list(ct2, collapse = "|")) - ct = list(ct_1, ct_2) - ct = do.call(paste0, list(ct, collapse = "|")) + ct_1 <- do.call(paste0, list(ct1, collapse = "|")) + ct_2 <- do.call(paste0, list(ct2, collapse = "|")) + ct <- list(ct_1, ct_2) + ct <- do.call(paste0, list(ct, collapse = "|")) return(ct) } @@ -39,10 +182,12 @@ .scPalette <- function(n) { requireNamespace("grDevices") - colorSpace <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#F29403", "#F781BF", + colorSpace <- c( + "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#F29403", "#F781BF", "#BC9DCC", "#A65628", "#54B0E4", "#222F75", "#1B9E77", "#B2DF8A", "#E3BE00", "#FB9A99", "#E7298A", "#910241", "#00CDD1", "#A6CEE3", "#CE1261", "#5E4FA2", - "#8CA77B", "#00441B", "#DEDC00", "#B3DE69", "#8DD3C7", "#999999") + "#8CA77B", "#00441B", "#DEDC00", "#B3DE69", "#8DD3C7", "#999999" + ) if (n <= length(colorSpace)) { colors <- colorSpace[1:n] } else { @@ -93,8 +238,7 @@ sce_[which(SingleCellExperiment::rowData(sce_)[, gene_symbol_mapping] %in% genes), ] } else { - sce_[which(SingleCellExperiment::rowData(sce_)[, "index"] %in% genes), - ] + sce_[which(SingleCellExperiment::rowData(sce_)[, "index"] %in% genes), ] } }) cm <- mean(Matrix::rowMeans(SingleCellExperiment::counts(scex))) @@ -110,16 +254,15 @@ sce_[which(SingleCellExperiment::rowData(sce_)[, gene_symbol_mapping] %in% genes), ] } else { - - sce_[which(SingleCellExperiment::rowData(sce_)[, "index"] %in% genes), - ] + sce_[which(SingleCellExperiment::rowData(sce_)[, "index"] %in% genes), ] } }) cm <- mean(Matrix::rowMeans(SingleCellExperiment::counts(scex) > 0)) return(cm) } -.generateDf <- function(ligand, sep, receptor, receptor_a, receptor_b, pair, converted_pair, +.generateDf <- function( + ligand, sep, receptor, receptor_a, receptor_b, pair, converted_pair, producers, receivers, cell_type_means, cell_type_fractions, sce, sce_alt, gsm, splitted = NULL) { if (!is.null(splitted)) { @@ -131,7 +274,7 @@ } producer_expression <- data.frame() producer_fraction <- data.frame() - if (!is.null(splitted)){ + if (!is.null(splitted)) { sce_altx <- sce_alt[[splitted]] } else { sce_altx <- sce_alt @@ -143,13 +286,17 @@ y <- cell_type_fractions[ligand[j], pp[i]] } else { if (any(grepl(paste0("^", ligand[j], "$"), row.names(sce)))) { - x <- .cellTypeExpr_complex(sce_altx[[producers[i]]], - ligand[j], gsm) - y <- .cellTypeFraction_complex(sce_altx[[producers[i]]], - ligand[j], gsm) + x <- .cellTypeExpr_complex( + sce_altx[[producers[i]]], + ligand[j], gsm + ) + y <- .cellTypeFraction_complex( + sce_altx[[producers[i]]], + ligand[j], gsm + ) } else { - x <- 0 - y <- 0 + x <- 0 + y <- 0 } } producer_expression[ligand[j], pp[i]] <- x @@ -165,20 +312,24 @@ y <- cell_type_fractions[receptor[j], rc[i]] } else { if (any(grepl(paste0("^", receptor[j], "$"), row.names(sce)))) { - x <- .cellTypeExpr_complex(sce_altx[[receivers[i]]], - receptor[j], gsm) - y <- .cellTypeFraction_complex(sce_altx[[receivers[i]]], - receptor[j], gsm) + x <- .cellTypeExpr_complex( + sce_altx[[receivers[i]]], + receptor[j], gsm + ) + y <- .cellTypeFraction_complex( + sce_altx[[receivers[i]]], + receptor[j], gsm + ) } else { - x <- 0 - y <- 0 + x <- 0 + y <- 0 } } receiver_expression[receptor[j], rc[i]] <- x receiver_fraction[receptor[j], rc[i]] <- y } } - test_df = list() + test_df <- list() for (i in seq_along(pp)) { px <- pp[i] rx <- rc[i] @@ -188,118 +339,507 @@ ra <- receptor_a[j] rb <- receptor_b[j] pr <- pair[j] - out = data.frame(c(lg, rcp, ra, rb, pr, px, rx, producer_expression[lg, - px], producer_fraction[lg, px], receiver_expression[rcp, rx], receiver_fraction[rcp, - rx])) - test_df = c(test_df, out) + out <- data.frame(c(lg, rcp, ra, rb, pr, px, rx, producer_expression[ + lg, + px + ], producer_fraction[lg, px], receiver_expression[rcp, rx], receiver_fraction[ + rcp, + rx + ])) + test_df <- c(test_df, out) } } df_ <- do.call(rbind, test_df) row.names(df_) <- 1:nrow(df_) - colnames(df_) <- c("ligand", "receptor", "receptor_a", "receptor_b", "pair", + colnames(df_) <- c( + "ligand", "receptor", "receptor_a", "receptor_b", "pair", "producer", "receiver", "producer_expression", "producer_fraction", "receiver_expression", - "receiver_fraction") + "receiver_fraction" + ) df_ <- as.data.frame(df_) - df_$from = paste0(df_$producer, sep, df_$ligand) - df_$to = paste0(df_$receiver, sep, df_$receptor) + df_$from <- paste0(df_$producer, sep, df_$ligand) + df_$to <- paste0(df_$receiver, sep, df_$receptor) if (!is.null(splitted)) { - df_$producer_ = df_$producer - df_$receiver_ = df_$receiver - df_$from = gsub(paste0(splitted, "_"), "", df_$from) - df_$to = gsub(paste0(splitted, "_"), "", df_$to) - df_$producer = gsub(paste0(splitted, "_"), "", df_$producer) - df_$receiver = gsub(paste0(splitted, "_"), "", df_$receiver) - df_$barcode = paste0(df_$producer_, "-", df_$receiver_, sep, converted_pair) + df_$producer_ <- df_$producer + df_$receiver_ <- df_$receiver + df_$from <- gsub(paste0(splitted, "_"), "", df_$from) + df_$to <- gsub(paste0(splitted, "_"), "", df_$to) + df_$producer <- gsub(paste0(splitted, "_"), "", df_$producer) + df_$receiver <- gsub(paste0(splitted, "_"), "", df_$receiver) + df_$barcode <- paste0(df_$producer_, "-", df_$receiver_, sep, converted_pair) } else { - df_$barcode = paste0(df_$producer, "-", df_$receiver, sep, converted_pair) + df_$barcode <- paste0(df_$producer, "-", df_$receiver, sep, converted_pair) } return(df_) } .swap_ligand_receptor <- function(df) { - is_r_a = as.logical(df$receptor_a) - is_r_b = as.logical(df$receptor_b) - lg = df$ligand - rp = df$receptor - from = df$from - to = df$to - prd = df$producer - rec = df$receiver - prd_exp = df$producer_expression - prd_fra = df$producer_fraction - rec_exp = df$receiver_expression - rec_fra = df$receiver_fraction + is_r_a <- as.logical(df$receptor_a) + is_r_b <- as.logical(df$receptor_b) + lg <- df$ligand + rp <- df$receptor + from <- df$from + to <- df$to + prd <- df$producer + rec <- df$receiver + prd_exp <- df$producer_expression + prd_fra <- df$producer_fraction + rec_exp <- df$receiver_expression + rec_fra <- df$receiver_fraction # create swaps - lg_swap = c() - rp_swap = c() - from_swap = c() - to_swap = c() - prd_swap = c() - rec_swap = c() - prd_exp_swap = c() - prd_fra_swap = c() - rec_exp_swap = c() - rec_fra_swap = c() + lg_swap <- c() + rp_swap <- c() + from_swap <- c() + to_swap <- c() + prd_swap <- c() + rec_swap <- c() + prd_exp_swap <- c() + prd_fra_swap <- c() + rec_exp_swap <- c() + rec_fra_swap <- c() for (i in seq_along(is_r_a)) { if (!is_r_a[i]) { if (is_r_b[i]) { - lg_swap = c(lg_swap, lg[i]) - rp_swap = c(rp_swap, rp[i]) - from_swap = c(from_swap, from[i]) - to_swap = c(to_swap, to[i]) - prd_swap = c(prd_swap, prd[i]) - rec_swap = c(rec_swap, rec[i]) - prd_exp_swap = c(prd_exp_swap, prd_exp[i]) - prd_fra_swap = c(prd_fra_swap, prd_fra[i]) - rec_exp_swap = c(rec_exp_swap, rec_exp[i]) - rec_fra_swap = c(rec_fra_swap, rec_fra[i]) + lg_swap <- c(lg_swap, lg[i]) + rp_swap <- c(rp_swap, rp[i]) + from_swap <- c(from_swap, from[i]) + to_swap <- c(to_swap, to[i]) + prd_swap <- c(prd_swap, prd[i]) + rec_swap <- c(rec_swap, rec[i]) + prd_exp_swap <- c(prd_exp_swap, prd_exp[i]) + prd_fra_swap <- c(prd_fra_swap, prd_fra[i]) + rec_exp_swap <- c(rec_exp_swap, rec_exp[i]) + rec_fra_swap <- c(rec_fra_swap, rec_fra[i]) } else { - lg_swap = c(lg_swap, lg[i]) - rp_swap = c(rp_swap, rp[i]) - from_swap = c(from_swap, from[i]) - to_swap = c(to_swap, to[i]) - prd_swap = c(prd_swap, prd[i]) - rec_swap = c(rec_swap, rec[i]) - prd_exp_swap = c(prd_exp_swap, prd_exp[i]) - prd_fra_swap = c(prd_fra_swap, prd_fra[i]) - rec_exp_swap = c(rec_exp_swap, rec_exp[i]) - rec_fra_swap = c(rec_fra_swap, rec_fra[i]) + lg_swap <- c(lg_swap, lg[i]) + rp_swap <- c(rp_swap, rp[i]) + from_swap <- c(from_swap, from[i]) + to_swap <- c(to_swap, to[i]) + prd_swap <- c(prd_swap, prd[i]) + rec_swap <- c(rec_swap, rec[i]) + prd_exp_swap <- c(prd_exp_swap, prd_exp[i]) + prd_fra_swap <- c(prd_fra_swap, prd_fra[i]) + rec_exp_swap <- c(rec_exp_swap, rec_exp[i]) + rec_fra_swap <- c(rec_fra_swap, rec_fra[i]) } } else if (is_r_a[i]) { if (is_r_b[i]) { - lg_swap = c(lg_swap, lg[i]) - rp_swap = c(rp_swap, rp[i]) - from_swap = c(from_swap, from[i]) - to_swap = c(to_swap, to[i]) - prd_swap = c(prd_swap, prd[i]) - rec_swap = c(rec_swap, rec[i]) - prd_exp_swap = c(prd_exp_swap, prd_exp[i]) - prd_fra_swap = c(prd_fra_swap, prd_fra[i]) - rec_exp_swap = c(rec_exp_swap, rec_exp[i]) - rec_fra_swap = c(rec_fra_swap, rec_fra[i]) + lg_swap <- c(lg_swap, lg[i]) + rp_swap <- c(rp_swap, rp[i]) + from_swap <- c(from_swap, from[i]) + to_swap <- c(to_swap, to[i]) + prd_swap <- c(prd_swap, prd[i]) + rec_swap <- c(rec_swap, rec[i]) + prd_exp_swap <- c(prd_exp_swap, prd_exp[i]) + prd_fra_swap <- c(prd_fra_swap, prd_fra[i]) + rec_exp_swap <- c(rec_exp_swap, rec_exp[i]) + rec_fra_swap <- c(rec_fra_swap, rec_fra[i]) } else { - lg_swap = c(lg_swap, rp[i]) - rp_swap = c(rp_swap, lg[i]) - from_swap = c(from_swap, to[i]) - to_swap = c(to_swap, from[i]) - prd_swap = c(prd_swap, rec[i]) - rec_swap = c(rec_swap, prd[i]) - prd_exp_swap = c(prd_exp_swap, rec_exp[i]) - prd_fra_swap = c(prd_fra_swap, rec_fra[i]) - rec_exp_swap = c(rec_exp_swap, prd_exp[i]) - rec_fra_swap = c(rec_fra_swap, prd_fra[i]) + lg_swap <- c(lg_swap, rp[i]) + rp_swap <- c(rp_swap, lg[i]) + from_swap <- c(from_swap, to[i]) + to_swap <- c(to_swap, from[i]) + prd_swap <- c(prd_swap, rec[i]) + rec_swap <- c(rec_swap, prd[i]) + prd_exp_swap <- c(prd_exp_swap, rec_exp[i]) + prd_fra_swap <- c(prd_fra_swap, rec_fra[i]) + rec_exp_swap <- c(rec_exp_swap, prd_exp[i]) + rec_fra_swap <- c(rec_fra_swap, prd_fra[i]) } } } - df$ligand_swap = lg_swap - df$receptor_swap = rp_swap - df$pair_swap = paste0(lg_swap, " - ", rp_swap) - df$producer_swap = prd_swap - df$receiver_swap = rec_swap - df$producer_expression_swap = prd_exp_swap - df$producer_fraction_swap = prd_fra_swap - df$receiever_expression_swap = rec_exp_swap - df$receiever_fraction_swap = rec_fra_swap - df$from_swap = from_swap - df$to_swap = to_swap + df$ligand_swap <- lg_swap + df$receptor_swap <- rp_swap + df$pair_swap <- paste0(lg_swap, " - ", rp_swap) + df$producer_swap <- prd_swap + df$receiver_swap <- rec_swap + df$producer_expression_swap <- prd_exp_swap + df$producer_fraction_swap <- prd_fra_swap + df$receiever_expression_swap <- rec_exp_swap + df$receiever_fraction_swap <- rec_fra_swap + df$from_swap <- from_swap + df$to_swap <- to_swap return(df) -} \ No newline at end of file +} + +.constructGraph <- function(input_group, sep, el, el0, unique_id, interactions_df, + plot_cpdb_out, celltype_key, edge_group = FALSE, edge_group_colors = NULL, node_group_colors = NULL, plot_score_as_thickness = TRUE) { + requireNamespace("igraph") + celltypes <- unique(c(as.character(el$producer), as.character(el$receiver))) + el1 <- data.frame( + from = "root", to = celltypes, barcode_1 = NA, barcode_2 = NA, + barcode_3 = NA + ) + el2 <- data.frame( + from = celltypes, to = paste0(celltypes, sep, "ligand"), + barcode_1 = NA, barcode_2 = NA, barcode_3 = NA + ) + el3 <- data.frame( + from = celltypes, to = paste0(celltypes, sep, "receptor"), + barcode_1 = NA, barcode_2 = NA, barcode_3 = NA + ) + el4 <- do.call(rbind, lapply(celltypes, function(x) { + cell_ligands <- grep(x, el$from, value = TRUE) + cell_ligands_idx <- grep(x, el$from) + if (length(cell_ligands) > 0) { + df <- data.frame( + from = paste0(x, sep, "ligand"), to = cell_ligands, + barcode_1 = el$barcode[cell_ligands_idx], barcode_2 = el$pair[cell_ligands_idx], + barcode_3 = paste0(el$from[cell_ligands_idx], sep, el$to[cell_ligands_idx]) + ) + } else { + df <- NULL + } + })) + el5 <- do.call(rbind, lapply(celltypes, function(x) { + cell_ligands <- grep(x, el$to, value = TRUE) + cell_ligands_idx <- grep(x, el$to) + if (length(cell_ligands) > 0) { + df <- data.frame( + from = paste0(x, sep, "receptor"), to = cell_ligands, + barcode_1 = el$barcode[cell_ligands_idx], barcode_2 = el$pair[cell_ligands_idx], + barcode_3 = paste0(el$from[cell_ligands_idx], sep, el$to[cell_ligands_idx]) + ) + } else { + df <- NULL + } + })) + gr_el <- do.call(rbind, list(el1, el2, el3, el4, el5)) + plot_cpdb_out$barcode <- paste0(plot_cpdb_out$Var2, sep, plot_cpdb_out$Var1) + mean_col <- grep("means$", colnames(plot_cpdb_out), value = TRUE) + means <- plot_cpdb_out[ + match(gr_el$barcode_1, plot_cpdb_out$barcode), + mean_col + ] + pval_col <- grep("pvals", colnames(plot_cpdb_out), value = TRUE) + pvals <- plot_cpdb_out[ + match(gr_el$barcode_1, plot_cpdb_out$barcode), + pval_col + ] + gr_el <- cbind(gr_el, means, pvals) + if (edge_group) { + groups <- interactions_df$group[match(gr_el$barcode_2, interactions_df$interacting_pair)] + } + gr <- igraph::graph_from_edgelist(as.matrix(gr_el[, 1:2])) + igraph::E(gr)$interaction_score <- as.numeric(means) + igraph::E(gr)$pvals <- as.numeric(pvals) + if (edge_group) { + igraph::E(gr)$group <- groups + } + igraph::E(gr)$name <- gr_el$barcode_3 + # order the graph vertices + igraph::V(gr)$type <- NA + igraph::V(gr)$type[igraph::V(gr)$name %in% el4$to] <- "ligand" + igraph::V(gr)$type[igraph::V(gr)$name %in% el5$to] <- "receptor" + from <- match(el0$from, igraph::V(gr)$name) + to <- match(el0$to, igraph::V(gr)$name) + dat <- data.frame(from = el0$from, to = el0$to) + if (nrow(dat) > 0) { + dat$barcode <- paste0(dat$from, sep, dat$to) + interaction_score <- igraph::E(gr)$interaction_score[match(dat$barcode, gr_el$barcode_3)] + pval <- igraph::E(gr)$pvals[match(dat$barcode, gr_el$barcode_3)] + if (any(is.na(pval))) { + pval[is.na(pval)] <- 1 + } + if (!all(is.na(range01(-log10(pval))))) { + pval <- range01(-log10(pval)) + } + if (edge_group) { + group <- igraph::E(gr)$group[match(dat$barcode, gr_el$barcode_3)] + } + ligand_expr <- data.frame( + cell_mol = el$from, expression = el$producer_expression, + fraction = el$producer_fraction + ) + recep_expr <- data.frame( + cell_mol = el$to, expression = el$receiver_expression, + fraction = el$receiver_fraction + ) + expression <- rbind(ligand_expr, recep_expr) + df <- igraph::as_data_frame(gr, "both") + df$vertices$expression <- 0 + df$vertices$fraction <- 0 + df$vertices$expression <- as.numeric(expression$expression)[match( + df$vertices$name, + expression$cell_mol + )] + df$vertices$fraction <- as.numeric(expression$fraction)[match( + df$vertices$name, + expression$cell_mol + )] + df$vertices$celltype <- "" + for (x in unique_id) { + idx <- grepl(paste0(x, sep), df$vertices$name) + df$vertices$celltype[idx] <- x + } + df$vertices$label <- df$vertices$name + df$vertices$label[!df$vertices$name %in% c(el0$from, el0$to)] <- "" + requireNamespace("igraph") + gr <- igraph::graph_from_data_frame(df$edges, directed = TRUE, vertices = df$vertices) + for (x in unique_id) { + igraph::V(gr)$label <- gsub(paste0(x, sep), "", igraph::V(gr)$label) + } + if (!is.null(edge_group_colors)) { + edge_group_colors <- edge_group_colors + } else { + nn <- length(unique(igraph::E(gr)$group)) + edge_group_colors <- .gg_color_hue(nn) + } + if (!is.null(node_group_colors)) { + node_group_colors <- node_group_colors + } else { + nn <- length(unique(meta[, celltype_key])) + node_group_colors <- .gg_color_hue(nn) + } + # plot the graph + if (edge_group) { + if (plot_score_as_thickness) { + pl <- ggraph(gr, layout = "dendrogram", circular = TRUE) + + geom_conn_bundle( + data = get_con( + from = from, to = to, + group = group, `-log10(sig)` = pval, interaction_score = interaction_score + ), + aes(colour = group, alpha = `-log10(sig)`, width = interaction_score), + tension = 0.5 + ) # + scale_edge_width(range = c(1, 3)) + scale_edge_alpha(limits = c(0, 1)) + + } else { + pl <- ggraph(gr, layout = "dendrogram", circular = TRUE) + + geom_conn_bundle( + data = get_con( + from = from, to = to, + group = group, `-log10(sig)` = pval, interaction_score = interaction_score + ), + aes(colour = group, alpha = interaction_score, width = `-log10(sig)`), + tension = 0.5 + ) # + scale_edge_width(range = c(1, 3)) + scale_edge_alpha(limits = c(0, 1)) + + } + pl <- pl + scale_edge_color_manual(values = edge_group_colors) + + geom_node_point(pch = 19, aes( + size = fraction, filter = leaf, + color = celltype, alpha = type + )) + theme_void() + coord_fixed() + + scale_size_continuous(limits = c(0, 1)) + scale_shape_manual(values = c( + ligand = 19, + receptor = 15 + )) + scale_color_manual(values = node_group_colors) + + geom_text_repel(aes(x = x, y = y, label = label), + segment.square = TRUE, + segment.inflect = TRUE, segment.size = 0.2, force = 0.5, + size = 2, force_pull = 0 + ) + scale_alpha_manual(values = c( + ligand = 0.5, + receptor = 1 + )) + small_legend(keysize = 0.5) + ggtitle(input_group) + } else { + if (plot_score_as_thickness) { + pl <- ggraph(gr, layout = "dendrogram", circular = TRUE) + + geom_conn_bundle( + data = get_con( + from = from, to = to, + `-log10(sig)` = pval, interaction_score = interaction_score + ), + aes(alpha = `-log10(sig)`, width = interaction_score), + tension = 0.5 + ) + } else { + pl <- ggraph(gr, layout = "dendrogram", circular = TRUE) + + geom_conn_bundle( + data = get_con( + from = from, to = to, + `-log10(sig)` = pval, interaction_score = interaction_score + ), + aes(alpha = interaction_score, width = `-log10(sig)`), + tension = 0.5 + ) + } + # scale_edge_width(range = c(1, 3)) + + # scale_edge_alpha(limits = c(0, 1)) + + pl <- pl + scale_edge_color_manual(values = edge_group_colors) + + geom_node_point(pch = 19, aes( + size = fraction, filter = leaf, + color = celltype, alpha = type + )) + theme_void() + coord_fixed() + + scale_size_continuous(limits = c(0, 1)) + scale_shape_manual(values = c( + ligand = 19, + receptor = 15 + )) + scale_color_manual(values = node_group_colors) + + geom_text_repel(aes(x = x, y = y, label = label), + segment.square = TRUE, + segment.inflect = TRUE, segment.size = 0.2, force = 0.5, + size = 2, force_pull = 0 + ) + # geom_node_text(aes(x = x*1.15, y=y*1.15, filter = leaf, label=label, size # =0.01)) + size + scale_alpha_manual(values = c(ligand = 0.5, receptor = 1)) + + small_legend(keysize = 0.5) + ggtitle(input_group) + } + return(pl) + } else { + return(NA) + } +} + +.chord_diagram4 <- function(tmp_dfx, lr_interactions, scaled, sep, + alpha, directional, show_legend, edge_cols, grid_cols, legend.pos.x, legend.pos.y, + title, grid_scale) { + tmp_dfx <- .swap_ligand_receptor(tmp_dfx) + unique_celltype <- unique(c(lr_interactions$`1`, lr_interactions$`2`)) + na_df <- data.frame(t(combn(unique_celltype, 2))) + colnames(na_df) <- c("producer_swap", "receiver_swap") + if (scaled) { + interactions_items <- lr_interactions$scaled_means + } else { + interactions_items <- lr_interactions$means + } + names(interactions_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + pvals_items <- lr_interactions$pvals + names(pvals_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + interactions_items[is.na(pvals_items)] <- 1 + tmp_dfx$pair_swap <- gsub("_", " - ", tmp_dfx$pair_swap) + tmp_dfx$value <- interactions_items[tmp_dfx$barcode] + tmp_dfx$pval <- pvals_items[tmp_dfx$barcode] + edge_color <- .scPalette(length(unique(tmp_dfx$pair_swap))) + names(edge_color) <- unique(tmp_dfx$pair_swap) + if (!is.null(edge_cols)) { + edge_color[names(edge_cols)] <- edge_cols + } + if (!is.null(grid_cols)) { + if (length(grid_cols) != length(unique(tmp_dfx$receiver_swap))) { + stop(paste0( + "Please provide ", length(unique(tmp_dfx$receiver_swap)), + " to grid_colors." + )) + } else { + grid_color <- grid_cols + } + } else { + grid_color <- .scPalette(length(unique(tmp_dfx$receiver_swap))) + } + if (is.null(grid_cols)) { + names(grid_color) <- unique(tmp_dfx$receiver_swap) + } + tmp_dfx$edge_color <- edge_color[tmp_dfx$pair_swap] + requireNamespace("colorspace") + tmp_dfx$edge_color <- colorspace::adjust_transparency(tmp_dfx$edge_color, + alpha = alpha + ) + tmp_dfx$edge_color[is.na(tmp_dfx$pval)] <- NA + tmp_dfx$grid_color <- grid_color[tmp_dfx$receiver_swap] + tmp_dfx$grid_color[is.na(tmp_dfx$pval)] <- NA + tmp_dfx <- tmp_dfx[!duplicated(tmp_dfx$barcode), ] + # filter to non na + tmp_dfx_not_na <- tmp_dfx[!is.na(tmp_dfx$pval), ] + emptydf <- data.frame(matrix(ncol = ncol(tmp_dfx_not_na), nrow = nrow(na_df))) + colnames(emptydf) <- colnames(tmp_dfx_not_na) + emptydf$producer_swap <- na_df$producer_swap + emptydf$receiver_swap <- na_df$receiver_swap + tmp_dfx <- rbind(tmp_dfx_not_na, emptydf) + tmp_dfx$value[is.na(tmp_dfx$value)] <- grid_scale + if (directional == 2) { + link.arr.type <- "triangle" + } else { + link.arr.type <- "big.arrow" + } + cells <- unique(c(tmp_dfx$producer_swap, tmp_dfx$receiver_swap)) + names(cells) <- cells + circos.clear() + chordDiagram(tmp_dfx[c("producer_swap", "receiver_swap", "value")], + directional = directional, + direction.type = c("diffHeight", "arrows"), link.arr.type = link.arr.type, + annotationTrack = c("name", "grid"), col = tmp_dfx$edge_color, grid.col = grid_color, + group = cells + ) + requireNamespace("grid") + requireNamespace("ComplexHeatmap") + if (show_legend) { + lgd <- ComplexHeatmap::Legend( + at = names(edge_color), type = "grid", + legend_gp = grid::gpar(fill = edge_color), title = "interactions" + ) + ComplexHeatmap::draw(lgd, + x = grid::unit(1, "npc") - grid::unit(legend.pos.x, "mm"), + y = grid::unit(legend.pos.y, "mm"), just = c("right", "bottom") + ) + } + requireNamespace("graphics") + graphics::title(main = title) + circos.clear() + gg <- recordPlot() + return(gg) +} + +.chord_diagram3 <- function(tmp_dfx, lr_interactions, scaled, sep, + alpha, directional, show_legend, edge_cols, grid_cols, legend.pos.x, legend.pos.y, + title) { + tmp_dfx <- .swap_ligand_receptor(tmp_dfx) + if (scaled) { + interactions_items <- lr_interactions$scaled_means + } else { + interactions_items <- lr_interactions$means + } + names(interactions_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + pvals_items <- lr_interactions$pvals + names(pvals_items) <- paste0(lr_interactions$Var2, sep, lr_interactions$Var1) + interactions_items[is.na(pvals_items)] <- 1 + tmp_dfx$pair_swap <- gsub("_", " - ", tmp_dfx$pair_swap) + tmp_dfx$value <- interactions_items[tmp_dfx$barcode] + tmp_dfx$pval <- pvals_items[tmp_dfx$barcode] + edge_color <- .scPalette(length(unique(tmp_dfx$pair_swap))) + names(edge_color) <- unique(tmp_dfx$pair_swap) + if (!is.null(edge_cols)) { + edge_color[names(edge_cols)] <- edge_cols + } + if (!is.null(grid_cols)) { + if (length(grid_cols) != length(unique(tmp_dfx$receiver_swap))) { + stop(paste0( + "Please provide ", length(unique(tmp_dfx$receiver_swap)), + " to grid_colors." + )) + } else { + grid_color <- grid_cols + } + } else { + grid_color <- .scPalette(length(unique(tmp_dfx$receiver_swap))) + } + if (is.null(grid_cols)) { + names(grid_color) <- unique(tmp_dfx$receiver_swap) + } + tmp_dfx$edge_color <- edge_color[tmp_dfx$pair_swap] + requireNamespace("colorspace") + tmp_dfx$edge_color <- colorspace::adjust_transparency(tmp_dfx$edge_color, + alpha = alpha + ) + tmp_dfx$edge_color[is.na(tmp_dfx$pval)] <- NA + tmp_dfx$grid_color <- grid_color[tmp_dfx$receiver_swap] + tmp_dfx$grid_color[is.na(tmp_dfx$pval)] <- NA + tmp_dfx <- tmp_dfx[!duplicated(tmp_dfx$barcode), ] + if (directional == 2) { + link.arr.type <- "triangle" + } else { + link.arr.type <- "big.arrow" + } + cells <- unique(c(tmp_dfx$producer_swap, tmp_dfx$receiver_swap)) + names(cells) <- cells + circos.clear() + chordDiagram(tmp_dfx[c("producer_swap", "receiver_swap", "value")], + directional = directional, + direction.type = c("diffHeight", "arrows"), link.arr.type = link.arr.type, + annotationTrack = c("name", "grid"), col = tmp_dfx$edge_color, grid.col = grid_color, + group = cells + ) + requireNamespace("grid") + requireNamespace("ComplexHeatmap") + if (show_legend) { + lgd <- ComplexHeatmap::Legend( + at = names(edge_color), type = "grid", + legend_gp = grid::gpar(fill = edge_color), title = "interactions" + ) + ComplexHeatmap::draw(lgd, + x = grid::unit(1, "npc") - grid::unit(legend.pos.x, "mm"), + y = grid::unit(legend.pos.y, "mm"), just = c("right", "bottom") + ) + } + requireNamespace("graphics") + graphics::title(main = title) + circos.clear() + gg <- recordPlot() + return(gg) +} diff --git a/README.md b/README.md index aa49990..fc6d060 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ R plotting functions to plot gene expression data of single-cell data. For a python port of `ktplots`, please check out my other [repository](https://www.github.com/zktuong/ktplotspy). ## Installation instructions -You can install the package via ```devtools::install_github()``` function in R +You can install the package via `devtools::install_github()` function in R ```R if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools") @@ -31,385 +31,18 @@ The data is downsampled from the [kidney cell atlas](https://kidneycellatlas.org For more info, please see [Stewart et al. kidney single cell data set published in Science 2019](https://science.sciencemag.org/content/365/6460/1461). -## plot_cpdb -This function seems like it's the most popular so I moved it up! Please see below for alternative visualisation options. - -Generates a dot plot after CellPhoneDB analysis via specifying the query celltypes and genes. The difference compared to the original cellphonedb `plot` is that this is totally customizable! - -The plotting is largely determined by the format of the meta file provided to CellPhoneDB analysis. - -To run, you will need to load in the means.txt and pvals.txt from the analysis. If you are using results from cellphonedb `deg_analysis` mode from version >= 3, the `pvalues.txt` is `relevant_interactions.txt` and also add `degs_analysis = TRUE` into all the functions below. -```R -# pvals <- read.delim("pvalues.txt", check.names = FALSE) -# means <- read.delim("means.txt", check.names = FALSE) - -# I've provided an example dataset -data(cpdb_output) -plot_cpdb(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', scdata = kidneyimmune, - idents = 'celltype', # column name where the cell ids are located in the metadata - split.by = 'Experiment', # column name where the grouping column is. Optional. - means = means, pvals = pvals, - genes = c("XCR1", "CXCL10", "CCL5")) + -small_axis(fontsize = 3) + small_grid() + small_guide() + small_legend(fontsize = 2) # some helper functions included in ktplots to help with the plotting -``` -![plot_cpdb](exampleImages/plot_cpdb_example.png) - -You can also try specifying ```gene.family``` option which will grep some pre-determined genes. -```R -plot_cpdb(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', scdata = kidneyimmune, - idents = 'celltype', means = means, pvals = pvals, split.by = 'Experiment', - gene.family = 'chemokines') + small_guide() + small_axis() + small_legend(keysize=.5) -``` -![plot_cpdb](exampleImages/plot_cpdb_example1.png) -```R -plot_cpdb(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', scdata = kidneyimmune, - idents = 'celltype', means = means, pvals = pvals, split.by = 'Experiment', - gene.family = 'chemokines', col_option = "maroon", highlight = "blue") + small_guide() + small_axis() + small_legend(keysize=.5) -``` -![plot_cpdb](exampleImages/plot_cpdb_example2.png) -```R -plot_cpdb(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', scdata = kidneyimmune, - idents = 'celltype', means = means, pvals = pvals, split.by = 'Experiment', - gene.family = 'chemokines', col_option = viridis::cividis(50)) + small_guide() + small_axis() + small_legend(keysize=.5) -``` -![plot_cpdb](exampleImages/plot_cpdb_example3.png) -```R -plot_cpdb(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', scdata = kidneyimmune, - idents = 'celltype', means = means, pvals = pvals, split.by = 'Experiment', - gene.family = 'chemokines', noir = TRUE) + small_guide() + small_axis() + small_legend(keysize=.5) -``` -![plot_cpdb](exampleImages/plot_cpdb_example4.png) - -A new style to plot inspired from `squidpy.pl.ligrec` where significant interactions are shown as outline instead. -```R -plot_cpdb(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', scdata = kidneyimmune, - idents = 'celltype', means = means, pvals = pvals, split.by = 'Experiment', - gene.family = 'chemokines', default_style = FALSE) + small_guide() + small_axis() + small_legend(keysize=.5) -``` -![plot_cpdb](exampleImages/plot_cpdb_alternate2.png) - -if ```genes``` and ```gene.family``` are both not specified, the function will try to plot everything. - -Specifying ```keep_significant_only``` will only keep those that are p<0.05 (which you can try to adjust with ```p.adjust.method```). - -You can now also specify more than 1 gene families: -```R -p <- plot_cpdb("B cell", "CD4T cell", kidneyimmune, "celltype", means, pvals, - split.by = "Experiment", gene.family = c("Coinhibitory", "Costimulatory"), - cluster_rows = FALSE, # ensures that the families are separate - keep_significant_only = TRUE) -``` -![plot_cpdb](exampleImages/plotcpdb_two.png) - -And also provide custom families as a ```data.frame```. -```R -df = data.frame(set1 = c("CCR6", "CCL20"), set2 = c("CCL5", "CCR4")) -p <- plot_cpdb("B cell", "CD4T cell", kidneyimmune, "celltype", means, pvals, - split.by = "Experiment", gene.family = c("set1", "set2"), custom_gene_family =df, - keep_significant_only = TRUE) -``` -![plot_cpdb](exampleImages/plotcpdb_custom.png) - -## combine_cpdb - -For the ```split.by``` option to work, the annotation in the meta file must be defined in the following format: -```R -{split.by}_{idents} -``` - -so to set up an example vector, it would be something like: -```R -annotation <- paste0(kidneyimmune$Experiment, '_', kidneyimmune$celltype) -``` - -The recommended way to use `split.by` is to prepare the data with `combine_cpdb` like in this example: - -```R -# Assume you have 2 cellphonedb runs, one where it's just naive and the other is treated, you will end up with 2 cellphonedb out folders -# remember, the celltype labels you provide to cellphonedb's meta.txt should already be like {split.by}_{idents} -# so the two meta.txt should look like: - -# naive file -# ATTAGTCGATCGTAGT-1 naive_CD4Tcell -# ATTAGTGGATCGTAGT-1 naive_CD4Tcell -# ATTAGTCGACCGTAGT-1 naive_CD8Tcell -# ATTAGTCGATCGTAGT-1 naive_CD8Tcell -# ATGAGTCGATCGTAGT-1 naive_Bcell -# ATTAGTCGATCGTGGT-1 naive_Bcell - -# treated file -# ATTAGTCAATCGTAGT-1 treated_CD4Tcell -# ATTAGTGGATCGTAGT-1 treated_CD4Tcell -# ATTAGTCGACCATAGT-1 treated_CD8Tcell -# ATTAGTAGATCGTAGT-1 treated_CD8Tcell -# ATGAGTCGATCGTAAT-1 treated_Bcell -# ATTAGTCGATCGTGAT-1 treated_Bcell - -# one you have set that up correctly, you can then read in the files. -naive_means <- read.delim("naive_out/means.txt", check.names = FALSE) -naive_pvals <- read.delim("naive_out/pvalues.txt", check.names = FALSE) -naive_decon <- read.delim("naive_out/deconvoluted.txt", check.names = FALSE) - -treated_means <- read.delim("treated_out/means.txt", check.names = FALSE) -treated_pvals <- read.delim("treated_out/pvalues.txt", check.names = FALSE) -treated_decon <- read.delim("treated_out/deconvoluted.txt", check.names = FALSE) - -means <- combine_cpdb(naive_means, treated_means) -pvals <- combine_cpdb(naive_pvals, treated_pvals) -decon <- combine_cpdb(naive_decon, treated_decon) - -plot_cpdb(...) -``` - -## plot_cpdb2 -Generates a circos-style wire/arc/chord plot for cellphonedb results. - -This function piggy-backs on the original `plot_cpdb` function and generates the results like this: - -Please help contribute to the interaction grouping list [here](https://docs.google.com/spreadsheets/d/1O9OKU7J0NdeQNJAIMpsHtWAFvY014GDQ7aigdGUSTmc/edit?usp=sharing)! - -Credits to Ben Stewart for coming up with the base code! - -#### Simple usage with example data -```R -library(ktplots) -data(kidneyimmune) -data(cpdb_output2) - -p <- plot_cpdb2(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', - scdata = kidneyimmune, - idents = 'celltype', # column name where the cell ids are located in the metadata - means = means2, - pvals = pvals2, - deconvoluted = decon2, # new options from here on specific to plot_cpdb2 - desiredInteractions = list( - c('CD4T cell', 'B cell'), - c('B cell', 'CD4T cell')), - interaction_grouping = interaction_annotation, - edge_group_colors = c( - "Activating" = "#e15759", - "Chemotaxis" = "#59a14f", - "Inhibitory" = "#4e79a7", - "Intracellular trafficking" = "#9c755f", - "DC_development" = "#B07aa1", - "Unknown" = "#e7e7e7" - ), - node_group_colors = c( - "CD4T cell" = "red", - "B cell" = "blue"), - keep_significant_only = TRUE, - standard_scale = TRUE, - remove_self = TRUE - ) -p -``` -![plot_cpd2](exampleImages/plot_cpdb2_example.png) - -#### Formatting data from anndata formatted file -```R -# code example but not using the example datasets -library(SingleCellExperiment) -library(reticulate) -library(ktplots) -ad=import('anndata') - -adata = ad$read_h5ad('rna.h5ad') -counts <- Matrix::t(adata$X) -row.names(counts) <- row.names(adata$var) -colnames(counts) <- row.names(adata$obs) -sce <- SingleCellExperiment(list(counts = counts), colData = adata$obs, rowData = adata$var) - -means <- read.delim('out/means.txt', check.names = FALSE) -pvalues <- read.delim('out/pvalues.txt', check.names = FALSE) -deconvoluted <- read.delim('out/deconvoluted.txt', check.names = FALSE) -interaction_grouping <- read.delim('interactions_groups.txt') -# > head(interaction_grouping) -# interaction role -# 1 ALOX5_ALOX5AP Activating -# 2 ANXA1_FPR1 Inhibitory -# 3 BTLA_TNFRSF14 Inhibitory -# 4 CCL5_CCR5 Chemotaxis -# 5 CD2_CD58 Activating -# 6 CD28_CD86 Activating - -test <- plot_cpdb2(cell_type1 = "CD4_Tem|CD4_Tcm|CD4_Treg", # same usage style as plot_cpdb - cell_type2 = "cDC", - idents = 'fine_clustering', - split.by = 'treatment_group_1', - scdata = sce, - means = means, - pvals = pvalues, - deconvoluted = deconvoluted, # new options from here on specific to plot_cpdb2 - gene_symbol_mapping = 'index', # column name in rowData holding the actual gene symbols if the row names is ENSG Ids. Might be a bit buggy - desiredInteractions = list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2'), c('CD4_Tem', 'cDC1'), c('CD4_Tem', 'cDC2 '), c('CD4_Treg', 'cDC1'), c('CD4_Treg', 'cDC2')), - interaction_grouping = interaction_grouping, - edge_group_colors = c("Activating" = "#e15759", "Chemotaxis" = "#59a14f", "Inhibitory" = "#4e79a7", " Intracellular trafficking" = "#9c755f", "DC_development" = "#B07aa1"), - node_group_colors = c("CD4_Tcm" = "#86bc86", "CD4_Tem" = "#79706e", "CD4_Treg" = "#ff7f0e", "cDC1" = "#bcbd22" ,"cDC2" = "#17becf"), - keep_significant_only = TRUE, - standard_scale = TRUE, - remove_self = TRUE) -``` -![plot_cpd2](exampleImages/plot_cpdb2.png) - -## plot_cpdb3 -Generates a chord diagram inspired from [CellChat](https://github.com/sqjin/CellChat)'s way of showing the data! - -Usage is similar to `plot_cpdb2` but with reduced options. Additional kwargs are passed to `plot_cpdb`. -```R -library(ktplots) -data(kidneyimmune) -data(cpdb_output2) - -p <- plot_cpdb3(cell_type1 = 'B cell', cell_type2 = 'CD4T cell|MNPd', - scdata = kidneyimmune, - idents = 'celltype', # column name where the cell ids are located in the metadata - means = means2, - pvals = pvals2, - deconvoluted = decon2, # new options from here on specific to plot_cpdb3 - keep_significant_only = TRUE, - standard_scale = TRUE, - remove_self = TRUE - ) -p -``` - -![plot_cpdb3](exampleImages/plot_cpdb3.png) - - -## plot_cpdb4 -New! Alternate way of showing the chord diagram for specific interactions! - -Usage is similar to `plot_cpdb3` but with additional required `interaction` option. Additional kwargs are passed to `plot_cpdb`. -```R -library(ktplots) -data(kidneyimmune) -data(cpdb_output2) - -p <- plot_cpdb4( - interaction = 'CLEC2D-KLRB1', - cell_type1 = 'NK', cell_type2 = 'Mast', - scdata = kidneyimmune, - idents = 'celltype', - means = means2, - pvals = pvals2, - deconvoluted = decon2, - keep_significant_only = TRUE, - standard_scale = TRUE, - ) -p -``` - -![plot_cpdb4](exampleImages/plot_cpdb4.png) - -or specify more than 1 interactions + only show specific cell-type type interactions! -```R -plot_cpdb4( - interaction = c('CLEC2D-KLRB1', 'CD40-CD40LG'), - cell_type1 = 'NK|B', cell_type2 = 'Mast|CD4T', - scdata = kidneyimmune, - idents = 'celltype', - means = means2, - pvals = pvals2, - deconvoluted = decon2, - desiredInteractions = list( - c('NK cell', 'Mast cell'), - c('NK cell', 'NKT cell'), - c('NKT cell', 'Mast cell'), - c('B cell', 'CD4T cell')), - keep_significant_only = TRUE, - ) -``` - -![plot_cpdb42](exampleImages/plot_cpdb4_2.png) - - -## plot_cpdb_heatmap - -Ported the original heatmap plot to this pacakge as per the main cellphonedb repo. Uses `pheatmap` internally. Colours indicate the number of significant interactions. - -```R -plot_cpdb_heatmap(kidneyimmune, 'celltype', pvals2, cellheight = 10, cellwidth = 10) -``` - -![plot_cpdb_heatmap](exampleImages/plot_cpdb_heatmap2.png) - -```R -plot_cpdb_heatmap(kidneyimmune, 'celltype', pvals2, cellheight = 10, cellwidth = 10, symmetrical = FALSE) -``` - -![plot_cpdb_heatmap](exampleImages/plot_cpdb_heatmap.png) - -The values for the `symmetrical=FALSE` mode follow the direction of the L-R direction where it's always moleculeA:celltypeA -> moleculeB:celltypeB. - -Therefore, if you trace on the `x-axis` for `celltype A` [MNPa(mono)] to `celltype B` [CD8T cell] on the `y-axis`: - -A -> B is 18 interactions - -Whereas if you trace on the `y-axis` for `celltype A` [MNPa(mono)] to `celltype B` [CD8T cell] on the `x-axis`: - -A -> B is 9 interactions - -`symmetrical=TRUE` mode will return 18+9 = 27 - -## Other useful functions - -## geneDotPlot -Plotting gene expression dot plots heatmaps. -```R -# Note, this conflicts with tidyr devel version -geneDotPlot(scdata = kidneyimmune, # object - genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), # genes to plot - idents = "celltype", # column name in meta data that holds the cell-cluster ID/assignment - split.by = 'Project', # column name in the meta data that you want to split the plotting by. If not provided, it will just plot according to idents - standard_scale = TRUE) + # whether to scale expression values from 0 to 1. See ?geneDotPlot for other options -theme(strip.text.x = element_text(angle=0, hjust = 0, size =7)) + small_guide() + small_legend() -``` -Hopefully you end up with something like this: -![geneDotPlot](exampleImages/geneDotPlot_example.png) - - -## correlationSpot -Ever wanted to ask if your gene(s) and/or prediction(s) of interests correlate spatially in vissium data? Now you can! -**disclaimer** It might be buggy. -```R -library(ggplot2) -scRNAseq <- Seurat::SCTransform(scRNAseq, verbose = FALSE) %>% Seurat::RunPCA(., verbose = FALSE) %>% Seurat::RunUMAP(., dims = 1:30, verbose = FALSE) -anchors <- Seurat::FindTransferAnchors(reference = scRNAseq, query = spatial, normalization.method = "SCT") -predictions.assay <- Seurat::TransferData(anchorset = anchors, refdata = scRNAseq$label, dims = 1:30, prediction.assay = TRUE, weight.reduction = spatial[["pca"]]) -spatial[["predictions"]] <- predictions.assay -Seurat::DefaultAssay(spatial) <- "predictions" -Seurat::DefaultAssay(spatial) <- 'SCT' -pa <- Seurat::SpatialFeaturePlot(spatial, features = c('Tnfsf13b', 'Cd79a'), pt.size.factor = 1.6, ncol = 2, crop = TRUE) + viridis::scale_fill_viridis() -Seurat::DefaultAssay(spatial) <- 'predictions' -pb <- Seurat::SpatialFeaturePlot(spatial, features = 'Group1-3', pt.size.factor = 1.6, ncol = 2, crop = TRUE) + viridis::scale_fill_viridis() - -p1 <- correlationSpot(spatial, genes = c('Tnfsf13b', 'Cd79a'), celltypes = 'Group1-3', pt.size.factor = 1.6, ncol = 2, crop = TRUE) + scale_fill_gradientn( colors = rev(RColorBrewer::brewer.pal(12, 'Spectral')),limits = c(-1, 1)) -p2 <- correlationSpot(spatial, genes = c('Tnfsf13b', 'Cd79a'), celltypes = 'Group1-3', pt.size.factor = 1.6, ncol = 2, crop = TRUE, average_by_cluster = TRUE) + scale_fill_gradientn(colors = rev(RColorBrewer::brewer.pal(12, 'Spectral')),limits = c(-1, 1)) + ggtitle('correlation averaged across clusters') - -cowplot::plot_grid(pa, pb, p1, p2, ncol = 2) -``` -![plot_cpdb](exampleImages/correlationSpot_example.png) - - -## small_legend/small_guide/small_axis/small_grid/topright_legend/topleft_legend/bottomleft_legend/bottomright_legend -As shown in the examples above, these are some functions to quickly adjust the size and position of ggplots. -```R -# for example -g <- Seurat::DimPlot(kidneyimmune, group.by = "celltype") -g1 <- g + small_legend() + small_guide() + small_axis() + bottomleft_legend() -library(patchwork) -g + g1 -``` -![gghelperfunctions](exampleImages/gghelperfunctions_example.png) +## Vignette +Please go to the official [vignette](https://zktuong.github.io/ktplots/articles/vignette.html) for more information. +For the legacy version of the `README.md` file, please go [here](https://github.com/zktuong/ktplots/blob/master/_legacy_README.md). ## Citation If you find these functions useful, please consider leaving a star, citing this repository, and/or citing the following [DOI](https://doi.org/10.5281/zenodo.5717922): -To cite a specific version of `ktplots`, please follow the links on the zenodo repository. e.g. v1.2.3: +To cite a specific version of `ktplots`, please follow the links on the zenodo repository. e.g. v2.0.0: ``` -Zewen Kelvin Tuong. (2021). zktuong/ktplots: 1.2.3 (v1.2.3). Zenodo. https://doi.org/10.5281/zenodo.5717922 +ZK Tuong. (2021). zktuong/ktplots: 2.0.0 (v2.0.0). Zenodo. https://doi.org/10.5281/zenodo.5717922 ``` Thank you! diff --git a/_legacy_README.md b/_legacy_README.md new file mode 100644 index 0000000..9080657 --- /dev/null +++ b/_legacy_README.md @@ -0,0 +1,416 @@ +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![codecov](https://codecov.io/gh/zktuong/ktplots/branch/master/graph/badge.svg)](https://codecov.io/gh/zktuong/ktplots) +[![R](https://github.com/zktuong/ktplots/actions/workflows/r.yml/badge.svg)](https://github.com/zktuong/ktplots/actions/workflows/r.yml) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.6728350.svg)](https://doi.org/10.5281/zenodo.5717922) + +# ktplots +R plotting functions to plot gene expression data of single-cell data. + +For a python port of `ktplots`, please check out my other [repository](https://www.github.com/zktuong/ktplotspy). + +## Installation instructions +You can install the package via `devtools::install_github()` function in R +```R +if (!requireNamespace("devtools", quietly = TRUE)) + install.packages("devtools") +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +devtools::install_github('zktuong/ktplots', dependencies = TRUE) +``` +## Usage instructions +```R +library(ktplots) +``` +There is a test dataset in `SingleCellExperiment` format to test the functions. +```R +library(SingleCellExperiment) +data(kidneyimmune) +#Some functions accept Seurat objects too. +``` +The data is downsampled from the [kidney cell atlas](https://kidneycellatlas.org). + +For more info, please see [Stewart et al. kidney single cell data set published in Science 2019](https://science.sciencemag.org/content/365/6460/1461). + +## plot_cpdb +This function seems like it's the most popular so I moved it up! Please see below for alternative visualisation options. + +Generates a dot plot after CellPhoneDB analysis via specifying the query celltypes and genes. The difference compared to the original cellphonedb `plot` is that this is totally customizable! + +The plotting is largely determined by the format of the meta file provided to CellPhoneDB analysis. + +To run, you will need to load in the means.txt and pvals.txt from the analysis. If you are using results from cellphonedb `deg_analysis` mode from version >= 3, the `pvalues.txt` is `relevant_interactions.txt` and also add `degs_analysis = TRUE` into all the functions below. +```R +# pvals <- read.delim("pvalues.txt", check.names = FALSE) +# means <- read.delim("means.txt", check.names = FALSE) + +# I've provided an example dataset +data(cpdb_output) +plot_cpdb(scdata = kidneyimmune, cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + celltype_key = 'celltype', # column name where the cell ids are located in the metadata + splitby_key = 'Experiment', # column name where the grouping column is. Optional. + means = means, pvals = pvals, + genes = c("XCR1", "CXCL10", "CCL5")) + +small_axis(fontsize = 3) + small_grid() + small_guide() + small_legend(fontsize = 2) # some helper functions included in ktplots to help with the plotting +``` +![plot_cpdb](exampleImages/plot_cpdb_example.png) + +You can also try specifying `gene_family` option which will grep some pre-determined genes. +```R +plot_cpdb(scdata = kidneyimmune, cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + celltype_key = 'celltype', means = means, pvals = pvals, splitby_key = 'Experiment', + gene_family = 'chemokines') + small_guide() + small_axis() + small_legend(keysize=.5) +``` +![plot_cpdb](exampleImages/plot_cpdb_example1.png) +```R +plot_cpdb(scdata = kidneyimmune, cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + celltype_key = 'celltype', means = means, pvals = pvals, splitby_key = 'Experiment', + gene_family = 'chemokines', col_option = "maroon", highlight = "blue") + small_guide() + small_axis() + small_legend(keysize=.5) +``` +![plot_cpdb](exampleImages/plot_cpdb_example2.png) +```R +plot_cpdb(scdata = kidneyimmune, cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + celltype_key = 'celltype', means = means, pvals = pvals, splitby_key = 'Experiment', + gene_family = 'chemokines', col_option = viridis::cividis(50)) + small_guide() + small_axis() + small_legend(keysize=.5) +``` +![plot_cpdb](exampleImages/plot_cpdb_example3.png) +```R +plot_cpdb(scdata = kidneyimmune, cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + celltype_key = 'celltype', means = means, pvals = pvals, splitby_key = 'Experiment', + gene_family = 'chemokines', noir = TRUE) + small_guide() + small_axis() + small_legend(keysize=.5) +``` +![plot_cpdb](exampleImages/plot_cpdb_example4.png) + +A new style to plot inspired from `squidpy.pl.ligrec` where significant interactions are shown as outline instead. +```R +plot_cpdb(scdata = kidneyimmune, cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + celltype_key = 'celltype', means = means, pvals = pvals, splitby_key = 'Experiment', + gene_family = 'chemokines', default_style = FALSE) + small_guide() + small_axis() + small_legend(keysize=.5) +``` +![plot_cpdb](exampleImages/plot_cpdb_alternate2.png) + +if `genes` and `gene_family` are both not specified, the function will try to plot everything. + +Specifying `keep_significant_only` will only keep those that are p<0.05. + +You can now also specify more than 1 gene families: +```R +p <- plot_cpdb(scdata = kidneyimmune, cell_type1 = "B cell", cell_type2 = "CD4T cell", + celltype_key = "celltype", means = means, pvals = pvals, + splitby_key = "Experiment", gene_family = c("Coinhibitory", "Costimulatory"), + cluster_rows = FALSE, # ensures that the families are separate + keep_significant_only = TRUE) +``` +![plot_cpdb](exampleImages/plotcpdb_two.png) + +And also provide custom families as a `data.frame`. +```R +df = data.frame(set1 = c("CCR6", "CCL20"), set2 = c("CCL5", "CCR4")) +p <- plot_cpdb(scdata = kidneyimmune, cell_type1 = "B cell", cell_type2 = "CD4T cell", celltype_key = "celltype", means = means, pvals = pvals,, + splitby_key = "Experiment", gene_family = c("set1", "set2"), custom_gene_family =df, + keep_significant_only = TRUE) +``` +![plot_cpdb](exampleImages/plotcpdb_custom.png) + +## combine_cpdb + +For the `splitby_key` option to work, the annotation in the meta file must be defined in the following format: +```R +{splitby_key}_{celltype_key} +``` + +so to set up an example vector, it would be something like: +```R +annotation <- paste0(kidneyimmune$Experiment, '_', kidneyimmune$celltype) +``` + +The recommended way to use `splitby_key` is to prepare the data with `combine_cpdb` like in this example: + +```R +# Assume you have 2 cellphonedb runs, one where it's just naive and the other is treated, you will end up with 2 cellphonedb out folders +# remember, the celltype labels you provide to cellphonedb's meta.txt should already be like {splitby_key}_{celltype_key} +# so the two meta.txt should look like: + +# naive file +# ATTAGTCGATCGTAGT-1 naive_CD4Tcell +# ATTAGTGGATCGTAGT-1 naive_CD4Tcell +# ATTAGTCGACCGTAGT-1 naive_CD8Tcell +# ATTAGTCGATCGTAGT-1 naive_CD8Tcell +# ATGAGTCGATCGTAGT-1 naive_Bcell +# ATTAGTCGATCGTGGT-1 naive_Bcell + +# treated file +# ATTAGTCAATCGTAGT-1 treated_CD4Tcell +# ATTAGTGGATCGTAGT-1 treated_CD4Tcell +# ATTAGTCGACCATAGT-1 treated_CD8Tcell +# ATTAGTAGATCGTAGT-1 treated_CD8Tcell +# ATGAGTCGATCGTAAT-1 treated_Bcell +# ATTAGTCGATCGTGAT-1 treated_Bcell + +# one you have set that up correctly, you can then read in the files. +naive_means <- read.delim("naive_out/means.txt", check.names = FALSE) +naive_pvals <- read.delim("naive_out/pvalues.txt", check.names = FALSE) +naive_decon <- read.delim("naive_out/deconvoluted.txt", check.names = FALSE) + +treated_means <- read.delim("treated_out/means.txt", check.names = FALSE) +treated_pvals <- read.delim("treated_out/pvalues.txt", check.names = FALSE) +treated_decon <- read.delim("treated_out/deconvoluted.txt", check.names = FALSE) + +means <- combine_cpdb(naive_means, treated_means) +pvals <- combine_cpdb(naive_pvals, treated_pvals) +decon <- combine_cpdb(naive_decon, treated_decon) + +plot_cpdb(...) +``` + +## plot_cpdb2 +Generates a circos-style wire/arc/chord plot for cellphonedb results. + +This function piggy-backs on the original `plot_cpdb` function and generates the results like this: + +Please help contribute to the interaction grouping list [here](https://docs.google.com/spreadsheets/d/1O9OKU7J0NdeQNJAIMpsHtWAFvY014GDQ7aigdGUSTmc/edit?usp=sharing)! + +Credits to Ben Stewart for coming up with the base code! + +#### Simple usage with example data +```R +library(ktplots) +data(kidneyimmune) +data(cpdb_output2) + +p <- plot_cpdb2(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', + scdata = kidneyimmune, + celltype_key = 'celltype', # column name where the cell ids are located in the metadata + means = means2, + pvals = pvals2, + deconvoluted = decon2, # new options from here on specific to plot_cpdb2 + desiredInteractions = list( + c('CD4T cell', 'B cell'), + c('B cell', 'CD4T cell')), + interaction_grouping = interaction_annotation, + edge_group_colors = c( + "Activating" = "#e15759", + "Chemotaxis" = "#59a14f", + "Inhibitory" = "#4e79a7", + "Intracellular trafficking" = "#9c755f", + "DC_development" = "#B07aa1", + "Unknown" = "#e7e7e7" + ), + node_group_colors = c( + "CD4T cell" = "red", + "B cell" = "blue"), + keep_significant_only = TRUE, + standard_scale = TRUE, + remove_self = TRUE + ) +p +``` +![plot_cpd2](exampleImages/plot_cpdb2_example.png) + +#### Formatting data from anndata formatted file +```R +# code example but not using the example datasets +library(SingleCellExperiment) +library(reticulate) +library(ktplots) +ad=import('anndata') + +adata = ad$read_h5ad('rna.h5ad') +counts <- Matrix::t(adata$X) +row.names(counts) <- row.names(adata$var) +colnames(counts) <- row.names(adata$obs) +sce <- SingleCellExperiment(list(counts = counts), colData = adata$obs, rowData = adata$var) + +means <- read.delim('out/means.txt', check.names = FALSE) +pvalues <- read.delim('out/pvalues.txt', check.names = FALSE) +deconvoluted <- read.delim('out/deconvoluted.txt', check.names = FALSE) +interaction_grouping <- read.delim('interactions_groups.txt') +# > head(interaction_grouping) +# interaction role +# 1 ALOX5_ALOX5AP Activating +# 2 ANXA1_FPR1 Inhibitory +# 3 BTLA_TNFRSF14 Inhibitory +# 4 CCL5_CCR5 Chemotaxis +# 5 CD2_CD58 Activating +# 6 CD28_CD86 Activating + +test <- plot_cpdb2(cell_type1 = "CD4_Tem|CD4_Tcm|CD4_Treg", # same usage style as plot_cpdb + cell_type2 = "cDC", + celltype_key = 'fine_clustering', + splitby_key = 'treatment_group_1', + scdata = sce, + means = means, + pvals = pvalues, + deconvoluted = deconvoluted, # new options from here on specific to plot_cpdb2 + gene_symbol_mapping = 'index', # column name in rowData holding the actual gene symbols if the row names is ENSG Ids. Might be a bit buggy + desiredInteractions = list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2'), c('CD4_Tem', 'cDC1'), c('CD4_Tem', 'cDC2 '), c('CD4_Treg', 'cDC1'), c('CD4_Treg', 'cDC2')), + interaction_grouping = interaction_grouping, + edge_group_colors = c("Activating" = "#e15759", "Chemotaxis" = "#59a14f", "Inhibitory" = "#4e79a7", " Intracellular trafficking" = "#9c755f", "DC_development" = "#B07aa1"), + node_group_colors = c("CD4_Tcm" = "#86bc86", "CD4_Tem" = "#79706e", "CD4_Treg" = "#ff7f0e", "cDC1" = "#bcbd22" ,"cDC2" = "#17becf"), + keep_significant_only = TRUE, + standard_scale = TRUE, + remove_self = TRUE) +``` +![plot_cpd2](exampleImages/plot_cpdb2.png) + +## plot_cpdb3 +Generates a chord diagram inspired from [CellChat](https://github.com/sqjin/CellChat)'s way of showing the data! + +Usage is similar to `plot_cpdb2` but with reduced options. Additional kwargs are passed to `plot_cpdb`. +```R +library(ktplots) +data(kidneyimmune) +data(cpdb_output2) + +p <- plot_cpdb3(cell_type1 = 'B cell', cell_type2 = 'CD4T cell|MNPd', + scdata = kidneyimmune, + celltype_key = 'celltype', # column name where the cell ids are located in the metadata + means = means2, + pvals = pvals2, + deconvoluted = decon2, # new options from here on specific to plot_cpdb3 + keep_significant_only = TRUE, + standard_scale = TRUE, + remove_self = TRUE + ) +p +``` + +![plot_cpdb3](exampleImages/plot_cpdb3.png) + + +## plot_cpdb4 +New! Alternate way of showing the chord diagram for specific interactions! + +Usage is similar to `plot_cpdb3` but with additional required `interaction` option. Additional kwargs are passed to `plot_cpdb`. +```R +library(ktplots) +data(kidneyimmune) +data(cpdb_output2) + +p <- plot_cpdb4( + interaction = 'CLEC2D-KLRB1', + cell_type1 = 'NK', cell_type2 = 'Mast', + scdata = kidneyimmune, + celltype_key = 'celltype', + means = means2, + pvals = pvals2, + deconvoluted = decon2, + keep_significant_only = TRUE, + standard_scale = TRUE, + ) +p +``` + +![plot_cpdb4](exampleImages/plot_cpdb4.png) + +or specify more than 1 interactions + only show specific cell-type type interactions! +```R +plot_cpdb4( + interaction = c('CLEC2D-KLRB1', 'CD40-CD40LG'), + cell_type1 = 'NK|B', cell_type2 = 'Mast|CD4T', + scdata = kidneyimmune, + celltype_key = 'celltype', + means = means2, + pvals = pvals2, + deconvoluted = decon2, + desiredInteractions = list( + c('NK cell', 'Mast cell'), + c('NK cell', 'NKT cell'), + c('NKT cell', 'Mast cell'), + c('B cell', 'CD4T cell')), + keep_significant_only = TRUE, + ) +``` + +![plot_cpdb42](exampleImages/plot_cpdb4_2.png) + + +## plot_cpdb_heatmap + +Ported the original heatmap plot to this pacakge as per the main cellphonedb repo. Uses `pheatmap` internally. Colours indicate the number of significant interactions. + +```R +plot_cpdb_heatmap(kidneyimmune, 'celltype', pvals2, cellheight = 10, cellwidth = 10) +``` + +![plot_cpdb_heatmap](exampleImages/plot_cpdb_heatmap2.png) + +```R +plot_cpdb_heatmap(kidneyimmune, 'celltype', pvals2, cellheight = 10, cellwidth = 10, symmetrical = FALSE) +``` + +![plot_cpdb_heatmap](exampleImages/plot_cpdb_heatmap.png) + +The values for the `symmetrical=FALSE` mode follow the direction of the L-R direction where it's always moleculeA:celltypeA -> moleculeB:celltypeB. + +Therefore, if you trace on the `x-axis` for `celltype A` [MNPa(mono)] to `celltype B` [CD8T cell] on the `y-axis`: + +A -> B is 18 interactions + +Whereas if you trace on the `y-axis` for `celltype A` [MNPa(mono)] to `celltype B` [CD8T cell] on the `x-axis`: + +A -> B is 9 interactions + +`symmetrical=TRUE` mode will return 18+9 = 27 + +## Other useful functions + +## geneDotPlot +Plotting gene expression dot plots heatmaps. +```R +# Note, this conflicts with tidyr devel version +geneDotPlot(scdata = kidneyimmune, # object + genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), # genes to plot + celltype_key = "celltype", # column name in meta data that holds the cell-cluster ID/assignment + splitby_key = 'Project', # column name in the meta data that you want to split the plotting by. If not provided, it will just plot according to celltype_key + standard_scale = TRUE) + # whether to scale expression values from 0 to 1. See ?geneDotPlot for other options +theme(strip.text.x = element_text(angle=0, hjust = 0, size =7)) + small_guide() + small_legend() +``` +Hopefully you end up with something like this: +![geneDotPlot](exampleImages/geneDotPlot_example.png) + + +## correlationSpot +Ever wanted to ask if your gene(s) and/or prediction(s) of interests correlate spatially in vissium data? Now you can! +**disclaimer** It might be buggy. +```R +library(ggplot2) +scRNAseq <- Seurat::SCTransform(scRNAseq, verbose = FALSE) %>% Seurat::RunPCA(., verbose = FALSE) %>% Seurat::RunUMAP(., dims = 1:30, verbose = FALSE) +anchors <- Seurat::FindTransferAnchors(reference = scRNAseq, query = spatial, normalization.method = "SCT") +predictions.assay <- Seurat::TransferData(anchorset = anchors, refdata = scRNAseq$label, dims = 1:30, prediction.assay = TRUE, weight.reduction = spatial[["pca"]]) +spatial[["predictions"]] <- predictions.assay +Seurat::DefaultAssay(spatial) <- "predictions" +Seurat::DefaultAssay(spatial) <- 'SCT' +pa <- Seurat::SpatialFeaturePlot(spatial, features = c('Tnfsf13b', 'Cd79a'), pt.size.factor = 1.6, ncol = 2, crop = TRUE) + viridis::scale_fill_viridis() +Seurat::DefaultAssay(spatial) <- 'predictions' +pb <- Seurat::SpatialFeaturePlot(spatial, features = 'Group1-3', pt.size.factor = 1.6, ncol = 2, crop = TRUE) + viridis::scale_fill_viridis() + +p1 <- correlationSpot(spatial, genes = c('Tnfsf13b', 'Cd79a'), celltypes = 'Group1-3', pt.size.factor = 1.6, ncol = 2, crop = TRUE) + scale_fill_gradientn( colors = rev(RColorBrewer::brewer.pal(12, 'Spectral')),limits = c(-1, 1)) +p2 <- correlationSpot(spatial, genes = c('Tnfsf13b', 'Cd79a'), celltypes = 'Group1-3', pt.size.factor = 1.6, ncol = 2, crop = TRUE, average_by_cluster = TRUE) + scale_fill_gradientn(colors = rev(RColorBrewer::brewer.pal(12, 'Spectral')),limits = c(-1, 1)) + ggtitle('correlation averaged across clusters') + +cowplot::plot_grid(pa, pb, p1, p2, ncol = 2) +``` +![plot_cpdb](exampleImages/correlationSpot_example.png) + + +## small_legend/small_guide/small_axis/small_grid/topright_legend/topleft_legend/bottomleft_legend/bottomright_legend +As shown in the examples above, these are some functions to quickly adjust the size and position of ggplots. +```R +# for example +g <- Seurat::DimPlot(kidneyimmune, group.by = "celltype") +g1 <- g + small_legend() + small_guide() + small_axis() + bottomleft_legend() +library(patchwork) +g + g1 +``` +![gghelperfunctions](exampleImages/gghelperfunctions_example.png) + + + +## Citation +If you find these functions useful, please consider leaving a star, citing this repository, and/or citing the following [DOI](https://doi.org/10.5281/zenodo.5717922): + +To cite a specific version of `ktplots`, please follow the links on the zenodo repository. e.g. v1.2.3: +``` +Zewen Kelvin Tuong. (2021). zktuong/ktplots: 1.2.3 (v1.2.3). Zenodo. https://doi.org/10.5281/zenodo.5717922 +``` + +Thank you! diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..99dc5d4 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,32 @@ +codecov: + require_ci_to_pass: no + +coverage: + status: + project: + default: + # Require 1% coverage, i.e., always succeed + target: 1 + patch: false + changes: false + +comment: + layout: "diff, flags, files" + behavior: once + require_base: no + +ignore: + - "data" + - "exampleImages" + - "inst" + - "man" + - "tests" + - "vignette" + - "*.yml" + - "*.rmd" + - "*.yaml" + - "LICENSE" + - ".git*" + - "R/init_ktplots.R" + - "R/ktplots.R" + - "R/correlationSpot.R" diff --git a/data/cpdb_output_v5.RData b/data/cpdb_output_v5.RData new file mode 100644 index 0000000..daa5d4d Binary files /dev/null and b/data/cpdb_output_v5.RData differ diff --git a/man/geneDotPlot.Rd b/man/geneDotPlot.Rd index 8943df0..c5b48c7 100644 --- a/man/geneDotPlot.Rd +++ b/man/geneDotPlot.Rd @@ -6,9 +6,9 @@ \usage{ geneDotPlot( scdata, - idents, + celltype_key, genes, - split.by = NULL, + splitby_key = NULL, pct.threshold = 0.05, scale = NULL, standard_scale = NULL, @@ -28,11 +28,11 @@ geneDotPlot( \arguments{ \item{scdata}{single-cell data. can be seurat/summarizedexperiment object} -\item{idents}{column name holding the idents for each cell} +\item{celltype_key}{column name holding the celltype for each cell} \item{genes}{genes you want to plot} -\item{split.by}{column name in the metadata/coldata table to split the spots by. If not provided, it will plot via idents provided.} +\item{splitby_key}{column name in the metadata/coldata table to split the spots by. If not provided, it will plot via celltype_key provided.} \item{pct.threshold}{float. required to keep gene expressed by minimum percentage of cells} @@ -40,7 +40,7 @@ geneDotPlot( \item{standard_scale}{logical. scale the expression to range from 0 to 1. NULL defaults to FALSE.} -\item{keepLevels}{logical. keep the original factor of the levels of the idents (for plotting)} +\item{keepLevels}{logical. keep the original factor of the levels of the celltype_key (for plotting)} \item{save.plot}{logical. will try to save the pdf} @@ -69,6 +69,6 @@ Plotting genes as dotplot \examples{ \donttest{ data(kidneyimmune) -geneDotPlot(kidneyimmune, genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), idents = "celltype", split.by = 'Project', standard_scale = TRUE) + theme(strip.text.x = element_text(angle=45, hjust = 0)) +geneDotPlot(kidneyimmune, genes = c('CD68', 'CD80', 'CD86', 'CD74', 'CD2', 'CD5'), celltype_key = 'celltype', splitby_key = 'Project', standard_scale = TRUE) + theme(strip.text.x = element_text(angle = 45, hjust = 0)) } } diff --git a/man/kidneyimmune.Rd b/man/kidneyimmune.Rd index 95f2c52..6024cef 100644 --- a/man/kidneyimmune.Rd +++ b/man/kidneyimmune.Rd @@ -13,6 +13,12 @@ \alias{sig_means2} \alias{covid_cpdb_meta} \alias{covid_sample_metadata} +\alias{means_v5} +\alias{sce_v5} +\alias{relevant_interactions_v5} +\alias{decon_v5} +\alias{cellsign_v5} +\alias{interaction_scores_v5} \title{kidneyimmune} \format{ A SingleCellExperiment object with the following slots filled @@ -52,6 +58,18 @@ An object of class \code{data.frame} with 837 rows and 156 columns. An object of class \code{data.frame} with 12 rows and 3 columns. An object of class \code{data.frame} with 12 rows and 2 columns. + +data after CellPhoneDB v5 analysis + +An object of class \code{SingleCellExperiment} with 30800 rows and 3312 columns. + +An object of class \code{data.frame} with 419 rows and 85 columns. + +An object of class \code{data.frame} with 6825 rows and 49 columns. + +An object of class \code{data.frame} with 9 rows and 85 columns. + +An object of class \code{data.frame} with 2440 rows and 94 columns. } \source{ \url{https://www.kidneycellatlas.org/} @@ -78,6 +96,18 @@ sig_means2 covid_cpdb_meta covid_sample_metadata + +data(cpdb_output_v5) + +sce_v5 + +relevant_interactions_v5 + +decon_v5 + +cellsign_v5 + +interaction_scores_v5 } \description{ kidneyimmune - A small set of demo data from Stewart et al. 2019 Science. See \url{https://www.kidneycellatlas.org/} @@ -101,10 +131,23 @@ sig_means2 - Dataframe of CellPhoneDB output significant_means.txt file covid_cpdb_meta - Example dataframe to use for cpdb_meta option in compare_cpdb covid_sample_metadata - Example dataframe to use for sample_metadata option in compare_cpdb + +cpdb_output_v5 - Dataframe of CellPhoneDB output means.txt file + +sce_v5 - A small dummy singlecelldata for cellphonedb v5 + +relevant_interactions_v5 - Dataframe of CellPhoneDB output relevant_interactions.txt file + +decon_v5 - Dataframe of CellPhoneDB output deconvoluted.txt file + +cellsign_v5 - Dataframe of CellPhoneDB output CellSign.txt file + +interaction_scores_v5 - Dataframe of CellPhoneDB output interaction_scores.txt file } \examples{ data(kidneyimmune) data(cpdb_output) data(cpdb_output) +data(cpdb_output_v5) } \keyword{datasets} diff --git a/man/misc.Rd b/man/misc.Rd index cb4590a..97f67c4 100644 --- a/man/misc.Rd +++ b/man/misc.Rd @@ -56,7 +56,7 @@ miscellaneous functions miscellaneous functions } \examples{ -x <- range01(runif(100)) +x <- range01(runif(100)) \donttest{ data$width <- data$width \%nin\% params$width } diff --git a/man/plot_cpdb.Rd b/man/plot_cpdb.Rd index d251114..008dafa 100644 --- a/man/plot_cpdb.Rd +++ b/man/plot_cpdb.Rd @@ -2,67 +2,70 @@ % Please edit documentation in R/plot_cpdb.R \name{plot_cpdb} \alias{plot_cpdb} -\title{Plotting cellphonedb results} +\title{Plotting CellPhoneDB results} \usage{ plot_cpdb( + scdata, cell_type1, cell_type2, - scdata, - idents, + celltype_key, means, pvals, + interaction_scores = NULL, + cellsign = NULL, max_size = 8, - p.adjust.method = NULL, - keep_significant_only = FALSE, - split.by = NULL, - gene.family = NULL, + keep_significant_only = TRUE, + splitby_key = NULL, + gene_family = NULL, custom_gene_family = NULL, genes = NULL, - scale = NULL, - standard_scale = NULL, + standard_scale = TRUE, cluster_rows = TRUE, col_option = viridis::viridis(50), default_style = TRUE, - noir = FALSE, - highlight = "red", + highlight_col = "red", highlight_size = NULL, - separator = NULL, - special_character_search_pattern = NULL, + max_highlight_size = 2, + special_character_regex_pattern = NULL, degs_analysis = FALSE, - verbose = FALSE, return_table = FALSE, exclude_interactions = NULL, + min_interaction_score = 0, + scale_alpha_by_interaction_scores = FALSE, + scale_alpha_by_cellsign = FALSE, + filter_by_cellsign = FALSE, + title = "", ... ) } \arguments{ -\item{cell_type1}{cell type 1} +\item{scdata}{single-cell data. can be Seurat/SingleCellExperiment object} -\item{cell_type2}{cell type 2} +\item{cell_type1}{Name of cell type 1. Accepts regex pattern.} -\item{scdata}{single-cell data. can be seurat/summarizedexperiment object} +\item{cell_type2}{Name of cell type 2. Accepts regex pattern.} -\item{idents}{vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns} +\item{celltype_key}{Column name in metadata/colData storing the celltype annotations. Values in this column should match the second column of the input `meta.txt` used for CellPhoneDB.} -\item{means}{object holding means.txt from cpdb output} +\item{means}{Data frame corresponding to `means.txt` from CellPhoneDB.} -\item{pvals}{object holding pvals.txt from cpdb output. Use relevant_interactions.txt if degs_analysis mode.} +\item{pvals}{Data frame corresponding to `pvalues.txt` or `relevant_interactions.txt` from CellPhoneDB.} -\item{max_size}{max size of points.} +\item{interaction_scores}{Data frame corresponding to `interaction_scores.txt` from CellPhoneDB version 5 onwards.} -\item{p.adjust.method}{correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')} +\item{cellsign}{Data frame corresponding to `CellSign.txt` from CellPhoneDB version 5 onwards.} -\item{keep_significant_only}{logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb.} +\item{max_size}{max size of points.} -\item{split.by}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} +\item{keep_significant_only}{logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb.} -\item{gene.family}{default = NULL. some predefined group of genes. can take one (or several) of these default options: 'chemokines', 'Th1', 'Th2', 'Th17', 'Treg', 'costimulatory', 'coinhibitory', 'niche'. Also accepts name(s) of custom gene families.} +\item{splitby_key}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} -\item{custom_gene_family}{default = NULL. If provided, will update the gene.family function with this custom entry. Both `gene.family` (name of the custom family) and `custom_gene_family` must be specified for this to work. Provide either a data.frame with column names as name of family and genes in rows or a named likes like : list("customfamily" = c("genea", "geneb", "genec"))} +\item{gene_family}{default = NULL. some predefined group of genes. can take one (or several) of these default options: 'chemokines', 'Th1', 'Th2', 'Th17', 'Treg', 'costimulatory', 'coinhibitory', 'niche'. Also accepts name(s) of custom gene families.} -\item{genes}{default = NULL. can specify custom list of genes if gene.family is NULL} +\item{custom_gene_family}{default = NULL. If provided, will update the gene_family function with this custom entry. Both `gene_family` (name of the custom family) and `custom_gene_family` must be specified for this to work. Provide either a data.frame with column names as name of family and genes in rows or a named likes like : list('customfamily' = c('genea', 'geneb', 'genec'))} -\item{scale}{logical. scale the expression to mean +/- SD. NULL defaults to TRUE.} +\item{genes}{default = NULL. can specify custom list of genes if gene_family is NULL} \item{standard_scale}{logical. scale the expression to range from 0 to 1. NULL defaults to FALSE.} @@ -70,39 +73,43 @@ plot_cpdb( \item{col_option}{specify plotting colours} -\item{noir}{default = FALSE. makes it b/w} +\item{default_style}{default = TRUE. Show all mean values and trace significant interactions with `higlight` colour. If FALSE, significant interactions will be presented as a white ring.} -\item{highlight}{colour for highlighting p <0.05} +\item{highlight_col}{colour for highlighting p <0.05} \item{highlight_size}{stroke size for highlight if p < 0.05. if NULL, scales to -log10(pval).} -\item{separator}{separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this.} +\item{max_highlight_size}{max size of stroke for highlight.} -\item{special_character_search_pattern}{search pattern if the cell type names contains special character. NULL defaults to '/|:|\\?|\\*|\\+|[\\]|\\(|\\)'.} +\item{special_character_regex_pattern}{search pattern if the cell type names contains special character. NULL defaults to '/|:|\\?|\\*|\\+|[\\]|\\(|\\)'.} -\item{degs_analysis}{if is cellphonedb degs_analysis mode.} - -\item{verbose}{prints cat/print statements if TRUE.} +\item{degs_analysis}{if is CellPhoneDB degs_analysis mode.} \item{return_table}{whether or not to return as a table rather than to plot.} \item{exclude_interactions}{if provided, the interactions will be removed from the output.} -\item{...}{passes arguments to grep for cell_type1 and cell_type2.} +\item{min_interaction_score}{Filtering the interactions shown by including only those above the given interaction score.} + +\item{scale_alpha_by_interaction_scores}{Whether or not to filter values by the interaction score.} -\item{default_stlye}{default = TRUE. Show all mean values and trace significant interactions with `higlight` colour. If FALSE, significant interactions will be presented as a white ring.} +\item{scale_alpha_by_cellsign}{Whether or not to filter the transparency of interactions by the cellsign.} + +\item{filter_by_cellsign}{Filter out interactions with a 0 value cellsign.} + +\item{...}{passes arguments to grep for cell_type1 and cell_type2.} } \value{ ggplot dot plot object of cellphone db output } \description{ -Plotting cellphonedb results +Plotting CellPhoneDB results } \examples{ \donttest{ data(kidneyimmune) data(cpdb_output) -plot_cpdb('B cell', 'CD4T cell', kidneyimmune, 'celltype', means, pvals, split.by = 'Experiment', genes = c('CXCL13', 'CD274', 'CXCR5')) -plot_cpdb('B cell', 'CD4T cell', kidneyimmune, 'celltype', means, pvals, split.by = 'Experiment', gene.family = 'chemokines') +plot_cpdb(kidneyimmune, 'B cell', 'CD4T cell', 'celltype', means, pvals, splitby_key = 'Experiment', genes = c('CXCL13', 'CD274', 'CXCR5')) +plot_cpdb(kidneyimmune, 'B cell', 'CD4T cell', 'celltype', means, pvals, splitby_key = 'Experiment', gene_family = 'chemokines') } } diff --git a/man/plot_cpdb2.Rd b/man/plot_cpdb2.Rd index 18d744d..b84e18f 100644 --- a/man/plot_cpdb2.Rd +++ b/man/plot_cpdb2.Rd @@ -2,21 +2,19 @@ % Please edit documentation in R/plot_cpdb2.R \name{plot_cpdb2} \alias{plot_cpdb2} -\title{Plotting cellphonedb results} +\title{Plotting CellPhoneDB results} \usage{ plot_cpdb2( + scdata, cell_type1, cell_type2, - scdata, - idents, + celltype_key, means, pvals, deconvoluted, - p.adjust.method = NULL, keep_significant_only = TRUE, - split.by = NULL, + splitby_key = NULL, standard_scale = TRUE, - separator = NULL, gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, @@ -26,17 +24,18 @@ plot_cpdb2( node_group_colors = NULL, degs_analysis = FALSE, return_df = FALSE, + plot_score_as_thickness = TRUE, ... ) } \arguments{ +\item{scdata}{single-cell data. Must be SingleCellExperiment object} + \item{cell_type1}{cell type 1} \item{cell_type2}{cell type 2} -\item{scdata}{single-cell data. can be seurat/summarizedexperiment object} - -\item{idents}{vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns} +\item{celltype_key}{column name of scdata's metadata. MUST match cpdb's columns} \item{means}{object holding means.txt from cpdb output} @@ -44,16 +43,12 @@ plot_cpdb2( \item{deconvoluted}{object holding deconvoluted.txt from cpdb output} -\item{p.adjust.method}{correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')} - \item{keep_significant_only}{logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb.} -\item{split.by}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} +\item{splitby_key}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} \item{standard_scale}{logical. scale the expression to range from 0 to 1. Default is TRUE} -\item{separator}{default = NULL. separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this.} - \item{gene_symbol_mapping}{default = NULL.column name for rowData in sce holding the actual gene symbols if row names aren't gene symbols} \item{frac}{default = 0.1. Minimum fraction of celtypes expressing a gene in order to keep the interaction. Gene must be expressesd >= `frac` in either of the pair of celltypes in order to keep.} @@ -62,27 +57,28 @@ plot_cpdb2( \item{desiredInteractions}{default = NULL. Specific list of celltype comparisons e.g. list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2')). Also accepts a dataframe where first column is celltype 1 and 2nd column is celltype 2.} -\item{interaction_grouping}{default = NULL. dataframe specifying groupings of cellphonedb interactions. First column must be cellphonedb's interacting_pair column. second column is whatever grouping you want.} +\item{interaction_grouping}{default = NULL. dataframe specifying groupings of CellPhoneDB interactions. First column must be CellPhoneDB's interacting_pair column. second column is whatever grouping you want.} -\item{edge_group_colors}{default = NULL. vector for colour mapping for edge groups. only used if split.by is specified.} +\item{edge_group_colors}{default = NULL. vector for colour mapping for edge groups. only used if splitby_key is specified.} \item{node_group_colors}{default = NULL. vector for colour mapping for node labels.} -\item{degs_analysis}{if is cellphonedb degs_analysis mode.} +\item{degs_analysis}{if is CellPhoneDB degs_analysis mode.} \item{return_df}{whether to just return this as a data.frame rather than plotting iot} -\item{...}{passes arguments plot_cpdb} +\item{plot_score_as_thickness}{logical. Whether to scale the thickness of the edges to the interaction score and scale alpha to -log10(significance). Default is TRUE. FALSE will be opposite behaviour} -\item{scale}{logical. scale the expression to mean +/- SD. NULL defaults to TRUE.} +\item{...}{passes arguments plot_cpdb} } \value{ -Plotting cellphonedb results as a weird chord diagram +Plotting CellPhoneDB results as a weird chord diagram } \description{ -Plotting cellphonedb results +Plotting CellPhoneDB results } \examples{ \donttest{ + } } diff --git a/man/plot_cpdb3.Rd b/man/plot_cpdb3.Rd index d7b03ef..36015f0 100644 --- a/man/plot_cpdb3.Rd +++ b/man/plot_cpdb3.Rd @@ -2,21 +2,19 @@ % Please edit documentation in R/plot_cpdb3.R \name{plot_cpdb3} \alias{plot_cpdb3} -\title{Plotting cellphonedb results as a chord diagram} +\title{Plotting CellPhoneDB results as a chord diagram} \usage{ plot_cpdb3( + scdata, cell_type1, cell_type2, - scdata, - idents, + celltype_key, means, pvals, deconvoluted, - p.adjust.method = NULL, keep_significant_only = TRUE, - split.by = NULL, + splitby_key = NULL, standard_scale = TRUE, - separator = NULL, gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, @@ -33,13 +31,13 @@ plot_cpdb3( ) } \arguments{ +\item{scdata}{single-cell data. can be seurat/summarizedexperiment object} + \item{cell_type1}{cell type 1} \item{cell_type2}{cell type 2} -\item{scdata}{single-cell data. can be seurat/summarizedexperiment object} - -\item{idents}{vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns} +\item{celltype_key}{vector holding the celltype_key for each cell or column name of scdata's metadata. MUST match cpdb's columns} \item{means}{object holding means.txt from cpdb output} @@ -47,16 +45,12 @@ plot_cpdb3( \item{deconvoluted}{object holding deconvoluted.txt from cpdb output} -\item{p.adjust.method}{correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')} - \item{keep_significant_only}{logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb.} -\item{split.by}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} +\item{splitby_key}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} \item{standard_scale}{logical. scale the expression to range from 0 to 1. Default is TRUE} -\item{separator}{default = NULL. separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this.} - \item{gene_symbol_mapping}{default = NULL.column name for rowData in sce holding the actual gene symbols if row names aren't gene symbols} \item{frac}{default = 0.1. Minimum fraction of celtypes expressing a gene in order to keep the interaction. Gene must be expressesd >= `frac` in either of the pair of celltypes in order to keep.} @@ -65,7 +59,7 @@ plot_cpdb3( \item{desiredInteractions}{default = NULL. Specific list of celltype comparisons e.g. list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2')). Also accepts a dataframe where first column is celltype 1 and 2nd column is celltype 2.} -\item{degs_analysis}{if is cellphonedb degs_analysis mode.} +\item{degs_analysis}{if is CellPhoneDB degs_analysis mode.} \item{directional}{Whether links have directions. 1 means the direction is from the first column in df to the second column, -1 is the reverse, 0 is no direction, and 2 for two directional.} @@ -82,16 +76,15 @@ plot_cpdb3( \item{legend.pos.y}{y position of legend} \item{...}{passes arguments plot_cpdb} - -\item{scale}{logical. scale the expression to mean +/- SD. NULL defaults to TRUE.} } \value{ -Plotting cellphonedb results as a CellChat inspired chord diagram +Plotting CellPhoneDB results as a CellChat inspired chord diagram } \description{ -Plotting cellphonedb results as a chord diagram +Plotting CellPhoneDB results as a chord diagram } \examples{ \donttest{ + } } diff --git a/man/plot_cpdb4.Rd b/man/plot_cpdb4.Rd index 215f123..a1241bc 100644 --- a/man/plot_cpdb4.Rd +++ b/man/plot_cpdb4.Rd @@ -2,22 +2,20 @@ % Please edit documentation in R/plot_cpdb4.R \name{plot_cpdb4} \alias{plot_cpdb4} -\title{Plotting select interactions from cellphonedb results as a chord diagram} +\title{Plotting select interactions from CellPhoneDB results as a chord diagram} \usage{ plot_cpdb4( - interaction, + scdata, cell_type1, cell_type2, - scdata, - idents, + celltype_key, means, pvals, deconvoluted, - p.adjust.method = NULL, + interaction, keep_significant_only = TRUE, - split.by = NULL, + splitby_key = NULL, standard_scale = TRUE, - separator = NULL, gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, @@ -35,15 +33,13 @@ plot_cpdb4( ) } \arguments{ -\item{interaction}{interaction to plot. Please use '-' to separate the two molecules e.g. CD40-CD40LG} +\item{scdata}{single-cell data. can be seurat/summarizedexperiment object} \item{cell_type1}{cell type 1} \item{cell_type2}{cell type 2} -\item{scdata}{single-cell data. can be seurat/summarizedexperiment object} - -\item{idents}{vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns} +\item{celltype_key}{vector holding the celltype_key for each cell or column name of scdata's metadata. MUST match cpdb's columns} \item{means}{object holding means.txt from cpdb output} @@ -51,16 +47,14 @@ plot_cpdb4( \item{deconvoluted}{object holding deconvoluted.txt from cpdb output} -\item{p.adjust.method}{correction method. p.adjust.methods of one of these options: c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')} +\item{interaction}{interaction to plot. Please use '-' to separate the two molecules e.g. CD40-CD40LG} \item{keep_significant_only}{logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb.} -\item{split.by}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} +\item{splitby_key}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} \item{standard_scale}{logical. scale the expression to range from 0 to 1. Default is TRUE} -\item{separator}{default = NULL. separator to use to split between celltypes. Unless otherwise specified, the separator will be `>@<`. Make sure the idents and split.by doesn't overlap with this.} - \item{gene_symbol_mapping}{default = NULL.column name for rowData in sce holding the actual gene symbols if row names aren't gene symbols} \item{frac}{default = 0.1. Minimum fraction of celtypes expressing a gene in order to keep the interaction. Gene must be expressesd >= `frac` in either of the pair of celltypes in order to keep.} @@ -69,7 +63,7 @@ plot_cpdb4( \item{desiredInteractions}{default = NULL. Specific list of celltype comparisons e.g. list(c('CD4_Tcm', 'cDC1'), c('CD4_Tcm', 'cDC2')). Also accepts a dataframe where first column is celltype 1 and 2nd column is celltype 2.} -\item{degs_analysis}{if is cellphonedb degs_analysis mode.} +\item{degs_analysis}{if is CellPhoneDB degs_analysis mode.} \item{directional}{Whether links have directions. 1 means the direction is from the first column in df to the second column, -1 is the reverse, 0 is no direction, and 2 for two directional.} @@ -88,16 +82,15 @@ plot_cpdb4( \item{legend.pos.y}{y position of legend} \item{...}{passes arguments plot_cpdb} - -\item{scale}{logical. scale the expression to mean +/- SD. NULL defaults to TRUE.} } \value{ -Plotting cellphonedb results as a CellChat inspired chord diagram for specific interactions +Plotting CellPhoneDB results as a CellChat inspired chord diagram for specific interactions } \description{ -Plotting select interactions from cellphonedb results as a chord diagram +Plotting select interactions from CellPhoneDB results as a chord diagram } \examples{ \donttest{ + } } diff --git a/man/plot_cpdb_heatmap.Rd b/man/plot_cpdb_heatmap.Rd index 871a0fe..4bae53a 100644 --- a/man/plot_cpdb_heatmap.Rd +++ b/man/plot_cpdb_heatmap.Rd @@ -2,12 +2,11 @@ % Please edit documentation in R/plot_cpdb_heatmap.R \name{plot_cpdb_heatmap} \alias{plot_cpdb_heatmap} -\title{Plotting cellphonedb results as a heatmap} +\title{Plotting CellPhoneDB results as a heatmap} \usage{ plot_cpdb_heatmap( - scdata, - idents, pvals, + degs_analysis = FALSE, log1p_transform = FALSE, show_rownames = TRUE, show_colnames = TRUE, @@ -26,20 +25,16 @@ plot_cpdb_heatmap( high_col = "deeppink4", alpha = 0.05, return_tables = FALSE, - degs_analysis = FALSE, - verbose = FALSE, symmetrical = TRUE, ... ) } \arguments{ -\item{scdata}{single-cell data. can be seurat/summarizedexperiment object} - -\item{idents}{vector holding the idents for each cell or column name of scdata's metadata. MUST match cpdb's columns} +\item{pvals}{Dataframe corresponding to `pvalues.txt` or `relevant_interactions.txt` from CellPhoneDB.} -\item{pvals}{object holding pvals.txt from cpdb output. Use relevant_interactions.txt if degs_analysis mode.} +\item{degs_analysis}{Whether `CellPhoneDB` was run in `deg_analysis` mode} -\item{log1p_transform}{whether to log1p transform the matrix before plotting.} +\item{log1p_transform}{Whether to log1p transform the output.} \item{show_rownames}{whether to show row names.} @@ -75,10 +70,6 @@ plot_cpdb_heatmap( \item{return_tables}{whether or not to return the results as a table rather than the heatmap} -\item{degs_analysis}{if is cellphonedb degs_analysis mode.} - -\item{verbose}{prints cat/print statements if TRUE.} - \item{symmetrical}{whether or not to return as symmetrical matrix} \item{...}{passed to pheatmap::pheatmap.} @@ -87,12 +78,12 @@ plot_cpdb_heatmap( pheatmap object of cellphone db output } \description{ -Plotting cellphonedb results as a heatmap +Plotting CellPhoneDB results as a heatmap } \examples{ \donttest{ data(kidneyimmune) data(cpdb_output2) -plot_cpdb_heatmap(kidneyimmune, "celltype", pvals2) +plot_cpdb_heatmap(pvals2) } } diff --git a/tests/testthat.R b/tests/testthat.R index 7e44522..b5f723b 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,5 +1,4 @@ library(testthat) library(ktplots) -library(ggplot2) test_check("ktplots") diff --git a/tests/testthat/test_cpdbplot1.R b/tests/testthat/test_cpdbplot1.R index 8aae71b..1a216a4 100644 --- a/tests/testthat/test_cpdbplot1.R +++ b/tests/testthat/test_cpdbplot1.R @@ -1,6 +1,7 @@ data(kidneyimmune) data(cpdb_output) data(cpdb_output2) +data(cpdb_output_v5) test_that("combine_cpdb works 1", { p <- combine_cpdb(decon, decon, decon) @@ -8,70 +9,46 @@ test_that("combine_cpdb works 1", { test_that("plot_cpdb works 1", { - p <- plot_cpdb("B cell", "CD4T cell", kidneyimmune, "celltype", means, pvals, - split.by = "Experiment", genes = c("CXCL13", "CD274", "CXCR5"), verbose = TRUE - ) + p <- plot_cpdb(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means, pvals = pvals, splitby_key = "Experiment", genes = c("CXCL13", "CD274", "CXCR5"), keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) test_that("plot_cpdb works 2", { - p <- plot_cpdb("B cell", "CD4T cell", kidneyimmune, "celltype", means, pvals, - split.by = "Experiment", gene.family = "chemokines", verbose = TRUE - ) + p <- plot_cpdb(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means, pvals = pvals, splitby_key = "Experiment", gene_family = "chemokines", keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) test_that("plot_cpdb works 3", { - p <- plot_cpdb("B cell", "CD4T cell", kidneyimmune, "celltype", means, pvals, - split.by = "Experiment", gene.family = "chemokines", verbose = FALSE, default_style = FALSE - ) + p <- plot_cpdb(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means, pvals = pvals, splitby_key = "Experiment", gene_family = "chemokines", default_style = FALSE, keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) test_that("plot_cpdb works 4", { - p <- plot_cpdb("B cell", "B cell", kidneyimmune, "celltype", means, pvals, - split.by = "Experiment", - gene.family = "chemokines", verbose = FALSE - ) + p <- plot_cpdb(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means, pvals = pvals, splitby_key = "Experiment", gene_family = "chemokines", keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) test_that("plot_cpdb works 5", { - p <- plot_cpdb("B cell", "cell", kidneyimmune, "celltype", means, pvals, - split.by = "Experiment", - gene.family = "chemokines", verbose = FALSE, default_style = FALSE, p.adjust.method = "BH" - ) + p <- plot_cpdb(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means, pvals = pvals, splitby_key = "Experiment", gene_family = "chemokines", default_style = FALSE, keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) test_that("plot_cpdb works 6", { - p <- plot_cpdb("B cell", "CD4T cell", kidneyimmune, "celltype", means2, pvals2, - gene.family = "custom_family", custom_gene_family = list(custom_family = c( - "CXCL13", - "CD274", "CXCR5" - )), verbose = FALSE - ) + p <- plot_cpdb(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means2, pvals = pvals2, gene_family = "custom_family", custom_gene_family = list(custom_family = c("CXCL13", "CD274", "CXCR5")), keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) test_that("plot_cpdb works 7", { - p <- plot_cpdb("B cell", "CD4T cell", kidneyimmune, "celltype", means2, pvals2, - gene.family = "custom_family", custom_gene_family = data.frame(custom_family = c( - "CXCL13", - "CD274", "CXCR5" - )), verbose = FALSE - ) + p <- plot_cpdb(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means2, pvals = pvals2, gene_family = "custom_family", custom_gene_family = data.frame(custom_family = c("CXCL13", "CD274", "CXCR5")), keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) test_that("plot_cpdb works 8", { - p <- plot_cpdb("B cell", "CD4T cell", kidneyimmune, "celltype", means2, pvals2, - gene.family = c("chemokines", "th1"), verbose = FALSE - ) + p <- plot_cpdb(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means2, pvals = pvals2, gene_family = c("chemokines", "th1"), keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) -test_that("werid characters are ok", { +test_that("weird characters are ok", { # edit the example objects to simulate Rachel's objects # rename B cells to TRC+ kidneyimmune$celltype <- gsub("B cell", "TRC+", kidneyimmune$celltype) @@ -106,121 +83,23 @@ test_that("werid characters are ok", { newpvals <- cbind(pvals[, 1:11], pvals_df) # plot_cpdb - p <- plot_cpdb( - cell_type1 = "TRC+", cell_type2 = "LTi-Like ILC3", scdata = kidneyimmune, - idents = "celltype", # column name where the cell ids are located in the metadata - means = newmeans, pvals = newpvals, - genes = c("LTB", "LTBR", "KITL", "KIT", "CCR6"), verbose = TRUE - ) + p <- plot_cpdb(cell_type1 = "TRC+", cell_type2 = "LTi-Like ILC3", scdata = kidneyimmune, celltype_key = "celltype", means = newmeans, pvals = newpvals, genes = c("LTB", "LTBR", "KITL", "KIT", "CCR6"), keep_significant_only = FALSE) expect_true(is.ggplot(p)) }) test_that("plot_cpdb2 works 1", { - sce <- kidneyimmune - p <- plot_cpdb2( - cell_type1 = "B cell", cell_type2 = "CD4T cell", - scdata = sce, - idents = "celltype", # column name where the cell ids are located in the metadata - means = means2, - pvals = pvals2, - deconvoluted = decon2, # new options from here on specific to plot_cpdb2 - desiredInteractions = list( - c("CD4T cell", "B cell"), - c("B cell", "CD4T cell") - ), - interaction_grouping = interaction_annotation, - edge_group_colors = c( - "Activating" = "#e15759", - "Chemotaxis" = "#59a14f", - "Inhibitory" = "#4e79a7", - "Intracellular trafficking" = "#9c755f", - "DC_development" = "#B07aa1", - "Unknown" = NA - ), - node_group_colors = c( - "CD4T cell" = "#86bc86", - "B cell" = "#79706e" - ), - keep_significant_only = TRUE, - standard_scale = TRUE, - remove_self = TRUE - ) + p <- plot_cpdb2(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means2, pvals = pvals2, deconvoluted = decon2, desiredInteractions = list(c("CD4T cell", "B cell"), c("B cell", "CD4T cell")), interaction_grouping = interaction_annotation, edge_group_colors = c("Activating" = "#e15759", "Chemotaxis" = "#59a14f", "Inhibitory" = "#4e79a7", "Intracellular trafficking" = "#9c755f", "DC_development" = "#B07aa1", "Unknown" = NA), node_group_colors = c("CD4T cell" = "#86bc86", "B cell" = "#79706e"), keep_significant_only = TRUE, standard_scale = TRUE, remove_self = TRUE) expect_true(is.ggplot(p)) }) - -# test_that("plot_cpdb2 works 2",{ -# p <- plot_cpdb2(cell_type1 = 'B cell', cell_type2 = 'CD4T cell', -# scdata = kidneyimmune, -# idents = 'celltype', # column name where the cell ids are located in the metadata -# split.by = 'Experiment', # column name where the grouping column is. Optional. -# means = means, -# pvals = pvals, -# deconvoluted = decon, # new options from here on specific to plot_cpdb2 -# desiredInteractions = list( -# c('CD4T cell', 'B cell'), -# c('B cell', 'CD4T cell')), -# interaction_grouping = interaction_annotation, -# edge_group_colors = c( -# "Activating" = "#e15759", -# "Chemotaxis" = "#59a14f", -# "Inhibitory" = "#4e79a7", -# "Intracellular trafficking" = "#9c755f", -# "DC_development" = "#B07aa1", -# "Unknown" = NA -# ), -# node_group_colors = c( -# "CD4T cell" = "#86bc86", -# "B cell" = "#79706e"), -# keep_significant_only = TRUE, -# standard_scale = TRUE, -# remove_self = TRUE -# ) -# expect_true(is.ggplot(p[[1]])) -# expect_true(is.ggplot(p[[2]])) -# expect_true(is.ggplot(p[[3]])) -# expect_true(is.ggplot(p[[4]])) -# expect_true(is.ggplot(p[[5]])) -# expect_true(is.ggplot(p[[6]])) -# expect_false(is.ggplot(p[[7]])) -# expect_true(is.ggplot(p[[8]])) -# expect_false(is.ggplot(p[[9]])) -# expect_false(is.ggplot(p[[10]])) -# expect_false(is.ggplot(p[[11]])) -# }) - - test_that("plot_cpdb3 works 1", { - p <- plot_cpdb3( - cell_type1 = "B cell", cell_type2 = "CD4T cell", - scdata = kidneyimmune, - idents = "celltype", # column name where the cell ids are located in the metadata - means = means2, - pvals = pvals2, - deconvoluted = decon2, # new options from here on specific to plot_cpdb2 - keep_significant_only = TRUE, - standard_scale = TRUE, - remove_self = TRUE - ) - + p <- plot_cpdb3(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", means = means2, pvals = pvals2, deconvoluted = decon2, keep_significant_only = TRUE, standard_scale = TRUE, remove_self = TRUE) expect_that(class(p), equals("recordedplot")) }) test_that("plot_cpdb3 2", { - p <- plot_cpdb3( - cell_type1 = "B cell", cell_type2 = "CD4T cell", - scdata = kidneyimmune, - idents = "celltype", # column name where the cell ids are located in the metadata - split.by = "Experiment", # column name where the grouping column is. Optional. - means = means, - pvals = pvals, - deconvoluted = decon, # new options from here on specific to plot_cpdb2 - keep_significant_only = TRUE, - standard_scale = TRUE, - remove_self = TRUE - ) - + p <- plot_cpdb3(cell_type1 = "B cell", cell_type2 = "CD4T cell", scdata = kidneyimmune, celltype_key = "celltype", splitby_key = "Experiment", means = means, pvals = pvals, deconvoluted = decon, keep_significant_only = TRUE, standard_scale = TRUE, remove_self = TRUE) expect_that(class(p[[1]]), equals("recordedplot")) expect_that(class(p[[2]]), equals("recordedplot")) expect_that(class(p[[3]]), equals("recordedplot")) @@ -235,60 +114,19 @@ test_that("plot_cpdb3 2", { }) test_that("plot_cpdb4 works 1", { - p <- plot_cpdb4( - interaction = "CLEC2D-KLRB1", - cell_type1 = "NK", cell_type2 = "Mast", - scdata = kidneyimmune, - idents = "celltype", # column name where the cell ids are located in the metadata - means = means2, - pvals = pvals2, - deconvoluted = decon2, # new options from here on specific to plot_cpdb2 - keep_significant_only = TRUE, - standard_scale = TRUE, - remove_self = TRUE - ) - + p <- plot_cpdb4(interaction = "CLEC2D-KLRB1", cell_type1 = "NK", cell_type2 = "Mast", scdata = kidneyimmune, celltype_key = "celltype", means = means2, pvals = pvals2, deconvoluted = decon2, keep_significant_only = TRUE, standard_scale = TRUE, remove_self = TRUE) expect_that(class(p), equals("recordedplot")) }) test_that("plot_cpdb4 works 2", { - p <- plot_cpdb4( - interaction = c("CLEC2D-KLRB1", "CD40-CD40LG"), - cell_type1 = "NK|B", cell_type2 = "Mast|CD4T", - scdata = kidneyimmune, - idents = "celltype", - means = means2, - pvals = pvals2, - deconvoluted = decon2, - desiredInteractions = list( - c("NK cell", "Mast cell"), - c("NK cell", "NKT cell"), - c("NKT cell", "Mast cell"), - c("B cell", "CD4T cell") - ), - keep_significant_only = TRUE, - ) - + p <- plot_cpdb4(interaction = c("CLEC2D-KLRB1", "CD40-CD40LG"), cell_type1 = "NK|B", cell_type2 = "Mast|CD4T", scdata = kidneyimmune, celltype_key = "celltype", means = means2, pvals = pvals2, deconvoluted = decon2, desiredInteractions = list(c("NK cell", "Mast cell"), c("NK cell", "NKT cell"), c("NKT cell", "Mast cell"), c("B cell", "CD4T cell")), keep_significant_only = TRUE) expect_that(class(p), equals("recordedplot")) }) test_that("plot_cpdb4 works 3", { - p <- plot_cpdb4( - interaction = "CLEC2D-KLRB1", - cell_type1 = "NK", cell_type2 = "Mast", - scdata = kidneyimmune, - idents = "celltype", # column name where the cell ids are located in the metadata - split.by = "Experiment", # column name where the grouping column is. Optional. - means = means, - pvals = pvals, - deconvoluted = decon, # new options from here on specific to plot_cpdb2 - keep_significant_only = TRUE, - standard_scale = TRUE, - remove_self = TRUE - ) - + p <- plot_cpdb4(interaction = "CLEC2D-KLRB1", cell_type1 = "NK", cell_type2 = "Mast", scdata = kidneyimmune, celltype_key = "celltype", splitby_key = "Experiment", means = means, pvals = pvals, deconvoluted = decon, keep_significant_only = TRUE, standard_scale = TRUE, remove_self = TRUE) for (i in 1:13) { expect_that(class(p[[i]]), equals("recordedplot")) } @@ -298,3 +136,159 @@ test_that("plot_cpdb_heatmap works", { p <- plot_cpdb_heatmap(pvals2) expect_that(class(p), equals("pheatmap")) }) + +test_that("plot_cpdb v5 1", { + p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + genes = c("TGFB2", "CSF1R"), + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + interaction_scores = interaction_scores_v5, + scale_alpha_by_interaction_scores = TRUE, + min_interaction_score = 20 + ) + expect_true(is.ggplot(p)) +}) + +test_that("plot_cpdb v5 2", { + p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + genes = c("TGFB2", "CSF1R"), + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + cellsign = cellsign_v5, + scale_alpha_by_interaction_scores = TRUE + ) + expect_true(is.ggplot(p)) +}) + +test_that("plot_cpdb v5 3", { + p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + cellsign = cellsign_v5, + scale_alpha_by_cellsign = TRUE, + filter_by_cellsign = TRUE + ) + expect_true(is.ggplot(p)) +}) + +test_that("plot_cpdb v5 4", { + p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + cellsign = cellsign_v5, + scale_alpha_by_cellsign = TRUE + ) + expect_true(is.ggplot(p)) +}) + +test_that("plot_cpdb v5 5", { + p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + genes = c("TGFB2", "CSF1R"), + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + interaction_scores = interaction_scores_v5, + scale_alpha_by_interaction_scores = TRUE, + min_interaction_score = 20, + default_style = FALSE + ) + expect_true(is.ggplot(p)) +}) + +test_that("plot_cpdb v5 6", { + p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + genes = c("TGFB2", "CSF1R"), + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + cellsign = cellsign_v5, + scale_alpha_by_interaction_scores = TRUE, + default_style = FALSE + ) + expect_true(is.ggplot(p)) +}) + +test_that("plot_cpdb v5 7", { + p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + cellsign = cellsign_v5, + scale_alpha_by_cellsign = TRUE, + filter_by_cellsign = TRUE, + default_style = FALSE + ) + expect_true(is.ggplot(p)) +}) + +test_that("plot_cpdb v5 8", { + p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + cellsign = cellsign_v5, + scale_alpha_by_cellsign = TRUE, + default_style = FALSE + ) + expect_true(is.ggplot(p)) +}) diff --git a/tests/testthat/test_dotplot.R b/tests/testthat/test_dotplot.R index 0d47867..8221e50 100644 --- a/tests/testthat/test_dotplot.R +++ b/tests/testthat/test_dotplot.R @@ -3,8 +3,8 @@ data(kidneyimmune) test_that("geneDotPlot works", { p <- geneDotPlot(kidneyimmune, genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), - idents = "celltype", - split.by = "Project", + celltype_key = "celltype", + splitby_key = "Project", standard_scale = TRUE ) + theme(strip.text.x = element_text(angle = 45, hjust = 0)) expect_true(is.ggplot(p)) @@ -13,8 +13,8 @@ test_that("geneDotPlot works", { test_that("test fill works", { p <- geneDotPlot(kidneyimmune, genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), - idents = "celltype", - split.by = "Project", + celltype_key = "celltype", + splitby_key = "Project", standard_scale = TRUE, fill = TRUE ) + theme(strip.text.x = element_text(angle = 45, hjust = 0)) @@ -24,7 +24,7 @@ test_that("test fill works", { test_that("test no split", { p <- geneDotPlot(kidneyimmune, genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), - idents = "celltype", + celltype_key = "celltype", standard_scale = TRUE, fill = TRUE ) + theme(strip.text.x = element_text(angle = 45, hjust = 0)) @@ -34,7 +34,7 @@ test_that("test no split", { test_that("test no scale1", { p <- geneDotPlot(kidneyimmune, genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), - idents = "celltype", + celltype_key = "celltype", standard_scale = FALSE, fill = TRUE ) + theme(strip.text.x = element_text(angle = 45, hjust = 0)) @@ -44,7 +44,7 @@ test_that("test no scale1", { test_that("test no scale2", { p <- geneDotPlot(kidneyimmune, genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), - idents = "celltype", + celltype_key = "celltype", scale = FALSE, standard_scale = FALSE, fill = TRUE @@ -55,7 +55,7 @@ test_that("test no scale2", { test_that("test scale3", { p <- geneDotPlot(kidneyimmune, genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), - idents = "celltype" + celltype_key = "celltype" ) + theme(strip.text.x = element_text(angle = 45, hjust = 0)) expect_true(is.ggplot(p)) }) diff --git a/vignettes/vignette.rmd b/vignettes/vignette.rmd new file mode 100644 index 0000000..9b923b8 --- /dev/null +++ b/vignettes/vignette.rmd @@ -0,0 +1,348 @@ +--- +title: "Plotting CellPhoneDB results" +output: rmarkdown::html_vignette +date: "`r Sys.Date()`" +vignette: > + %\VignetteIndexEntry{Plotting CellPhoneDB results} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![codecov](https://codecov.io/gh/zktuong/ktplots/branch/master/graph/badge.svg)](https://codecov.io/gh/zktuong/ktplots) +[![R](https://github.com/zktuong/ktplots/actions/workflows/r.yml/badge.svg)](https://github.com/zktuong/ktplots/actions/workflows/r.yml) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.6728350.svg)](https://doi.org/10.5281/zenodo.5717922) + +# ktplots + +Welcome to `ktplots`! This is a R package to help visualise `CellPhoneDB` results. Here, we will go through a quick tutorial on how to use the functions in this package. + +For a python port of `ktplots`, please check out my other [repository](https://www.github.com/zktuong/ktplotspy). + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## Installation instructions +You can install the package via `devtools::install_github()` function in R +```{r, eval = FALSE} +if (!requireNamespace("devtools", quietly = TRUE)) { + install.packages("devtools") +} +if (!requireNamespace("BiocManager", quietly = TRUE)) { + install.packages("BiocManager") +} +devtools::install_github("zktuong/ktplots", dependencies = TRUE) +``` + +## Usage instructions +```{r, message = FALSE, warning = FALSE} +library(ktplots) +``` + +There is a test dataset in `SingleCellExperiment` format to test the functions. + +```{r, message = FALSE, warning = FALSE} +library(SingleCellExperiment) +data(kidneyimmune) +# Some functions accept Seurat objects too. +``` +The data is downsampled from the [kidney cell atlas](https://kidneycellatlas.org). + +For more info, please see [Stewart et al. kidney single cell data set published in Science 2019](https://science.sciencemag.org/content/365/6460/1461). + +## Prepare input +We will need 3 files to use this package, a `SingleCellExperiment` (or `Seurat`; some functions only accepts the former) object that correspond to the object you used for `CellPhoneDB` and the `means.txt` and `pvalues.txt` output. +If you are using results from `CellPhoneDB` `deg_analysis` mode from version >= 3, the `pvalues.txt` is `relevant_interactions.txt` and also add `degs_analysis = TRUE` into all the functions below. `deconvoluted` is only used for `plot_cpdb2/3/4`. + +```{r, message = FALSE, warning = FALSE} +# pvals <- read.delim("pvalues.txt", check.names = FALSE) +# means <- read.delim("means.txt", check.names = FALSE) +# decon = pd.read_csv("deconvoluted.txt", sep="\t") + +# I've provided an example datasets +data(cpdb_output) +data(cpdb_output2) +``` + +## Heatmap +The original heatmap plot from `CellPhoneDB` can be achieved with this reimplemented function. +```{r, message = FALSE, warning = FALSE} +plot_cpdb_heatmap(pvals = pvals2, cellheight = 10, cellwidth = 10) +``` + +The current heatmap is directional (check `count_network` and `interaction_edges` for more details in `return_tables = True`). + +To obtain the heatmap where the interaction counts are not symmetrical, do: + +```{r, message = FALSE, warning = FALSE} +plot_cpdb_heatmap(pvals = pvals2, cellheight = 10, cellwidth = 10, symmetrical = FALSE) +``` + +The values for the `symmetrical=FALSE` mode follow the direction of the L-R direction where it's always moleculeA:celltypeA -> moleculeB:celltypeB. + +Therefore, if you trace on the `x-axis` for `celltype A` [MNPa(mono)] to `celltype B` [CD8T cell] on the `y-axis`: + +A -> B is 18 interactions + +Whereas if you trace on the `y-axis` for `celltype A` [MNPa(mono)] to `celltype B` [CD8T cell] on the `x-axis`: + +A -> B is 9 interactions + +`symmetrical=TRUE` mode will return 18+9 = 27 + +## Dot plot + +### plot_cpdb +A simple usage of `plot_cpdb` is as follows: +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 4} +plot_cpdb( + scdata=kidneyimmune, + cell_type1="B cell", + cell_type2=".", # this means all cell-types + celltype_key="celltype", + means=means2, + pvals=pvals2, + genes=c("PTPRC", "TNFSF13"), + title="interacting interactions!", +) +``` + +Or don't specify either and it will try to plot all significant interactions. + +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 3} +plot_cpdb( + scdata=kidneyimmune, + cell_type1="B cell", + cell_type2=".", + celltype_key="celltype", + means=means2, + pvals=pvals2, + genes=c("PTPRC", "CD40", "CLEC2D"), + default_style=FALSE +) +``` + +you can also toggle options to `splitby_key` and `gene_family`: + +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 6} +plot_cpdb( + scdata = kidneyimmune, + cell_type1 = "B cell", + cell_type2 = "Neutrophil|MNPc|NK cell", + celltype_key = "celltype", + means = means, + pvals = pvals, + splitby_key = "Experiment", + gene_family = "chemokines" +) +``` + +if `genes` and `gene_family` are both not specified, the function will try to plot everything. + +Specifying `keep_significant_only` will only keep those that are p<0.05. + +You can also specify more than 1 gene families: +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 8} +plot_cpdb( + scdata = kidneyimmune, + cell_type1 = "B cell", + cell_type2 = "CD4T cell", + celltype_key = "celltype", + means = means, + pvals = pvals, + splitby_key = "Experiment", + gene_family = c("Coinhibitory", "Costimulatory"), + cluster_rows = FALSE # ensures that the families are separate, +) +``` + +And also provide custom families as a `data.frame`. +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 6} +df <- data.frame(set1 = c("CCR6", "CCL20", "CXCL10", "CCR3", "TNFRSF13C"), set2 = c("CCL5", "CCR4", "PTPRC", "CD40", "CLEC2D")) +plot_cpdb( + scdata=kidneyimmune, + cell_type1 = "B cell", + cell_type2 = "CD4T cell", + celltype_key = "celltype", + means = means, + pvals = pvals, + splitby_key = "Experiment", + gene_family = c("set1", "set2"), + custom_gene_family = df, +) +``` + +### combine_cpdb + +For the `splitby_key` option to work, the annotation in the meta file must be defined in the following format: +```{r, eval = FALSE} +{splitby_key}_{celltype_key} +``` + +so to set up an example vector, it would be something like: +```{r, eval = FALSE} +annotation <- paste0(kidneyimmune$Experiment, "_", kidneyimmune$celltype) +``` + +The recommended way to use `splitby_key` is to prepare the data with `combine_cpdb` like in this example: + +```{r, eval = FALSE} +# Assume you have 2 cellphonedb runs, one where it's just naive and the other is treated, you will end up with 2 cellphonedb out folders +# remember, the celltype labels you provide to cellphonedb's meta.txt should already be like {splitby_key}_{celltype_key} +# so the two meta.txt should look like: + +# naive file +# ATTAGTCGATCGTAGT-1 naive_CD4Tcell +# ATTAGTGGATCGTAGT-1 naive_CD4Tcell +# ATTAGTCGACCGTAGT-1 naive_CD8Tcell +# ATTAGTCGATCGTAGT-1 naive_CD8Tcell +# ATGAGTCGATCGTAGT-1 naive_Bcell +# ATTAGTCGATCGTGGT-1 naive_Bcell + +# treated file +# ATTAGTCAATCGTAGT-1 treated_CD4Tcell +# ATTAGTGGATCGTAGT-1 treated_CD4Tcell +# ATTAGTCGACCATAGT-1 treated_CD8Tcell +# ATTAGTAGATCGTAGT-1 treated_CD8Tcell +# ATGAGTCGATCGTAAT-1 treated_Bcell +# ATTAGTCGATCGTGAT-1 treated_Bcell + +# one you have set that up correctly, you can then read in the files. +naive_means <- read.delim("naive_out/means.txt", check.names = FALSE) +naive_pvals <- read.delim("naive_out/pvalues.txt", check.names = FALSE) +naive_decon <- read.delim("naive_out/deconvoluted.txt", check.names = FALSE) + +treated_means <- read.delim("treated_out/means.txt", check.names = FALSE) +treated_pvals <- read.delim("treated_out/pvalues.txt", check.names = FALSE) +treated_decon <- read.delim("treated_out/deconvoluted.txt", check.names = FALSE) + +means <- combine_cpdb(naive_means, treated_means) +pvals <- combine_cpdb(naive_pvals, treated_pvals) +decon <- combine_cpdb(naive_decon, treated_decon) + +plot_cpdb(...) +``` + +### plot_cpdb2 +Generates a circos-style wire/arc/chord plot for cellphonedb results. + +This function piggy-backs on the original `plot_cpdb` function and generates the results like this: + +Please help contribute to the interaction grouping list [here](https://docs.google.com/spreadsheets/d/1O9OKU7J0NdeQNJAIMpsHtWAFvY014GDQ7aigdGUSTmc/edit?usp=sharing)! + +Credits to Ben Stewart for coming up with the base code! + +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 10} + +plot_cpdb2( + scdata = kidneyimmune, + cell_type1 = "B cell", + cell_type2 = ".", + celltype_key = "celltype", # column name where the cell ids are located in the metadata + means = means2, + pvals = pvals2, + deconvoluted = decon2, # new options from here on specific to plot_cpdb2 + desiredInteractions = list( + c("CD4T cell", "B cell"), + c("B cell", "CD4T cell") + ), + interaction_grouping = interaction_annotation, + edge_group_colors = c( + "Activating" = "#e15759", + "Chemotaxis" = "#59a14f", + "Inhibitory" = "#4e79a7", + "Intracellular trafficking" = "#9c755f", + "DC_development" = "#B07aa1", + "Unknown" = "#e7e7e7" + ), + node_group_colors = c( + "CD4T cell" = "red", + "B cell" = "blue" + ), +) +``` + +### plot_cpdb3 +Generates a chord diagram inspired from [CellChat](https://github.com/sqjin/CellChat)'s way of showing the data! + +Usage is similar to `plot_cpdb2` but with reduced options. Additional kwargs are passed to `plot_cpdb`. +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 6} +plot_cpdb3( + scdata = kidneyimmune, + cell_type1 = "B cell", + cell_type2 = "CD4T cell|MNPd", + celltype_key = "celltype", # column name where the cell ids are located in the metadata + means = means2, + pvals = pvals2, + deconvoluted = decon2 # new options from here on specific to plot_cpdb3 +) +``` + + +### plot_cpdb4 +New! Alternate way of showing the chord diagram for specific interactions! + +Usage is similar to `plot_cpdb3` but with additional required `interaction` option. Additional kwargs are passed to `plot_cpdb`. +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 6} +plot_cpdb4( + scdata = kidneyimmune, + interaction = "CLEC2D-KLRB1", + cell_type1 = "NK", + cell_type2 = "Mast", + celltype_key = "celltype", + means = means2, + pvals = pvals2, + deconvoluted = decon2 +) +``` + +or specify more than 1 interactions + only show specific cell-type type interactions! +```{r, message = FALSE, warning = FALSE} +plot_cpdb4( + interaction = c("CLEC2D-KLRB1", "CD40-CD40LG"), + cell_type1 = "NK|B", cell_type2 = "Mast|CD4T", + scdata = kidneyimmune, + celltype_key = "celltype", + means = means2, + pvals = pvals2, + deconvoluted = decon2, + desiredInteractions = list( + c("NK cell", "Mast cell"), + c("NK cell", "NKT cell"), + c("NKT cell", "Mast cell"), + c("B cell", "CD4T cell") + ), + keep_significant_only = TRUE, +) +``` + +## Other useful functions + +### geneDotPlot +Plotting gene expression dot plots heatmaps. +```{r, message = FALSE, warning = FALSE} +library(ggplot2) +# Note, this conflicts with tidyr devel version +geneDotPlot( + scdata = kidneyimmune, # object + genes = c("CD68", "CD80", "CD86", "CD74", "CD2", "CD5"), # genes to plot + celltype_key = "celltype", # column name in meta data that holds the cell-cluster ID/assignment + splitby_key = "Project", # column name in the meta data that you want to split the plotting by. If not provided, it will just plot according to celltype_key + standard_scale = TRUE +) + theme(strip.text.x = element_text(angle = 0, hjust = 0, size = 7)) + small_guide() + small_legend() +``` + +## Citation +If you find these functions useful, please consider leaving a star, citing this repository, and/or citing the following [DOI](https://doi.org/10.5281/zenodo.5717922): + +To cite a specific version of `ktplots`, please follow the links on the zenodo repository. e.g. v1.2.3: +``` +Zewen Kelvin Tuong. (2021). zktuong/ktplots: 1.2.3 (v1.2.3). Zenodo. https://doi.org/10.5281/zenodo.5717922 +``` + +Thank you! diff --git a/vignettes/vignette_v5.rmd b/vignettes/vignette_v5.rmd new file mode 100644 index 0000000..267d869 --- /dev/null +++ b/vignettes/vignette_v5.rmd @@ -0,0 +1,209 @@ +--- +title: "New CellPhoneDB v5 results" +output: rmarkdown::html_vignette +date: "`r Sys.Date()`" +vignette: > + %\VignetteIndexEntry{New CellPhoneDB v5 results} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![codecov](https://codecov.io/gh/zktuong/ktplots/branch/master/graph/badge.svg)](https://codecov.io/gh/zktuong/ktplots) +[![R](https://github.com/zktuong/ktplots/actions/workflows/r.yml/badge.svg)](https://github.com/zktuong/ktplots/actions/workflows/r.yml) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.6728350.svg)](https://doi.org/10.5281/zenodo.5717922) + +# CellPhoneDB v5 results + +From version 5 of [CellPhoneDB](https://www.github.com/ventolab/cellphonedb), there is a new output file - `interaction_scores`. + +According to the official repository, this table corresponds to: + +`interaction_scores`: stores the new score generated. This score ranges from 0-100. + +To score interactions CellPhoneDB v5 employs the following protocol: + +1. Exclude genes not participating in any interaction and those expressed in less than k% of cells within a given cell type. +2. Calculate the mean expression (G) of each gene (i) within each cell type (j). +3. For heteromeric proteins, aggregate the mean gene expression of each subunit (n) employing the geometric mean. +4. Scale mean gene/heteromer expression across cell types between 0 and 100. +5. Calculate the product of the scale mean expression of the interaction proteins as a proxy of the interaction relevance. + +`cellsign`: accepts the new `CellSign` data. + +The aim of the CellSign module is to identify activated receptors and prioritise high-confidence interactions by leveraging the activity of the downstream transcription factors (TFs). CellSign relies on a database of receptors linked to their putative downstream TFs. + +`ktplots` will support these output via inclusion into the existing `plot_cpdb` function. We will gradually enable their functionality across the other functions, as well as with in the python package eventually. + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +### Load packages +```{r, message = FALSE, warning = FALSE} +library(ktplots) +data(cpdb_output_v5) + +plot_cpdb_heatmap(pvals=relevant_interactions_v5, degs_analysis=TRUE, title="Sum of significant interactions") +``` + +```{r, message = FALSE, warning = FALSE, fig.height=3, fig.width=10} +plot_cpdb( + scdata=sce_v5, + cell_type1="PV MYH11|PV STEAP4|PV MMPP11", + cell_type2="EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means=means_v5, + pvals=relevant_interactions_v5, + celltype_key="cell_labels", + genes=c("TGFB2", "CSF1R"), + title="Interactions between PV and trophoblast ", + max_size=6, + highlight_size=0.75, + degs_analysis=TRUE, + standard_scale=TRUE +) +``` + +### Interaction scores + +Let's start with interaction scores. If a dataframe corresponding to the `interaction_scores` file is provided, you can toggle the alpha transparency of the interactions by the interaction score (interaction ranking is simply the score/100). + +```{r, message = FALSE, warning = FALSE, fig.height=3, fig.width=12} +plot_cpdb( + scdata=sce_v5, + cell_type1="PV MYH11|PV STEAP4|PV MMPP11", + cell_type2="EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means=means_v5, + pvals=relevant_interactions_v5, + celltype_key="cell_labels", + genes=c("TGFB2", "CSF1R"), + title="Interactions between PV and trophoblast ", + max_size=6, + highlight_size=0.75, + degs_analysis=TRUE, + standard_scale=TRUE, + interaction_scores=interaction_scores_v5, + scale_alpha_by_interaction_scores=TRUE +) +``` + +You can also specify a minimum interaction score to keep, removing all interactions lesser than this value. + +```{r, message = FALSE, warning = FALSE, fig.height=3, fig.width=8} +plot_cpdb( + scdata=sce_v5, + cell_type1="PV MYH11|PV STEAP4|PV MMPP11", + cell_type2="EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means=means_v5, + pvals=relevant_interactions_v5, + celltype_key="cell_labels", + genes=c("TGFB2", "CSF1R"), + title="Interactions between\nPV and trophoblast ", + max_size=6, + highlight_size=0.75, + degs_analysis=TRUE, + standard_scale=TRUE, + interaction_scores=interaction_scores_v5, + min_interaction_score=20 +) +``` + +or specify both to have the alpha transparency shown too. + +```{r, message = FALSE, warning = FALSE, fig.height=3, fig.width=10} +plot_cpdb( + scdata=sce_v5, + cell_type1="PV MYH11|PV STEAP4|PV MMPP11", + cell_type2="EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means=means_v5, + pvals=relevant_interactions_v5, + celltype_key="cell_labels", + genes=c("TGFB2", "CSF1R"), + title="Interactions between\nPV and trophoblast ", + max_size=6, + highlight_size=0.75, + degs_analysis=TRUE, + standard_scale=TRUE, + interaction_scores=interaction_scores_v5, + scale_alpha_by_interaction_scores=TRUE, + min_interaction_score=20 +) +``` + +### CellSign + +If a dataframe corresponding to the `cellsign` file is provided, you can toggle the filter the interactions by the results + +```{r, message = FALSE, warning = FALSE, fig.height=4, fig.width=8} +plot_cpdb( + scdata=sce_v5, + cell_type1="PV MYH11", + cell_type2="EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means=means_v5, + pvals=relevant_interactions_v5, + celltype_key="cell_labels", + title="Interactions between\nPV and trophoblast with\ndownstream significance", + max_size=6, + highlight_size=0.75, + degs_analysis=TRUE, + standard_scale=TRUE, + cellsign=cellsign_v5, + filter_by_cellsign=TRUE +) +``` + +and also scale the alpha value (50% for 0 and 100% for 1). + +```{r, message = FALSE, warning = FALSE, fig.height=4, fig.width=8} +plot_cpdb( + scdata=sce_v5, + cell_type1="PV MYH11", + cell_type2="EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means=means_v5, + pvals=relevant_interactions_v5, + celltype_key="cell_labels", + title="Interactions between\nPV and trophoblast with\ndownstream significance", + max_size=6, + highlight_size=0.75, + degs_analysis=TRUE, + standard_scale=TRUE, + cellsign=cellsign_v5, + filter_by_cellsign=TRUE, + scale_alpha_by_cellsign=TRUE +) +``` + +### Additional plotting data + +From now on, `is_integrin`, `directionality` and `classification` are transferred to final output table in `plot_cpdb`. This means you will be able to use something like `facet_grid`/`facet_wrap` to plot them! + +```{r, message = FALSE, warning = FALSE, fig.height=3, fig.width=20} +library(ggplot2) +p <- plot_cpdb( + scdata = sce_v5, + cell_type1 = "PV MYH11|PV STEAP4|PV MMPP11", + cell_type2 = "EVT_1|EVT_2|GC|iEVT|eEVT|VCT_CCC", + means = means_v5, + pvals = relevant_interactions_v5, + celltype_key = "cell_labels", + genes = c("TGFB2", "CSF1R", "COL1A1"), + max_size = 6, + highlight_size = 0.75, + degs_analysis = TRUE, + standard_scale = TRUE, + interaction_scores = interaction_scores_v5 + # return_table = TRUE + ) +p + facet_wrap(~classification, ncol = 3) +``` + +```{r, message = FALSE, warning = FALSE, fig.height=3, fig.width=20} +p + facet_wrap(~classification + is_integrin, ncol = 3) +``` + +```{r, message = FALSE, warning = FALSE, fig.height=3, fig.width=20} +p + facet_wrap(~directionality, ncol = 2) +```