diff --git a/R/plot_cpdb.R b/R/plot_cpdb.R index 9e47ce4e..c2c3aa3b 100644 --- a/R/plot_cpdb.R +++ b/R/plot_cpdb.R @@ -9,7 +9,7 @@ #' @param interaction_scores Data frame corresponding to `interaction_scores.txt` from CellPhoneDB version 5 onwards. #' @param cellsign Data frame corresponding to `CellSign.txt` from CellPhoneDB version 5 onwards. #' @param max_size max size of points. -#' @param keep_significant_only logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb. +#' @param keep_significant_only logical. Default is TRUE. Switch to FALSE if you want to plot all the results from cpdb. #' @param splitby_key column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis. #' @param gene_family default = NULL. some predefined group of genes. can take one (or several) of these default options: 'chemokines', 'Th1', 'Th2', 'Th17', 'Treg', 'costimulatory', 'coinhibitory', 'niche'. Also accepts name(s) of custom gene families. #' @param custom_gene_family default = NULL. If provided, will update the gene_family function with this custom entry. Both `gene_family` (name of the custom family) and `custom_gene_family` must be specified for this to work. Provide either a data.frame with column names as name of family and genes in rows or a named likes like : list('customfamily' = c('genea', 'geneb', 'genec')) @@ -35,8 +35,8 @@ #' \donttest{ #' data(kidneyimmune) #' data(cpdb_output) -#' plot_cpdb(kidneyimmune, 'B cell', 'CD4T cell', 'celltype', means, pvals, splitby_key = 'Experiment', genes = c('CXCL13', 'CD274', 'CXCR5')) -#' plot_cpdb(kidneyimmune, 'B cell', 'CD4T cell', 'celltype', means, pvals, splitby_key = 'Experiment', gene_family = 'chemokines') +#' plot_cpdb(kidneyimmune, "B cell", "CD4T cell", "celltype", means, pvals, splitby_key = "Experiment", genes = c("CXCL13", "CD274", "CXCR5")) +#' plot_cpdb(kidneyimmune, "B cell", "CD4T cell", "celltype", means, pvals, splitby_key = "Experiment", gene_family = "chemokines") #' } #' @include utils.R #' @import dplyr @@ -45,14 +45,15 @@ #' @import reshape2 #' @export -plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals, - interaction_scores = NULL, cellsign = NULL, max_size = 8, keep_significant_only = TRUE, - splitby_key = NULL, gene_family = NULL, custom_gene_family = NULL, genes = NULL, - standard_scale = TRUE, cluster_rows = TRUE, col_option = viridis::viridis(50), - default_style = TRUE, highlight_col = "red", highlight_size = NULL, max_highlight_size = 2, - special_character_regex_pattern = NULL, degs_analysis = FALSE, return_table = FALSE, - exclude_interactions = NULL, min_interaction_score = 0, scale_alpha_by_interaction_scores = FALSE, - scale_alpha_by_cellsign = FALSE, filter_by_cellsign = FALSE, title = "", ...) { +plot_cpdb <- function( + scdata, cell_type1, cell_type2, celltype_key, means, pvals, + interaction_scores = NULL, cellsign = NULL, max_size = 8, keep_significant_only = TRUE, + splitby_key = NULL, gene_family = NULL, custom_gene_family = NULL, genes = NULL, + standard_scale = TRUE, cluster_rows = TRUE, col_option = viridis::viridis(50), + default_style = TRUE, highlight_col = "red", highlight_size = NULL, max_highlight_size = 2, + special_character_regex_pattern = NULL, degs_analysis = FALSE, return_table = FALSE, + exclude_interactions = NULL, min_interaction_score = 0, scale_alpha_by_interaction_scores = FALSE, + scale_alpha_by_cellsign = FALSE, filter_by_cellsign = FALSE, title = "", ...) { requireNamespace("SingleCellExperiment") requireNamespace("grDevices") if (is.null(special_character_regex_pattern)) { @@ -71,7 +72,8 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals cellsign_mat <- .prep_table(cellsign) } col_start <- ifelse(colnames(pvals_mat)[DEFAULT_CLASS_COL] == "classification", - DEFAULT_V5_COL_START, DEFAULT_COL_START) + DEFAULT_V5_COL_START, DEFAULT_COL_START + ) if (degs_analysis) { pvals_mat[, col_start:ncol(pvals_mat)] <- 1 - pvals_mat[, col_start:ncol(pvals_mat)] } @@ -79,22 +81,28 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals if (col_start == DEFAULT_V5_COL_START) { v5tmp <- reshape2::melt(means_mat, id.vars = colnames(means_mat)[1:col_start]) special_sep <- paste0(rep(DEFAULT_SEP, 3), collapse = "") - row.names(v5tmp) <- paste0(gsub("_", "-", v5tmp$interacting_pair), special_sep, - v5tmp$variable) + row.names(v5tmp) <- paste0( + gsub("_", "-", v5tmp$interacting_pair), special_sep, + v5tmp$variable + ) v5tmp <- v5tmp[, c("is_integrin", "directionality", "classification")] } cell_type1 <- .sub_pattern(cell_type = cell_type1, pattern = special_character_regex_pattern) cell_type2 <- .sub_pattern(cell_type = cell_type2, pattern = special_character_regex_pattern) - query_list <- .prep_query_group(data = means_mat, genes = genes, gene_family = gene_family, - custom_gene_family = custom_gene_family) + query_list <- .prep_query_group( + data = means_mat, genes = genes, gene_family = gene_family, + custom_gene_family = custom_gene_family + ) query <- query_list[["query"]] query_group <- query_list[["query_group"]] # prepare the cell_type query if (!is.null(splitby_key)) { labels <- paste0(metadata[[splitby_key]], "_", metadata[[celltype_key]]) if (is.factor(metadata[[splitby_key]]) & is.factor(metadata[[celltype_key]])) { - labels <- factor(labels, levels = paste0(levels(metadata[[splitby_key]]), - "_", rep(levels(metadata[[celltype_key]]), each = length(levels(metadata[[splitby_key]]))))) + labels <- factor(labels, levels = paste0( + levels(metadata[[splitby_key]]), + "_", rep(levels(metadata[[celltype_key]]), each = length(levels(metadata[[splitby_key]]))) + )) } else { labels <- factor(labels) } @@ -105,8 +113,10 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals # the purpose for this step is to allow for special characters to be used # in the celltype grepping if (length(groups) > 1) { - labels2 <- gsub(paste0(paste0(groups, "_"), collapse = "|"), "", - labels) + labels2 <- gsub( + paste0(paste0(groups, "_"), collapse = "|"), "", + labels + ) } else { labels2 <- gsub(paste0(groups, "_"), "", labels) } @@ -120,10 +130,14 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals grp <- as.list(groups) celltype <- list() for (i in 1:length(c_type1)) { - celltype[[i]] <- .create_celltype_query(ctype1 = c_type1[[i]], ctype2 = c_type2, - sep = DEFAULT_SEP) - celltype[[i]] <- lapply(grp, .keep_interested_groups, ct = celltype[[i]], - sep = DEFAULT_SEP) + celltype[[i]] <- .create_celltype_query( + ctype1 = c_type1[[i]], ctype2 = c_type2, + sep = DEFAULT_SEP + ) + celltype[[i]] <- lapply(grp, .keep_interested_groups, + ct = celltype[[i]], + sep = DEFAULT_SEP + ) } for (i in 1:length(celltype)) { celltype[[i]] <- celltype[[i]][-which(celltype[[i]] == "")] @@ -144,8 +158,10 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals c_type2 <- lapply(c_type2, .sub_pattern, pattern = special_character_regex_pattern) celltype <- list() for (i in 1:length(c_type1)) { - celltype[[i]] <- .create_celltype_query(ctype1 = c_type1[[i]], ctype2 = c_type2, - sep = DEFAULT_SEP) + celltype[[i]] <- .create_celltype_query( + ctype1 = c_type1[[i]], ctype2 = c_type2, + sep = DEFAULT_SEP + ) } cell_type <- do.call(paste0, list(celltype, collapse = "|")) } @@ -159,60 +175,93 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals c_type2 <- lapply(c_type2, .sub_pattern, pattern = special_character_regex_pattern) celltype <- list() for (i in 1:length(c_type1)) { - celltype[[i]] <- .create_celltype_query(ctype1 = c_type1[[i]], ctype2 = c_type2, - sep = DEFAULT_SEP) + celltype[[i]] <- .create_celltype_query( + ctype1 = c_type1[[i]], ctype2 = c_type2, + sep = DEFAULT_SEP + ) } cell_type <- do.call(paste0, list(celltype, collapse = "|")) } if (!is.null(gene_family) & is.null(genes)) { if (length(gene_family) == 1) { - means_mat <- .prep_data_querygroup_celltype1(.data = means_mat, .query_group = query_group, + means_mat <- .prep_data_querygroup_celltype1( + .data = means_mat, .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, .celltype = celltype, - ...) - pvals_mat <- .prep_data_querygroup_celltype1(.data = pvals_mat, .query_group = query_group, + ... + ) + pvals_mat <- .prep_data_querygroup_celltype1( + .data = pvals_mat, .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, .celltype = celltype, - ...) + ... + ) if (!is.null(interaction_scores)) { - interaction_scores_mat <- .prep_data_querygroup_celltype1(.data = interaction_scores_mat, + interaction_scores_mat <- .prep_data_querygroup_celltype1( + .data = interaction_scores_mat, .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, - .celltype = celltype, ...) + .celltype = celltype, ... + ) } else if (!is.null(cellsign)) { - cellsign_mat <- .prep_data_querygroup_celltype1(.data = cellsign_mat, + cellsign_mat <- .prep_data_querygroup_celltype1( + .data = cellsign_mat, .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, - .celltype = celltype, ...) + .celltype = celltype, ... + ) } } else if (length(gene_family) > 1) { - means_mat <- .prep_data_querygroup_celltype2(.data = means_mat, .query_group = query_group, + means_mat <- .prep_data_querygroup_celltype2( + .data = means_mat, .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, .celltype = celltype, - ...) - pvals_mat <- .prep_data_querygroup_celltype2(.data = pvals_mat, .query_group = query_group, + ... + ) + pvals_mat <- .prep_data_querygroup_celltype2( + .data = pvals_mat, .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, .celltype = celltype, - ...) + ... + ) if (!is.null(interaction_scores)) { - interaction_scores_mat <- .prep_data_querygroup_celltype2(.data = interaction_scores_mat, + interaction_scores_mat <- .prep_data_querygroup_celltype2( + .data = interaction_scores_mat, .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, - .celltype = celltype, ...) + .celltype = celltype, ... + ) } else if (!is.null(cellsign)) { - cellsign_mat <- .prep_data_querygroup_celltype2(.data = cellsign_mat, + cellsign_mat <- .prep_data_querygroup_celltype2( + .data = cellsign_mat, .query_group = query_group, .gene_family = gene_family, .cell_type = cell_type, - .celltype = celltype, ...) + .celltype = celltype, ... + ) } } } else if (is.null(gene_family) & !is.null(genes) | is.null(gene_family) & is.null(genes)) { - means_mat <- .prep_data_query_celltype(.data = means_mat, .query = query, - .cell_type = cell_type, .celltype = celltype, ...) - pvals_mat <- .prep_data_query_celltype(.data = pvals_mat, .query = query, - .cell_type = cell_type, .celltype = celltype, ...) + means_mat <- .prep_data_query_celltype( + .data = means_mat, .query = query, + .cell_type = cell_type, .celltype = celltype, ... + ) + pvals_mat <- .prep_data_query_celltype( + .data = pvals_mat, .query = query, + .cell_type = cell_type, .celltype = celltype, ... + ) if (!is.null(interaction_scores)) { - interaction_scores_mat <- .prep_data_query_celltype(.data = interaction_scores_mat, - .cell_type = cell_type, .celltype = celltype, ...) + interaction_scores_mat <- .prep_data_query_celltype( + .data = interaction_scores_mat, .query = query, + .cell_type = cell_type, .celltype = celltype, ... + ) } else if (!is.null(cellsign)) { - cellsign_mat <- .prep_data_query_celltype(.data = cellsign_mat, .cell_type = cell_type, - .celltype = celltype, ...) + cellsign_mat <- cellsign_mat[, col_start:ncol(cellsign_mat)] # too difficult to code is properly? } + # } else if (!is.null(cellsign)) { + # cellsign_mat <- .prep_data_query_celltype( + # .data = cellsign_mat, .query = query, + # .cell_type = cell_type, .celltype = celltype, ... + # ) + # } } if (length(means_mat) == 0) { stop("Please check your options for splitby_key and your celltypes.") + } else { + if (!all(dim(pvals_mat) == dim(means_mat))) { + pvals_mat <- .prep_dimensions(pvals_mat, means_mat) + } } # rearrange the columns so that it interleaves the two groups if (!is.null(splitby_key)) { @@ -322,8 +371,10 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals } } } - row.names(df) <- paste0(df$Var1, paste0(rep(DEFAULT_SEP, 3), collapse = ""), - df$Var2) + row.names(df) <- paste0( + df$Var1, paste0(rep(DEFAULT_SEP, 3), collapse = ""), + df$Var2 + ) df$Var2 <- gsub(DEFAULT_SEP, "-", df$Var2) final_levels <- unique(df$Var2) df$Var2 <- factor(df$Var2, unique(df$Var2)) @@ -366,85 +417,117 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals if (scale_alpha_by_interaction_scores == TRUE) { if (default_style) { if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means, alpha = interaction_scores)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means, alpha = interaction_scores + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means, alpha = interaction_scores)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means, alpha = interaction_scores + )) } } else { if (all(df$significant == "no")) { - if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means, alpha = interaction_scores)) - } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means, alpha = interaction_scores)) - } - default_style <- TRUE - } else { - highlight_col <- "#FFFFFF" # enforce this - if (standard_scale) { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = highlight_size, - alpha = interaction_scores)) + if (standard_scale) { + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means, alpha = interaction_scores + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = x_stroke, - alpha = interaction_scores)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means, alpha = interaction_scores + )) } + default_style <- TRUE } else { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = highlight_size, - alpha = interaction_scores)) + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size, + alpha = interaction_scores + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke, + alpha = interaction_scores + )) + } } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = x_stroke, alpha = interaction_scores)) + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size, + alpha = interaction_scores + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke, alpha = interaction_scores + )) + } } } - } } } else { if (default_style) { if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means + )) } } else { if (all(df$significant == "no")) { - if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means)) - } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means)) - } - default_style <- TRUE - } else { - highlight_col <- "#FFFFFF" # enforce this - if (standard_scale) { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = highlight_size)) + if (standard_scale) { + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = x_stroke)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means + )) } + default_style <- TRUE } else { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = highlight_size)) + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke + )) + } } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = x_stroke)) + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke + )) + } } } - } } } } else if (!is.null(cellsign)) { @@ -456,168 +539,249 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals if (scale_alpha_by_cellsign == TRUE) { if (default_style) { if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means, alpha = cellsign)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means, alpha = cellsign + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means, alpha = cellsign)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means, alpha = cellsign + )) } } else { if (all(df$significant == "no")) { - if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means, alpha = cellsign)) - } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means, alpha = cellsign)) - } - default_style <- TRUE - } else { - highlight_col <- "#FFFFFF" # enforce this - if (standard_scale) { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = highlight_size, - alpha = cellsign)) + if (standard_scale) { + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means, alpha = cellsign + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = x_stroke, - alpha = cellsign)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means, alpha = cellsign + )) } + default_style <- TRUE } else { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = highlight_size, - alpha = cellsign)) + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size, + alpha = cellsign + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke, + alpha = cellsign + )) + } } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = x_stroke, alpha = cellsign)) + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size, + alpha = cellsign + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke, alpha = cellsign + )) + } } } - } } } else { if (default_style) { if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means + )) } } else { if (all(df$significant == "no")) { - if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means)) - } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means)) - } - default_style <- TRUE - } else { - highlight_col <- "#FFFFFF" # enforce this - if (standard_scale) { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = highlight_size)) + if (standard_scale) { + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = x_stroke)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means + )) } + default_style <- TRUE } else { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = highlight_size)) + highlight_col <- "#FFFFFF" # enforce this + if (standard_scale) { + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke + )) + } } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = x_stroke)) + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke + )) + } } } - } } } } else { if (default_style) { if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, fill = scaled_means, - size = scaled_means)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, fill = scaled_means, + size = scaled_means + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, fill = means, - size = means)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, fill = means, + size = means + )) } } else { if (all(df$significant == "no")) { if (standard_scale) { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = scaled_means, size = scaled_means)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = scaled_means, size = scaled_means + )) } else { - g <- ggplot(df, aes(x = Var2, y = Var1, color = significant, - fill = means, size = means)) + g <- ggplot(df, aes( + x = Var2, y = Var1, color = significant, + fill = means, size = means + )) } default_style <- TRUE } else { - highlight_col <- "#FFFFFF" # enforce this + highlight_col <- "#FFFFFF" # enforce this if (standard_scale) { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = highlight_size)) - } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = scaled_means, size = scaled_means, stroke = x_stroke)) - } - } else { - if (!is.null(highlight_size)) { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = highlight_size)) + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = highlight_size + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = scaled_means, size = scaled_means, stroke = x_stroke + )) + } } else { - g <- ggplot(df, aes(x = Var2, y = Var1, fill = significant, - colour = means, size = means, stroke = x_stroke)) - } + if (!is.null(highlight_size)) { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = highlight_size + )) + } else { + g <- ggplot(df, aes( + x = Var2, y = Var1, fill = significant, + colour = means, size = means, stroke = x_stroke + )) + } } } } } - g <- g + geom_point(pch = 21, na.rm = TRUE) + theme_bw() + theme(axis.text.x = element_text(angle = 45, - hjust = 0, color = "#000000"), axis.text.y = element_text(color = "#000000"), + g <- g + geom_point(pch = 21, na.rm = TRUE) + theme_bw() + theme( + axis.text.x = element_text( + angle = 45, + hjust = 0, color = "#000000" + ), axis.text.y = element_text(color = "#000000"), axis.title.x = element_blank(), axis.title.y = element_blank(), legend.direction = "vertical", - legend.box = "horizontal") + scale_x_discrete(position = "top") + scale_radius(range = c(0, - max_size)) + scale_linewidth(range = c(0, max_highlight_size)) + legend.box = "horizontal" + ) + scale_x_discrete(position = "top") + scale_radius(range = c( + 0, + max_size + )) + scale_linewidth(range = c(0, max_highlight_size)) if (default_style) { - g <- g + scale_colour_manual(values = c(yes = highlight_col, no = "#ffffff"), - na.value = NA, na.translate = FALSE) + guides(fill = guide_colourbar(barwidth = 4, - label = TRUE, ticks = TRUE, draw.ulim = TRUE, draw.llim = TRUE, order = 1), - size = guide_legend(reverse = TRUE, order = 2), stroke = guide_legend(reverse = TRUE, - order = 3)) + g <- g + scale_colour_manual( + values = c(yes = highlight_col, no = "#ffffff"), + na.value = NA, na.translate = FALSE + ) + guides( + fill = guide_colourbar( + barwidth = 4, + label = TRUE, ticks = TRUE, draw.ulim = TRUE, draw.llim = TRUE, order = 1 + ), + size = guide_legend(reverse = TRUE, order = 2), stroke = guide_legend( + reverse = TRUE, + order = 3 + ) + ) if (length(col_option) == 1) { - g <- g + scale_fill_gradientn(colors = (grDevices::colorRampPalette(c("white", - col_option)))(100), na.value = "white") + g <- g + scale_fill_gradientn(colors = (grDevices::colorRampPalette(c( + "white", + col_option + )))(100), na.value = "white") } else { - g <- g + scale_fill_gradientn(colors = c("white", (grDevices::colorRampPalette(col_option))(99)), - na.value = "white") + g <- g + scale_fill_gradientn( + colors = c("white", (grDevices::colorRampPalette(col_option))(99)), + na.value = "white" + ) } } else { - g <- g + scale_fill_manual(values = highlight_col, na.value = "#ffffff", - na.translate = TRUE) + guides(colour = guide_colourbar(barwidth = 4, - label = TRUE, ticks = TRUE, draw.ulim = TRUE, draw.llim = TRUE, order = 1), - size = guide_legend(reverse = TRUE, order = 2), stroke = guide_legend(reverse = TRUE, - order = 3)) + g <- g + scale_fill_manual( + values = highlight_col, na.value = "#ffffff", + na.translate = TRUE + ) + guides( + colour = guide_colourbar( + barwidth = 4, + label = TRUE, ticks = TRUE, draw.ulim = TRUE, draw.llim = TRUE, order = 1 + ), + size = guide_legend(reverse = TRUE, order = 2), stroke = guide_legend( + reverse = TRUE, + order = 3 + ) + ) df2 <- df if (standard_scale) { df2$scaled_means[df$pvals < 0.05] <- NA - g <- g + geom_point(aes(x = Var2, y = Var1, colour = scaled_means, - size = scaled_means), data = df2, inherit_aes = FALSE, na_rm = TRUE) + g <- g + geom_point(aes( + x = Var2, y = Var1, colour = scaled_means, + size = scaled_means + ), data = df2, inherit_aes = FALSE, na_rm = TRUE) } else { df2$means[df$pvals < 0.05] <- NA g <- g + geom_point(aes(x = Var2, y = Var1, colour = means, size = means), - data = df2, inherit_aes = FALSE, na_rm = TRUE) + data = df2, inherit_aes = FALSE, na_rm = TRUE + ) } if (length(col_option) == 1) { - g <- g + scale_colour_gradientn(colors = (grDevices::colorRampPalette(c("white", - col_option)))(100), na.value = "white") + g <- g + scale_colour_gradientn(colors = (grDevices::colorRampPalette(c( + "white", + col_option + )))(100), na.value = "white") } else { - g <- g + scale_colour_gradientn(colors = c("white", (grDevices::colorRampPalette(col_option))(99)), - na.value = "white") + g <- g + scale_colour_gradientn( + colors = c("white", (grDevices::colorRampPalette(col_option))(99)), + na.value = "white" + ) } } if (!is.null(interaction_scores) & (scale_alpha_by_interaction_scores == @@ -640,4 +804,4 @@ plot_cpdb <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals } return(g) } -} \ No newline at end of file +} diff --git a/R/utils.R b/R/utils.R index 08d99ce0..b195271d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,6 +10,22 @@ DEFAULT_CLASS_COL <- 13 DEFAULT_COL_START <- 12 +.prep_dimensions <- function(input, reference) { + requireNamespace("reshape2") + tmp_mat <- reference + tmp_mat <- (tmp_mat * 0) + 1 + melted_A <- reshape2::melt(as.matrix(tmp_mat)) + melted_B <- reshape2::melt(as.matrix(input)) + rownames(melted_A) <- paste0(melted_A$Var1, DEFAULT_SEP, melted_A$Var2) + rownames(melted_B) <- paste0(melted_B$Var1, DEFAULT_SEP, melted_B$Var2) + melted_A[row.names(melted_B), ] <- melted_B + tmp_mat <- reshape2::dcast(melted_A, Var1 ~ Var2, value.var = "value") + rownames(tmp_mat) <- tmp_mat$Var1 + tmp_mat <- tmp_mat[, -1] + tmp_mat <- tmp_mat[rownames(reference), colnames(reference)] + return(tmp_mat) +} + .prep_table <- function(data) { dat <- data rownames(dat) <- make.names(dat$interacting_pair, unique = TRUE) @@ -36,18 +52,18 @@ DEFAULT_COL_START <- 12 .prep_query_group <- function(data, genes = NULL, gene_family = NULL, custom_gene_family = NULL) { if (is.null(gene_family) & is.null(genes)) { query_group <- NULL - query_id <- grep("", data$interacting_pair) - query <- row.names(data)[query_id] + query_id <- grep("", data$interacting_pair, value = TRUE) + query <- row.names(data[data$interacting_pair %in% query_id, ]) } else if (!is.null(gene_family) & !is.null(genes)) { stop("Please specify either genes or gene_family, not both") } else if (!is.null(gene_family) & is.null(genes)) { - chemokines <- grep("^CXC|CCL|CCR|CX3|XCL|XCR", data$interacting_pair) - th1 <- grep("IL2|IL12|IL18|IL27|IFNG|IL10|TNF$|TNF |LTA|LTB|STAT1|CCR5|CXCR3|IL12RB1|IFNGR1|TBX21|STAT4", data$interacting_pair) - th2 <- grep("IL4|IL5|IL25|IL10|IL13|AREG|STAT6|GATA3|IL4R", data$interacting_pair) - th17 <- grep("IL21|IL22|IL24|IL26|IL17A|IL17A|IL17F|IL17RA|IL10|RORC|RORA|STAT3|CCR4|CCR6|IL23RA|TGFB", data$interacting_pair) - treg <- grep("IL35|IL10|FOXP3|IL2RA|TGFB", data$interacting_pair) - costimulatory <- grep("CD86|CD80|CD48|LILRB2|LILRB4|TNF|CD2|ICAM|SLAM|LT[AB]|NECTIN2|CD40|CD70|CD27|CD28|CD58|TSLP|PVR|CD44|CD55|CD[1-9]", data$interacting_pair) - coinhibitory <- grep("SIRP|CD47|ICOS|TIGIT|CTLA4|PDCD1|CD274|LAG3|HAVCR|VSIR", data$interacting_pair) + chemokines <- grep("^CXC|CCL|CCR|CX3|XCL|XCR", data$interacting_pair, value = TRUE) + th1 <- grep("IL2|IL12|IL18|IL27|IFNG|IL10|TNF$|TNF |LTA|LTB|STAT1|CCR5|CXCR3|IL12RB1|IFNGR1|TBX21|STAT4", data$interacting_pair, value = TRUE) + th2 <- grep("IL4|IL5|IL25|IL10|IL13|AREG|STAT6|GATA3|IL4R", data$interacting_pair, value = TRUE) + th17 <- grep("IL21|IL22|IL24|IL26|IL17A|IL17A|IL17F|IL17RA|IL10|RORC|RORA|STAT3|CCR4|CCR6|IL23RA|TGFB", data$interacting_pair, value = TRUE) + treg <- grep("IL35|IL10|FOXP3|IL2RA|TGFB", data$interacting_pair, value = TRUE) + costimulatory <- grep("CD86|CD80|CD48|LILRB2|LILRB4|TNF|CD2|ICAM|SLAM|LT[AB]|NECTIN2|CD40|CD70|CD27|CD28|CD58|TSLP|PVR|CD44|CD55|CD[1-9]", data$interacting_pair, value = TRUE) + coinhibitory <- grep("SIRP|CD47|ICOS|TIGIT|CTLA4|PDCD1|CD274|LAG3|HAVCR|VSIR", data$interacting_pair, value = TRUE) query_group <- list( chemokines = chemokines, chemokine = chemokines, @@ -60,11 +76,12 @@ DEFAULT_COL_START <- 12 costimulation = costimulatory, coinhibition = coinhibitory ) + query_group <- lapply(query_group, function(x) row.names(data[data$interacting_pair %in% x, ])) if (!is.null(custom_gene_family)) { cgf <- as.list(custom_gene_family) cgf <- lapply(cgf, function(x) { - q_id <- grep(paste(x, collapse = "|"), data$interacting_pair) - q <- row.names(data)[q_id] + q_id <- grep(paste(x, collapse = "|"), data$interacting_pair, value = TRUE) + q <- row.names(data[data$interacting_pair %in% q_id, ]) return(q) }) query_group <- c(query_group, cgf) @@ -72,8 +89,8 @@ DEFAULT_COL_START <- 12 query <- NULL } else if (is.null(gene_family) & !is.null(genes)) { query_group <- NULL - query_id <- grep(paste(genes, collapse = "|"), data$interacting_pair) - query <- row.names(data)[query_id] + query_id <- grep(paste(genes, collapse = "|"), data$interacting_pair, value = TRUE) + query <- row.names(data[data$interacting_pair %in% query_id, ]) } out <- list("query_group" = query_group, "query" = query) return(out) @@ -99,7 +116,7 @@ DEFAULT_COL_START <- 12 .prep_data_querygroup_celltype1 <- function(.data, .query_group, .gene_family, .cell_type, .celltype, ...) { dat <- suppressWarnings(tryCatch( - .data[.query_group[[tolower(.gene_family)]], + .data[rownames(.data) %in% .query_group[[tolower(.gene_family)]], grep(.cell_type, colnames(.data), useBytes = TRUE, ...), drop = FALSE, ], @@ -110,7 +127,7 @@ DEFAULT_COL_START <- 12 ) }) colidx <- unique(do.call(c, colidx)) - tmpm <- .data[.query_group[[tolower(.gene_family)]], colidx, drop = FALSE] + tmpm <- .data[rownames(.data) %in% .query_group[[tolower(.gene_family)]], colidx, drop = FALSE] return(tmpm) } )) @@ -120,7 +137,7 @@ DEFAULT_COL_START <- 12 .prep_data_querygroup_celltype2 <- function(.data, .query_group, .gene_family, .cell_type, .celltype, ...) { dat <- suppressWarnings(tryCatch( - .data[unlist(.query_group[c(tolower(.gene_family))], use.names = FALSE), + .data[rownames(.data) %in% unlist(.query_group[c(tolower(.gene_family))], use.names = FALSE), grep(.cell_type, colnames(.data), useBytes = TRUE, ...), drop = FALSE ], @@ -131,7 +148,7 @@ DEFAULT_COL_START <- 12 ) }) colidx <- unique(do.call(c, colidx)) - tmpm <- .data[unlist(.query_group[c(tolower(.gene_family))], use.names = FALSE), colidx, drop = FALSE] + tmpm <- .data[rownames(.data) %in% unlist(.query_group[c(tolower(.gene_family))], use.names = FALSE), colidx, drop = FALSE] return(tmpm) } )) @@ -140,7 +157,7 @@ DEFAULT_COL_START <- 12 } .prep_data_query_celltype <- function(.data, .query, .cell_type, .celltype, ...) { - dat <- suppressWarnings(tryCatch(.data[.query, grep(.cell_type, colnames(.data), + dat <- suppressWarnings(tryCatch(.data[rownames(.data) %in% .query, grep(.cell_type, colnames(.data), useBytes = TRUE, ... ), drop = FALSE], error = function(e) { colidx <- lapply(.celltype, function(z) { @@ -150,7 +167,7 @@ DEFAULT_COL_START <- 12 ) }) colidx <- unique(do.call(c, colidx)) - tmpm <- .data[.query, colidx, drop = FALSE] + tmpm <- .data[rownames(.data) %in% .query, colidx, drop = FALSE] return(tmpm) })) dat <- dat[rowSums(is.na(dat)) == 0, ] diff --git a/data/cpdb_output_degs.RData b/data/cpdb_output_degs.RData new file mode 100644 index 00000000..4e555cd2 Binary files /dev/null and b/data/cpdb_output_degs.RData differ diff --git a/data/cpdb_output_stat.RData b/data/cpdb_output_stat.RData new file mode 100644 index 00000000..6ad90586 Binary files /dev/null and b/data/cpdb_output_stat.RData differ diff --git a/man/plot_cpdb.Rd b/man/plot_cpdb.Rd index 008dafae..01a3891f 100644 --- a/man/plot_cpdb.Rd +++ b/man/plot_cpdb.Rd @@ -57,7 +57,7 @@ plot_cpdb( \item{max_size}{max size of points.} -\item{keep_significant_only}{logical. Default is FALSE. Switch to TRUE if you only want to plot the significant hits from cpdb.} +\item{keep_significant_only}{logical. Default is TRUE. Switch to FALSE if you want to plot all the results from cpdb.} \item{splitby_key}{column name in the metadata/coldata table to split the spots by. Can only take columns with binary options. If specified, name to split by MUST be specified in the meta file provided to cpdb prior to analysis.} @@ -109,7 +109,7 @@ Plotting CellPhoneDB results \donttest{ data(kidneyimmune) data(cpdb_output) -plot_cpdb(kidneyimmune, 'B cell', 'CD4T cell', 'celltype', means, pvals, splitby_key = 'Experiment', genes = c('CXCL13', 'CD274', 'CXCR5')) -plot_cpdb(kidneyimmune, 'B cell', 'CD4T cell', 'celltype', means, pvals, splitby_key = 'Experiment', gene_family = 'chemokines') +plot_cpdb(kidneyimmune, "B cell", "CD4T cell", "celltype", means, pvals, splitby_key = "Experiment", genes = c("CXCL13", "CD274", "CXCR5")) +plot_cpdb(kidneyimmune, "B cell", "CD4T cell", "celltype", means, pvals, splitby_key = "Experiment", gene_family = "chemokines") } } diff --git a/vignettes/vignette.rmd b/vignettes/vignette.rmd index 9b923b85..c6a8ef52 100644 --- a/vignettes/vignette.rmd +++ b/vignettes/vignette.rmd @@ -65,14 +65,18 @@ If you are using results from `CellPhoneDB` `deg_analysis` mode from version >= # decon = pd.read_csv("deconvoluted.txt", sep="\t") # I've provided an example datasets -data(cpdb_output) -data(cpdb_output2) +data(cpdb_output_stat) # ran with CellPhoneDB statistical analysis mode +data(cpdb_output_degs) # ran with CellPhoneDB degs analysis mode ``` ## Heatmap The original heatmap plot from `CellPhoneDB` can be achieved with this reimplemented function. ```{r, message = FALSE, warning = FALSE} -plot_cpdb_heatmap(pvals = pvals2, cellheight = 10, cellwidth = 10) +plot_cpdb_heatmap(pvals = pvals_stat, cellheight = 10, cellwidth = 10) +``` + +```{r, message = FALSE, warning = FALSE} +plot_cpdb_heatmap(pvals = rel_int_degs, cellheight = 10, cellwidth = 10, degs_analysis = TRUE) ``` The current heatmap is directional (check `count_network` and `interaction_edges` for more details in `return_tables = True`). @@ -80,7 +84,7 @@ The current heatmap is directional (check `count_network` and `interaction_edges To obtain the heatmap where the interaction counts are not symmetrical, do: ```{r, message = FALSE, warning = FALSE} -plot_cpdb_heatmap(pvals = pvals2, cellheight = 10, cellwidth = 10, symmetrical = FALSE) +plot_cpdb_heatmap(pvals = pvals_stat, cellheight = 10, cellwidth = 10, symmetrical = FALSE) ``` The values for the `symmetrical=FALSE` mode follow the direction of the L-R direction where it's always moleculeA:celltypeA -> moleculeB:celltypeB. @@ -105,8 +109,22 @@ plot_cpdb( cell_type1="B cell", cell_type2=".", # this means all cell-types celltype_key="celltype", - means=means2, - pvals=pvals2, + means=means_stat, + pvals=pvals_stat, + genes=c("PTPRC", "TNFSF13"), + title="interacting interactions!", +) +``` + +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 4} +plot_cpdb( + scdata=kidneyimmune, + cell_type1="B cell", + cell_type2=".", # this means all cell-types + celltype_key="celltype", + means=means_degs, + pvals=rel_int_degs, + degs_analysis=TRUE, genes=c("PTPRC", "TNFSF13"), title="interacting interactions!", ) @@ -114,14 +132,40 @@ plot_cpdb( Or don't specify either and it will try to plot all significant interactions. +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 10} +plot_cpdb( + scdata=kidneyimmune, + cell_type1="B cell", + cell_type2=".", + celltype_key="celltype", + means=means_stat, + pvals=pvals_stat, +) +``` + +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 10} +plot_cpdb( + scdata=kidneyimmune, + cell_type1="B cell", + cell_type2=".", # this means all cell-types + celltype_key="celltype", + means=means_degs, + pvals=rel_int_degs, + degs_analysis=TRUE, + title="interacting interactions!", +) +``` + +You can also try an alternative visualisation inspired by how `squidpy` displays the results: + ```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 3} plot_cpdb( scdata=kidneyimmune, cell_type1="B cell", cell_type2=".", celltype_key="celltype", - means=means2, - pvals=pvals2, + means=means_stat, + pvals=pvals_stat, genes=c("PTPRC", "CD40", "CLEC2D"), default_style=FALSE ) @@ -130,6 +174,8 @@ plot_cpdb( you can also toggle options to `splitby_key` and `gene_family`: ```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 6} +data(cpdb_output) # this is a different dataset where the "Experiment" was appended to the "celltype" + plot_cpdb( scdata = kidneyimmune, cell_type1 = "B cell", @@ -238,7 +284,7 @@ Please help contribute to the interaction grouping list [here](https://docs.goog Credits to Ben Stewart for coming up with the base code! ```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 10} - +data(cpdb_output2) # legacy reasons plot_cpdb2( scdata = kidneyimmune, cell_type1 = "B cell", @@ -277,9 +323,9 @@ plot_cpdb3( cell_type1 = "B cell", cell_type2 = "CD4T cell|MNPd", celltype_key = "celltype", # column name where the cell ids are located in the metadata - means = means2, - pvals = pvals2, - deconvoluted = decon2 # new options from here on specific to plot_cpdb3 + means = means_stat, + pvals = pvals_stat, + deconvoluted = decon_stat # new options from here on specific to plot_cpdb3 ) ``` @@ -295,9 +341,9 @@ plot_cpdb4( cell_type1 = "NK", cell_type2 = "Mast", celltype_key = "celltype", - means = means2, - pvals = pvals2, - deconvoluted = decon2 + means = means_stat, + pvals = pvals_stat, + deconvoluted = decon_stat ) ```