Skip to content

Commit

Permalink
fix duplicate rownames introduced in CellPhoneDB v5 (#90)
Browse files Browse the repository at this point in the history
* hopefully this fixes #89

* Update plot_cpdb.R

* Update utils.R

* ok fix the chords

* bump version

* Update DESCRIPTION

be consistent

* Update plot_cpdb4.R

* fix formatting

* Revert "fix formatting"

This reverts commit ce4ca40.

* first attempt at fixing the names

* Update plot_cpdb.R

* Update plot_cpdb.Rd

* Update utils.R
  • Loading branch information
zktuong authored Dec 6, 2023
1 parent 18cf3ac commit 8b249e8
Show file tree
Hide file tree
Showing 8 changed files with 285 additions and 165 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ktplots
Title: Plot single-cell data dotplots
Version: 2.0.0
Version: 2.1.0
Authors@R: person("Kelvin", "Tuong", email = c("[email protected]"), role = c("aut", "cre"))
Description: Plotting tools for scData.
License: MIT + file LICENSE
Expand Down
18 changes: 12 additions & 6 deletions R/plot_cpdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#' @param scale_alpha_by_interaction_scores Whether or not to filter values by the interaction score.
#' @param scale_alpha_by_cellsign Whether or not to filter the transparency of interactions by the cellsign.
#' @param filter_by_cellsign Filter out interactions with a 0 value cellsign.
#' @param keep_id_cp_interaction Whether or not to keep the id_cp_interaction in the plot.
#' @param ... passes arguments to grep for cell_type1 and cell_type2.
#' @return ggplot dot plot object of cellphone db output
#' @examples
Expand All @@ -53,7 +54,7 @@ plot_cpdb <- function(
default_style = TRUE, highlight_col = "red", highlight_size = NULL, max_highlight_size = 2,
special_character_regex_pattern = NULL, degs_analysis = FALSE, return_table = FALSE,
exclude_interactions = NULL, min_interaction_score = 0, scale_alpha_by_interaction_scores = FALSE,
scale_alpha_by_cellsign = FALSE, filter_by_cellsign = FALSE, title = "", ...) {
scale_alpha_by_cellsign = FALSE, filter_by_cellsign = FALSE, title = "", keep_id_cp_interaction = FALSE, ...) {
requireNamespace("SingleCellExperiment")
requireNamespace("grDevices")
if (is.null(special_character_regex_pattern)) {
Expand All @@ -80,11 +81,10 @@ plot_cpdb <- function(
# ok front load a 'dictionary' here.
if (col_start == DEFAULT_V5_COL_START) {
v5tmp <- reshape2::melt(means_mat, id.vars = colnames(means_mat)[1:col_start])
special_sep <- paste0(rep(DEFAULT_SEP, 3), collapse = "")
row.names(v5tmp) <- paste0(
gsub("_", "-", v5tmp$interacting_pair), special_sep,
v5tmp$variable
)
row.names(v5tmp) <- paste0(v5tmp$id_cp_interaction, SPECIAL_SEP, gsub(
"_",
"-", v5tmp$interacting_pair
), SPECIAL_SEP, v5tmp$variable)
v5tmp <- v5tmp[, c("is_integrin", "directionality", "classification")]
}
cell_type1 <- .sub_pattern(cell_type = cell_type1, pattern = special_character_regex_pattern)
Expand Down Expand Up @@ -410,6 +410,12 @@ plot_cpdb <- function(
if (return_table) {
return(df)
} else {
# change the label of Var1
if (keep_id_cp_interaction){
df$Var1 <- gsub(SPECIAL_SEP, "_", df$Var1)
} else {
df$Var1 <- gsub(paste0(".*", SPECIAL_SEP), "", df$Var1)
}
if (!is.null(interaction_scores)) {
requireNamespace("dplyr")
df <- df %>%
Expand Down
149 changes: 91 additions & 58 deletions R/plot_cpdb2.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,50 +32,64 @@
#' @import ggrepel
#' @export

plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals,
plot_cpdb2 <- function(
scdata, cell_type1, cell_type2, celltype_key, means, pvals,
deconvoluted, keep_significant_only = TRUE, splitby_key = NULL, standard_scale = TRUE,
gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, desiredInteractions = NULL,
interaction_grouping = NULL, edge_group_colors = NULL, node_group_colors = NULL,
degs_analysis = FALSE, return_df = FALSE, plot_score_as_thickness = TRUE, ...) {
if (class(scdata) == "Seurat") {
stop("Sorry not supported. Please use a SingleCellExperiment object.")
}
lr_interactions <- plot_cpdb(scdata = scdata, cell_type1 = cell_type1, cell_type2 = cell_type2,
lr_interactions <- plot_cpdb(
scdata = scdata, cell_type1 = cell_type1, cell_type2 = cell_type2,
celltype_key = celltype_key, splitby_key = splitby_key, means = means, pvals = pvals,
keep_significant_only = keep_significant_only, standard_scale = standard_scale,
return_table = TRUE, degs_analysis = degs_analysis, ...)
return_table = TRUE, degs_analysis = degs_analysis, ...
)
requireNamespace("SummarizedExperiment")
requireNamespace("SingleCellExperiment")
if (is.null(splitby_key)) {
if (any(lr_interactions[, 3] > 0)) {
if (any(is.na(lr_interactions[, 3]))) {
lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[,
3]), ]
lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[
,
3
]), ]
} else {
lr_interactions <- lr_interactions[lr_interactions[, 3] > 0, ]
}
}
}
subset_clusters <- unique(unlist(lapply(as.character(lr_interactions$group),
strsplit, DEFAULT_SEP)))
subset_clusters <- unique(unlist(lapply(
as.character(lr_interactions$group),
strsplit, DEFAULT_SEP
)))
sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, celltype_key] %in%
subset_clusters]
interactions <- means[, c("interacting_pair", "gene_a", "gene_b", "partner_a",
"partner_b", "receptor_a", "receptor_b")]
interactions$converted <- gsub("-", " ", interactions$interacting_pair)
interactions$converted <- gsub("_", "-", interactions$converted)
interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1,
]
tm0 <- do.call(c, lapply(as.list(interactions_subset$interacting_pair), strsplit,
"_"))
interactions <- means[, c(
"id_cp_interaction", "interacting_pair", "gene_a", "gene_b", "partner_a",
"partner_b", "receptor_a", "receptor_b"
)]
interactions$use_interaction_name <- paste0(interactions$id_cp_interaction, SPECIAL_SEP, interactions$interacting_pair)
interactions$converted <- gsub("_", "-", interactions$use_interaction_name)
interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, ]
tm0 <- do.call(c, lapply(
as.list(interactions_subset$use_interaction_name), strsplit,
"_"
))
if (any(lapply(tm0, length) > 2)) {
complex_id <- which(lapply(tm0, length) > 2)
interactions_subset_ <- interactions_subset[complex_id, ]
simple_1 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_b)]
partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep("complex:",
interactions_subset_$partner_b)])
partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep("complex:",
interactions_subset_$partner_a)])
partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep(
"complex:",
interactions_subset_$partner_b
)])
partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep(
"complex:",
interactions_subset_$partner_a
)])
simple_2 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_a)]
for (i in seq_along(simple_1)) {
simple_1[i] <- gsub(paste0(partner_1[i], "_|_", partner_1[i]), "", simple_1[i])
Expand All @@ -88,22 +102,29 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
for (i in seq_along(complex_id)) {
tm0[[complex_id[i]]] <- tmplist[[i]]
}
tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2)))
tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2)))
colnames(tm0) <- c("id_a", "id_b")
interactions_subset <- cbind(interactions_subset, tm0)
dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b",
"id_a", "id_b", "receptor_a", "receptor_b")]
dictionary <- interactions_subset[, c(
"id_cp_interaction", "gene_a", "gene_b", "partner_a", "partner_b",
"id_a", "id_b", "receptor_a", "receptor_b"
)]
} else {
tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2)))
tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2)))
colnames(tm0) <- c("id_a", "id_b")
tm0$id_a <- gsub(paste0(".*", SPECIAL_SEP), "", tm0$id_a)
interactions_subset <- cbind(interactions_subset, tm0)
dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b",
"id_a", "id_b", "receptor_a", "receptor_b")]
dictionary <- interactions_subset[, c(
"id_cp_interaction", "gene_a", "gene_b", "partner_a", "partner_b",
"id_a", "id_b", "receptor_a", "receptor_b"
)]
}
if (!is.null(interaction_grouping)) {
if ((class(interaction_grouping) == "data.frame")) {
interactions_subset$group <- interaction_grouping[, 2][match(interactions_subset$interacting_pair,
interaction_grouping[, 1])]
interactions_subset$group <- interaction_grouping[, 2][match(
interactions_subset$interacting_pair,
interaction_grouping[, 1]
)]
}
}
# extract all the possible genes.
Expand All @@ -130,9 +151,9 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
for (n in names(sce_list)) {
for (x in unique(meta[, celltype_key])) {
sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, celltype_key] == x &
meta[, splitby_key] == n]
meta[, splitby_key] == n]
sce_list_alt[[n]][[x]] <- sce_subset[, meta[, celltype_key] == x &
meta[, splitby_key] == n]
meta[, splitby_key] == n]
}
}
sce_list2 <- lapply(sce_list, function(y) {
Expand Down Expand Up @@ -177,8 +198,7 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
}
rownames(sce_list2) <- humanreadablename
rownames(sce_list3) <- humanreadablename
decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset),
]
decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset), ]
if (nrow(decon_subset) > 0) {
# although multiple rows are returned, really it's the same value for
# the same complex
Expand All @@ -192,14 +212,14 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
decon_subset_fraction <- lapply(decon_subset, function(x) {
x <- unique(x$gene_name)
test <- lapply(sce_list_alt, function(y) {
return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping))
return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping))
})
return(test)
})
decon_subset_fraction <- lapply(decon_subset_fraction, function(x) {
y <- lapply(x, function(z) do.call(cbind, z))
for (i in 1:length(y)) {
colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]]))
colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]]))
}
y <- do.call(cbind, y)
return(y)
Expand All @@ -208,7 +228,7 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
decon_subset_fraction <- lapply(decon_subset, function(x) {
z <- unique(x$gene_name)
test <- lapply(sce_list_alt, function(y) {
return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping))
return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping))
})
return(do.call(cbind, test))
})
Expand All @@ -228,8 +248,10 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
# make a big fat edgelist
if (!is.null(desiredInteractions)) {
if (class(desiredInteractions) == "list") {
desiredInteractions_ <- c(desiredInteractions, lapply(desiredInteractions,
rev))
desiredInteractions_ <- c(desiredInteractions, lapply(
desiredInteractions,
rev
))
cell_type_grid <- as.data.frame(do.call(rbind, desiredInteractions_))
} else if ((class(desiredInteractions) == "data.frame")) {
cell_type_grid <- desiredInteractions
Expand Down Expand Up @@ -260,27 +282,33 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
dfx <- list()
if (!is.null(splitby_key)) {
for (i in unique(meta[, splitby_key])) {
dfx[[i]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor,
dfx[[i]] <- .generateDf(
ligand = ligand, sep = DEFAULT_SEP, receptor = receptor,
receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair,
producers = producers, receivers = receivers, cell_type_means = expr_df,
cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt,
gsm = gene_symbol_mapping, splitted = i)
gsm = gene_symbol_mapping, splitted = i
)
dfx[[i]] <- dfx[[i]][dfx[[i]]$barcode %in% barcodes, ]
}
} else {
dfx[[1]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor,
dfx[[1]] <- .generateDf(
ligand = ligand, sep = DEFAULT_SEP, receptor = receptor,
receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair,
producers = producers, receivers = receivers, cell_type_means = expr_df,
cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt,
gsm = gene_symbol_mapping)
gsm = gene_symbol_mapping
)
dfx[[1]] <- dfx[[1]][dfx[[1]]$barcode %in% barcodes, ]
}
if (return_df) {
return(dfx)
} else {
# set the bundled connections
df0 <- lapply(dfx, function(x) x[x$producer_fraction >= frac | x$receiver_fraction >=
frac, ]) # save this for later
df0 <- lapply(dfx, function(x) {
x[x$producer_fraction >= frac | x$receiver_fraction >=
frac, ]
}) # save this for later
# now construct the hierachy
gl <- list()
if (!is.null(interaction_grouping)) {
Expand All @@ -293,31 +321,36 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
for (i in 1:length(dfx)) {
if (!is.null(splitby_key)) {
if (nrow(dfx[[i]]) > 0 & nrow(df0[[i]]) > 0) {
gl[[i]] <- .constructGraph(input_group = names(dfx)[i], sep = DEFAULT_SEP,
el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset,
plot_cpdb_out = lr_interactions, celltype_key = celltype_key,
edge_group = edge_group, edge_group_colors = edge_group_colors,
node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness)
gl[[i]] <- .constructGraph(
input_group = names(dfx)[i], sep = DEFAULT_SEP,
el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset,
plot_cpdb_out = lr_interactions, celltype_key = celltype_key,
edge_group = edge_group, edge_group_colors = edge_group_colors,
node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness
)
} else {
gl[[i]] <- NA
cantplot <- c(cantplot, names(dfx)[i])
gl[[i]] <- NA
cantplot <- c(cantplot, names(dfx)[i])
}
} else {
if (nrow(dfx[[i]]) > 0 & nrow(df0[[i]]) > 0) {
gl[[i]] <- .constructGraph(input_group = NULL, sep = DEFAULT_SEP,
el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset,
plot_cpdb_out = lr_interactions, celltype_key = celltype_key,
edge_group = edge_group, edge_group_colors = edge_group_colors,
node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness)
gl[[i]] <- .constructGraph(
input_group = NULL, sep = DEFAULT_SEP,
el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset,
plot_cpdb_out = lr_interactions, celltype_key = celltype_key,
edge_group = edge_group, edge_group_colors = edge_group_colors,
node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness
)
} else {
gl[[i]] <- NA
noplot <- TRUE
gl[[i]] <- NA
noplot <- TRUE
}
}
}
if (length(cantplot) > 0) {
cat("The following groups in splitby_key cannot be plotted due to missing/no significant interactions/celltypes",
sep = "\n")
sep = "\n"
)
cat(cantplot, sep = "\n")
}
if (noplot) {
Expand All @@ -329,4 +362,4 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
return(gl[[1]])
}
}
}
}
Loading

0 comments on commit 8b249e8

Please sign in to comment.