From d30df557637221bf127f76b60843263acebd0cd8 Mon Sep 17 00:00:00 2001 From: sreichl Date: Fri, 23 Feb 2024 19:55:54 +0100 Subject: [PATCH] fix heatmap, voclanos and statistics bar plot --- workflow/scripts/aggregate.R | 77 +++--------------------------------- workflow/scripts/dea.R | 1 - workflow/scripts/heatmap.R | 15 ++++++- workflow/scripts/volcanos.R | 4 +- 4 files changed, 21 insertions(+), 76 deletions(-) diff --git a/workflow/scripts/aggregate.R b/workflow/scripts/aggregate.R index 876b14e..5f594b3 100644 --- a/workflow/scripts/aggregate.R +++ b/workflow/scripts/aggregate.R @@ -44,15 +44,6 @@ if (score_formula!=""){ # annotate differential direction (up or down) dea_results$direction <- as.character(lapply(dea_results$avg_log2FC, function(x) if(x>0){"up"}else{"down"})) -# ### aggregate & save DEA statistics by stat. sign. -# tmp_dea_results <- dea_results[dea_results$p_val_adj <= adj_pval, ] -# dea_stats <- table(tmp_dea_results$group, tmp_dea_results$direction) -# dea_stats_df <- as.data.frame.matrix(dea_stats) -# dea_stats_df$total <- rowSums(dea_stats_df) - -# # write.csv(dea_stats_df, file=file.path(dea_stats_path), row.names=TRUE) -# fwrite(as.data.frame(dea_stats_df), file=file.path(dea_stats_path), row.names=TRUE) - ### aggregate & save FILTERED DEA statistics dea_filtered_results <- dea_results[(dea_results$p_val_adj<=adj_pval) & (abs(dea_results$avg_log2FC)>=lfc) & @@ -65,21 +56,6 @@ dea_filtered_stats_df$total <- rowSums(dea_filtered_stats_df) # write.csv(dea_filtered_stats_df, file=file.path(dea_stats_path), row.names=TRUE) fwrite(as.data.frame(dea_filtered_stats_df), file=file.path(dea_stats_path), row.names=TRUE) -### aggregate & save LFC matrix from filtered DEA results -> NOT USED anymore as it duplicates results unnecessarily -# groups <- paste0("group_",unique(dea_filtered_results$group)) -# features <- unique(dea_filtered_results$feature) - -# lfc_df <- data.frame(matrix(nrow=length(features), ncol=length(groups), dimnames=list(features, groups))) - -# for (group in unique(dea_filtered_results$group)){ -# tmp_dea_results <- dea_results[dea_results$group==group, ] #old: dea_filtered_results[dea_filtered_results$group==group, ] -# rownames(tmp_dea_results) <- tmp_dea_results$feature -# lfc_df[features, paste0("group_",group)] <- tmp_dea_results[features, 'avg_log2FC'] -# } - -# # write.csv(lfc_df, file=file.path(dea_filtered_lfc_path), row.names=TRUE) -# fwrite(as.data.frame(lfc_df), file=file.path(dea_filtered_lfc_path), row.names=TRUE) - ### save differential feature lists from filtered DEA results for downstream analysis (e.g., enrichment analysis) for (group in unique(dea_filtered_results$group)){ for (direction in unique(dea_filtered_results$direction)){ @@ -87,55 +63,17 @@ for (group in unique(dea_filtered_results$group)){ tmp_features <- dea_filtered_results[(dea_filtered_results$group==group) & (dea_filtered_results$direction==direction),"feature"] write(tmp_features, file.path(dirname(dea_result_path),"feature_lists",paste0(group,"_",direction,"_features.txt"))) } -} - -# ### visualize & save ALL DEA statistics -> NOT USED anymore as it's confusing/redundant -# width_panel <- length(groups) * width + 1 - -# # convert group column to string for correct plotting -# dea_results$group <- as.character(dea_results$group) -# tmp_dea_results <- dea_results[dea_results$p_val_adj<=adj_pval, ] - -# dea_results_p <- ggplot(tmp_dea_results, aes(x=group, fill=direction)) + -# geom_bar() + -# xlab(metadata) + -# ylab("number of differential features") + -# scale_y_continuous(limits = c(0, NA), expand = c(0, 0)) + -# scale_fill_manual(values=list(up="red", down="blue"), drop=FALSE) + -# custom_theme + -# theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust=1, size = 6)) # rotates the x-Axis - - -# # save plot -# # options(repr.plot.width=width_panel, repr.plot.height=height) -# # print(dea_results_p) -# ggsave_new(filename = "stats", -# results_path=dirname(dea_stats_plot_path), -# plot=dea_results_p, -# width=width_panel, -# height=height) +} ### visualize & save FILTERED DEA statistics -width_panel <- length(groups) * width + 1 - -# convert group column to string for correct plotting -# dea_filtered_results$group <- as.character(dea_filtered_results$group) - -# dea_filtered_results_p <- ggplot(dea_filtered_results, aes(x=group, fill=direction)) + -# geom_bar() + -# xlab(metadata) + -# ylab("number of differential features") + -# scale_y_continuous(limits = c(0, NA), expand = c(0, 0)) + -# scale_fill_manual(values=list(up="red", down="blue"), drop=FALSE) + -# custom_theme + -# theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust=1, size = 6)) # rotates the x-Axis +width_panel <- length(groups) * width + 2 -# fromat stats df for plotting: remove "total" column & transform from wide to long -dea_filtered_stats_df <- dea_filtered_stats_df[,-3] +# format stats df for plotting: remove "total" column & transform from wide to long +dea_filtered_stats_df$total <- NULL dea_filtered_stats_df[,"down"] <- -1 * dea_filtered_stats_df[,"down"] plot_stats_df <- stack(dea_filtered_stats_df) colnames(plot_stats_df) <- c("n_features","direction") -plot_stats_df$groups <- c(rownames(dea_filtered_stats_df), rownames(dea_filtered_stats_df)) +plot_stats_df$groups <- rep(rownames(dea_filtered_stats_df), ncol(dea_filtered_stats_df)) # plot dea_filtered_results_p <- ggplot(plot_stats_df, aes(x=groups, y=n_features, fill=direction)) + @@ -148,11 +86,8 @@ dea_filtered_results_p <- ggplot(plot_stats_df, aes(x=groups, y=n_features, fill theme(#legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 6) ) - - + # save plot -# options(repr.plot.width=width_panel, repr.plot.height=height) -# print(dea_filtered_results_p) ggsave_new(filename = "stats", results_path=dirname(dea_stats_plot_path), plot=dea_filtered_results_p, diff --git a/workflow/scripts/dea.R b/workflow/scripts/dea.R index 14e25d9..1b19caf 100644 --- a/workflow/scripts/dea.R +++ b/workflow/scripts/dea.R @@ -130,5 +130,4 @@ if (control=="ALL"){ } ### save results -# write.csv(dea_results, file=file.path(dea_result_path), row.names=FALSE) fwrite(as.data.frame(dea_results), file=file.path(dea_result_path), row.names=FALSE) \ No newline at end of file diff --git a/workflow/scripts/heatmap.R b/workflow/scripts/heatmap.R index 959ac67..db971ab 100644 --- a/workflow/scripts/heatmap.R +++ b/workflow/scripts/heatmap.R @@ -43,13 +43,24 @@ if (feature_list_name=="FILTERED"){ feature_list <- unique(intersect(feature_list, dea_results$feature)) } +# if no features in the results, end early with empty plot +if(length(feature_list)==0){ + ggsave_new(filename = feature_list_name, + results_path=dirname(lfc_heatmap_path), + plot = ggplot() + annotate("text", x = 0.5, y = 0.5, label = "Features not found in DEA results.") + theme_void(), + width=4, + height=1) + + quit(save = "no", status = 0) +} + # make LFC dataframe -lfc_df <- dcast(dea_results, feature ~ group, value.var = 'avg_log2FC') +lfc_df <- reshape2::dcast(dea_results, feature ~ group, value.var = 'avg_log2FC') rownames(lfc_df) <- lfc_df$feature lfc_df$feature <- NULL # make adjusted p-value dataframe -adjp_df <- dcast(dea_results, feature ~ group, value.var = 'p_val_adj') +adjp_df <- reshape2::dcast(dea_results, feature ~ group, value.var = 'p_val_adj') rownames(adjp_df) <- adjp_df$feature adjp_df$feature <- NULL diff --git a/workflow/scripts/volcanos.R b/workflow/scripts/volcanos.R index 86399b3..f364426 100644 --- a/workflow/scripts/volcanos.R +++ b/workflow/scripts/volcanos.R @@ -47,7 +47,7 @@ for (pval_type in c("p_val_adj", "p_val")){ for (group in unique(dea_results$group)){ toptable <- dea_results[dea_results$group==group,] - lab <- toptable$feature + x <- "avg_log2FC" selectLab <- NULL colCustom <- NULL @@ -77,7 +77,7 @@ for (pval_type in c("p_val_adj", "p_val")){ } volcano_plot <- EnhancedVolcano(toptable = toptable, - lab = lab, + lab = toptable$feature, x = x, y = pval_type, selectLab = selectLab,