Big change

cafferychen777 · May 31, 2023 · b1fbceb · b1fbceb
1 parent d9f3d44
commit b1fbceb
Show file tree

Hide file tree

Showing 67 changed files with 1,175 additions and 14,274 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,3 +4,6 @@
 ^Meta$
 ^.*\.Rproj$
 ^\.Rproj\.user$
+^\.github$
+^README\.Rmd$
+
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -33,7 +33,8 @@ Imports:
     tidyr,
     ggprism,
     phyloseq,
-    patchwork
+    patchwork,
+    circlize
 Depends: 
     R (>= 2.10)
 Suggests: 

diff --git a/LICENSE.md b/LICENSE.md
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,9 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
-export(compare_daa_results)
-export(compare_metagenome_results)
+export()
 export(ggpicrust2)
-export(import_MicrobiomeAnalyst_daa_results)
 export(ko2kegg_abundance)
 export(pathway_annotation)
 export(pathway_daa)

diff --git a/R/compare_daa_results.R b/R/compare_daa_results.R
@@ -27,13 +27,16 @@
 #' # Run pathway_daa function for multiple methods
 #' methods <- c("ALDEx2", "DESeq2", "edgeR")
 #' daa_results_list <- lapply(methods, function(method) {
-#' pathway_daa(abundance = metacyc_abundance %>% column_to_rownames("pathway"), metadata = metadata, group = "Environment", daa_method = method)
+#' pathway_daa(abundance = metacyc_abundance %>% column_to_rownames("pathway"),
+#' metadata = metadata, group = "Environment", daa_method = method)
 #' })
 #'
 #' # Compare results across different methods
-#' comparison_results <- compare_daa_results(daa_results_list = daa_results_list, method_names = c("ALDEx2_Welch's t test","ALDEx2_Wilcoxon rank test","DESeq2", "edgeR"))
+#' comparison_results <- compare_daa_results(daa_results_list = daa_results_list,
+#' method_names = c("ALDEx2_Welch's t test","ALDEx2_Wilcoxon rank test","DESeq2", "edgeR"))
 #' }
 #' @export
+utils::globalVariables(c("group1","group2"))
 compare_daa_results <- function(daa_results_list, method_names, p_values_threshold = 0.05) {
   # Compare the consistency and inconsistency of statistically significant features obtained using different methods in pathway_daa.
 

diff --git a/R/compare_metagenome_results.R b/R/compare_metagenome_results.R
@@ -24,6 +24,7 @@
 #'
 #' @examples
 #' \donttest{
+#' library(tidyverse)
 #' # Generate example data
 #' set.seed(123)
 #' # First metagenome
@@ -46,6 +47,7 @@
 #' print(results$correlation$p_matrix)
 #' }
 #' @export
+utils::globalVariables(c("cor.test","Heatmap"))
 compare_metagenome_results <- function(metagenomes, names, daa_method = "ALDEx2", p.adjust = "BH", reference = NULL) {
   if(length(metagenomes) != length(names)){
     stop("The length of 'metagenomes' must match the length of 'names'")

diff --git a/R/daa_annotated_results_df.R b/R/daa_annotated_results_df.R
@@ -0,0 +1,22 @@
+#' Differentially Abundant Analysis Results with Annotation
+#'
+#' This is a result dataset after processing 'kegg_abundance' through
+#' the 'pathway_daa' with the LinDA method and further annotation with 'pathway_annotation'.
+#'
+#' @format A data frame with 10 variables:
+#' \describe{
+#'   \item{adj_method}{Method used for adjusting p-values.}
+#'   \item{feature}{Feature being tested.}
+#'   \item{group1}{One group in the comparison.}
+#'   \item{group2}{The other group in the comparison.}
+#'   \item{method}{Statistical test used.}
+#'   \item{p_adjust}{Adjusted p-value.}
+#'   \item{p_values}{P-values from the statistical test.}
+#'   \item{pathway_class}{Class of the pathway.}
+#'   \item{pathway_description}{Description of the pathway.}
+#'   \item{pathway_map}{Map of the pathway.}
+#'   \item{pathway_name}{Name of the pathway.}
+#' }
+#' @source From ggpicrust2 package demonstration.
+#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
+"daa_annotated_results_df"
diff --git a/R/daa_results_df.R b/R/daa_results_df.R
@@ -0,0 +1,18 @@
+#' DAA Results Dataset
+#'
+#' This dataset is the result of processing 'kegg_abundance' through the 'LinDA' method in the 'pathway_daa' function.
+#' It includes information about the feature, groups compared, p values, and method used.
+#'
+#' @format A data frame with columns:
+#' \describe{
+#'   \item{adj_method}{Method used for p-value adjustment.}
+#'   \item{feature}{The feature (pathway) being compared.}
+#'   \item{group1}{The first group in the comparison.}
+#'   \item{group2}{The second group in the comparison.}
+#'   \item{method}{The method used for the comparison.}
+#'   \item{p_adjust}{The adjusted p-value from the comparison.}
+#'   \item{p_values}{The raw p-value from the comparison.}
+#' }
+#' @source From ggpicrust2 package demonstration.
+#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
+"daa_results_df"
diff --git a/R/import_MicrobiomeAnalyst_daa_results.R b/R/import_MicrobiomeAnalyst_daa_results.R
@@ -16,6 +16,7 @@
 #' }
 #'
 #' @export
+utils::globalVariables(c("read.csv"))
 import_MicrobiomeAnalyst_daa_results <- function(file_path = NULL, data = NULL, method = "MicrobiomeAnalyst", group_levels = NULL) {
   # Check if a data frame is provided
   if (is.null(data)) {

diff --git a/R/kegg_abundance.R b/R/kegg_abundance.R
@@ -0,0 +1,17 @@
+#' KEGG Abundance Dataset
+#'
+#' A dataset derived from 'ko_abundance' by the function 'ko2kegg_abundance' in the ggpicrust2 package.
+#' Each row corresponds to a KEGG pathway, and each column corresponds to a sample.
+#'
+#' @format A data frame where rownames are KEGG pathways and column names are individual sample names, including:
+#' "SRR11393730", "SRR11393731", "SRR11393732", "SRR11393733", "SRR11393734", "SRR11393735", "SRR11393736",
+#' "SRR11393737", "SRR11393738", "SRR11393739", "SRR11393740", "SRR11393741", "SRR11393742", "SRR11393743",
+#' "SRR11393744", "SRR11393745", "SRR11393746", "SRR11393747", "SRR11393748", "SRR11393749", "SRR11393750",
+#' "SRR11393751", "SRR11393752", "SRR11393753", "SRR11393754", "SRR11393755", "SRR11393756", "SRR11393757",
+#' "SRR11393758", "SRR11393759", "SRR11393760", "SRR11393761", "SRR11393762", "SRR11393763", "SRR11393764",
+#' "SRR11393765", "SRR11393766", "SRR11393767", "SRR11393768", "SRR11393769", "SRR11393770", "SRR11393771",
+#' "SRR11393772", "SRR11393773", "SRR11393774", "SRR11393775", "SRR11393776", "SRR11393777", "SRR11393778", "SRR11393779"
+#'
+#' @source From ggpicrust2 package demonstration.
+#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
+"kegg_abundance"
diff --git a/R/ko_abundance.R b/R/ko_abundance.R
@@ -0,0 +1,17 @@
+#' KO Abundance Dataset
+#'
+#' This is a demonstration dataset from the ggpicrust2 package, representing the output of PICRUSt2.
+#' Each row represents a KO (KEGG Orthology) group, and each column corresponds to a sample.
+#'
+#' @format A data frame where rownames are KO groups and column names include #NAME and individual sample names, such as:
+#' "#NAME", "SRR11393730", "SRR11393731", "SRR11393732", "SRR11393733", "SRR11393734", "SRR11393735", "SRR11393736",
+#' "SRR11393737", "SRR11393738", "SRR11393739", "SRR11393740", "SRR11393741", "SRR11393742", "SRR11393743",
+#' "SRR11393744", "SRR11393745", "SRR11393746", "SRR11393747", "SRR11393748", "SRR11393749", "SRR11393750",
+#' "SRR11393751", "SRR11393752", "SRR11393753", "SRR11393754", "SRR11393755", "SRR11393756", "SRR11393757",
+#' "SRR11393758", "SRR11393759", "SRR11393760", "SRR11393761", "SRR11393762", "SRR11393763", "SRR11393764",
+#' "SRR11393765", "SRR11393766", "SRR11393767", "SRR11393768", "SRR11393769", "SRR11393770", "SRR11393771",
+#' "SRR11393772", "SRR11393773", "SRR11393774", "SRR11393775", "SRR11393776", "SRR11393777", "SRR11393778", "SRR11393779"
+#'
+#' @source From ggpicrust2 package demonstration.
+#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
+"ko_abundance"
diff --git a/R/metacyc_abundance.R b/R/metacyc_abundance.R
@@ -0,0 +1,17 @@
+#' MetaCyc Abundance Dataset
+#'
+#' This is a demonstration dataset from the ggpicrust2 package, representing the output of PICRUSt2.
+#' Each row represents a MetaCyc pathway, and each column corresponds to a sample.
+#'
+#' @format A data frame where rownames are MetaCyc pathways and column names include "pathway" and individual sample names, such as:
+#' "pathway", "SRR11393730", "SRR11393731", "SRR11393732", "SRR11393733", "SRR11393734", "SRR11393735", "SRR11393736",
+#' "SRR11393737", "SRR11393738", "SRR11393739", "SRR11393740", "SRR11393741", "SRR11393742", "SRR11393743",
+#' "SRR11393744", "SRR11393745", "SRR11393746", "SRR11393747", "SRR11393748", "SRR11393749", "SRR11393750",
+#' "SRR11393751", "SRR11393752", "SRR11393753", "SRR11393754", "SRR11393755", "SRR11393756", "SRR11393757",
+#' "SRR11393758", "SRR11393759", "SRR11393760", "SRR11393761", "SRR11393762", "SRR11393763", "SRR11393764",
+#' "SRR11393765", "SRR11393766", "SRR11393767", "SRR11393768", "SRR11393769", "SRR11393770", "SRR11393771",
+#' "SRR11393772", "SRR11393773", "SRR11393774", "SRR11393775", "SRR11393776", "SRR11393777", "SRR11393778", "SRR11393779"
+#'
+#' @source From ggpicrust2 package demonstration.
+#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
+"metacyc_abundance"
diff --git a/R/metadata.R b/R/metadata.R
@@ -0,0 +1,15 @@
+#' Metadata for ggpicrust2 Demonstration
+#'
+#' This is a demonstration dataset from the ggpicrust2 package. It provides the metadata
+#' required for the demonstration functions in the package. The dataset includes
+#' environmental information for each sample.
+#'
+#' @format A tibble with each row representing metadata for a sample.
+#' \describe{
+#'   \item{Sample1}{Metadata for Sample1, including Environment}
+#'   \item{Sample2}{Metadata for Sample2, including Environment}
+#'   \item{...}{...}
+#' }
+#' @source ggpicrust2 package demonstration.
+#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
+"metadata"
diff --git a/R/pathway_heatmap.R b/R/pathway_heatmap.R
@@ -26,8 +26,15 @@
 #'                                group = factor(rep(c("Control", "Treatment"), each = 5)))
 #'
 #' # Create a heatmap
-#' heatmap_plot <- pathway_heatmap(kegg_abundance_example, metadata_example, "group")
-#' print(heatmap_plot)
+#' pathway_heatmap(kegg_abundance_example, metadata_example, "group")
+#'
+#' \donttest{
+#' data("metacyc_abundance")
+#' data("metadata")
+#' metacyc_daa_results_df <- pathway_daa(metacyc_abundance %>% column_to_rownames("pathway"), metadata, "Environment", daa_method = "LinDA")
+#' feature_with_p_0.05 <- metacyc_daa_results_df %>% filter(p_adjust < 0.05)
+#' pathway_heatmap(abundance = metacyc_abundance %>% filter(pathway %in% feature_with_p_0.05$feature) %>% column_to_rownames("pathway"), metadata = metadata, group = "Environment")
+#' }
 utils::globalVariables(c("rowname","Sample","Value"))
 pathway_heatmap <- function(abundance, metadata, group) {
    # Heatmaps use color changes to visualize changes in values. However, if the
@@ -61,11 +68,13 @@ pathway_heatmap <- function(abundance, metadata, group) {
   # Convert the abundance matrix to a data frame
   z_df <- as.data.frame(z_abundance)
 
+  metadata <- metadata %>% as.data.frame()
+
   # Order the samples based on the environment information
   ordered_metadata <- metadata[order(metadata[, group]),]
   ordered_sample_names <- ordered_metadata$sample_name
   order <- ordered_metadata$sample_name
-  ordered_group_levels <- levels(ordered_metadata[, group])
+  ordered_group_levels <- ordered_metadata %>% select(all_of(c(group))) %>% pull()
 
 
   # Convert the abundance data frame to a long format
@@ -78,12 +87,15 @@ pathway_heatmap <- function(abundance, metadata, group) {
   # Set the order of the samples in the heatmap
   long_df$Sample <- factor(long_df$Sample, levels = order)
 
+  # Compute breaks from the data
+  breaks <- quantile(long_df$Value, probs = seq(0, 1, by = 0.3), na.rm = TRUE)
+
   # Create the heatmap using ggplot
   p <-
     ggplot2::ggplot(data = long_df,
                     mapping = ggplot2::aes(x = Sample, y = rowname, fill = Value)) +
     ggplot2::geom_tile() +
-    ggplot2::scale_fill_gradientn(colours = c("#0571b0","#92c5de","white","#f4a582","#ca0020"), breaks = c(0,0.2, 0.4, 0.6)) +
+    ggplot2::scale_fill_gradientn(colours = c("#0571b0","#92c5de","white","#f4a582","#ca0020"), breaks = breaks) +
     ggplot2::labs(x = NULL, y = NULL) +
     ggplot2::scale_y_discrete(expand = c(0, 0), position = "left") +
     ggplot2::scale_x_discrete(expand = c(0, 0)) +
@@ -118,11 +130,11 @@ pathway_heatmap <- function(abundance, metadata, group) {
     )
 
   # Print the ordered sample names and group levels
-  cat("Ordered Sample Names:\n")
+  cat("The Sample Names in order from left to right are:\n")
   cat(ordered_sample_names, sep = ", ")
   cat("\n")
 
-  cat("Group Levels:\n")
+  cat("The Group Levels in order from left to right are:\n")
   cat(ordered_group_levels, sep = ", ")
   cat("\n")
 

diff --git a/R/pathway_pca.R b/R/pathway_pca.R
@@ -8,15 +8,23 @@
 #'
 #' @examples
 #' # Create example functional pathway abundance data
-#' abundance_example <- data.frame(A = rnorm(10), B = rnorm(10), C = rnorm(10))
+#' kegg_abundance_example <- matrix(rnorm(30), nrow = 3, ncol = 10)
+#' colnames(kegg_abundance_example) <- paste0("Sample", 1:10)
+#' rownames(kegg_abundance_example) <- c("PathwayA", "PathwayB", "PathwayC")
 #'
 #' # Create example metadata
-#' metadata_example <- tibble::tibble(sample_id = 1:10,
-#'                                    group = factor(rep(c("Control", "Treatment"), each = 5)))
+#' # Please ensure the sample IDs in the metadata have the column name "sample_name"
+#' metadata_example <- data.frame(sample_name = colnames(kegg_abundance_example),
+#'                                group = factor(rep(c("Control", "Treatment"), each = 5)))
 #'
-#' # Perform PCA and create visualizations
-#' pca_plot <- pathway_pca(t(abundance_example), metadata_example, "group")
+#' pca_plot <- pathway_pca(abundance_example, metadata_example, "group")
 #' print(pca_plot)
+#'
+#' \donttest{
+#' data("metacyc_abundance")
+#' data("metadata")
+#' pathway_pca(metacyc_abundance %>% column_to_rownames("pathway"), metadata, "Environment")
+#' }
 pathway_pca <- function(abundance, metadata, group){
   # due to NSE notes in R CMD check
   PC1 = PC2 = Group = NULL
@@ -27,7 +35,7 @@ pathway_pca <- function(abundance, metadata, group){
   pca_proportion <- stats::prcomp(t(abundance), center = TRUE, scale = TRUE)$sdev[1:2]/sum(stats::prcomp(t(abundance), center = TRUE, scale = TRUE)$sdev)*100
 
   # Combine the PCA results with the metadata information
-  pca <- cbind(pca_axis, metadata[,group])
+  pca <- cbind(pca_axis, metadata %>% select(all_of(c(group))))
   pca$Group <- pca[,group]
 
   levels <- length(levels(factor(pca$Group)))