Skip to content

Commit

Permalink
Big change
Browse files Browse the repository at this point in the history
  • Loading branch information
cafferychen777 committed May 31, 2023
1 parent d9f3d44 commit b1fbceb
Show file tree
Hide file tree
Showing 67 changed files with 1,175 additions and 14,274 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
^Meta$
^.*\.Rproj$
^\.Rproj\.user$
^\.github$
^README\.Rmd$

3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ Imports:
tidyr,
ggprism,
phyloseq,
patchwork
patchwork,
circlize
Depends:
R (>= 2.10)
Suggests:
Expand Down
21 changes: 0 additions & 21 deletions LICENSE.md

This file was deleted.

4 changes: 1 addition & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(compare_daa_results)
export(compare_metagenome_results)
export()
export(ggpicrust2)
export(import_MicrobiomeAnalyst_daa_results)
export(ko2kegg_abundance)
export(pathway_annotation)
export(pathway_daa)
Expand Down
7 changes: 5 additions & 2 deletions R/compare_daa_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@
#' # Run pathway_daa function for multiple methods
#' methods <- c("ALDEx2", "DESeq2", "edgeR")
#' daa_results_list <- lapply(methods, function(method) {
#' pathway_daa(abundance = metacyc_abundance %>% column_to_rownames("pathway"), metadata = metadata, group = "Environment", daa_method = method)
#' pathway_daa(abundance = metacyc_abundance %>% column_to_rownames("pathway"),
#' metadata = metadata, group = "Environment", daa_method = method)
#' })
#'
#' # Compare results across different methods
#' comparison_results <- compare_daa_results(daa_results_list = daa_results_list, method_names = c("ALDEx2_Welch's t test","ALDEx2_Wilcoxon rank test","DESeq2", "edgeR"))
#' comparison_results <- compare_daa_results(daa_results_list = daa_results_list,
#' method_names = c("ALDEx2_Welch's t test","ALDEx2_Wilcoxon rank test","DESeq2", "edgeR"))
#' }
#' @export
utils::globalVariables(c("group1","group2"))
compare_daa_results <- function(daa_results_list, method_names, p_values_threshold = 0.05) {
# Compare the consistency and inconsistency of statistically significant features obtained using different methods in pathway_daa.

Expand Down
2 changes: 2 additions & 0 deletions R/compare_metagenome_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#'
#' @examples
#' \donttest{
#' library(tidyverse)
#' # Generate example data
#' set.seed(123)
#' # First metagenome
Expand All @@ -46,6 +47,7 @@
#' print(results$correlation$p_matrix)
#' }
#' @export
utils::globalVariables(c("cor.test","Heatmap"))
compare_metagenome_results <- function(metagenomes, names, daa_method = "ALDEx2", p.adjust = "BH", reference = NULL) {
if(length(metagenomes) != length(names)){
stop("The length of 'metagenomes' must match the length of 'names'")
Expand Down
22 changes: 22 additions & 0 deletions R/daa_annotated_results_df.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Differentially Abundant Analysis Results with Annotation
#'
#' This is a result dataset after processing 'kegg_abundance' through
#' the 'pathway_daa' with the LinDA method and further annotation with 'pathway_annotation'.
#'
#' @format A data frame with 10 variables:
#' \describe{
#' \item{adj_method}{Method used for adjusting p-values.}
#' \item{feature}{Feature being tested.}
#' \item{group1}{One group in the comparison.}
#' \item{group2}{The other group in the comparison.}
#' \item{method}{Statistical test used.}
#' \item{p_adjust}{Adjusted p-value.}
#' \item{p_values}{P-values from the statistical test.}
#' \item{pathway_class}{Class of the pathway.}
#' \item{pathway_description}{Description of the pathway.}
#' \item{pathway_map}{Map of the pathway.}
#' \item{pathway_name}{Name of the pathway.}
#' }
#' @source From ggpicrust2 package demonstration.
#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
"daa_annotated_results_df"
18 changes: 18 additions & 0 deletions R/daa_results_df.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#' DAA Results Dataset
#'
#' This dataset is the result of processing 'kegg_abundance' through the 'LinDA' method in the 'pathway_daa' function.
#' It includes information about the feature, groups compared, p values, and method used.
#'
#' @format A data frame with columns:
#' \describe{
#' \item{adj_method}{Method used for p-value adjustment.}
#' \item{feature}{The feature (pathway) being compared.}
#' \item{group1}{The first group in the comparison.}
#' \item{group2}{The second group in the comparison.}
#' \item{method}{The method used for the comparison.}
#' \item{p_adjust}{The adjusted p-value from the comparison.}
#' \item{p_values}{The raw p-value from the comparison.}
#' }
#' @source From ggpicrust2 package demonstration.
#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
"daa_results_df"
1 change: 1 addition & 0 deletions R/import_MicrobiomeAnalyst_daa_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#' }
#'
#' @export
utils::globalVariables(c("read.csv"))
import_MicrobiomeAnalyst_daa_results <- function(file_path = NULL, data = NULL, method = "MicrobiomeAnalyst", group_levels = NULL) {
# Check if a data frame is provided
if (is.null(data)) {
Expand Down
17 changes: 17 additions & 0 deletions R/kegg_abundance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#' KEGG Abundance Dataset
#'
#' A dataset derived from 'ko_abundance' by the function 'ko2kegg_abundance' in the ggpicrust2 package.
#' Each row corresponds to a KEGG pathway, and each column corresponds to a sample.
#'
#' @format A data frame where rownames are KEGG pathways and column names are individual sample names, including:
#' "SRR11393730", "SRR11393731", "SRR11393732", "SRR11393733", "SRR11393734", "SRR11393735", "SRR11393736",
#' "SRR11393737", "SRR11393738", "SRR11393739", "SRR11393740", "SRR11393741", "SRR11393742", "SRR11393743",
#' "SRR11393744", "SRR11393745", "SRR11393746", "SRR11393747", "SRR11393748", "SRR11393749", "SRR11393750",
#' "SRR11393751", "SRR11393752", "SRR11393753", "SRR11393754", "SRR11393755", "SRR11393756", "SRR11393757",
#' "SRR11393758", "SRR11393759", "SRR11393760", "SRR11393761", "SRR11393762", "SRR11393763", "SRR11393764",
#' "SRR11393765", "SRR11393766", "SRR11393767", "SRR11393768", "SRR11393769", "SRR11393770", "SRR11393771",
#' "SRR11393772", "SRR11393773", "SRR11393774", "SRR11393775", "SRR11393776", "SRR11393777", "SRR11393778", "SRR11393779"
#'
#' @source From ggpicrust2 package demonstration.
#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
"kegg_abundance"
17 changes: 17 additions & 0 deletions R/ko_abundance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#' KO Abundance Dataset
#'
#' This is a demonstration dataset from the ggpicrust2 package, representing the output of PICRUSt2.
#' Each row represents a KO (KEGG Orthology) group, and each column corresponds to a sample.
#'
#' @format A data frame where rownames are KO groups and column names include #NAME and individual sample names, such as:
#' "#NAME", "SRR11393730", "SRR11393731", "SRR11393732", "SRR11393733", "SRR11393734", "SRR11393735", "SRR11393736",
#' "SRR11393737", "SRR11393738", "SRR11393739", "SRR11393740", "SRR11393741", "SRR11393742", "SRR11393743",
#' "SRR11393744", "SRR11393745", "SRR11393746", "SRR11393747", "SRR11393748", "SRR11393749", "SRR11393750",
#' "SRR11393751", "SRR11393752", "SRR11393753", "SRR11393754", "SRR11393755", "SRR11393756", "SRR11393757",
#' "SRR11393758", "SRR11393759", "SRR11393760", "SRR11393761", "SRR11393762", "SRR11393763", "SRR11393764",
#' "SRR11393765", "SRR11393766", "SRR11393767", "SRR11393768", "SRR11393769", "SRR11393770", "SRR11393771",
#' "SRR11393772", "SRR11393773", "SRR11393774", "SRR11393775", "SRR11393776", "SRR11393777", "SRR11393778", "SRR11393779"
#'
#' @source From ggpicrust2 package demonstration.
#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
"ko_abundance"
17 changes: 17 additions & 0 deletions R/metacyc_abundance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#' MetaCyc Abundance Dataset
#'
#' This is a demonstration dataset from the ggpicrust2 package, representing the output of PICRUSt2.
#' Each row represents a MetaCyc pathway, and each column corresponds to a sample.
#'
#' @format A data frame where rownames are MetaCyc pathways and column names include "pathway" and individual sample names, such as:
#' "pathway", "SRR11393730", "SRR11393731", "SRR11393732", "SRR11393733", "SRR11393734", "SRR11393735", "SRR11393736",
#' "SRR11393737", "SRR11393738", "SRR11393739", "SRR11393740", "SRR11393741", "SRR11393742", "SRR11393743",
#' "SRR11393744", "SRR11393745", "SRR11393746", "SRR11393747", "SRR11393748", "SRR11393749", "SRR11393750",
#' "SRR11393751", "SRR11393752", "SRR11393753", "SRR11393754", "SRR11393755", "SRR11393756", "SRR11393757",
#' "SRR11393758", "SRR11393759", "SRR11393760", "SRR11393761", "SRR11393762", "SRR11393763", "SRR11393764",
#' "SRR11393765", "SRR11393766", "SRR11393767", "SRR11393768", "SRR11393769", "SRR11393770", "SRR11393771",
#' "SRR11393772", "SRR11393773", "SRR11393774", "SRR11393775", "SRR11393776", "SRR11393777", "SRR11393778", "SRR11393779"
#'
#' @source From ggpicrust2 package demonstration.
#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
"metacyc_abundance"
15 changes: 15 additions & 0 deletions R/metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#' Metadata for ggpicrust2 Demonstration
#'
#' This is a demonstration dataset from the ggpicrust2 package. It provides the metadata
#' required for the demonstration functions in the package. The dataset includes
#' environmental information for each sample.
#'
#' @format A tibble with each row representing metadata for a sample.
#' \describe{
#' \item{Sample1}{Metadata for Sample1, including Environment}
#' \item{Sample2}{Metadata for Sample2, including Environment}
#' \item{...}{...}
#' }
#' @source ggpicrust2 package demonstration.
#' @references Douglas GM, Maffei VJ, Zaneveld J, Yurgel SN, Brown JR, Taylor CM, Huttenhower C, Langille MGI. PICRUSt2 for prediction of metagenome functions. Nat Biotechnol. 2020.
"metadata"
24 changes: 18 additions & 6 deletions R/pathway_heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,15 @@
#' group = factor(rep(c("Control", "Treatment"), each = 5)))
#'
#' # Create a heatmap
#' heatmap_plot <- pathway_heatmap(kegg_abundance_example, metadata_example, "group")
#' print(heatmap_plot)
#' pathway_heatmap(kegg_abundance_example, metadata_example, "group")
#'
#' \donttest{
#' data("metacyc_abundance")
#' data("metadata")
#' metacyc_daa_results_df <- pathway_daa(metacyc_abundance %>% column_to_rownames("pathway"), metadata, "Environment", daa_method = "LinDA")
#' feature_with_p_0.05 <- metacyc_daa_results_df %>% filter(p_adjust < 0.05)
#' pathway_heatmap(abundance = metacyc_abundance %>% filter(pathway %in% feature_with_p_0.05$feature) %>% column_to_rownames("pathway"), metadata = metadata, group = "Environment")
#' }
utils::globalVariables(c("rowname","Sample","Value"))
pathway_heatmap <- function(abundance, metadata, group) {
# Heatmaps use color changes to visualize changes in values. However, if the
Expand Down Expand Up @@ -61,11 +68,13 @@ pathway_heatmap <- function(abundance, metadata, group) {
# Convert the abundance matrix to a data frame
z_df <- as.data.frame(z_abundance)

metadata <- metadata %>% as.data.frame()

# Order the samples based on the environment information
ordered_metadata <- metadata[order(metadata[, group]),]
ordered_sample_names <- ordered_metadata$sample_name
order <- ordered_metadata$sample_name
ordered_group_levels <- levels(ordered_metadata[, group])
ordered_group_levels <- ordered_metadata %>% select(all_of(c(group))) %>% pull()


# Convert the abundance data frame to a long format
Expand All @@ -78,12 +87,15 @@ pathway_heatmap <- function(abundance, metadata, group) {
# Set the order of the samples in the heatmap
long_df$Sample <- factor(long_df$Sample, levels = order)

# Compute breaks from the data
breaks <- quantile(long_df$Value, probs = seq(0, 1, by = 0.3), na.rm = TRUE)

# Create the heatmap using ggplot
p <-
ggplot2::ggplot(data = long_df,
mapping = ggplot2::aes(x = Sample, y = rowname, fill = Value)) +
ggplot2::geom_tile() +
ggplot2::scale_fill_gradientn(colours = c("#0571b0","#92c5de","white","#f4a582","#ca0020"), breaks = c(0,0.2, 0.4, 0.6)) +
ggplot2::scale_fill_gradientn(colours = c("#0571b0","#92c5de","white","#f4a582","#ca0020"), breaks = breaks) +
ggplot2::labs(x = NULL, y = NULL) +
ggplot2::scale_y_discrete(expand = c(0, 0), position = "left") +
ggplot2::scale_x_discrete(expand = c(0, 0)) +
Expand Down Expand Up @@ -118,11 +130,11 @@ pathway_heatmap <- function(abundance, metadata, group) {
)

# Print the ordered sample names and group levels
cat("Ordered Sample Names:\n")
cat("The Sample Names in order from left to right are:\n")
cat(ordered_sample_names, sep = ", ")
cat("\n")

cat("Group Levels:\n")
cat("The Group Levels in order from left to right are:\n")
cat(ordered_group_levels, sep = ", ")
cat("\n")

Expand Down
20 changes: 14 additions & 6 deletions R/pathway_pca.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,23 @@
#'
#' @examples
#' # Create example functional pathway abundance data
#' abundance_example <- data.frame(A = rnorm(10), B = rnorm(10), C = rnorm(10))
#' kegg_abundance_example <- matrix(rnorm(30), nrow = 3, ncol = 10)
#' colnames(kegg_abundance_example) <- paste0("Sample", 1:10)
#' rownames(kegg_abundance_example) <- c("PathwayA", "PathwayB", "PathwayC")
#'
#' # Create example metadata
#' metadata_example <- tibble::tibble(sample_id = 1:10,
#' group = factor(rep(c("Control", "Treatment"), each = 5)))
#' # Please ensure the sample IDs in the metadata have the column name "sample_name"
#' metadata_example <- data.frame(sample_name = colnames(kegg_abundance_example),
#' group = factor(rep(c("Control", "Treatment"), each = 5)))
#'
#' # Perform PCA and create visualizations
#' pca_plot <- pathway_pca(t(abundance_example), metadata_example, "group")
#' pca_plot <- pathway_pca(abundance_example, metadata_example, "group")
#' print(pca_plot)
#'
#' \donttest{
#' data("metacyc_abundance")
#' data("metadata")
#' pathway_pca(metacyc_abundance %>% column_to_rownames("pathway"), metadata, "Environment")
#' }
pathway_pca <- function(abundance, metadata, group){
# due to NSE notes in R CMD check
PC1 = PC2 = Group = NULL
Expand All @@ -27,7 +35,7 @@ pathway_pca <- function(abundance, metadata, group){
pca_proportion <- stats::prcomp(t(abundance), center = TRUE, scale = TRUE)$sdev[1:2]/sum(stats::prcomp(t(abundance), center = TRUE, scale = TRUE)$sdev)*100

# Combine the PCA results with the metadata information
pca <- cbind(pca_axis, metadata[,group])
pca <- cbind(pca_axis, metadata %>% select(all_of(c(group))))
pca$Group <- pca[,group]

levels <- length(levels(factor(pca$Group)))
Expand Down
Loading

0 comments on commit b1fbceb

Please sign in to comment.