Skip to content

Commit

Permalink
Analysis code
Browse files Browse the repository at this point in the history
  • Loading branch information
muhamanz authored May 2, 2024
1 parent 29b4af5 commit 34b9a7d
Showing 1 changed file with 210 additions and 0 deletions.
210 changes: 210 additions & 0 deletions Analysis code
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
#Analysis scripts for the manuscript: "Composition, diversity, and interactions of oral archaeome and virome" by Muhammed Manzoor et al.
#The data in this study are not publicly available due to the sensitive nature of the data derived from human subjects, including personal information. However, researchers who wish to use our data may do so by requesting from the SECRETO Oral Study Consortia Data Access Committee (https://ega-archive.org/dacs/EGAC00001003449).
#Load packages
#BiocManager::install("microbiome/mia", version="devel")
packages <- c("ggplot2", "biomformat", "ggthemes", "phyloseq", "vegan", "microbiomeMarker", "microbiome", "tidyverse", "reshape2", "ComplexHeatmap", "maptree", "patchwork", "miaViz", " file2meco ", "SpiecEasi", "chorddiag")
#Phenotype data is loaded from the included R object
#Load metadata and biom file
biom <- import_biom("cuatroc.biom", parseFunction=parse_taxonomy_greengenes, parallel=TRUE)
metadata <- import_qiime_sample_data("metadata.txt")
#Construct the primary phyloseq object.
phyloseq <- merge_phyloseq(biom, metadata)
#ADD TREE information to the phyloseq object.
random_tree = rtree(ntaxa(phyloseq), rooted=TRUE, tip.label=taxa_names(phyloseq))
phyloseq <- merge_phyloseq(biom, metadata, random_tree)
#make tse from phyloseq
tse<- makeTreeSummarizedExperimentFromPhyloseq(phyloseq)
#remove participants who have used antibiotics preceeding 3 months of saliva sampling
tse <- tse[ , colData(tse)$ANTIBIOTICS %in% FALSE]
#subset only controls
tse <- tse[ , colData(tse)$$Group %in% c("control"), ]
#subset only archea and viruses
tse<- tse [rowData(tse)$Kingdom %in% c("Viruses","Archaea"), ]
#Subset the feature by kingdom viruses, and archaea
tse_virus <- tse [rowData(tse)$Kingdom %in% c("Viruses"), ]
tse_Archaea <- tse [rowData(tse)$Kingdom %in% c("Archaea"), ]
#Agglomerate data at Phylum, Genus and species level
tse_Phylum<- agglomerateByRank(tse, rank="Phylum")
tse_Genus<- agglomerateByRank(tse, rank="Genus")
tse_Species<- agglomerateByRank(tse, rank="Species")
# add assay to tse object.
tse <- transformAssay(tse, assay.type = "counts", method = "relabundance")
assays(tse)
# Add CLR transformation in the data as a new assay
tse <- transformSamples(tse, method = "clr", pseudocount=1)
#plot the taxonomical tree
rowData(altExp(tse,"Phylum"))$prevalence <-
getPrevalence(altExp(tse,"Phylum"), detection = 1/100, sort = FALSE,
assay.type = "counts", as_relative = TRUE)
altExps(tse) <- splitByRanks(tse)
altExps(tse) <-
lapply(altExps(tse),
function(y){
rowData(y)$prevalence <-
getPrevalence(y, detection = 0/100, sort = FALSE,
assay_name = "counts", as_relative = TRUE)
y
})
top_phyla <- getTopTaxa(altExp(tse,"Phylum"),
method="prevalence",
top=24L,
assay_name="counts")

top_phyla_mean <- getTopTaxa(altExp(tse,"Phylum"),
method="mean",
top=24L,
assay_name="counts")
x <- unsplitByRanks(tse, ranks = taxonomyRanks(tse)[1:6])
x <- addTaxonomyTree(x)
plotRowTree(x[rowData(x)$Phylum %in% top_phyla_mean,],
edge_colour_by = "Phylum",
tip_colour_by = "prevalence",
node_colour_by = "prevalence")
#estimate diversity
tse <- mia::estimateDiversitytse
assay_name = "counts", # Calculate diversity from "counts" assay
index = "shannon")
#Alpha diversity visualizations
# creates data frame from the collected data
df <- as.data.frame(colData(tse))
# Changes old levels with new levels
df$Group <- factor(df$Group)
ggplot(df, aes(x = Group, y = shannon )) +
geom_violin(trim=FALSE)+geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.1) + theme(text = element_text(size = 5))+ theme_classic() ggplot(df, aes(x = Group, y = observed )) +
geom_violin(trim=FALSE)+geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.1) + theme(text = element_text(size = 5))+ theme_classic()
#Visualizing taxonomic composition for viruses and archaea
se <- relAbundanceCounts(tse_Archaea)
se_Genus <- agglomerateByRank(tse_Archaea, rank ="Genus", onRankOnly=TRUE)
top_taxa <- getTopTaxa(se_Genus,top = 10, assay_name = "relabundance")
# Renaming the "Phylum" rank to keep only top taxa and the rest to "Other"
genus_renamed <- lapply(rowData(se)$Genus,
function(x){if (x %in% top_taxa) {x} else {"Other"}})
rowData(se)$Genus <- as.character(genus_renamed)
plotAbundance(se, assay_name="relabundance", rank = "Genus",
order_rank_by="abund",decreasing = TRUE,
use_relative = TRUE,
layout = c("bar", "point"),
one_facet = TRUE)
se <- relAbundanceCounts(tse_virus)
se_Genus <- agglomerateByRank(tse_virus, rank ="Genus", onRankOnly=TRUE)
top_taxa <- getTopTaxa(se_Genus,top = 10, assay_name = "relabundance")
# Renaming the "Phylum" rank to keep only top taxa and the rest to "Other"
genus_renamed <- lapply(rowData(se)$Genus,
function(x){if (x %in% top_taxa) {x} else {"Other"}})
rowData(se)$Genus <- as.character(genus_renamed)

plotAbundance(se, assay_name="relabundance", rank = "Genus",
order_rank_by="abund",decreasing = TRUE,
use_relative = TRUE,
layout = c("bar", "point"),
one_facet = TRUE)

t#Composition heatmap
tse_Phylum <- transformFeatures(tse_Phylum, assay_name = "clr",
method = "z", name = "clr_z")
# Get n most abundant taxa, and subsets the data by them
top_taxa <- getTopTaxa(tse_Phylum, top = 30)
tse_phylum_subset <- tse_Phylum [top_taxa, ]
# Gets the assay table
mat <- assay(tse_Phylum, "clr_z")
# Creates the heatmap
pheatmap(mat)
# Hierarchical clustering
taxa_hclust <- hclust(dist(mat), method = "complete")
# Creates a phylogenetic tree
taxa_tree <- as.phylo(taxa_hclust)
# Plot taxa tree
taxa_tree <- ggtree(taxa_tree) +
theme(plot.margin=margin(0,0,0,0)) # removes margins
# Get order of taxa in plot
taxa_ordered <- get_taxa_name(taxa_tree)
# Creates clusters
taxa_clusters <- cutree(tree = taxa_hclust, k = 3)
# Converts into data frame
taxa_clusters <- data.frame(clusters = taxa_clusters)
taxa_clusters$clusters <- factor(taxa_clusters$clusters)

# Order data so that it's same as in phylo tree
taxa_clusters <- taxa_clusters[taxa_ordered, , drop = FALSE]
# Adds information to rowData
rowData(tse_phylum_subset)$clusters <- taxa_clusters[order(match(rownames(taxa_clusters),
rownames(tse_phylum_subset))), ]
# Prints taxa and their clusters
rowData(tse_phylum_subset)$clusters
# Hierarchical clustering
sample_hclust <- hclust(dist(t(mat)), method = "complete")
# Creates a phylogenetic tree
sample_tree <- as.phylo(sample_hclust)
# Plot sample tree
sample_tree <- ggtree(sample_tree) + layout_dendrogram() +
theme(plot.margin=margin(0,0,0,0)) # removes margins
# Get order of samples in plot
samples_ordered <- rev(get_taxa_name(sample_tree))
# Creates clusters
sample_clusters <- factor(cutree(tree = sample_hclust, k = 3))
# Converts into data frame
sample_data <- data.frame(clusters = sample_clusters)
# Order data so that it's same as in phylo tree
sample_data <- sample_data[samples_ordered, , drop = FALSE]
# Order data based on
tse_phylum <- tse_phylum [ , rownames(sample_data)]
# Add sample type data
sample_data$sample_types <- factor(colData(tse_phylum)$Group)
sample_data
# Determines the scaling of colors
# Scale colors
breaks <- seq(-ceiling(max(abs(mat))), ceiling(max(abs(mat))),
length.out = ifelse( max(abs(mat))>5, 2*ceiling(max(abs(mat))), 10 ) )
colors <- colorRampPalette(c("darkblue", "blue", "white", "red", "darkred"))(length(breaks)-1)
pheatmap(mat, annotation_row = taxa_clusters,
annotation_col = sample_data,
breaks = breaks,
color = colors)
#Differential abundance analysis using LEfSe
set.seed(123)
lefse<-run_lefse(phyloseq, group = "SMOKING", subgroup = NULL, taxa_rank = "all",
transform = c("identity", "log10", "log10p"), norm = "CPM",
norm_para = list(), kw_cutoff = 0.05, lda_cutoff = 2, bootstrap_n = 30, bootstrap_fraction = 2/3, wilcoxon_cutoff = 0.05,
multigrp_strat = FALSE, strict = c("0"), sample_min = 10, only_same_subgrp = FALSE, curv = FALSE)
#save the output
write.csv(marker_table(lefse), "lefse_SMOKING.csv")
#plot abundance from the LEfSe results
p_abd <- plot_abundance(lefse, group = "SMOKING")
#convert phyloseq into The microtable class
meco_smokers <- phyloseq2meco(phyloseq_smokers)
meco_nonsmokers <- phyloseq2meco(phyloseq_nonsmokers)
# The parameter cor_method in trans_network is used to select correlation calculation method.
t1 <- trans_network$new(dataset = meco_nonsmokers, cor_method = "sparcc", cal_cor= "SparCC", use_sparcc_method = "SpiecEasi", filter_thres = 0.001)
t2 <- trans_network$new(dataset = meco_smokers, cor_method = "sparcc", cal_cor= "SparCC", use_sparcc_method = "SpiecEasi", filter_thres = 0.001)
# construct network; require igraph package
t1$cal_network(COR_p_thres = 0.01, COR_optimization = TRUE)
t2$cal_network(COR_p_thres = 0.01, COR_optimization = TRUE)
# use arbitrary coefficient threshold to contruct network
t1$cal_network(COR_p_thres = 0.01, COR_cut = 0.7)
t2$cal_network(COR_p_thres = 0.01, COR_cut = 0.7)
# return t1$res_network
# return t2$res_network
# invoke igraph cluster_fast_greedy function for this undirected network
t1$cal_module(method = "cluster_fast_greedy")
t2$cal_module(method = "cluster_fast_greedy")
# require rgexf package to be installed
t1$save_network(filepath = "network_control.gexf")
t2$save_network(filepath = "network_patient.gexf")
# calculate network attributes
t1$cal_network_attr()
t1$res_network_attr
t2$cal_network_attr()
t2$res_network_attr
# get node properties
t1$get_node_table(node_roles = TRUE)
t2$get_node_table(node_roles = TRUE)
# return t1$res_node_table # return t2$res_node_table
#Circos plots showed strong (r ≥ 0.8) correlations among multiple bacterial phyla, depicting the linkages between different phyla or within the same phylum
t1$cal_sum_links(taxa_level = "Phylum")
t2$cal_sum_links(taxa_level = "Phylum")
# interactive visualization; require chorddiag package; see https://github.com/mattflor/chorddiag
t1$plot_sum_links(plot_pos = TRUE, plot_num = 10, color_values = RColorBrewer::brewer.pal(10, "Paired"))
t2$plot_sum_links(plot_pos = TRUE, plot_num = 10, color_values = RColorBrewer::brewer.pal(10, "Paired"))
# From v1.2.0, method = "circlize" is available for conveniently saving the static plot
t1$plot_sum_links(method = "circlize", transparency = 0.2, annotationTrackHeight = circlize::mm_h(c(5, 5)))
t2$plot_sum_links(method = "circlize", transparency = 0.2, annotationTrackHeight = circlize::mm_h(c(5, 5)))

0 comments on commit 34b9a7d

Please sign in to comment.