From 43cefd2ac2f5191e3febe43f09132f8a906e7a37 Mon Sep 17 00:00:00 2001 From: apereira Date: Tue, 12 Mar 2024 13:30:38 +0100 Subject: [PATCH] Add Rscripts used in the paper --- .../R/00-generate-samples-list.R | 48 -- src/sup/01-detect-primers/R/01-detect-its.R | 5 - .../_targets.R | 58 ++- .../defaults.R | 3 + .../functions.R | 449 +++++++++++++++++- src/sup/05-manuscript-figs/01-sup-fig1.R | 161 ------- src/sup/05-manuscript-figs/02-main-fig2.R | 207 -------- src/sup/05-manuscript-figs/02-sup-fig2.R | 303 ------------ src/sup/05-manuscript-figs/03-sup-fig3.R | 153 ------ src/sup/05-manuscript-figs/README | 3 - 10 files changed, 497 insertions(+), 893 deletions(-) delete mode 100644 src/sup/01-detect-primers/R/00-generate-samples-list.R delete mode 100644 src/sup/05-manuscript-figs/01-sup-fig1.R delete mode 100644 src/sup/05-manuscript-figs/02-main-fig2.R delete mode 100644 src/sup/05-manuscript-figs/02-sup-fig2.R delete mode 100644 src/sup/05-manuscript-figs/03-sup-fig3.R delete mode 100644 src/sup/05-manuscript-figs/README diff --git a/src/sup/01-detect-primers/R/00-generate-samples-list.R b/src/sup/01-detect-primers/R/00-generate-samples-list.R deleted file mode 100644 index e434a34..0000000 --- a/src/sup/01-detect-primers/R/00-generate-samples-list.R +++ /dev/null @@ -1,48 +0,0 @@ -library(tidyverse) - -runs <- read_csv("raw/runs.csv") %>% - as_tibble() -bioprojects <- read_csv("raw/bioprojects.csv") %>% - as_tibble() -biosamples <- read_tsv("raw/biosamples.tsv") %>% - as_tibble() - -proj_noRef <- c( - "PRJNA282687", - "PRJNA324410", - "PRJNA271113", - "PRJNA359237", - "PRJNA473079", - "PRJNA263505", - "PRJNA406830", - "PRJNA432446", - "PRJNA496065" -) - -runs %>% - filter(bioproject_id %in% proj_noRef) %>% - filter(str_detect(amplicon, "bac_16s")) %>% - filter(str_detect(library_layout, "SINGLE")) %>% - select(run_id) %>% - write_tsv(file = "draco/proj/04-global-microbiome/raw/proj_no_literature/bacteria_single.tsv",col_names = T) - -runs %>% - filter(bioproject_id %in% proj_noRef) %>% - filter(str_detect(amplicon,"bac_16s")) %>% - filter(str_detect(library_layout, "PAIRED")) %>% - select(run_id) %>% - write_tsv(file = "draco/proj/04-global-microbiome/raw/proj_no_literature/bacteria_paired.tsv",col_names = T) - -runs %>% - filter(bioproject_id %in% proj_noRef) %>% - filter(str_detect(amplicon, "fun_its")) %>% - filter(str_detect(library_layout, "SINGLE")) %>% - select(run_id) %>% - write_tsv(file = "draco/proj/04-global-microbiome/raw/proj_no_literature/fungi_single.tsv",col_names = T) - -runs %>% - filter(bioproject_id %in% proj_noRef) %>% - filter(str_detect(amplicon, "fun_its")) %>% - filter(str_detect(library_layout, "PAIRED")) %>% - select(run_id) %>% - write_tsv(file = "draco/proj/04-global-microbiome/raw/proj_no_literature/fungi_paired.tsv",col_names = T) diff --git a/src/sup/01-detect-primers/R/01-detect-its.R b/src/sup/01-detect-primers/R/01-detect-its.R index e543f02..97b886b 100644 --- a/src/sup/01-detect-primers/R/01-detect-its.R +++ b/src/sup/01-detect-primers/R/01-detect-its.R @@ -1,10 +1,5 @@ library(tidyverse) -# After ITS extraction, it was used the following command to extract the headers -# of extracted fastas: -# grep ">" *.fasta > ../00-tmp/extracted_its_seqs_NAs_primers.txt -# This way, each header correspond to a sequence. - # ITS sequences after ITSx extraction its <- read_delim(file = "cache/00-tmp/extracted_its_seqs_NAs_primers.txt", col_names = F) diff --git a/src/sup/03-generalists-specialists-genome-with-gff/_targets.R b/src/sup/03-generalists-specialists-genome-with-gff/_targets.R index 34880fc..0296d93 100644 --- a/src/sup/03-generalists-specialists-genome-with-gff/_targets.R +++ b/src/sup/03-generalists-specialists-genome-with-gff/_targets.R @@ -1,6 +1,6 @@ #!/usr/bin/env R -# This pipeline analise the antismash results with gff3 file suplied together with the genome. +# This pipeline analyse the antismash results with gff3 file suplied together with the genome. source(here::here("src/03-generalists-specialists-genome-with-gff/defaults.R")) @@ -14,15 +14,16 @@ tar_option_set(packages = c("tidyverse","tarchetypes","jsonlite", "patchwork", " ### Targets results dir gen_spe_tar_dir <- "cache/01-r-generalist_specialist_genome_metadata" +amr_dir <- "cache/01-r-07-amr" ### Import results from other targets that were already analised -#bgcs_by_genome <- tar_read(name = bgcs_by_genome, store = gen_spe_tar_dir) genomes2find_BGCs <- tar_read(name = genomes2find_BGCs, store = gen_spe_tar_dir) generalist_genus <- tar_read(name = generalist_genus, store = gen_spe_tar_dir) specialist_genus <- tar_read(name = specialist_genus, store = gen_spe_tar_dir) genome_gff <- tar_read(name = genome_gff, store = gen_spe_tar_dir) cds_by_genome <- tar_read(name = cds_by_genome, store = gen_spe_tar_dir) genome_length <- tar_read(name = genome_length, store = gen_spe_tar_dir) +valid_amr <- tar_read(name = valid_amr, store = amr_dir) library(future) library(future.callr) @@ -38,8 +39,6 @@ list( import_bgcs, parse_antismash(list_bgcs_files), pattern = map(list_bgcs_files) - # , - # deployment = "main" ), tar_target( bgcs_by_genome, @@ -76,6 +75,53 @@ list( tar_target( bcgs_compare_means, bcgs_statistics(generalist_specialists_bgcs, bcgs_groups) + ), + tar_target( + genome_length2, + import_genome_length("cache/17-genome-lenght") + ), + tar_target( + paper_main_fig2, + plot_main_fig2_paper(generalist_specialists_bgcs, genomes2find_BGCs, cds_by_genome, genome_length, valid_amr) + ), + tar_target( + cds2permutate, + cds_to_plot(genomes2find_BGCs, cds_by_genome, genome_length) + ), + tar_target( + permutate_cds, + cds2permutate %>% + filter(kingdom == "Bacteria") %>% + permutation_test(y = "cds_n", x = "Group", n_iter = 10000) + ), + tar_target( + permutate_cds_norm, + cds2permutate %>% + filter(kingdom == "Bacteria") %>% + permutation_test(y = "cds_norm", x = "Group", n_iter = 10000) + ), + tar_target( + bcgs2permutate, + bgcs_df_paper(generalist_specialists_bgcs, genome_length) + ), + tar_target( + permutate_bcgs, + bcgs2permutate %>% + filter(kingdom == "Bacteria") %>% + permutation_test(y = "value", x = "Groups", n_iter = 10000) + ), + tar_target( + amr2permutate, + amr_df_paper(valid_amr, genomes2find_BGCs, genome_length) + ), + tar_target( + permutate_amr, + amr2permutate %>% + filter(kingdom == "Bacteria") %>% + permutation_test(y = "norm_amr", x = "Groups", n_iter = 10000) + ), + tar_target( + compare_bcgs_products, + compare_bcgs_products_bac_gen(generalist_specialists_bgcs, genome_length, bcgs_groups) ) -) - +) \ No newline at end of file diff --git a/src/sup/03-generalists-specialists-genome-with-gff/defaults.R b/src/sup/03-generalists-specialists-genome-with-gff/defaults.R index 4203fad..fc63859 100644 --- a/src/sup/03-generalists-specialists-genome-with-gff/defaults.R +++ b/src/sup/03-generalists-specialists-genome-with-gff/defaults.R @@ -22,6 +22,9 @@ library(patchwork) library(future) library(future.callr) library(RColorBrewer) +library(tidygraph) +library(ggraph) +library(igraph) setwd(here::here()) diff --git a/src/sup/03-generalists-specialists-genome-with-gff/functions.R b/src/sup/03-generalists-specialists-genome-with-gff/functions.R index 3edd62a..3453af4 100644 --- a/src/sup/03-generalists-specialists-genome-with-gff/functions.R +++ b/src/sup/03-generalists-specialists-genome-with-gff/functions.R @@ -72,9 +72,6 @@ generate_annotation <- function(bgcs_by_genome, genomes2find_BGCs, generalist_ge metadata %>% left_join(bgcs_by_genome, by = c("accession" = "genome_id")) %>% mutate(n = replace_na(n, 0)) - # %>% - # Remove taxa with no antismash hits - #drop_na(products) } @@ -121,7 +118,6 @@ plot_bgcs_gen_size_cds <- function(generalist_specialists_bgcs, genomes2find_BGC mutate(g_size = g_size/10^6) a <- generalist_specialists_bgcs %>% - #unite(col = Categories, c("kingdom", "type"), sep = " ") %>% rename(Groups = type) %>% group_by(accession, Groups, kingdom) %>% summarise(value = sum(n)) %>% @@ -219,6 +215,13 @@ plot_bgcs_gen_size_cds <- function(generalist_specialists_bgcs, genomes2find_BGC height = 5, dpi = 300) + ggsave(plot = b, + filename = "plots/02-generalist-specialist-bgcs/gene_spec_gen_size.pdf", + device = "pdf", + width = 12, + height = 5, + dpi = 300) + return(list(a,b,c)) } @@ -353,7 +356,6 @@ plot_bcgs_gen_spec <- function(generalist_specialists_bgcs, bcgs_groups) { left_join(bcgs_groups) %>% group_by(Groups, kingdom, cluster, perc) %>% summarise(sum_perc = sum(perc)) - #In order to avoid horizontal white line on geom_col, summarize it first! p <- df %>% ggplot(aes(Groups, perc, fill = cluster)) + @@ -416,7 +418,6 @@ bcgs_statistics <- function(generalist_specialists_bgcs, bcgs_groups) { ggplot(aes(comp, count, fill = cluster)) + geom_boxplot() + stat_compare_means( - #symnum.args = symnum.args, method = "wilcox", comparisons = list( c("RiPP-Specialist", "RiPP-Generalist"), @@ -435,4 +436,438 @@ bcgs_statistics <- function(generalist_specialists_bgcs, bcgs_groups) { device = "png", width = 11, height = 7) -} \ No newline at end of file +} + +import_genome_length <- function(path) { + + list.files(path, full.names = T) %>% + as_tibble() %>% + mutate(gen_len = map(value, ~ read_lines(.x))) %>% + unnest(gen_len) %>% + mutate(accession = str_extract(value, "GC[A|F]_[0-9]+.[0-9]")) +} + +plot_main_fig2_paper <- function(generalist_specialists_bgcs, genomes2find_BGCs, cds_by_genome, genome_length, valid_amr) { + + prevalence_group_colors <- c( + "generalist" = "#3a86ff", + "specialist" = "#ffbe0b" + ) + + + scale_color_prevalence_group <- function(drop = FALSE, ...) { + scale_color_manual( + values = purrr::simplify(prevalence_group_colors), + na.value = "#000000", + drop = drop, + ... + ) + } + + symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), + symbols = c("****", "***", "**", "*", "ns")) + + a <- + kingdoms_groups %>% + set_names(kingdoms_groups) %>% + purrr::map(function(current_kingdom) { + sub_abundances %>% + filter(subset_value == "all" & norm_method == "tss") %>% + pull(data) %>% + first() %>% + inner_join(selected_generalists_specialists) %>% + filter(kingdom == current_kingdom & !is.na(prevalence_group)) %>% + filter(abundance > 0.01e-2) %>% + ggplot(aes(x = prevalence_group, y = abundance, color = prevalence_group)) + + geom_boxplot(width = 0.5) + + facet_grid(~kingdom) + + scale_x_discrete(labels = str_to_sentence) + + scale_y_log10(expand = c(0, 1)) + + stat_compare_means( + method = "wilcox", + symnum.args = symnum.args, + comparisons = list(c("generalist", "specialist")) + ) + + annotation_logticks(sides = "l") + + scale_color_prevalence_group() + + theme_bw() + + theme( + strip.text = element_text(size = rel(1.1)), + axis.text = element_text(size = rel(1.1)), + panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank(), + strip.background = element_blank(), + axis.ticks.x=element_blank(), + panel.border = element_rect(colour = "black", fill = NA)) + + guides(color = FALSE) + + labs( + x = "", + y = "Abundance (TSS)" + ) + }) + + + + cds <- genomes2find_BGCs %>% + distinct() %>% + select(accession, genus, type, kingdom) %>% + left_join(cds_by_genome, by = c("accession" = "genome_id")) %>% + select(-value) %>% + left_join(genome_length %>% + rename(accession = genome_id) %>% + unnest(g_size) %>% + mutate(g_size = as.numeric(g_size)) %>% + mutate(g_size = g_size/10^6) %>% + distinct(accession, g_size)) %>% + mutate(cds_norm = cds_n/g_size) + + b <- cds %>% + rename(Group = type) %>% + ggplot(aes(Group, cds_n)) + + geom_boxplot(aes(color = Group), width=0.5) + + facet_wrap(c("kingdom"), scales="free_y") + + stat_compare_means( + symnum.args = symnum.args, + method = "wilcox", + comparisons = list(c("Generalist", "Specialist"))) + + ylab("Number of CDS") + + guides(color = "none") + + scale_color_prevalence_type() + + scale_y_log10() + + theme_bw() + + annotation_logticks(sides = "l") + + theme(axis.title.x = element_blank(), + strip.text = element_text(size = rel(1.1)), + axis.text = element_text(size = rel(1.1)), + legend.position = "none", + panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank(), + strip.background = element_blank(), + panel.border = element_rect(colour = "black", fill = NA)) + + + c <- generalist_specialists_bgcs %>% + left_join(genome_length %>% + rename(accession = genome_id) %>% + unnest(g_size) %>% + mutate(g_size = as.numeric(g_size)) %>% + mutate(g_size = g_size/10^6) %>% + distinct(accession, g_size)) %>% + rename(Groups = type) %>% + mutate(n = n/g_size) %>% + group_by(accession, Groups, kingdom) %>% + summarise(value = sum(n)) %>% + ggplot(aes(Groups, value, color = Groups)) + + geom_boxplot(width=0.5) + + scale_y_log10() + + stat_compare_means( + symnum.args = symnum.args, + method = "wilcox", + comparisons = list(c("Specialist", "Generalist"))) + + facet_wrap(c("kingdom"), scales="free_y") + + scale_color_prevalence_type() + + annotation_logticks(sides = "l") + + ylab("Number of BCGs/Genome Length") + + guides(color = FALSE) + + theme_minimal() + + theme(axis.title.x = element_blank(), + strip.text = element_text(size = rel(1.1)), + axis.text = element_text(size = rel(1.1)), + legend.position = "none", + panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank(), + strip.background = element_blank(), + panel.border = element_rect(colour = "black", fill = NA)) + + amr <- valid_amr %>% + rename(accession = genome_id) %>% + left_join(genomes2find_BGCs %>% + distinct(accession, genus, type, kingdom)) + + amr2 <- amr %>% + rename(Groups = type, + element_type = `Element type`, + element_sub = `Element subtype`, + class = Class) %>% + select(accession, Groups, kingdom, element_type, element_sub, class) %>% + mutate(count = 1) %>% + group_by(accession, Groups, kingdom, element_type, element_sub, class) %>% + summarise(n = sum(count)) %>% + left_join(genome_length %>% + rename(accession = genome_id) %>% + unnest(g_size) %>% + mutate(g_size = as.numeric(g_size)) %>% + mutate(g_size = g_size/10^6) %>% + distinct(accession, g_size)) %>% + mutate(norm_amr = n/g_size) + + symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), + symbols = c("****", "***", "**", "*", "ns")) + + d <- amr2 %>% + filter(element_type == "AMR") %>% + ggplot(aes(Groups, norm_amr, color = Groups)) + + geom_point(size = 0.1) + + geom_boxplot(width=0.5) + + stat_compare_means( + method = "wilcox", + symnum.args = symnum.args, + comparisons = list(c("Generalist", "Specialist")), + method.args = list(alternative = "less")) + + facet_wrap(c("element_type")) + + scale_y_log10() + + scale_y_continuous(trans='log10') + + scale_color_prevalence_type() + + annotation_logticks(sides = "l") + + ylab("Number of AMR genes/Genome length") + + guides(color = FALSE) + + theme_bw() + + theme(axis.title.x = element_blank(), + strip.text = element_text(size = rel(1.1)), + axis.text = element_text(size = rel(1.1)), + legend.position = "none", + panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank(), + strip.background = element_blank(), + panel.border = element_rect(colour = "black", fill = NA)) + + generalists_common_coabundance_graph <- readRDS("cache/15-microbiome-rserver-hki-rdata/generalists_common_coabundance_graph.rds") + + kingdoms_colors <- c( + "Bacteria" = "#3f78a9", + "Fungi" = "#A93F55", + "all" = "black" + ) + + + scale_color_kingdom <- function(drop = FALSE, na.value = "#d9d9d9", ...) { + scale_color_manual( + values = purrr::simplify(kingdoms_colors), + na.value = na.value, + drop = drop, + ... + ) + } + + environment_group_colors <- c( + "soil" = "#8b786d", + "aquatic" = "#78a1bb", + "host" = "#e0aa5a", + "all" = "black" + ) + + environment_group_sets_colors <- c( + "host-aquatic-soil" = "#000000", + "aquatic-soil" = "#155A82", + "host-aquatic" = "#83155F", + "host-soil" = "#826516" + ) %>% + append(environment_group_colors) + +e <- + generalists_common_coabundance_graph %>% + activate(edges) %>% + mutate(estimate_direction = estimate_direction %>% recode("positive" = "Positive", "negative" = "Negative")) %>% + ggraph(layout = layout_in_circle(generalists_common_coabundance_graph, order = order(V(generalists_common_coabundance_graph)$taxon_group))) + + geom_edge_link(aes(color = environment_group, alpha = involves_prevalence_group_taxon)) + + geom_node_point(aes(color = kingdom, size = is_generalist)) + + scale_edge_color_manual(values = environment_group_sets_colors, labels = str_to_sentence) + + scale_color_kingdom(na.value = "black") + + scale_size_manual(values = c(`TRUE` = 1.5, `FALSE` = 0.2), labels = c("No", "Yes")) + + scale_edge_alpha_manual(values = c(`TRUE` = 0.5, `FALSE` = 0.05), guide = "none") + + coord_fixed() + + facet_edges(~estimate_direction) + + guides( + color = guide_legend(override.aes = list(size = 6)), + size = guide_legend(override.aes = list(size = c(2,6)), + edge_color = guide_legend(override.aes = list(edge_width = 13))) + ) + + theme_void() + + labs( + color = "Kingdom", + edge_color = "Common in environments", + size = "Generalist") + + theme( + legend.text = element_text(size = rel(1.1)), + legend.title = element_text(size = rel(1.1)), + strip.text.x = element_text(size = 12, margin = margin(b = 5)) + ) + + +layout <- " +AABB +CCD# +EEEF +" + + f <- + wrap_plots( + wrap_plots(a$Bacteria + labs(tag = "A"), a$Fungi), + b + labs(tag = "B"), + c + labs(tag = "C"), + d + labs(tag = "D"), + e + labs(tag = "E"), + guides = "collect") + + guide_area() + + plot_layout(design = layout) + + ggsave(filename = "plots/04-manuscript-ap/fig2-v1.11.pdf", + plot = f, + device = "pdf", + width = 30, + height = 35, + units = "cm", + dpi = 300) + + ggsave(filename = "plots/04-manuscript-ap/fig2-v1.11.png", + plot = f, + width = 30, + height = 35, + units = "cm", + dpi = 300) + + return(f) + +} + +cds_to_plot <- function(genomes2find_BGCs, cds_by_genome, genome_length) { + + genomes2find_BGCs %>% + distinct() %>% + select(accession, genus, type, kingdom) %>% + left_join(cds_by_genome, by = c("accession" = "genome_id")) %>% + select(-value) %>% + left_join(genome_length %>% + rename(accession = genome_id) %>% + unnest(g_size) %>% + mutate(g_size = as.numeric(g_size)) %>% + mutate(g_size = g_size/10^6) %>% + distinct(accession, g_size)) %>% + mutate(cds_norm = cds_n/g_size) %>% + rename(Group = type) +} + +permutation_test <- function(df, y = "cds_n", x = "Group", n_iter = 1000) { + + p_values <- vector("numeric", n_iter) + + # Loop through the iterations + for (i in 1:n_iter) { + + # Shuffle the labels randomly + shuffled_labels <- sample(df[[x]]) + + # Combine the shuffled labels with the variable data + shuffled_data <- data.frame(values = df[[y]], group = shuffled_labels) + + #Perform the Wilcoxon rank-sum test and store the p-value + test_result <- wilcox.test(values ~ group, data = shuffled_data) + p_values[i] <- test_result$p.value + + } + + observed_p_value <- wilcox.test(df[[y]] ~ df[[x]])$p.value + + # Calculate the proportion of permutation p-values smaller than the observed p-value + proportion_smaller <- mean(p_values <= observed_p_value) + + # If the proportion of permuted p-values smaller than or equal to the observed + # p-value is 0, it suggests that none of the permuted datasets produced a result + # as extreme as the observed result. This implies that the observed difference + # between the groups defined by x (as measured by the test statistic, typically + # the Wilcoxon rank-sum test statistic in this case) is so extreme that it is + # unlikely to have occurred by random chance alone. + + # Therefore, a proportion of 0 indicates strong evidence against the null + # hypothesis of no difference between the groups, implying that there are + # indeed differences between the groups. + + return(proportion_smaller) +} + +bgcs_df_paper <- function(generalist_specialists_bgcs, genome_length) { + + generalist_specialists_bgcs %>% + left_join(genome_length %>% + rename(accession = genome_id) %>% + unnest(g_size) %>% + mutate(g_size = as.numeric(g_size)) %>% + mutate(g_size = g_size/10^6) %>% + distinct(accession, g_size)) %>% + rename(Groups = type) %>% + mutate(n = n/g_size) %>% + group_by(accession, Groups, kingdom) %>% + summarise(value = sum(n)) +} + +amr_df_paper <- function(valid_amr, genomes2find_BGCs, genome_length) { + +valid_amr %>% + rename(accession = genome_id) %>% + left_join(genomes2find_BGCs %>% + distinct(accession, genus, type, kingdom)) %>% + rename(Groups = type, + element_type = `Element type`, + element_sub = `Element subtype`, + class = Class) %>% + select(accession, Groups, kingdom, element_type, element_sub, class) %>% + mutate(count = 1) %>% + group_by(accession, Groups, kingdom, element_type, element_sub, class) %>% + summarise(n = sum(count)) %>% + left_join(genome_length %>% + rename(accession = genome_id) %>% + unnest(g_size) %>% + mutate(g_size = as.numeric(g_size)) %>% + mutate(g_size = g_size/10^6) %>% + distinct(accession, g_size)) %>% + mutate(norm_amr = n/g_size) %>% + filter(element_type == "AMR") +} + + +compare_bcgs_products_bac_gen <- function(generalist_specialists_bgcs, genome_length, bcgs_groups) { + + bcgs_df <- + generalist_specialists_bgcs %>% + left_join(genome_length %>% + rename(accession = genome_id) %>% + unnest(g_size) %>% + mutate(g_size = as.numeric(g_size)) %>% + mutate(g_size = g_size/10^6) %>% + distinct(accession, g_size)) %>% + rename(Groups = type) %>% + mutate(n = n/g_size) %>% + mutate(products = replace_na(products, "No-BCGs")) %>% + group_by(accession, Groups, kingdom, products) %>% + summarise(value = sum(n)) %>% + filter(kingdom == "Bacteria" & value > 0) %>% + left_join(bcgs_groups) + +# p_products <- + bcgs_df %>% + ggplot(aes(Groups, value)) + + geom_boxplot(aes(color=cluster)) + + stat_compare_means( + method = "wilcox", + symnum.args = symnum.args, + label = "..p.adj..", + comparisons = list(c("Specialist", "Generalist")), + method.args = list(alternative = "greater"), + ) + + #facet_wrap(~Groups, scales = "free_y") + + scale_y_continuous(expand = expansion(mult = c(0, 0.1))) + + scale_y_log10() + +# If the p-value is less than a chosen significance level (commonly 0.05), it +# suggests that there is statistically significant evidence to reject the null +# hypothesis that the means are equal, in favor of the alternative hypothesis +# that the mean of the "Generalist" group is greater than the mean of the +# "Specialist" group. + +ggsave(filename = "plots/04-manuscript-ap/bcgs_products_bacteria.pdf", + plot = p_products, width = 9, height = 7, dpi = 300) + +ggsave(filename = "plots/04-manuscript-ap/bcgs_products_bacteria.png", + plot = p_products, width = 9, height = 7, dpi = 300) + +} diff --git a/src/sup/05-manuscript-figs/01-sup-fig1.R b/src/sup/05-manuscript-figs/01-sup-fig1.R deleted file mode 100644 index a8bf476..0000000 --- a/src/sup/05-manuscript-figs/01-sup-fig1.R +++ /dev/null @@ -1,161 +0,0 @@ -library(tidyverse) -library(RColorBrewer) -library(patchwork) - -# 16S - -raw_coverage <- list.files(path = "cache/08-coverage", - full.names = T) %>% - as_tibble() %>% - #slice(1:3) %>% - mutate(coverage = map(value, ~ read_tsv(.x))) - -cov <- raw_coverage %>% - mutate(sample_id = str_split(value, "/", simplify=T)[,3]) %>% - mutate(sample_id = str_remove(sample_id, ".txt")) %>% - select(-value) %>% - unnest(coverage) - -runs <- read_csv(file = "/home/ailtonpcf/draco/proj/04-global-microbiome/raw/runs.csv") - -# Color palete -n <- 10 -qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] -col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) - -# Merge dfs -proj16s <- cov %>% - rename(rRNA = "#ID") %>% - left_join(runs %>% select(run_id, bioproject_id), by = c("sample_id" = "run_id")) %>% - relocate(sample_id, bioproject_id) %>% - mutate(rRNA = str_remove(rRNA, "_[:digit:]+")) %>% - mutate(rRNA = str_remove(rRNA, "_amp")) %>% - mutate(bioproject_id = str_replace(bioproject_id, "_", "\n")) - -#Plot1 -p1 <- proj16s %>% - ggplot(aes(x=Covered_percent)) + - geom_boxplot(aes(color=rRNA)) + - facet_wrap(c("bioproject_id"), scales = "free_y") + - scale_color_manual(values = col_vector) + - guides(fill=guide_legend(ncol=1)) + - theme_bw() + - theme(axis.text = element_text(size = 10), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs(x = "Coverage (%)", - y = "Count", - color = "rRNA", - tag = "A") - -p2 <- proj16s %>% - ggplot(aes(x=Covered_percent)) + - geom_density(aes(color=rRNA), alpha = 0.75) + - facet_wrap(c("bioproject_id"), scales = "free_y") + - scale_color_manual(values = col_vector) + - guides(fill=guide_legend(ncol=1)) + - theme_bw() + - theme(axis.text = element_text(size = 10), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs(x = "Coverage (%)", - y = "Count", - color = "rRNA", - tag = "A") - -p3 <- proj16s %>% - ggplot(aes(x=Covered_percent)) + - geom_histogram(aes(fill=rRNA), alpha = 0.75) + - facet_wrap(c("bioproject_id"), scales = "free_y") + - scale_fill_manual(values = col_vector) + - guides(fill=guide_legend(ncol=1)) + - theme_bw() + - theme(axis.text = element_text(size = rel(1.1)), - strip.text = element_text(size = rel(1.1)), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs(x = "Coverage (%)", - y = "Number of reads", - fill = "rRNA", - tag = "A") - -# ITS - -its <- read_delim(file = "cache/00-tmp/extracted_its_seqs_NAs_primers.txt", - col_names = F) - -# Parse fasta headers from grep -seqs <- its %>% - select(X1) %>% - mutate(sample_id = str_split(X1, "\\.", simplify=T)[,1]) %>% - mutate(ITS = str_extract(X1, "ITS[1-2]")) - -# Import bioprojects and run_ids -runs <- read_csv(file = "raw/runs.csv") - -# Merge run_id and bioprojects -sampleByITS <- seqs %>% - drop_na(ITS) %>% - group_by(sample_id, ITS) %>% - count() %>% - #pivot_wider(names_from = its, values_from = n, values_fill = 0) %>% - left_join(runs %>% select(run_id, bioproject_id), by = c("sample_id" = "run_id")) - -# Color palete -n <- 2 -qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] -col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) - - -# Total counts by project and primer -p4 <- sampleByITS %>% - ungroup() %>% - select(-sample_id) %>% - group_by(bioproject_id, ITS) %>% - summarise(total = sum(n)) %>% - filter(!total < 100) %>% - arrange(total) %>% - ggplot(aes(fct_reorder(bioproject_id, total), total)) + - geom_col(aes(fill = ITS), position = "fill", width = 0.5) + - coord_flip() + - scale_fill_manual(values = col_vector) + - guides(fill=guide_legend(ncol=1)) + - theme_bw() + - theme(axis.title.y = element_blank(), - strip.text = element_text(size = rel(1.1)), - axis.text = element_text(size = rel(1.1)), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - axis.ticks.x=element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs(y = "Reads (%)", - fill = "ITS", - tag = "B") - - -wp3 <- p3 + p4 + plot_layout(widths = c(3,1), guides = "collect") - -ggsave(filename = "/home/ailtonpcf/draco/proj/04-global-microbiome/plots/04-manuscript-ap/sup-fig1.png", - plot = wp3, - device = "png", - width = 13, - height = 9, - dpi = 300) - -ggsave(filename = "/home/ailtonpcf/draco/proj/04-global-microbiome/plots/04-manuscript-ap/sup-fig1.pdf", - plot = wp3, - device = "pdf", - width = 13, - height = 9, - dpi = 300) - - - - diff --git a/src/sup/05-manuscript-figs/02-main-fig2.R b/src/sup/05-manuscript-figs/02-main-fig2.R deleted file mode 100644 index f8a84e7..0000000 --- a/src/sup/05-manuscript-figs/02-main-fig2.R +++ /dev/null @@ -1,207 +0,0 @@ -source("src/03-generalists-specialists-genome-with-gff/defaults.R") - -tar_load(figure1) -tar_load(figure4) - -library(tidygraph) -library(ggraph) -library(igraph) - -generalists_common_coabundance_graph <- readRDS("cache/15-microbiome-rserver-hki-rdata/generalists_common_coabundance_graph.rds") - -kingdoms_colors <- c( - "Bacteria" = "#3f78a9", - "Fungi" = "#A93F55", - "all" = "black" -) - - -scale_color_kingdom <- function(drop = FALSE, na.value = "#d9d9d9", ...) { - scale_color_manual( - values = purrr::simplify(kingdoms_colors), - na.value = na.value, - drop = drop, - ... - ) -} - -environment_group_colors <- c( - "soil" = "#8b786d", - "aquatic" = "#78a1bb", - "host" = "#e0aa5a", - "all" = "black" -) - -environment_group_sets_colors <- c( - "host-aquatic-soil" = "#000000", - "aquatic-soil" = "#155A82", - "host-aquatic" = "#83155F", - "host-soil" = "#826516" -) %>% - append(environment_group_colors) - - -d <- - generalists_common_coabundance_graph %>% - activate(edges) %>% - mutate(estimate_direction = estimate_direction %>% recode("positive" = "Positive", "negative" = "Negative")) %>% - ggraph(layout = layout_in_circle(generalists_common_coabundance_graph, order = order(V(generalists_common_coabundance_graph)$taxon_group))) + - geom_edge_link(aes(color = environment_group, alpha = involves_prevalence_group_taxon)) + - geom_node_point(aes(color = kingdom, size = is_generalist)) + - scale_edge_color_manual(values = environment_group_sets_colors, labels = str_to_sentence) + - scale_color_kingdom(na.value = "black") + - scale_size_manual(values = c(`TRUE` = 5, `FALSE` = 2), labels = c("No", "Yes")) + - scale_edge_alpha_manual(values = c(`TRUE` = 0.5, `FALSE` = 0.05), guide = "none") + - coord_fixed() + - facet_edges(~estimate_direction) + - guides(color = guide_legend(override.aes = list(size = 5))) + - theme_void() + - labs( - color = "Kingdom", - edge_color = "Common in environments", - size = "Generalist") -################################################################################ -kingdoms_groups <- readRDS("cache/15-microbiome-rserver-hki-rdata/kingdoms_groups.rds") -sub_abundances <- readRDS("cache/15-microbiome-rserver-hki-rdata/sub_abundances.rds") -selected_generalists_specialists <- readRDS("cache/15-microbiome-rserver-hki-rdata/selected_generalists_specialists.rds") - -prevalence_group_colors <- c( - "generalist" = "#3a86ff", - "specialist" = "#ffbe0b" -) - - -scale_color_prevalence_group <- function(drop = FALSE, ...) { - scale_color_manual( - values = purrr::simplify(prevalence_group_colors), - na.value = "#000000", - drop = drop, - ... - ) -} - -symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), - symbols = c("****", "***", "**", "*", "ns")) - -bc <- - kingdoms_groups %>% - set_names(kingdoms_groups) %>% - purrr::map(function(current_kingdom) { - sub_abundances %>% - filter(subset_value == "all" & norm_method == "tss") %>% - pull(data) %>% - first() %>% - inner_join(selected_generalists_specialists) %>% - filter(kingdom == current_kingdom & !is.na(prevalence_group)) %>% - filter(abundance > 0.01e-2) %>% - ggplot(aes(x = prevalence_group, y = abundance, color = prevalence_group)) + - geom_boxplot(width = 0.5) + - facet_grid(~kingdom) + - scale_x_discrete(labels = str_to_sentence) + - scale_y_log10(expand = c(0, 1)) + - stat_compare_means( - method = "wilcox", - symnum.args = symnum.args, - comparisons = list(c("generalist", "specialist")) - ) + - annotation_logticks(sides = "l") + - scale_color_prevalence_group() + - theme_bw() + - theme( - strip.text = element_text(size = rel(1.1)), - axis.text = element_text(size = rel(1.1)), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - axis.ticks.x=element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - guides(color = FALSE) + - labs( - x = "", - y = "Abundance (TSS)" - ) - }) - -################################################################################ - -layout <- " -AABBCC -DDEEEE -" - -f <- wrap_plots( - wrap_plots( - bc[[1]] + labs(tag = "A"), - bc[[2]] - ), - figure1[[3]] + labs(tag = "B"), - figure1[[1]] + labs(tag = "C"), - figure4 + labs(tag = "D"), - d + labs(tag = "E"), - ncol = 3, - nrow = 2, - guides = "collect") + - plot_layout(design = layout - # plot_layout(widths = c(1,1,2), heights = c(0.5, 2) - ) - -ggsave(filename = "plots/04-manuscript-ap/fig2-v1.3.pdf", - plot = f, - device = "pdf", - width = 45, - height = 25, - units = "cm", - dpi = 300) - -ggsave(filename = "plots/04-manuscript-ap/fig2-v1.3.png", - plot = f, - device = "png", - width = 45, - height = 25, - units = "cm", - dpi = 300) - -# BIG CHANGES -############################################################################## - -source("src/07-amr/defaults.R") -tar_load(amr_comparisons_plot) - -amr_comparisons_plot[1] - -layout <- " -AABBCC -DDEEEE -" - -f <- wrap_plots( - wrap_plots( - bc[[1]] + labs(tag = "A"), - bc[[2]] - ), - figure1[[3]] + labs(tag = "B"), - figure1[[1]] + labs(tag = "C"), - amr_comparisons_plot[[1]] + labs(tag = "D"), - d + labs(tag = "E"), - ncol = 3, - nrow = 2, - guides = "collect") + - plot_layout(design = layout - # plot_layout(widths = c(1,1,2), heights = c(0.5, 2) - ) - -ggsave(filename = "plots/04-manuscript-ap/fig2-v1.4.pdf", - plot = f, - device = "pdf", - width = 45, - height = 25, - units = "cm", - dpi = 300) - -ggsave(filename = "plots/04-manuscript-ap/fig2-v1.4.png", - plot = f, - device = "png", - width = 45, - height = 25, - units = "cm", - dpi = 300) diff --git a/src/sup/05-manuscript-figs/02-sup-fig2.R b/src/sup/05-manuscript-figs/02-sup-fig2.R deleted file mode 100644 index 07666a4..0000000 --- a/src/sup/05-manuscript-figs/02-sup-fig2.R +++ /dev/null @@ -1,303 +0,0 @@ -source("src/04-quantify-primers-effect/defaults.R") -library(ggpubr) - -# Import results generated before -tar_load(fungal_primers_heat_rds) -tar_load(bacterial_primers_heat_rds) - -venn <- read_rds("cache/15-microbiome-rserver-hki-rdata/venn_no_aqua_fixed.rds") -prevalent_taxa <- readRDS(file = "cache/15-microbiome-rserver-hki-rdata/02supfig-prevalent_taxa.rds") -sub_abundances <- readRDS(file = "cache/15-microbiome-rserver-hki-rdata/02supfig-sub_abundances.rds") -primers_by_sample <- readRDS(file = "cache/15-microbiome-rserver-hki-rdata/02supfig-primers_by_sample.rds") - -environment_group_colors <- c( - "soil" = "#8b786d", - "aquatic" = "#78a1bb", - "host" = "#e0aa5a", - "all" = "black" -) - - -scale_color_environment_group <- function(drop = FALSE, ...) { - scale_color_manual( - values = environment_group_colors, - na.value = "#d9d9d9", - drop = drop, - ... - ) -} - -symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), - symbols = c("****", "***", "**", "*", "ns")) - -bacv1_v4 <- - prevalent_taxa %>% - nest(-kingdom) %>% - filter(kingdom == "Bacteria") %>% - mutate( - data = data %>% map(~ { - .x %>% - mutate( - group = case_when( - soil & host & aquatic ~ "Common\nin all", - soil & !host & !aquatic ~ "Unique to\nsoil", - !soil & !host & aquatic ~ "Unique to\naquatic", - !soil & host & !aquatic ~ "Unique to\nhost" - ), - environment_group = group %>% recode( - "Common\nin all" = "all", "Unique to\nsoil" = "soil", "Unique to\naquatic" = "aquatic", - "Unique to\nhost" = "host" - ) - ) %>% - left_join( - sub_abundances$data[[2]] %>% select(sample_id, taxon, abundance) - ) %>% - filter(abundance > 0.01e-2) %>% - filter(!is.na(group)) %>% - left_join(sub_abundances$data[[2]]) %>% - left_join(primers_by_sample %>% select(sample_id, contains("bac"))) %>% - filter(!is.na(set1bac)) %>% - ggplot(aes(group, abundance, color = environment_group)) + - geom_boxplot(width = 0.5) + - stat_compare_means( - method = "wilcox", - symnum.args = symnum.args, - comparisons = list( - c("Common\nin all", "Unique to\naquatic"), - c("Common\nin all", "Unique to\nhost"), - c("Common\nin all", "Unique to\nsoil") - ) - ) + - scale_y_log10(expand = c(0, 0.5)) + - scale_color_environment_group() + - annotation_logticks(sides = "l") + - guides(color = FALSE) + - theme_bw() + - theme( - strip.text = element_text(size = rel(1.1)), - axis.text = element_text(size = rel(1.1)), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - axis.ticks.x=element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs(x = "", y = "Abundance (TSS)") + - facet_wrap(c("set1bac")) - - }) - ) %>% - deframe() %>% - pluck(1) - -bacv3_v5 <- - prevalent_taxa %>% - nest(-kingdom) %>% - filter(kingdom == "Bacteria") %>% - mutate( - data = data %>% map(~ { - .x %>% - mutate( - group = case_when( - soil & host & aquatic ~ "Common\nin all", - soil & !host & !aquatic ~ "Unique to\nsoil", - !soil & !host & aquatic ~ "Unique to\naquatic", - !soil & host & !aquatic ~ "Unique to\nhost" - ), - environment_group = group %>% recode( - "Common\nin all" = "all", "Unique to\nsoil" = "soil", "Unique to\naquatic" = "aquatic", - "Unique to\nhost" = "host" - ) - ) %>% - left_join( - sub_abundances$data[[2]] %>% select(sample_id, taxon, abundance) - ) %>% - filter(abundance > 0.01e-2) %>% - filter(!is.na(group)) %>% - left_join(sub_abundances$data[[2]]) %>% - left_join(primers_by_sample %>% select(sample_id, contains("bac"))) %>% - filter(!is.na(set2bac)) %>% - ggplot(aes(group, abundance, color = environment_group)) + - geom_boxplot(width = 0.5) + - stat_compare_means( - method = "wilcox", - symnum.args = symnum.args, - comparisons = list( - c("Common\nin all", "Unique to\naquatic"), - c("Common\nin all", "Unique to\nhost"), - c("Common\nin all", "Unique to\nsoil") - ) - ) + - scale_y_log10(expand = c(0, 0.5)) + - scale_color_environment_group() + - annotation_logticks(sides = "l") + - guides(color = FALSE) + - theme_bw() + - theme( - strip.text = element_text(size = rel(1.1)), - axis.text = element_text(size = rel(1.1)), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - axis.ticks.x=element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs(x = "", y = "Abundance (TSS)") + - facet_wrap(c("set2bac")) - - }) - ) %>% - deframe() %>% - pluck(1) - -funits1 <- - prevalent_taxa %>% - nest(-kingdom) %>% - filter(kingdom == "Fungi") %>% - mutate( - data = data %>% map(~ { - .x %>% - mutate( - group = case_when( - soil & host & aquatic ~ "Common\nin all", - soil & !host & !aquatic ~ "Unique to\nsoil", - !soil & !host & aquatic ~ "Unique to\naquatic", - !soil & host & !aquatic ~ "Unique to\nhost" - ), - environment_group = group %>% recode( - "Common\nin all" = "all", "Unique to\nsoil" = "soil", "Unique to\naquatic" = "aquatic", - "Unique to\nhost" = "host" - ) - ) %>% - left_join( - sub_abundances$data[[2]] %>% - drop_na(environment_group) %>% - select(sample_id, taxon, abundance) - ) %>% - drop_na(environment_group) %>% - filter(abundance > 0.01e-2) %>% - filter(!is.na(group)) %>% - left_join(sub_abundances$data[[2]]) %>% - left_join(primers_by_sample %>% select(sample_id, contains("fun"))) %>% - filter(!is.na(set1fun)) %>% - ggplot(aes(group, abundance, color = environment_group)) + - geom_boxplot(width = 0.5) + - stat_compare_means( - method = "wilcox", - symnum.args = symnum.args, - comparisons = list( - c("Common\nin all", "Unique to\naquatic"), - c("Common\nin all", "Unique to\nhost"), - c("Common\nin all", "Unique to\nsoil") - ) - ) + - scale_y_log10(expand = c(0, 0.5)) + - scale_color_environment_group() + - annotation_logticks(sides = "l") + - guides(color = FALSE) + - theme_bw() + - theme( - strip.text = element_text(size = rel(1.1)), - axis.text = element_text(size = rel(1.1)), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - axis.ticks.x=element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs(x = "", y = "Abundance (TSS)") + - facet_wrap(c("set1fun")) - - }) - ) %>% - deframe() %>% - pluck(1) - -funits2 <- - prevalent_taxa %>% - nest(-kingdom) %>% - filter(kingdom == "Fungi") %>% - mutate( - data = data %>% map(~ { - .x %>% - mutate( - group = case_when( - soil & host & aquatic ~ "Common\nin all", - soil & !host & !aquatic ~ "Unique to\nsoil", - !soil & !host & aquatic ~ "Unique to\naquatic", - !soil & host & !aquatic ~ "Unique to\nhost" - ), - environment_group = group %>% recode( - "Common\nin all" = "all", "Unique to\nsoil" = "soil", "Unique to\naquatic" = "aquatic", - "Unique to\nhost" = "host" - ) - ) %>% - left_join( - sub_abundances$data[[2]] %>% - drop_na(environment_group) %>% - select(sample_id, taxon, abundance) - ) %>% - filter(abundance > 0.01e-2) %>% - filter(!is.na(group)) %>% - left_join(sub_abundances$data[[2]]) %>% - left_join(primers_by_sample %>% select(sample_id, contains("fun"), env_check)) %>% - filter(!is.na(set2fun)) %>% - #filter(!(is.na(env_check) & environment_group == "aquatic")) %>% - ggplot(aes(group, abundance, color = environment_group)) + - geom_boxplot(width = 0.5) + - stat_compare_means( - method = "wilcox", - symnum.args = symnum.args, - comparisons = list( - c("Common\nin all", "Unique to\nhost"), - c("Common\nin all", "Unique to\nsoil") - ) - ) + - scale_y_log10(expand = c(0, 0.5)) + - scale_color_environment_group() + - annotation_logticks(sides = "l") + - guides(color = FALSE) + - theme_bw() + - theme( - strip.text = element_text(size = rel(1.1)), - axis.text = element_text(size = rel(1.1)), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - axis.ticks.x=element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs(x = "", y = "Abundance (TSS)") + - facet_wrap(c("set2fun")) - - }) - ) %>% - deframe() %>% - pluck(1) - -p <- wrap_plots( - venn[[1]], - venn[[2]], - venn[[3]], - venn[[4]], - (bacterial_primers_heat_rds + labs(tag = "B")), - (fungal_primers_heat_rds + labs(tag = "C")), - bacv1_v4 + labs(tag = "D"), - bacv3_v5, - funits1, - funits2, - ncol = 2, - widths = c(1,1) -) - -ggsave(filename = "plots/04-manuscript-ap/sup-fig2.pdf", - plot = p, - device = "pdf", - width = 33, - height = 40, - units = "cm", - dpi = 300) - -ggsave(filename = "plots/04-manuscript-ap/sup-fig2.png", - plot = p, - device = "png", - width = 33, - height = 40, - units = "cm", - dpi = 300) diff --git a/src/sup/05-manuscript-figs/03-sup-fig3.R b/src/sup/05-manuscript-figs/03-sup-fig3.R deleted file mode 100644 index a63d677..0000000 --- a/src/sup/05-manuscript-figs/03-sup-fig3.R +++ /dev/null @@ -1,153 +0,0 @@ -# Most ecological papers use Levins' niche breadth index to define generalist taxon -source("src/defaults.R") - -library(MicroNiche) - -loadd(sub_abundances) -loadd(selected_generalists_specialists) -loadd(samples) - -taxa_data <- - sub_abundances %>% - filter(norm_method == "tss" & subset_value == "all") %>% - unnest(data) %>% - # pseudo counts to normalize for seqdepth - mutate(abundance = round(abundance * 10e3)) %>% - select(sample_id, taxon, abundance) %>% - pivot_wider(names_from = sample_id, values_from = abundance, values_fill = list(abundance = 0)) %>% - as.data.frame() - -levins_indicies <- - tibble(sample_grouping = c("bioproject_id", "environment_group", "habitat")) %>% - mutate( - levins_index = sample_grouping %>% map(possibly(~ { - sample_data <- - taxa_data %>% - colnames() %>% - tibble(sample_id = .) %>% - left_join(samples) %>% - pluck(.x) - - n_sample_groups <- sample_data %>% - unique() %>% - length() - - levins.Bn(taxa_data, n_sample_groups, sample_data) %>% - as_tibble(rownames = "taxon") - }, NA)) - ) - -#saveRDS(object = levins_indicies, file = "rdata/levins_indicies.rds") -levins_indicies <- readRDS("rdata/levins_indicies.rds") - -prevalence_group_colors_upper <- c( - "Generalist" = "#3a86ff", - "Specialist" = "#ffbe0b" -) - -scale_color_prevalence_group_upper <- function(drop = FALSE, ...) { - scale_color_manual( - values = purrr::simplify(prevalence_group_colors_upper), - na.value = "#000000", - drop = drop, - ... - ) -} - -p1 <- levins_indicies %>% - unnest(levins_index) %>% - left_join(selected_generalists_specialists) %>% - filter(!is.na(prevalence_group)) %>% - mutate( - sample_grouping = sample_grouping %>% - recode("bioproject_id" = "Project", "habitat" = "Habitat", "environment_group" = "Environment") %>% - factor(levels = c("Project", "Habitat", "Environment")) - ) %>% - mutate(prevalence_group = recode(prevalence_group, "generalist" = "Generalist", "specialist" = "Specialist")) %>% - ggplot(aes(prevalence_group, Bn, color = prevalence_group)) + - geom_boxplot() + - scale_color_prevalence_group_upper() + - stat_compare_means( - method = "wilcox", - comparisons = list(c("Generalist", "Specialist")) - ) + - facet_wrap(~sample_grouping) + - guides(color = "none") + - theme_bw() + - theme(panel.grid.major.x = element_blank(), - strip.text = element_text(size = rel(1.2)), - legend.text = element_text(size=rel(1.2)), - axis.text = element_text(size=rel(1.2)), - axis.title = element_text(size=rel(1.2)), - panel.grid.minor.x = element_blank(), - strip.background = element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) + - labs( - x = "", - y = TeX("Levins' niche breadth index B_n"), - tag = "A" - ) - -# SNB -################################################################################ -library(ggbeeswarm) - -generalists <- read_tsv(here::here("raw/snb/generalist_edit.txt"), skip = 1, - col_names = "taxa") %>% - mutate(gen_spec = "generalist") - -specialists <- read_tsv(here::here("raw/snb/specialists_edit.txt"), skip = 1, - col_names = "taxa") %>% - mutate(gen_spec = "specialist") - -gen_spec <- bind_rows(generalists, specialists) %>% - mutate(taxa = str_replace_all(taxa, " ", "_")) - -snb <- read_tsv(here::here("raw/snb/bas_snb.txt")) %>% - filter(rank == "genus") %>% - mutate(taxa = str_extract(`taxonomic lineage`, "genus\\.(.+)"), - taxa = str_remove(taxa, "genus\\.") - ) %>% - relocate(taxa, .after = rank) - -plot_df <- gen_spec %>% - left_join(snb) - -p2 <- snb %>% - ggplot(aes(x = "1", y = SNB)) + - geom_violin() + - geom_quasirandom(data = plot_df, aes(x = "1", y = SNB, color = gen_spec), - width = 0.07, size = 3) + - scale_color_manual(values = c("#3a86ff", "#ffbe0b"), name = "", - labels = c("Generalist", "Specialist")) + - labs(x = "", tag = "B") + - guides(color=guide_legend(override.aes=list(fill=NA))) + - theme_bw() + - theme(axis.text.x = element_blank(), - legend.position=c(.7, .9), - legend.key = element_blank(), - legend.background=element_blank(), - strip.text = element_text(size = rel(1.2)), - legend.text = element_text(size=rel(1.2)), - axis.text = element_text(size=rel(1.2)), - axis.title = element_text(size=rel(1.2)), - panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - strip.background = element_blank(), - panel.border = element_rect(colour = "black", fill = NA)) - -p <- p1 + p2 + plot_layout(widths = c(2,1)) - -ggsave(filename = "results/sup-fig3.png", - plot = p, - device = "png", - width = 11, - height = 7, - dpi = 300) - -ggsave(filename = "results/sup-fig3.pdf", - plot = p, - device = "pdf", - width = 11, - height = 7, - dpi = 300) diff --git a/src/sup/05-manuscript-figs/README b/src/sup/05-manuscript-figs/README deleted file mode 100644 index b943407..0000000 --- a/src/sup/05-manuscript-figs/README +++ /dev/null @@ -1,3 +0,0 @@ -- Figures not present here are generated by drake pipeline. - -- load() R function make drake results available to be used.