From 43cefd2ac2f5191e3febe43f09132f8a906e7a37 Mon Sep 17 00:00:00 2001
From: apereira <ailton.pereira@leibniz-hki.de>
Date: Tue, 12 Mar 2024 13:30:38 +0100
Subject: [PATCH] Add Rscripts used in the paper

---
 .../R/00-generate-samples-list.R              |  48 --
 src/sup/01-detect-primers/R/01-detect-its.R   |   5 -
 .../_targets.R                                |  58 ++-
 .../defaults.R                                |   3 +
 .../functions.R                               | 449 +++++++++++++++++-
 src/sup/05-manuscript-figs/01-sup-fig1.R      | 161 -------
 src/sup/05-manuscript-figs/02-main-fig2.R     | 207 --------
 src/sup/05-manuscript-figs/02-sup-fig2.R      | 303 ------------
 src/sup/05-manuscript-figs/03-sup-fig3.R      | 153 ------
 src/sup/05-manuscript-figs/README             |   3 -
 10 files changed, 497 insertions(+), 893 deletions(-)
 delete mode 100644 src/sup/01-detect-primers/R/00-generate-samples-list.R
 delete mode 100644 src/sup/05-manuscript-figs/01-sup-fig1.R
 delete mode 100644 src/sup/05-manuscript-figs/02-main-fig2.R
 delete mode 100644 src/sup/05-manuscript-figs/02-sup-fig2.R
 delete mode 100644 src/sup/05-manuscript-figs/03-sup-fig3.R
 delete mode 100644 src/sup/05-manuscript-figs/README

diff --git a/src/sup/01-detect-primers/R/00-generate-samples-list.R b/src/sup/01-detect-primers/R/00-generate-samples-list.R
deleted file mode 100644
index e434a34..0000000
--- a/src/sup/01-detect-primers/R/00-generate-samples-list.R
+++ /dev/null
@@ -1,48 +0,0 @@
-library(tidyverse)
-
-runs <- read_csv("raw/runs.csv") %>% 
-  as_tibble()
-bioprojects <- read_csv("raw/bioprojects.csv") %>% 
-  as_tibble()
-biosamples <- read_tsv("raw/biosamples.tsv") %>% 
-  as_tibble()
-
-proj_noRef <- c(
-  "PRJNA282687",
-  "PRJNA324410",
-  "PRJNA271113",
-  "PRJNA359237",
-  "PRJNA473079",
-  "PRJNA263505",
-  "PRJNA406830",
-  "PRJNA432446",
-  "PRJNA496065"
-)
-
-runs %>%
-  filter(bioproject_id  %in% proj_noRef) %>% 
-  filter(str_detect(amplicon, "bac_16s")) %>% 
-  filter(str_detect(library_layout, "SINGLE")) %>%
-  select(run_id) %>% 
-  write_tsv(file = "draco/proj/04-global-microbiome/raw/proj_no_literature/bacteria_single.tsv",col_names = T)
-
-runs %>%
-  filter(bioproject_id  %in% proj_noRef) %>% 
-  filter(str_detect(amplicon,"bac_16s")) %>% 
-  filter(str_detect(library_layout, "PAIRED")) %>%
-  select(run_id) %>% 
-  write_tsv(file = "draco/proj/04-global-microbiome/raw/proj_no_literature/bacteria_paired.tsv",col_names = T)
-
-runs %>%
-  filter(bioproject_id  %in% proj_noRef) %>% 
-  filter(str_detect(amplicon, "fun_its")) %>% 
-  filter(str_detect(library_layout, "SINGLE")) %>%
-  select(run_id) %>% 
-  write_tsv(file = "draco/proj/04-global-microbiome/raw/proj_no_literature/fungi_single.tsv",col_names = T)
-
-runs %>%
-  filter(bioproject_id  %in% proj_noRef) %>% 
-  filter(str_detect(amplicon, "fun_its")) %>% 
-  filter(str_detect(library_layout, "PAIRED")) %>%
-  select(run_id) %>% 
-  write_tsv(file = "draco/proj/04-global-microbiome/raw/proj_no_literature/fungi_paired.tsv",col_names = T)
diff --git a/src/sup/01-detect-primers/R/01-detect-its.R b/src/sup/01-detect-primers/R/01-detect-its.R
index e543f02..97b886b 100644
--- a/src/sup/01-detect-primers/R/01-detect-its.R
+++ b/src/sup/01-detect-primers/R/01-detect-its.R
@@ -1,10 +1,5 @@
 library(tidyverse)
 
-# After ITS extraction, it was used the following command to extract the headers 
-# of extracted fastas:
-# grep ">" *.fasta > ../00-tmp/extracted_its_seqs_NAs_primers.txt
-# This way, each header correspond to a sequence.
-
 # ITS sequences after ITSx extraction
 its <- read_delim(file = "cache/00-tmp/extracted_its_seqs_NAs_primers.txt", 
                 col_names = F)
diff --git a/src/sup/03-generalists-specialists-genome-with-gff/_targets.R b/src/sup/03-generalists-specialists-genome-with-gff/_targets.R
index 34880fc..0296d93 100644
--- a/src/sup/03-generalists-specialists-genome-with-gff/_targets.R
+++ b/src/sup/03-generalists-specialists-genome-with-gff/_targets.R
@@ -1,6 +1,6 @@
 #!/usr/bin/env R
 
-# This pipeline analise the antismash results with gff3 file suplied together with the genome.
+# This pipeline analyse the antismash results with gff3 file suplied together with the genome.
 
 source(here::here("src/03-generalists-specialists-genome-with-gff/defaults.R"))
 
@@ -14,15 +14,16 @@ tar_option_set(packages = c("tidyverse","tarchetypes","jsonlite", "patchwork", "
 
 ### Targets results dir
 gen_spe_tar_dir <- "cache/01-r-generalist_specialist_genome_metadata"
+amr_dir         <- "cache/01-r-07-amr"
 
 ### Import results from other targets that were already analised
-#bgcs_by_genome    <- tar_read(name = bgcs_by_genome, store = gen_spe_tar_dir)
 genomes2find_BGCs <- tar_read(name = genomes2find_BGCs, store = gen_spe_tar_dir)
 generalist_genus  <- tar_read(name = generalist_genus, store = gen_spe_tar_dir)
 specialist_genus  <- tar_read(name = specialist_genus, store = gen_spe_tar_dir)
 genome_gff        <- tar_read(name = genome_gff, store = gen_spe_tar_dir)
 cds_by_genome     <- tar_read(name = cds_by_genome, store = gen_spe_tar_dir)
 genome_length     <- tar_read(name = genome_length, store = gen_spe_tar_dir)
+valid_amr         <- tar_read(name = valid_amr, store = amr_dir)
 
 library(future)
 library(future.callr)
@@ -38,8 +39,6 @@ list(
     import_bgcs,
     parse_antismash(list_bgcs_files),
     pattern = map(list_bgcs_files)
-    # ,
-    # deployment = "main"
   ),
   tar_target(
     bgcs_by_genome,
@@ -76,6 +75,53 @@ list(
   tar_target(
     bcgs_compare_means,
     bcgs_statistics(generalist_specialists_bgcs, bcgs_groups)
+  ),
+  tar_target(
+    genome_length2,
+    import_genome_length("cache/17-genome-lenght")
+  ),
+  tar_target(
+    paper_main_fig2,
+    plot_main_fig2_paper(generalist_specialists_bgcs, genomes2find_BGCs, cds_by_genome, genome_length, valid_amr)
+  ),
+  tar_target(
+    cds2permutate,
+    cds_to_plot(genomes2find_BGCs, cds_by_genome, genome_length)
+  ),
+  tar_target(
+    permutate_cds,
+    cds2permutate %>% 
+      filter(kingdom == "Bacteria") %>% 
+      permutation_test(y = "cds_n", x = "Group", n_iter = 10000)
+  ),
+  tar_target(
+    permutate_cds_norm,
+    cds2permutate %>% 
+      filter(kingdom == "Bacteria") %>% 
+      permutation_test(y = "cds_norm", x = "Group", n_iter = 10000)
+  ),
+  tar_target(
+    bcgs2permutate,
+    bgcs_df_paper(generalist_specialists_bgcs, genome_length)
+  ),
+  tar_target(
+    permutate_bcgs,
+    bcgs2permutate %>% 
+      filter(kingdom == "Bacteria") %>% 
+      permutation_test(y = "value", x = "Groups", n_iter = 10000)
+  ),
+  tar_target(
+    amr2permutate,
+    amr_df_paper(valid_amr, genomes2find_BGCs, genome_length)
+  ),
+  tar_target(
+    permutate_amr,
+    amr2permutate %>% 
+      filter(kingdom == "Bacteria") %>% 
+      permutation_test(y = "norm_amr", x = "Groups", n_iter = 10000)
+  ),
+  tar_target(
+    compare_bcgs_products,
+    compare_bcgs_products_bac_gen(generalist_specialists_bgcs, genome_length, bcgs_groups)
   )
-)
-
+)
\ No newline at end of file
diff --git a/src/sup/03-generalists-specialists-genome-with-gff/defaults.R b/src/sup/03-generalists-specialists-genome-with-gff/defaults.R
index 4203fad..fc63859 100644
--- a/src/sup/03-generalists-specialists-genome-with-gff/defaults.R
+++ b/src/sup/03-generalists-specialists-genome-with-gff/defaults.R
@@ -22,6 +22,9 @@ library(patchwork)
 library(future)
 library(future.callr)
 library(RColorBrewer)
+library(tidygraph)
+library(ggraph)
+library(igraph)
 
 setwd(here::here())
 
diff --git a/src/sup/03-generalists-specialists-genome-with-gff/functions.R b/src/sup/03-generalists-specialists-genome-with-gff/functions.R
index 3edd62a..3453af4 100644
--- a/src/sup/03-generalists-specialists-genome-with-gff/functions.R
+++ b/src/sup/03-generalists-specialists-genome-with-gff/functions.R
@@ -72,9 +72,6 @@ generate_annotation <- function(bgcs_by_genome, genomes2find_BGCs, generalist_ge
   metadata %>% 
     left_join(bgcs_by_genome, by = c("accession" = "genome_id")) %>% 
     mutate(n = replace_na(n, 0))
-  # %>% 
-  # Remove taxa with no antismash hits
-  #drop_na(products)
   
   
 }
@@ -121,7 +118,6 @@ plot_bgcs_gen_size_cds <- function(generalist_specialists_bgcs, genomes2find_BGC
     mutate(g_size = g_size/10^6)
   
   a <- generalist_specialists_bgcs %>% 
-    #unite(col = Categories, c("kingdom", "type"), sep = " ") %>% 
     rename(Groups = type) %>% 
     group_by(accession, Groups, kingdom) %>% 
     summarise(value = sum(n)) %>%
@@ -219,6 +215,13 @@ plot_bgcs_gen_size_cds <- function(generalist_specialists_bgcs, genomes2find_BGC
          height = 5, 
          dpi = 300)
   
+  ggsave(plot = b,
+         filename = "plots/02-generalist-specialist-bgcs/gene_spec_gen_size.pdf",
+         device = "pdf", 
+         width = 12, 
+         height = 5, 
+         dpi = 300)
+  
   return(list(a,b,c))
   
 }
@@ -353,7 +356,6 @@ plot_bcgs_gen_spec <- function(generalist_specialists_bgcs, bcgs_groups) {
     left_join(bcgs_groups) %>%
     group_by(Groups, kingdom, cluster, perc) %>% 
     summarise(sum_perc = sum(perc))
-  #In order to avoid horizontal white line on geom_col, summarize it first!
   
   p <- df %>% 
     ggplot(aes(Groups, perc, fill = cluster)) +
@@ -416,7 +418,6 @@ bcgs_statistics <- function(generalist_specialists_bgcs, bcgs_groups) {
     ggplot(aes(comp, count, fill = cluster)) +
     geom_boxplot() +
     stat_compare_means(
-      #symnum.args = symnum.args,
       method = "wilcox",
       comparisons = list(
         c("RiPP-Specialist", "RiPP-Generalist"),
@@ -435,4 +436,438 @@ bcgs_statistics <- function(generalist_specialists_bgcs, bcgs_groups) {
          device = "png", 
          width = 11, 
          height = 7)
-}
\ No newline at end of file
+}
+
+import_genome_length <- function(path) {
+  
+  list.files(path, full.names = T) %>% 
+    as_tibble() %>% 
+    mutate(gen_len = map(value, ~ read_lines(.x))) %>% 
+    unnest(gen_len) %>% 
+    mutate(accession = str_extract(value, "GC[A|F]_[0-9]+.[0-9]"))
+}
+
+plot_main_fig2_paper <- function(generalist_specialists_bgcs, genomes2find_BGCs, cds_by_genome, genome_length, valid_amr) {
+  
+  prevalence_group_colors <- c(
+    "generalist" = "#3a86ff",
+    "specialist" = "#ffbe0b"
+  )
+  
+  
+  scale_color_prevalence_group <- function(drop = FALSE, ...) {
+    scale_color_manual(
+      values =  purrr::simplify(prevalence_group_colors),
+      na.value = "#000000",
+      drop = drop,
+      ...
+    )
+  }
+  
+  symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
+                      symbols = c("****", "***", "**", "*", "ns"))
+  
+  a <- 
+    kingdoms_groups %>%
+    set_names(kingdoms_groups) %>%
+    purrr::map(function(current_kingdom) {
+      sub_abundances %>%
+        filter(subset_value == "all" & norm_method == "tss") %>%
+        pull(data) %>%
+        first() %>%
+        inner_join(selected_generalists_specialists) %>%
+        filter(kingdom == current_kingdom & !is.na(prevalence_group)) %>%
+        filter(abundance > 0.01e-2) %>%
+        ggplot(aes(x = prevalence_group, y = abundance, color = prevalence_group)) +
+        geom_boxplot(width = 0.5) +
+        facet_grid(~kingdom) +
+        scale_x_discrete(labels = str_to_sentence) +
+        scale_y_log10(expand = c(0, 1)) +
+        stat_compare_means(
+          method = "wilcox",
+          symnum.args = symnum.args,
+          comparisons = list(c("generalist", "specialist"))
+        ) +
+        annotation_logticks(sides = "l") +
+        scale_color_prevalence_group() +
+        theme_bw() +
+        theme(
+          strip.text = element_text(size = rel(1.1)),
+          axis.text = element_text(size = rel(1.1)), 
+          panel.grid.minor.x = element_blank(),
+          panel.grid.major.x = element_blank(),
+          strip.background = element_blank(),
+          axis.ticks.x=element_blank(),
+          panel.border = element_rect(colour = "black", fill = NA)) +
+        guides(color = FALSE) +
+        labs(
+          x = "",
+          y = "Abundance (TSS)"
+        )
+    })
+  
+  
+  
+  cds <- genomes2find_BGCs %>% 
+    distinct() %>% 
+    select(accession, genus, type, kingdom) %>% 
+    left_join(cds_by_genome, by = c("accession" = "genome_id")) %>% 
+    select(-value) %>% 
+    left_join(genome_length %>% 
+                rename(accession = genome_id) %>% 
+                unnest(g_size) %>% 
+                mutate(g_size = as.numeric(g_size)) %>% 
+                mutate(g_size = g_size/10^6) %>% 
+                distinct(accession, g_size)) %>% 
+    mutate(cds_norm = cds_n/g_size)
+  
+  b <- cds %>% 
+    rename(Group = type) %>% 
+    ggplot(aes(Group, cds_n)) +
+    geom_boxplot(aes(color = Group), width=0.5) +
+    facet_wrap(c("kingdom"), scales="free_y") +
+    stat_compare_means(
+      symnum.args = symnum.args,
+      method = "wilcox",
+      comparisons = list(c("Generalist", "Specialist"))) +
+    ylab("Number of CDS") +
+    guides(color = "none") +
+    scale_color_prevalence_type() +
+    scale_y_log10() +
+    theme_bw() +
+    annotation_logticks(sides = "l") +
+    theme(axis.title.x = element_blank(),
+          strip.text = element_text(size = rel(1.1)),
+          axis.text = element_text(size = rel(1.1)),
+          legend.position = "none",
+          panel.grid.minor.x = element_blank(),
+          panel.grid.major.x = element_blank(),
+          strip.background = element_blank(),
+          panel.border = element_rect(colour = "black", fill = NA))
+
+    
+  c <- generalist_specialists_bgcs %>% 
+    left_join(genome_length %>% 
+                rename(accession = genome_id) %>% 
+                unnest(g_size) %>% 
+                mutate(g_size = as.numeric(g_size)) %>% 
+                mutate(g_size = g_size/10^6) %>% 
+                distinct(accession, g_size)) %>% 
+    rename(Groups = type) %>% 
+    mutate(n = n/g_size) %>% 
+    group_by(accession, Groups, kingdom) %>% 
+    summarise(value = sum(n)) %>%
+    ggplot(aes(Groups, value, color = Groups)) +
+    geom_boxplot(width=0.5) +
+    scale_y_log10() +
+    stat_compare_means(
+      symnum.args = symnum.args,
+      method = "wilcox", 
+      comparisons = list(c("Specialist", "Generalist"))) +
+    facet_wrap(c("kingdom"), scales="free_y") +
+    scale_color_prevalence_type() +
+    annotation_logticks(sides = "l") +
+    ylab("Number of BCGs/Genome Length") +
+    guides(color = FALSE) +
+    theme_minimal() +
+    theme(axis.title.x = element_blank(),
+          strip.text = element_text(size = rel(1.1)),
+          axis.text = element_text(size = rel(1.1)),
+          legend.position = "none",
+          panel.grid.minor.x = element_blank(),
+          panel.grid.major.x = element_blank(),
+          strip.background = element_blank(),
+          panel.border = element_rect(colour = "black", fill = NA))
+  
+  amr <- valid_amr %>% 
+    rename(accession = genome_id) %>% 
+    left_join(genomes2find_BGCs %>% 
+                distinct(accession, genus, type, kingdom))
+  
+  amr2 <- amr %>% 
+    rename(Groups = type,
+           element_type = `Element type`,
+           element_sub = `Element subtype`,
+           class = Class) %>% 
+    select(accession, Groups, kingdom, element_type, element_sub, class) %>% 
+    mutate(count = 1) %>% 
+    group_by(accession, Groups, kingdom, element_type, element_sub, class) %>% 
+    summarise(n = sum(count)) %>% 
+    left_join(genome_length %>% 
+                rename(accession = genome_id) %>% 
+                unnest(g_size) %>% 
+                mutate(g_size = as.numeric(g_size)) %>% 
+                mutate(g_size = g_size/10^6) %>% 
+                distinct(accession, g_size)) %>% 
+    mutate(norm_amr = n/g_size)
+  
+  symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
+                      symbols = c("****", "***", "**", "*", "ns"))
+  
+  d <- amr2 %>% 
+    filter(element_type == "AMR") %>% 
+    ggplot(aes(Groups, norm_amr, color = Groups)) +
+    geom_point(size = 0.1) +
+    geom_boxplot(width=0.5) +
+    stat_compare_means(
+      method = "wilcox", 
+      symnum.args = symnum.args,
+      comparisons = list(c("Generalist", "Specialist")),
+      method.args = list(alternative = "less")) +
+    facet_wrap(c("element_type")) +
+    scale_y_log10() +
+    scale_y_continuous(trans='log10') +
+    scale_color_prevalence_type() +
+    annotation_logticks(sides = "l") +
+    ylab("Number of AMR genes/Genome length") +
+    guides(color = FALSE) +
+    theme_bw() +
+    theme(axis.title.x = element_blank(),
+          strip.text = element_text(size = rel(1.1)),
+          axis.text = element_text(size = rel(1.1)),
+          legend.position = "none",
+          panel.grid.minor.x = element_blank(),
+          panel.grid.major.x = element_blank(),
+          strip.background = element_blank(),
+          panel.border = element_rect(colour = "black", fill = NA)) 
+  
+  generalists_common_coabundance_graph <- readRDS("cache/15-microbiome-rserver-hki-rdata/generalists_common_coabundance_graph.rds")
+  
+  kingdoms_colors <- c(
+    "Bacteria" = "#3f78a9",
+    "Fungi" = "#A93F55",
+    "all" = "black"
+  )
+  
+  
+  scale_color_kingdom <- function(drop = FALSE, na.value = "#d9d9d9", ...) {
+    scale_color_manual(
+      values =  purrr::simplify(kingdoms_colors),
+      na.value = na.value,
+      drop = drop,
+      ...
+    )
+  }
+  
+  environment_group_colors <- c(
+    "soil" = "#8b786d",
+    "aquatic" = "#78a1bb",
+    "host" = "#e0aa5a",
+    "all" = "black"
+  )
+  
+  environment_group_sets_colors <- c(
+    "host-aquatic-soil" = "#000000",
+    "aquatic-soil" = "#155A82",
+    "host-aquatic" = "#83155F",
+    "host-soil" = "#826516"
+  ) %>%
+    append(environment_group_colors)
+  
+e <- 
+    generalists_common_coabundance_graph %>%
+    activate(edges) %>%
+    mutate(estimate_direction = estimate_direction %>% recode("positive" = "Positive", "negative" = "Negative")) %>%
+    ggraph(layout = layout_in_circle(generalists_common_coabundance_graph, order = order(V(generalists_common_coabundance_graph)$taxon_group))) +
+    geom_edge_link(aes(color = environment_group, alpha = involves_prevalence_group_taxon)) +
+    geom_node_point(aes(color = kingdom, size = is_generalist)) +
+    scale_edge_color_manual(values = environment_group_sets_colors, labels = str_to_sentence) +
+    scale_color_kingdom(na.value = "black") +
+    scale_size_manual(values = c(`TRUE` = 1.5, `FALSE` = 0.2), labels = c("No", "Yes")) +
+    scale_edge_alpha_manual(values = c(`TRUE` = 0.5, `FALSE` = 0.05), guide = "none") +
+    coord_fixed() +
+    facet_edges(~estimate_direction) +
+    guides(
+      color = guide_legend(override.aes = list(size = 6)),
+      size = guide_legend(override.aes = list(size = c(2,6)),
+      edge_color = guide_legend(override.aes = list(edge_width = 13)))
+           ) +
+    theme_void() +
+    labs(
+      color = "Kingdom",
+      edge_color = "Common in environments",
+      size = "Generalist") +
+    theme(
+      legend.text = element_text(size = rel(1.1)),
+      legend.title = element_text(size = rel(1.1)),
+      strip.text.x = element_text(size = 12, margin = margin(b = 5))
+    )
+  
+
+layout <- "
+AABB
+CCD#
+EEEF
+"
+  
+  f <-
+    wrap_plots(
+      wrap_plots(a$Bacteria + labs(tag = "A"), a$Fungi),
+      b + labs(tag = "B"),
+      c + labs(tag = "C"),
+      d + labs(tag = "D"),
+      e + labs(tag = "E"),
+    guides = "collect") +
+    guide_area() +
+    plot_layout(design = layout)
+  
+  ggsave(filename = "plots/04-manuscript-ap/fig2-v1.11.pdf",
+         plot = f,
+         device = "pdf",
+         width = 30,
+         height = 35,
+         units = "cm",
+         dpi = 300)
+  
+  ggsave(filename = "plots/04-manuscript-ap/fig2-v1.11.png",
+         plot = f,
+         width = 30,
+         height = 35,
+         units = "cm",
+         dpi = 300)
+  
+  return(f)
+ 
+}
+
+cds_to_plot <- function(genomes2find_BGCs, cds_by_genome, genome_length) {
+  
+  genomes2find_BGCs %>% 
+    distinct() %>% 
+    select(accession, genus, type, kingdom) %>% 
+    left_join(cds_by_genome, by = c("accession" = "genome_id")) %>% 
+    select(-value) %>% 
+    left_join(genome_length %>% 
+                rename(accession = genome_id) %>% 
+                unnest(g_size) %>% 
+                mutate(g_size = as.numeric(g_size)) %>% 
+                mutate(g_size = g_size/10^6) %>% 
+                distinct(accession, g_size)) %>% 
+    mutate(cds_norm = cds_n/g_size) %>% 
+    rename(Group = type)
+}
+
+permutation_test <- function(df, y = "cds_n", x = "Group", n_iter = 1000) {
+  
+  p_values <- vector("numeric", n_iter)
+  
+  # Loop through the iterations
+  for (i in 1:n_iter) {
+    
+    # Shuffle the labels randomly
+    shuffled_labels <- sample(df[[x]])
+    
+    # Combine the shuffled labels with the variable data
+    shuffled_data <- data.frame(values = df[[y]], group = shuffled_labels)
+
+    #Perform the Wilcoxon rank-sum test and store the p-value
+    test_result <- wilcox.test(values ~ group, data = shuffled_data)
+    p_values[i] <- test_result$p.value
+    
+  }
+  
+  observed_p_value <- wilcox.test(df[[y]] ~ df[[x]])$p.value
+
+  # Calculate the proportion of permutation p-values smaller than the observed p-value
+  proportion_smaller <- mean(p_values <= observed_p_value)
+
+  # If the proportion of permuted p-values smaller than or equal to the observed 
+  # p-value is 0, it suggests that none of the permuted datasets produced a result
+  # as extreme as the observed result. This implies that the observed difference 
+  # between the groups defined by x (as measured by the test statistic, typically 
+  # the Wilcoxon rank-sum test statistic in this case) is so extreme that it is 
+  # unlikely to have occurred by random chance alone.
+  
+  # Therefore, a proportion of 0 indicates strong evidence against the null 
+  # hypothesis of no difference between the groups, implying that there are 
+  # indeed differences between the groups.
+  
+  return(proportion_smaller)
+}
+
+bgcs_df_paper <- function(generalist_specialists_bgcs, genome_length) {
+  
+  generalist_specialists_bgcs %>% 
+    left_join(genome_length %>% 
+                rename(accession = genome_id) %>% 
+                unnest(g_size) %>% 
+                mutate(g_size = as.numeric(g_size)) %>% 
+                mutate(g_size = g_size/10^6) %>% 
+                distinct(accession, g_size)) %>% 
+    rename(Groups = type) %>% 
+    mutate(n = n/g_size) %>% 
+    group_by(accession, Groups, kingdom) %>% 
+    summarise(value = sum(n))
+}
+
+amr_df_paper <- function(valid_amr, genomes2find_BGCs, genome_length) {
+  
+valid_amr %>% 
+    rename(accession = genome_id) %>% 
+    left_join(genomes2find_BGCs %>% 
+                distinct(accession, genus, type, kingdom)) %>% 
+    rename(Groups = type,
+           element_type = `Element type`,
+           element_sub = `Element subtype`,
+           class = Class) %>% 
+    select(accession, Groups, kingdom, element_type, element_sub, class) %>% 
+    mutate(count = 1) %>% 
+    group_by(accession, Groups, kingdom, element_type, element_sub, class) %>% 
+    summarise(n = sum(count)) %>% 
+    left_join(genome_length %>% 
+                rename(accession = genome_id) %>% 
+                unnest(g_size) %>% 
+                mutate(g_size = as.numeric(g_size)) %>% 
+                mutate(g_size = g_size/10^6) %>% 
+                distinct(accession, g_size)) %>% 
+    mutate(norm_amr = n/g_size) %>% 
+    filter(element_type == "AMR")
+}
+
+
+compare_bcgs_products_bac_gen <- function(generalist_specialists_bgcs, genome_length, bcgs_groups) {
+  
+  bcgs_df <- 
+    generalist_specialists_bgcs %>% 
+    left_join(genome_length %>% 
+                rename(accession = genome_id) %>% 
+                unnest(g_size) %>% 
+                mutate(g_size = as.numeric(g_size)) %>% 
+                mutate(g_size = g_size/10^6) %>% 
+                distinct(accession, g_size)) %>% 
+    rename(Groups = type) %>% 
+    mutate(n = n/g_size) %>% 
+    mutate(products = replace_na(products, "No-BCGs")) %>% 
+    group_by(accession, Groups, kingdom, products) %>% 
+    summarise(value = sum(n)) %>% 
+    filter(kingdom == "Bacteria" & value > 0) %>% 
+    left_join(bcgs_groups)
+  
+# p_products <- 
+  bcgs_df %>% 
+  ggplot(aes(Groups, value)) +
+  geom_boxplot(aes(color=cluster)) +
+  stat_compare_means(
+    method = "wilcox",
+    symnum.args = symnum.args,
+    label = "..p.adj..",
+    comparisons = list(c("Specialist", "Generalist")),
+    method.args = list(alternative = "greater"),
+  ) +
+  #facet_wrap(~Groups, scales = "free_y") +
+  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
+  scale_y_log10()
+
+# If the p-value is less than a chosen significance level (commonly 0.05), it 
+# suggests that there is statistically significant evidence to reject the null 
+# hypothesis that the means are equal, in favor of the alternative hypothesis 
+# that the mean of the "Generalist" group is greater than the mean of the 
+# "Specialist" group.
+
+ggsave(filename = "plots/04-manuscript-ap/bcgs_products_bacteria.pdf", 
+       plot = p_products, width = 9, height = 7, dpi = 300)
+
+ggsave(filename = "plots/04-manuscript-ap/bcgs_products_bacteria.png", 
+       plot = p_products, width = 9, height = 7, dpi = 300)
+
+}
diff --git a/src/sup/05-manuscript-figs/01-sup-fig1.R b/src/sup/05-manuscript-figs/01-sup-fig1.R
deleted file mode 100644
index a8bf476..0000000
--- a/src/sup/05-manuscript-figs/01-sup-fig1.R
+++ /dev/null
@@ -1,161 +0,0 @@
-library(tidyverse)
-library(RColorBrewer)
-library(patchwork)
-
-# 16S
-
-raw_coverage <- list.files(path = "cache/08-coverage",
-                           full.names = T) %>%
-  as_tibble() %>%
-  #slice(1:3) %>% 
-  mutate(coverage = map(value, ~ read_tsv(.x)))
-
-cov <- raw_coverage %>%
-  mutate(sample_id = str_split(value, "/", simplify=T)[,3]) %>%
-  mutate(sample_id = str_remove(sample_id, ".txt")) %>%
-  select(-value) %>%
-  unnest(coverage)
-
-runs <- read_csv(file = "/home/ailtonpcf/draco/proj/04-global-microbiome/raw/runs.csv")
-
-# Color palete
-n <- 10
-qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
-col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
-
-# Merge dfs
-proj16s <- cov %>%
-  rename(rRNA = "#ID") %>%
-  left_join(runs %>% select(run_id, bioproject_id), by = c("sample_id" = "run_id")) %>%
-  relocate(sample_id, bioproject_id) %>%
-  mutate(rRNA = str_remove(rRNA, "_[:digit:]+")) %>%
-  mutate(rRNA = str_remove(rRNA, "_amp")) %>% 
-  mutate(bioproject_id = str_replace(bioproject_id, "_", "\n"))
-
-#Plot1
-p1 <- proj16s %>%
-  ggplot(aes(x=Covered_percent)) +
-  geom_boxplot(aes(color=rRNA)) +
-  facet_wrap(c("bioproject_id"), scales = "free_y") +
-  scale_color_manual(values = col_vector) +
-  guides(fill=guide_legend(ncol=1)) +
-  theme_bw() +
-  theme(axis.text = element_text(size = 10),
-        panel.grid.minor.x = element_blank(),
-        panel.grid.major.x = element_blank(),
-        strip.background = element_blank(),
-        panel.border = element_rect(colour = "black", fill = NA)) +
-  labs(x = "Coverage (%)",
-       y = "Count",
-       color = "rRNA", 
-       tag = "A")
-
-p2 <- proj16s %>%
-  ggplot(aes(x=Covered_percent)) +
-  geom_density(aes(color=rRNA), alpha = 0.75) +
-  facet_wrap(c("bioproject_id"), scales = "free_y") +
-  scale_color_manual(values = col_vector) +
-  guides(fill=guide_legend(ncol=1)) +
-  theme_bw() +
-  theme(axis.text = element_text(size = 10),
-        panel.grid.minor.x = element_blank(),
-        panel.grid.major.x = element_blank(),
-        strip.background = element_blank(),
-        panel.border = element_rect(colour = "black", fill = NA)) +
-  labs(x = "Coverage (%)",
-       y = "Count",
-       color = "rRNA", 
-       tag = "A")
-
-p3 <- proj16s %>%
-  ggplot(aes(x=Covered_percent)) +
-  geom_histogram(aes(fill=rRNA), alpha = 0.75) +
-  facet_wrap(c("bioproject_id"), scales = "free_y") +
-  scale_fill_manual(values = col_vector) +
-  guides(fill=guide_legend(ncol=1)) +
-  theme_bw() +
-  theme(axis.text = element_text(size = rel(1.1)),
-        strip.text = element_text(size = rel(1.1)),
-        panel.grid.minor.x = element_blank(),
-        panel.grid.major.x = element_blank(),
-        strip.background = element_blank(),
-        panel.border = element_rect(colour = "black", fill = NA)) +
-  labs(x = "Coverage (%)",
-       y = "Number of reads",
-       fill = "rRNA", 
-       tag = "A")
-
-# ITS
-
-its <- read_delim(file = "cache/00-tmp/extracted_its_seqs_NAs_primers.txt", 
-                  col_names = F)
-
-# Parse fasta headers from grep
-seqs <- its %>%
-  select(X1) %>% 
-  mutate(sample_id = str_split(X1, "\\.", simplify=T)[,1]) %>% 
-  mutate(ITS = str_extract(X1, "ITS[1-2]"))
-
-# Import bioprojects and run_ids
-runs <- read_csv(file = "raw/runs.csv")
-
-# Merge run_id and bioprojects
-sampleByITS <- seqs %>% 
-  drop_na(ITS) %>% 
-  group_by(sample_id, ITS) %>%
-  count() %>% 
-  #pivot_wider(names_from = its, values_from = n, values_fill = 0) %>% 
-  left_join(runs %>% select(run_id, bioproject_id), by = c("sample_id" = "run_id"))
-
-# Color palete
-n <- 2
-qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
-col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
-
-
-# Total counts by project and primer
-p4 <- sampleByITS %>% 
-  ungroup() %>% 
-  select(-sample_id) %>% 
-  group_by(bioproject_id, ITS) %>% 
-  summarise(total = sum(n)) %>% 
-  filter(!total < 100) %>% 
-  arrange(total) %>% 
-  ggplot(aes(fct_reorder(bioproject_id, total), total)) +
-  geom_col(aes(fill = ITS), position = "fill", width = 0.5) +
-  coord_flip() +
-  scale_fill_manual(values = col_vector) +
-  guides(fill=guide_legend(ncol=1)) +
-  theme_bw() +
-  theme(axis.title.y = element_blank(),
-        strip.text = element_text(size = rel(1.1)),
-        axis.text = element_text(size = rel(1.1)), 
-        panel.grid.minor.x = element_blank(),
-        panel.grid.major.x = element_blank(),
-        strip.background = element_blank(),
-        axis.ticks.x=element_blank(),
-        panel.border = element_rect(colour = "black", fill = NA)) +
-  labs(y = "Reads (%)",
-       fill = "ITS", 
-       tag = "B")
-
-
-wp3 <- p3 + p4 + plot_layout(widths = c(3,1), guides = "collect")
-
-ggsave(filename = "/home/ailtonpcf/draco/proj/04-global-microbiome/plots/04-manuscript-ap/sup-fig1.png",
-       plot = wp3,
-       device = "png",
-       width = 13,
-       height = 9,
-       dpi = 300)
-
-ggsave(filename = "/home/ailtonpcf/draco/proj/04-global-microbiome/plots/04-manuscript-ap/sup-fig1.pdf",
-       plot = wp3,
-       device = "pdf",
-       width = 13,
-       height = 9,
-       dpi = 300)
-
-
-
-
diff --git a/src/sup/05-manuscript-figs/02-main-fig2.R b/src/sup/05-manuscript-figs/02-main-fig2.R
deleted file mode 100644
index f8a84e7..0000000
--- a/src/sup/05-manuscript-figs/02-main-fig2.R
+++ /dev/null
@@ -1,207 +0,0 @@
-source("src/03-generalists-specialists-genome-with-gff/defaults.R")
-
-tar_load(figure1)
-tar_load(figure4)
-
-library(tidygraph)
-library(ggraph)
-library(igraph)
-
-generalists_common_coabundance_graph <- readRDS("cache/15-microbiome-rserver-hki-rdata/generalists_common_coabundance_graph.rds")
-
-kingdoms_colors <- c(
-  "Bacteria" = "#3f78a9",
-  "Fungi" = "#A93F55",
-  "all" = "black"
-)
-
-
-scale_color_kingdom <- function(drop = FALSE, na.value = "#d9d9d9", ...) {
-  scale_color_manual(
-    values =  purrr::simplify(kingdoms_colors),
-    na.value = na.value,
-    drop = drop,
-    ...
-  )
-}
-
-environment_group_colors <- c(
-  "soil" = "#8b786d",
-  "aquatic" = "#78a1bb",
-  "host" = "#e0aa5a",
-  "all" = "black"
-)
-
-environment_group_sets_colors <- c(
-  "host-aquatic-soil" = "#000000",
-  "aquatic-soil" = "#155A82",
-  "host-aquatic" = "#83155F",
-  "host-soil" = "#826516"
-) %>%
-  append(environment_group_colors)
-
-
-d <- 
-  generalists_common_coabundance_graph %>%
-  activate(edges) %>%
-  mutate(estimate_direction = estimate_direction %>% recode("positive" = "Positive", "negative" = "Negative")) %>%
-  ggraph(layout = layout_in_circle(generalists_common_coabundance_graph, order = order(V(generalists_common_coabundance_graph)$taxon_group))) +
-  geom_edge_link(aes(color = environment_group, alpha = involves_prevalence_group_taxon)) +
-  geom_node_point(aes(color = kingdom, size = is_generalist)) +
-  scale_edge_color_manual(values = environment_group_sets_colors, labels = str_to_sentence) +
-  scale_color_kingdom(na.value = "black") +
-  scale_size_manual(values = c(`TRUE` = 5, `FALSE` = 2), labels = c("No", "Yes")) +
-  scale_edge_alpha_manual(values = c(`TRUE` = 0.5, `FALSE` = 0.05), guide = "none") +
-  coord_fixed() +
-  facet_edges(~estimate_direction) +
-  guides(color = guide_legend(override.aes = list(size = 5))) +
-  theme_void() +
-  labs(
-    color = "Kingdom",
-    edge_color = "Common in environments",
-    size = "Generalist")
-################################################################################
-kingdoms_groups <- readRDS("cache/15-microbiome-rserver-hki-rdata/kingdoms_groups.rds")
-sub_abundances <- readRDS("cache/15-microbiome-rserver-hki-rdata/sub_abundances.rds")
-selected_generalists_specialists <- readRDS("cache/15-microbiome-rserver-hki-rdata/selected_generalists_specialists.rds")
-
-prevalence_group_colors <- c(
-  "generalist" = "#3a86ff",
-  "specialist" = "#ffbe0b"
-)
-
-
-scale_color_prevalence_group <- function(drop = FALSE, ...) {
-  scale_color_manual(
-    values =  purrr::simplify(prevalence_group_colors),
-    na.value = "#000000",
-    drop = drop,
-    ...
-  )
-}
-
-symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
-                    symbols = c("****", "***", "**", "*", "ns"))
-
-bc <- 
-  kingdoms_groups %>%
-  set_names(kingdoms_groups) %>%
-  purrr::map(function(current_kingdom) {
-    sub_abundances %>%
-      filter(subset_value == "all" & norm_method == "tss") %>%
-      pull(data) %>%
-      first() %>%
-      inner_join(selected_generalists_specialists) %>%
-      filter(kingdom == current_kingdom & !is.na(prevalence_group)) %>%
-      filter(abundance > 0.01e-2) %>%
-      ggplot(aes(x = prevalence_group, y = abundance, color = prevalence_group)) +
-      geom_boxplot(width = 0.5) +
-      facet_grid(~kingdom) +
-      scale_x_discrete(labels = str_to_sentence) +
-      scale_y_log10(expand = c(0, 1)) +
-      stat_compare_means(
-        method = "wilcox",
-        symnum.args = symnum.args,
-        comparisons = list(c("generalist", "specialist"))
-      ) +
-      annotation_logticks(sides = "l") +
-      scale_color_prevalence_group() +
-      theme_bw() +
-      theme(
-        strip.text = element_text(size = rel(1.1)),
-        axis.text = element_text(size = rel(1.1)), 
-        panel.grid.minor.x = element_blank(),
-        panel.grid.major.x = element_blank(),
-        strip.background = element_blank(),
-        axis.ticks.x=element_blank(),
-        panel.border = element_rect(colour = "black", fill = NA)) +
-      guides(color = FALSE) +
-      labs(
-        x = "",
-        y = "Abundance (TSS)"
-      )
-  })
-
-################################################################################
-
-layout <- "
-AABBCC
-DDEEEE
-"
-
-f <- wrap_plots(
-  wrap_plots(
-    bc[[1]] + labs(tag = "A"),
-    bc[[2]]
-  ),
-  figure1[[3]] + labs(tag = "B"),
-  figure1[[1]] + labs(tag = "C"),
-  figure4 + labs(tag = "D"),
-  d + labs(tag = "E"),
-  ncol = 3,
-  nrow = 2,
-  guides = "collect") + 
-  plot_layout(design = layout
-  # plot_layout(widths = c(1,1,2), heights = c(0.5, 2)
-  )
-
-ggsave(filename = "plots/04-manuscript-ap/fig2-v1.3.pdf",
-       plot = f,
-       device = "pdf",
-       width = 45,
-       height = 25,
-       units = "cm",
-       dpi = 300)
-
-ggsave(filename = "plots/04-manuscript-ap/fig2-v1.3.png",
-       plot = f,
-       device = "png",
-       width = 45,
-       height = 25,
-       units = "cm",
-       dpi = 300)
-
-# BIG CHANGES
-##############################################################################
-
-source("src/07-amr/defaults.R")
-tar_load(amr_comparisons_plot)
-
-amr_comparisons_plot[1]
-
-layout <- "
-AABBCC
-DDEEEE
-"
-
-f <- wrap_plots(
-  wrap_plots(
-    bc[[1]] + labs(tag = "A"),
-    bc[[2]]
-  ),
-  figure1[[3]] + labs(tag = "B"),
-  figure1[[1]] + labs(tag = "C"),
-  amr_comparisons_plot[[1]] + labs(tag = "D"),
-  d + labs(tag = "E"),
-  ncol = 3,
-  nrow = 2,
-  guides = "collect") + 
-  plot_layout(design = layout
-              # plot_layout(widths = c(1,1,2), heights = c(0.5, 2)
-  )
-
-ggsave(filename = "plots/04-manuscript-ap/fig2-v1.4.pdf",
-       plot = f,
-       device = "pdf",
-       width = 45,
-       height = 25,
-       units = "cm",
-       dpi = 300)
-
-ggsave(filename = "plots/04-manuscript-ap/fig2-v1.4.png",
-       plot = f,
-       device = "png",
-       width = 45,
-       height = 25,
-       units = "cm",
-       dpi = 300)
diff --git a/src/sup/05-manuscript-figs/02-sup-fig2.R b/src/sup/05-manuscript-figs/02-sup-fig2.R
deleted file mode 100644
index 07666a4..0000000
--- a/src/sup/05-manuscript-figs/02-sup-fig2.R
+++ /dev/null
@@ -1,303 +0,0 @@
-source("src/04-quantify-primers-effect/defaults.R")
-library(ggpubr)
-
-# Import results generated before
-tar_load(fungal_primers_heat_rds)
-tar_load(bacterial_primers_heat_rds)
-
-venn <- read_rds("cache/15-microbiome-rserver-hki-rdata/venn_no_aqua_fixed.rds")
-prevalent_taxa <- readRDS(file = "cache/15-microbiome-rserver-hki-rdata/02supfig-prevalent_taxa.rds")
-sub_abundances <- readRDS(file = "cache/15-microbiome-rserver-hki-rdata/02supfig-sub_abundances.rds")
-primers_by_sample <- readRDS(file = "cache/15-microbiome-rserver-hki-rdata/02supfig-primers_by_sample.rds")
-
-environment_group_colors <- c(
-  "soil" = "#8b786d",
-  "aquatic" = "#78a1bb",
-  "host" = "#e0aa5a",
-  "all" = "black"
-)
-
-
-scale_color_environment_group <- function(drop = FALSE, ...) {
-  scale_color_manual(
-    values = environment_group_colors,
-    na.value = "#d9d9d9",
-    drop = drop,
-    ...
-  )
-}
-
-symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
-                    symbols = c("****", "***", "**", "*", "ns"))
-
-bacv1_v4 <-
-  prevalent_taxa %>%
-  nest(-kingdom) %>%
-  filter(kingdom == "Bacteria") %>% 
-  mutate(
-    data = data %>% map(~ {
-      .x %>%
-        mutate(
-          group = case_when(
-            soil & host & aquatic ~ "Common\nin all",
-            soil & !host & !aquatic ~ "Unique to\nsoil",
-            !soil & !host & aquatic ~ "Unique to\naquatic",
-            !soil & host & !aquatic ~ "Unique to\nhost"
-          ),
-          environment_group = group %>% recode(
-            "Common\nin all" = "all", "Unique to\nsoil" = "soil", "Unique to\naquatic" = "aquatic",
-            "Unique to\nhost" = "host"
-          )
-        ) %>%
-        left_join(
-          sub_abundances$data[[2]] %>% select(sample_id, taxon, abundance)
-        ) %>%
-        filter(abundance > 0.01e-2) %>%
-        filter(!is.na(group)) %>%
-        left_join(sub_abundances$data[[2]]) %>%
-        left_join(primers_by_sample %>% select(sample_id, contains("bac"))) %>%
-        filter(!is.na(set1bac)) %>%
-        ggplot(aes(group, abundance, color = environment_group)) +
-        geom_boxplot(width = 0.5) +
-        stat_compare_means(
-          method = "wilcox",
-          symnum.args = symnum.args,
-          comparisons = list(
-            c("Common\nin all", "Unique to\naquatic"),
-            c("Common\nin all", "Unique to\nhost"),
-            c("Common\nin all", "Unique to\nsoil")
-          )
-        ) +
-        scale_y_log10(expand = c(0, 0.5)) +
-        scale_color_environment_group() +
-        annotation_logticks(sides = "l") +
-        guides(color = FALSE) +
-        theme_bw() +
-        theme(
-          strip.text = element_text(size = rel(1.1)),
-          axis.text = element_text(size = rel(1.1)), 
-          panel.grid.minor.x = element_blank(),
-          panel.grid.major.x = element_blank(),
-          strip.background = element_blank(),
-          axis.ticks.x=element_blank(),
-          panel.border = element_rect(colour = "black", fill = NA)) +
-        labs(x = "", y = "Abundance (TSS)") +
-        facet_wrap(c("set1bac"))
-        
-    })
-  ) %>%
-  deframe() %>% 
-  pluck(1)
-
-bacv3_v5 <-
-  prevalent_taxa %>%
-  nest(-kingdom) %>%
-  filter(kingdom == "Bacteria") %>% 
-  mutate(
-    data = data %>% map(~ {
-      .x %>%
-        mutate(
-          group = case_when(
-            soil & host & aquatic ~ "Common\nin all",
-            soil & !host & !aquatic ~ "Unique to\nsoil",
-            !soil & !host & aquatic ~ "Unique to\naquatic",
-            !soil & host & !aquatic ~ "Unique to\nhost"
-          ),
-          environment_group = group %>% recode(
-            "Common\nin all" = "all", "Unique to\nsoil" = "soil", "Unique to\naquatic" = "aquatic",
-            "Unique to\nhost" = "host"
-          )
-        ) %>%
-        left_join(
-          sub_abundances$data[[2]] %>% select(sample_id, taxon, abundance)
-        ) %>%
-        filter(abundance > 0.01e-2) %>%
-        filter(!is.na(group)) %>%
-        left_join(sub_abundances$data[[2]]) %>%
-        left_join(primers_by_sample %>% select(sample_id, contains("bac"))) %>%
-        filter(!is.na(set2bac)) %>% 
-        ggplot(aes(group, abundance, color = environment_group)) +
-        geom_boxplot(width = 0.5) +
-        stat_compare_means(
-          method = "wilcox",
-          symnum.args = symnum.args,
-          comparisons = list(
-            c("Common\nin all", "Unique to\naquatic"),
-            c("Common\nin all", "Unique to\nhost"),
-            c("Common\nin all", "Unique to\nsoil")
-          )
-        ) +
-        scale_y_log10(expand = c(0, 0.5)) +
-        scale_color_environment_group() +
-        annotation_logticks(sides = "l") +
-        guides(color = FALSE) +
-        theme_bw() +
-        theme(
-          strip.text = element_text(size = rel(1.1)),
-          axis.text = element_text(size = rel(1.1)), 
-          panel.grid.minor.x = element_blank(),
-          panel.grid.major.x = element_blank(),
-          strip.background = element_blank(),
-          axis.ticks.x=element_blank(),
-          panel.border = element_rect(colour = "black", fill = NA)) +
-        labs(x = "", y = "Abundance (TSS)") +
-        facet_wrap(c("set2bac"))
-      
-    })
-  ) %>%
-  deframe() %>% 
-  pluck(1)
-
-funits1 <-
-  prevalent_taxa %>%
-  nest(-kingdom) %>%
-  filter(kingdom == "Fungi") %>% 
-  mutate(
-    data = data %>% map(~ {
-      .x %>%
-        mutate(
-          group = case_when(
-            soil & host & aquatic ~ "Common\nin all",
-            soil & !host & !aquatic ~ "Unique to\nsoil",
-            !soil & !host & aquatic ~ "Unique to\naquatic",
-            !soil & host & !aquatic ~ "Unique to\nhost"
-          ),
-          environment_group = group %>% recode(
-            "Common\nin all" = "all", "Unique to\nsoil" = "soil", "Unique to\naquatic" = "aquatic",
-            "Unique to\nhost" = "host"
-          )
-        ) %>%
-        left_join(
-          sub_abundances$data[[2]] %>% 
-            drop_na(environment_group) %>% 
-            select(sample_id, taxon, abundance)
-        ) %>%
-        drop_na(environment_group) %>%
-        filter(abundance > 0.01e-2) %>%
-        filter(!is.na(group)) %>%
-        left_join(sub_abundances$data[[2]]) %>%
-        left_join(primers_by_sample %>% select(sample_id, contains("fun"))) %>%
-        filter(!is.na(set1fun)) %>% 
-        ggplot(aes(group, abundance, color = environment_group)) +
-        geom_boxplot(width = 0.5) +
-        stat_compare_means(
-          method = "wilcox",
-          symnum.args = symnum.args,
-          comparisons = list(
-            c("Common\nin all", "Unique to\naquatic"),
-            c("Common\nin all", "Unique to\nhost"),
-            c("Common\nin all", "Unique to\nsoil")
-          )
-        ) +
-        scale_y_log10(expand = c(0, 0.5)) +
-        scale_color_environment_group() +
-        annotation_logticks(sides = "l") +
-        guides(color = FALSE) +
-        theme_bw() +
-        theme(
-          strip.text = element_text(size = rel(1.1)),
-          axis.text = element_text(size = rel(1.1)), 
-          panel.grid.minor.x = element_blank(),
-          panel.grid.major.x = element_blank(),
-          strip.background = element_blank(),
-          axis.ticks.x=element_blank(),
-          panel.border = element_rect(colour = "black", fill = NA)) +
-        labs(x = "", y = "Abundance (TSS)") +
-        facet_wrap(c("set1fun"))
-      
-    })
-  ) %>%
-  deframe() %>% 
-  pluck(1)
-
-funits2 <-
-  prevalent_taxa %>%
-  nest(-kingdom) %>%
-  filter(kingdom == "Fungi") %>% 
-  mutate(
-    data = data %>% map(~ {
-      .x %>%
-        mutate(
-          group = case_when(
-            soil & host & aquatic ~ "Common\nin all",
-            soil & !host & !aquatic ~ "Unique to\nsoil",
-            !soil & !host & aquatic ~ "Unique to\naquatic",
-            !soil & host & !aquatic ~ "Unique to\nhost"
-          ),
-          environment_group = group %>% recode(
-            "Common\nin all" = "all", "Unique to\nsoil" = "soil", "Unique to\naquatic" = "aquatic",
-            "Unique to\nhost" = "host"
-          )
-        ) %>%
-        left_join(
-          sub_abundances$data[[2]] %>% 
-            drop_na(environment_group) %>% 
-            select(sample_id, taxon, abundance)
-        ) %>%
-        filter(abundance > 0.01e-2) %>%
-        filter(!is.na(group)) %>%
-        left_join(sub_abundances$data[[2]]) %>%
-        left_join(primers_by_sample %>% select(sample_id, contains("fun"), env_check)) %>%
-        filter(!is.na(set2fun)) %>%
-        #filter(!(is.na(env_check) & environment_group == "aquatic")) %>%
-        ggplot(aes(group, abundance, color = environment_group)) +
-        geom_boxplot(width = 0.5) +
-        stat_compare_means(
-          method = "wilcox",
-          symnum.args = symnum.args,
-          comparisons = list(
-            c("Common\nin all", "Unique to\nhost"),
-            c("Common\nin all", "Unique to\nsoil")
-          )
-        ) +
-        scale_y_log10(expand = c(0, 0.5)) +
-        scale_color_environment_group() +
-        annotation_logticks(sides = "l") +
-        guides(color = FALSE) +
-        theme_bw() +
-        theme(
-          strip.text = element_text(size = rel(1.1)),
-          axis.text = element_text(size = rel(1.1)), 
-          panel.grid.minor.x = element_blank(),
-          panel.grid.major.x = element_blank(),
-          strip.background = element_blank(),
-          axis.ticks.x=element_blank(),
-          panel.border = element_rect(colour = "black", fill = NA)) +
-        labs(x = "", y = "Abundance (TSS)") +
-        facet_wrap(c("set2fun"))
-      
-    })
-  ) %>%
-  deframe() %>% 
-  pluck(1)
-
-p <- wrap_plots(
-  venn[[1]],
-  venn[[2]],
-  venn[[3]],
-  venn[[4]],
-  (bacterial_primers_heat_rds + labs(tag = "B")),
-  (fungal_primers_heat_rds + labs(tag = "C")),
-  bacv1_v4 + labs(tag = "D"),
-  bacv3_v5,
-  funits1,
-  funits2,
-  ncol = 2,
-  widths = c(1,1)
-)
-
-ggsave(filename = "plots/04-manuscript-ap/sup-fig2.pdf",
-       plot = p,
-       device = "pdf",
-       width = 33,
-       height = 40,
-       units = "cm",
-       dpi = 300)
-
-ggsave(filename = "plots/04-manuscript-ap/sup-fig2.png",
-       plot = p,
-       device = "png",
-       width = 33,
-       height = 40,
-       units = "cm",
-       dpi = 300)
diff --git a/src/sup/05-manuscript-figs/03-sup-fig3.R b/src/sup/05-manuscript-figs/03-sup-fig3.R
deleted file mode 100644
index a63d677..0000000
--- a/src/sup/05-manuscript-figs/03-sup-fig3.R
+++ /dev/null
@@ -1,153 +0,0 @@
-# Most ecological papers use Levins' niche breadth index to define generalist taxon
-source("src/defaults.R")
-
-library(MicroNiche)
-
-loadd(sub_abundances)
-loadd(selected_generalists_specialists)
-loadd(samples)
-
-taxa_data <-
-  sub_abundances %>%
-  filter(norm_method == "tss" & subset_value == "all") %>%
-  unnest(data) %>%
-  # pseudo counts to normalize for seqdepth
-  mutate(abundance = round(abundance * 10e3)) %>%
-  select(sample_id, taxon, abundance) %>%
-  pivot_wider(names_from = sample_id, values_from = abundance, values_fill = list(abundance = 0)) %>%
-  as.data.frame()
-
-levins_indicies <-
-  tibble(sample_grouping = c("bioproject_id", "environment_group", "habitat")) %>%
-  mutate(
-    levins_index = sample_grouping %>% map(possibly(~ {
-      sample_data <-
-        taxa_data %>%
-        colnames() %>%
-        tibble(sample_id = .) %>%
-        left_join(samples) %>%
-        pluck(.x)
-
-      n_sample_groups <- sample_data %>%
-        unique() %>%
-        length()
-
-      levins.Bn(taxa_data, n_sample_groups, sample_data) %>%
-        as_tibble(rownames = "taxon")
-    }, NA))
-  )
-
-#saveRDS(object = levins_indicies, file = "rdata/levins_indicies.rds")
-levins_indicies <- readRDS("rdata/levins_indicies.rds")
-
-prevalence_group_colors_upper <- c(
-  "Generalist" = "#3a86ff",
-  "Specialist" = "#ffbe0b"
-)
-
-scale_color_prevalence_group_upper <- function(drop = FALSE, ...) {
-  scale_color_manual(
-    values =  purrr::simplify(prevalence_group_colors_upper),
-    na.value = "#000000",
-    drop = drop,
-    ...
-  )
-}
-
-p1 <- levins_indicies %>%
-  unnest(levins_index) %>%
-  left_join(selected_generalists_specialists) %>%
-  filter(!is.na(prevalence_group)) %>%
-  mutate(
-    sample_grouping = sample_grouping %>%
-      recode("bioproject_id" = "Project", "habitat" = "Habitat", "environment_group" = "Environment") %>%
-      factor(levels = c("Project", "Habitat", "Environment"))
-  ) %>%
-  mutate(prevalence_group = recode(prevalence_group, "generalist" = "Generalist", "specialist" = "Specialist")) %>% 
-  ggplot(aes(prevalence_group, Bn, color = prevalence_group)) +
-  geom_boxplot() +
-  scale_color_prevalence_group_upper() +
-  stat_compare_means(
-    method = "wilcox",
-    comparisons = list(c("Generalist", "Specialist"))
-  ) +
-  facet_wrap(~sample_grouping) +
-  guides(color = "none") +
-  theme_bw() +
-  theme(panel.grid.major.x = element_blank(),
-        strip.text = element_text(size = rel(1.2)),
-        legend.text = element_text(size=rel(1.2)),
-        axis.text = element_text(size=rel(1.2)),
-        axis.title = element_text(size=rel(1.2)),
-        panel.grid.minor.x = element_blank(),
-        strip.background = element_blank(),
-        panel.border = element_rect(colour = "black", fill = NA)) +
-  labs(
-    x = "",
-    y = TeX("Levins' niche breadth index B_n"),
-    tag = "A"
-  )
-
-# SNB
-################################################################################
-library(ggbeeswarm)
-
-generalists <- read_tsv(here::here("raw/snb/generalist_edit.txt"), skip = 1,
-                        col_names = "taxa")  %>% 
-  mutate(gen_spec = "generalist")
-
-specialists <- read_tsv(here::here("raw/snb/specialists_edit.txt"), skip = 1,
-                        col_names = "taxa") %>% 
-  mutate(gen_spec = "specialist")
-
-gen_spec <- bind_rows(generalists, specialists) %>% 
-  mutate(taxa = str_replace_all(taxa, " ", "_"))
-
-snb <- read_tsv(here::here("raw/snb/bas_snb.txt")) %>% 
-  filter(rank == "genus") %>% 
-  mutate(taxa = str_extract(`taxonomic lineage`, "genus\\.(.+)"),
-         taxa = str_remove(taxa, "genus\\.")
-  ) %>% 
-  relocate(taxa, .after = rank)
-
-plot_df <- gen_spec %>% 
-  left_join(snb)
-
-p2 <- snb %>% 
-  ggplot(aes(x = "1", y = SNB)) +
-  geom_violin() +
-  geom_quasirandom(data = plot_df, aes(x = "1", y = SNB, color = gen_spec), 
-                   width = 0.07, size = 3) +
-  scale_color_manual(values = c("#3a86ff", "#ffbe0b"), name = "", 
-                     labels = c("Generalist", "Specialist")) +
-  labs(x = "", tag = "B") +
-  guides(color=guide_legend(override.aes=list(fill=NA))) +
-  theme_bw() +
-  theme(axis.text.x = element_blank(),
-        legend.position=c(.7, .9),
-        legend.key = element_blank(),
-        legend.background=element_blank(),
-        strip.text = element_text(size = rel(1.2)),
-        legend.text = element_text(size=rel(1.2)),
-        axis.text = element_text(size=rel(1.2)),
-        axis.title = element_text(size=rel(1.2)),
-        panel.grid.minor.x = element_blank(),
-        panel.grid.major.x = element_blank(),
-        strip.background = element_blank(),
-        panel.border = element_rect(colour = "black", fill = NA))
-
-p <- p1 + p2 + plot_layout(widths = c(2,1))
-
-ggsave(filename = "results/sup-fig3.png", 
-       plot = p, 
-       device = "png",
-       width = 11, 
-       height = 7, 
-       dpi = 300)
-
-ggsave(filename = "results/sup-fig3.pdf", 
-       plot = p, 
-       device = "pdf",
-       width = 11, 
-       height = 7, 
-       dpi = 300)
diff --git a/src/sup/05-manuscript-figs/README b/src/sup/05-manuscript-figs/README
deleted file mode 100644
index b943407..0000000
--- a/src/sup/05-manuscript-figs/README
+++ /dev/null
@@ -1,3 +0,0 @@
-- Figures not present here are generated by drake pipeline.
-
-- load() R function make drake results available to be used.