diff --git a/authentication.qmd b/authentication.qmd index b789610..bb6fe2c 100644 --- a/authentication.qmd +++ b/authentication.qmd @@ -708,7 +708,7 @@ conda activate authentication We navigate to the working directory: ```bash -cd metadmg/figures +cd metadmg/ ``` We load R by running `R` in your terminal @@ -738,7 +738,7 @@ We will use the function `get_dmg_decay_fit` to visualise damage pattern (@fig-a The function is saved in `metadmg/script/`, so we only need to run the following command to recall it: ```{r eval=F} -source("metadmg/script/get_dmg_decay_fit.R") +source("script/get_dmg_decay_fit.R") ``` But if you are curious and want to know how it works, here is the function itself: @@ -848,10 +848,10 @@ get_dmg_decay_fit <- function(df, orient = "fwd", pos = 30, p_breaks = c(0, 0.1, } ``` -We load our metaDMG output data (tsv file) and generate the damage plots as seen in @fig-authentication-fagusovisdmg using the function `get-damage`. +We load our metaDMG output data (tsv file) and the metadata with information on the age of each sample. We generate the damage plots as seen in @fig-authentication-fagusovisdmg using the function `get-damage`. ```{r eval=F} -df <- read.csv("metadmg/concatenated_metaDMGfinal.tsv", sep = "\t") +df <- read.csv("concatenated_metaDMGfinal.tsv", sep = "\t") #Rename sample column colnames(df)[colnames(df) == 'filename'] <- 'sample' @@ -864,6 +864,17 @@ df$sample[df$sample == "VM-17_aggregated_results.stat"] <- "VM-17" df$sample[df$sample == "VM-19_aggregated_results.stat"] <- "VM-19" df$sample[df$sample == "VM-3_aggregated_results.stat"] <- "VM-3" +#Import the metadata with dates BP +depth_data <- read.csv ("figures/depth_data.csv", header = TRUE) +View (depth_data) + +#Merge context_data and depth_data with dataframe (adding new column for dates BP) +df$new <- depth_data$Date_BP[match(df$sample, depth_data$Sample_ID)] +names(df)[names(df) == 'new'] <- 'Date_BP' + +# Convert Date_BP columns to factors (categorical variable) +df$Date_BP <- as.factor(df$Date_BP) + #Setting filtering theshold for ancient reads minDMG = 0.02 # filter criteria, plot only taxa above set value zfit = 2 # minimum significance, the higher the better, 2 would mean that we estimante the damage with 95% confidence. @@ -881,7 +892,6 @@ X <- tax_g_list purrr::map(tax_g_list, function(X, nrank) { sel_tax <- dt1 %>% rename(label = sample) %>% - #filter(Genome_Id %in% (tax_superg %>% filter(Taxa_Super_Groups == X) %>% pull(Genome_Id))) %>% filter(name == X) %>% filter(rank == rank) %>% select(name, label) %>% @@ -899,7 +909,7 @@ purrr::map(tax_g_list, function(X, nrank) { get_dmg_decay_fit(df = dt1 %>% rename(label = sample) %>% inner_join(sel_tax) %>% filter(rank == rank), orient = "rev", y_max = 0.70) + ggtitle(paste0(X, " nreads=", n_readsa, " Reverse")) ), align = "hv") - ggsave(paste0(X, "-dmg.pdf"), plot = last_plot(), width = 8, height = 4) + ggsave(paste0("figures/", X, "-dmg.pdf"), plot = last_plot(), width = 8, height = 4) } }) ``` @@ -912,61 +922,35 @@ We provide an R script to investigate the main statistics. Here we visualise the amplitude of damage (A) and its significance (Zfit), for the full dataset but filtering it to a minimum of 100 reads and at the genus level (@fig-authentication-fig8). ```{r eval=F} -#Subset dataset at genus level -dt2 <- df %>% filter(grepl("\\bgenus\\b", rank)) - -#plotting amplitude of damage vs its significance -p1 <- ggplot(dt2, aes(y=A, x=Zfit)) + - geom_point(aes(size=nreads), col ="dark green") + - theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) + - scale_size_continuous(labels = function(x) format(x, scientific = FALSE)) + - xlab("significance") + ylab("damage") + theme_minimal() -p1 - -#Plotting only animals and plants at genus level -#subset dataset -dt3 <- dt2 %>% filter(nreads > 100, grepl("\\bgenus\\b", rank), grepl("Metazoa", taxa_path) | grepl("Viridiplant", taxa_path)) +#Subset dataset animal and plants at the genus level +dt2 <- df %>% filter(nreads > 100, grepl("\\bgenus\\b", rank), grepl("Metazoa", taxa_path) | grepl("Viridiplant", taxa_path)) #Adding factor column for Kingdom -dt3 <- dt3 %>% +dt2 <- dt2 %>% mutate(Kingdom = # creating our new column case_when(grepl("Viridiplant", taxa_path) ~ "Viridiplantae", grepl("Metazoa",taxa_path) ~ "Metazoa")) -#Plotting amplitude of damage vs its significance -p2 <- ggplot(dt3, aes(y=A, x=Zfit)) + +#Plotting amplitude of damage vs its significance and saving as pdf file +pdf(file = "figures/p1.pdf", width = 8, height = 6) +ggplot(dt2, aes(y=A, x=Zfit)) + geom_point(aes(size=nreads, col=Kingdom)) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) + scale_color_manual(values = c("#8B1A1A", "#458B00"))+ scale_size_continuous(labels = function(x) format(x, scientific = FALSE)) + xlab("significance") + ylab("damage") + theme_minimal() -p2 - -#Save the plots as a PDF file -ggsave("p1.pdf", plot = p1, width = 8, height = 6) -ggsave("p2.pdf", plot = p2, width = 8, height = 6) +dev.off() ``` ![Amplitude of damage (A) vs significance (Zfit) for animals and plants.](assets/images/chapters/authentication/p2.png){#fig-authentication-fig8} ### Amplitude of damage and mean fragment length through time -Here we visualise the amplitude of damage (A) and the mean length of the fragments (mean_rlen) by depth and by date (BP) for the full dataset but filtering it to a minimum of 100 reads and at the genus level (@fig-authentication-fig9). +Here we visualise the amplitude of damage (A) and the mean length of the fragments (mean_rlen) by date (BP) for the filtered dataset with a minimum of 100 reads and at the genus level (@fig-authentication-fig9). ```{r eval=F} -#Import the metadata -depth_data <- read.csv ("metadmg/figures/depth_data.csv", sep = ",") -View (depth_data) - -#Merge context_data and depth_data with dataframe (adding new column for dates BP) -df$new <- depth_data$Date_BP[match(df$sample, depth_data$Sample_ID)] -names(df)[names(df) == 'new'] <- 'Date_BP' - -# Convert Date_BP columns to factors (categorical variable) -df$Date_BP <- as.factor(df$Date_BP) - #Plotting damage (A) by period (dates BP) -p3a<- dt4 %>% +p2a<- dt2 %>% mutate(Date_BP = fct_relevel(Date_BP, "6100","5300","4100","3900","3000", "800")) %>% ggplot(aes(x=A, y=Date_BP))+ @@ -974,10 +958,10 @@ p3a<- dt4 %>% geom_point(aes(fill = sample), size = 3, shape = 21, color = "black", stroke = .5) + scale_x_continuous(limits = c(0, 0.20), breaks = seq(0, 0.20, by = 0.05)) + theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -p3a +p2a #Plotting mean length (mean_rlen) by period (dates BP) -p3b<- dt4 %>% +p2b<- dt2 %>% mutate(Date_BP = fct_relevel(Date_BP, "6100","5300","4100","3900","3000", "800")) %>% ggplot(aes(x=mean_rlen, y=Date_BP))+ @@ -985,15 +969,13 @@ p3b<- dt4 %>% geom_point(aes(fill = sample), size = 3, shape = 21, color = "black", stroke = .5) + scale_x_continuous(limits = c(30, 80), breaks = seq(30, 80, by = 10)) + theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -p3b +p2b -#Combining the plots -p3 <- grid.arrange(p3a, p3b, +#Combining the plots and saving as pdf file +pdf(file = "figures/p2.pdf", width = 8, height = 6) +p2 <- grid.arrange(p2a, p2b, ncol = 2, nrow = 1) -p3 - -#Save plot -ggsave("p3.pdf", plot = p4, width = 10, height = 8) +dev.off() ``` ![Amplitude of damage (A) and mean fragment length (mean_rlen) through time.](assets/images/chapters/authentication/p3.png){#fig-authentication-fig9}